From 5e2e1cc4131cf4d21629c94331f2351b7dc8b87c Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:52:14 -0700 Subject: [PATCH 01/34] zram: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Acked-by: Minchan Kim Link: https://lore.kernel.org/r/20211015235219.2191207-9-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a68297fb51a2..876bf191e2ca 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1949,7 +1949,9 @@ static int zram_add(void) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); - device_add_disk(NULL, zram->disk, zram_disk_attr_groups); + ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups); + if (ret) + goto out_cleanup_disk; strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); @@ -1957,6 +1959,8 @@ static int zram_add(void) pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; +out_cleanup_disk: + blk_cleanup_disk(zram->disk); out_free_idr: idr_remove(&zram_index_idr, device_id); out_free_dev: From ff4cbe0fcf5d749f76040f782f0618656cd23e33 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:52:16 -0700 Subject: [PATCH 02/34] ps3disk: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Tested-by: Geoff Levand Link: https://lore.kernel.org/r/20211015235219.2191207-11-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 8d51efbe045d..3054adf77460 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -467,9 +467,13 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) gendisk->disk_name, priv->model, priv->raw_capacity >> 11, get_capacity(gendisk) >> 11); - device_add_disk(&dev->sbd.core, gendisk, NULL); - return 0; + error = device_add_disk(&dev->sbd.core, gendisk, NULL); + if (error) + goto fail_cleanup_disk; + return 0; +fail_cleanup_disk: + blk_cleanup_disk(gendisk); fail_free_tag_set: blk_mq_free_tag_set(&priv->tag_set); fail_teardown: From 3c30883acab1d20ecbd3c48dc12b147b51548742 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:52:17 -0700 Subject: [PATCH 03/34] ps3vram: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Acked-by: Geoff Levand Link: https://lore.kernel.org/r/20211015235219.2191207-12-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ps3vram.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index d1ebf193cb9a..c1876646a4cb 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -753,9 +753,14 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n", gendisk->disk_name, get_capacity(gendisk) >> 11); - device_add_disk(&dev->core, gendisk, NULL); + error = device_add_disk(&dev->core, gendisk, NULL); + if (error) + goto out_cleanup_disk; + return 0; +out_cleanup_disk: + blk_cleanup_disk(gendisk); out_cache_cleanup: remove_proc_entry(DEVICE_NAME, NULL); ps3vram_cache_cleanup(dev); From e1528830bd4ebf435d91c154e309e6e028336210 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:52:07 -0700 Subject: [PATCH 04/34] block/brd: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015235219.2191207-2-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/brd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index aa0472718dce..a896ee175d86 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -370,6 +370,7 @@ static int brd_alloc(int i) struct brd_device *brd; struct gendisk *disk; char buf[DISK_NAME_LEN]; + int err = -ENOMEM; mutex_lock(&brd_devices_mutex); list_for_each_entry(brd, &brd_devices, brd_list) { @@ -420,16 +421,20 @@ static int brd_alloc(int i) /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); - add_disk(disk); + err = add_disk(disk); + if (err) + goto out_cleanup_disk; return 0; +out_cleanup_disk: + blk_cleanup_disk(disk); out_free_dev: mutex_lock(&brd_devices_mutex); list_del(&brd->brd_list); mutex_unlock(&brd_devices_mutex); kfree(brd); - return -ENOMEM; + return err; } static void brd_probe(dev_t dev) From e4c4871a73944353ea23e319de27ef73ce546623 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 2 Nov 2021 09:52:34 +0800 Subject: [PATCH 05/34] nbd: fix max value for 'first_minor' commit b1a811633f73 ("block: nbd: add sanity check for first_minor") checks that 'first_minor' should not be greater than 0xff, which is wrong. Whitout the commit, the details that when user pass 0x100000, it ends up create sysfs dir "/sys/block/43:0" are as follows: nbd_dev_add disk->first_minor = index << part_shift -> default part_shift is 5, first_minor is 0x2000000 device_add_disk ddev->devt = MKDEV(disk->major, disk->first_minor) -> (0x2b << 20) | (0x2000000) = 0x2b00000 device_add device_create_sys_dev_entry format_dev_t sprintf(buffer, "%u:%u", MAJOR(dev), MINOR(dev)); -> got 43:0 sysfs_create_link -> /sys/block/43:0 By the way, with the wrong fix, when part_shift is the default value, only 8 ndb devices can be created since 8 << 5 is greater than 0xff. Since the max bits for 'first_minor' should be the same as what MKDEV() does, which is 20. Change the upper bound of 'first_minor' from 0xff to 0xfffff. Fixes: b1a811633f73 ("block: nbd: add sanity check for first_minor") Signed-off-by: Yu Kuai Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20211102015237.2309763-2-yebin10@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0ee104fbb628..29f9ac8817d7 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1808,11 +1808,11 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) disk->major = NBD_MAJOR; /* Too big first_minor can cause duplicate creation of - * sysfs files/links, since first_minor will be truncated to - * byte in __device_add_disk(). + * sysfs files/links, since MKDEV() expect that the max bits of + * first_minor is 20. */ disk->first_minor = index << part_shift; - if (disk->first_minor > 0xff) { + if (disk->first_minor > MINORMASK) { err = -EINVAL; goto out_free_idr; } From 940c264984fd1457918393c49674f6b39ee16506 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 2 Nov 2021 09:52:35 +0800 Subject: [PATCH 06/34] nbd: fix possible overflow for 'first_minor' in nbd_dev_add() If 'part_shift' is not zero, then 'index << part_shift' might overflow to a value that is not greater than '0xfffff', then sysfs might complains about duplicate creation. Fixes: b0d9111a2d53 ("nbd: use an idr to keep track of nbd devices") Signed-off-by: Yu Kuai Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20211102015237.2309763-3-yebin10@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 29f9ac8817d7..a5e3ee28006f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1808,11 +1808,11 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) disk->major = NBD_MAJOR; /* Too big first_minor can cause duplicate creation of - * sysfs files/links, since MKDEV() expect that the max bits of - * first_minor is 20. + * sysfs files/links, since index << part_shift might overflow, or + * MKDEV() expect that the max bits of first_minor is 20. */ disk->first_minor = index << part_shift; - if (disk->first_minor > MINORMASK) { + if (disk->first_minor < index || disk->first_minor > MINORMASK) { err = -EINVAL; goto out_free_idr; } From 69beb62ff0d1723a750eebe1c4d01da573d7cd19 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 2 Nov 2021 09:52:36 +0800 Subject: [PATCH 07/34] nbd: Fix incorrect error handle when first_minor is illegal in nbd_dev_add If first_minor is illegal will goto out_free_idr label, this will miss cleanup disk. Fixes: b1a811633f73 ("block: nbd: add sanity check for first_minor") Signed-off-by: Ye Bin Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20211102015237.2309763-4-yebin10@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a5e3ee28006f..f201c40d1dc9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1814,7 +1814,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) disk->first_minor = index << part_shift; if (disk->first_minor < index || disk->first_minor > MINORMASK) { err = -EINVAL; - goto out_free_idr; + goto out_err_disk; } disk->minors = 1 << part_shift; From e2daec488c57069a4a431d5b752f50294c4bf273 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 2 Nov 2021 09:52:37 +0800 Subject: [PATCH 08/34] nbd: Fix hungtask when nbd_config_put I got follow issue: [ 247.381177] INFO: task kworker/u10:0:47 blocked for more than 120 seconds. [ 247.382644] Not tainted 4.19.90-dirty #140 [ 247.383502] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 247.385027] Call Trace: [ 247.388384] schedule+0xb8/0x3c0 [ 247.388966] schedule_timeout+0x2b4/0x380 [ 247.392815] wait_for_completion+0x367/0x510 [ 247.397713] flush_workqueue+0x32b/0x1340 [ 247.402700] drain_workqueue+0xda/0x3c0 [ 247.403442] destroy_workqueue+0x7b/0x690 [ 247.405014] nbd_config_put.cold+0x2f9/0x5b6 [ 247.405823] recv_work+0x1fd/0x2b0 [ 247.406485] process_one_work+0x70b/0x1610 [ 247.407262] worker_thread+0x5a9/0x1060 [ 247.408699] kthread+0x35e/0x430 [ 247.410918] ret_from_fork+0x1f/0x30 We can reproduce issue as follows: 1. Inject memory fault in nbd_start_device -1244,10 +1248,18 @@ static int nbd_start_device(struct nbd_device *nbd) nbd_dev_dbg_init(nbd); for (i = 0; i < num_connections; i++) { struct recv_thread_args *args; - - args = kzalloc(sizeof(*args), GFP_KERNEL); + + if (i == 1) { + args = NULL; + printk("%s: inject malloc error\n", __func__); + } + else + args = kzalloc(sizeof(*args), GFP_KERNEL); 2. Inject delay in recv_work -757,6 +760,8 @@ static void recv_work(struct work_struct *work) blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); } + printk("%s: comm=%s pid=%d\n", __func__, current->comm, current->pid); + mdelay(5 * 1000); nbd_config_put(nbd); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); 3. Create nbd server nbd-server 8000 /tmp/disk 4. Create nbd client nbd-client localhost 8000 /dev/nbd1 Then will trigger above issue. Reason is when add delay in recv_work, lead to release the last reference of 'nbd->config_refs'. nbd_config_put will call flush_workqueue to make all work finish. Obviously, it will lead to deadloop. To solve this issue, according to Josef's suggestion move 'recv_work' init from start device to nbd_dev_add, then destroy 'recv_work'when nbd device teardown. Signed-off-by: Ye Bin Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20211102015237.2309763-5-yebin10@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index f201c40d1dc9..60751b185e6d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -260,7 +260,7 @@ static void nbd_dev_remove(struct nbd_device *nbd) mutex_lock(&nbd_index_mutex); idr_remove(&nbd_index_idr, nbd->index); mutex_unlock(&nbd_index_mutex); - + destroy_workqueue(nbd->recv_workq); kfree(nbd); } @@ -1319,10 +1319,6 @@ static void nbd_config_put(struct nbd_device *nbd) kfree(nbd->config); nbd->config = NULL; - if (nbd->recv_workq) - destroy_workqueue(nbd->recv_workq); - nbd->recv_workq = NULL; - nbd->tag_set.timeout = 0; nbd->disk->queue->limits.discard_granularity = 0; nbd->disk->queue->limits.discard_alignment = 0; @@ -1351,14 +1347,6 @@ static int nbd_start_device(struct nbd_device *nbd) return -EINVAL; } - nbd->recv_workq = alloc_workqueue("knbd%d-recv", - WQ_MEM_RECLAIM | WQ_HIGHPRI | - WQ_UNBOUND, 0, nbd->index); - if (!nbd->recv_workq) { - dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n"); - return -ENOMEM; - } - blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); nbd->pid = task_pid_nr(current); @@ -1784,6 +1772,15 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) } nbd->disk = disk; + nbd->recv_workq = alloc_workqueue("nbd%d-recv", + WQ_MEM_RECLAIM | WQ_HIGHPRI | + WQ_UNBOUND, 0, nbd->index); + if (!nbd->recv_workq) { + dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n"); + err = -ENOMEM; + goto out_err_disk; + } + /* * Tell the block layer that we are not a rotational device */ @@ -1814,7 +1811,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) disk->first_minor = index << part_shift; if (disk->first_minor < index || disk->first_minor > MINORMASK) { err = -EINVAL; - goto out_err_disk; + goto out_free_work; } disk->minors = 1 << part_shift; @@ -1823,7 +1820,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) sprintf(disk->disk_name, "nbd%d", index); err = add_disk(disk); if (err) - goto out_err_disk; + goto out_free_work; /* * Now publish the device. @@ -1832,6 +1829,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) nbd_total_devices++; return nbd; +out_free_work: + destroy_workqueue(nbd->recv_workq); out_err_disk: blk_cleanup_disk(disk); out_free_idr: @@ -2087,13 +2086,10 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) nbd_disconnect(nbd); sock_shutdown(nbd); /* - * Make sure recv thread has finished, so it does not drop the last - * config ref and try to destroy the workqueue from inside the work - * queue. And this also ensure that we can safely call nbd_clear_que() + * Make sure recv thread has finished, we can safely call nbd_clear_que() * to cancel the inflight I/Os. */ - if (nbd->recv_workq) - flush_workqueue(nbd->recv_workq); + flush_workqueue(nbd->recv_workq); nbd_clear_que(nbd); nbd->task_setup = NULL; mutex_unlock(&nbd->config_lock); From 8c13ab115b577bd09097b9d77916732e97e31b7b Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 17 Oct 2021 21:50:17 +0800 Subject: [PATCH 09/34] md/bitmap: don't set max_write_behind if there is no write mostly device We shouldn't set it since write behind IO should only happen to write mostly device. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md-bitmap.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index e29c6298ef5c..bfd6026d7809 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2469,11 +2469,30 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) { unsigned long backlog; unsigned long old_mwb = mddev->bitmap_info.max_write_behind; + struct md_rdev *rdev; + bool has_write_mostly = false; int rv = kstrtoul(buf, 10, &backlog); if (rv) return rv; if (backlog > COUNTER_MAX) return -EINVAL; + + /* + * Without write mostly device, it doesn't make sense to set + * backlog for max_write_behind. + */ + rdev_for_each(rdev, mddev) { + if (test_bit(WriteMostly, &rdev->flags)) { + has_write_mostly = true; + break; + } + } + if (!has_write_mostly) { + pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", + mdname(mddev)); + return -EINVAL; + } + mddev->bitmap_info.max_write_behind = backlog; if (!backlog && mddev->serial_info_pool) { /* serial_info_pool is not needed if backlog is zero */ From 1e37799b50eccb79c59c660b330746a7848c346b Mon Sep 17 00:00:00 2001 From: Yang Guang Date: Thu, 28 Oct 2021 08:48:05 +0000 Subject: [PATCH 10/34] raid5-ppl: use swap() to make code cleaner Use the macro `swap()` defined in `include/linux/minmax.h` to avoid opencoding it. Reported-by: Zeal Robot Signed-off-by: Yang Guang Signed-off-by: Song Liu --- drivers/md/raid5-ppl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 3ddc2aa0b530..4ab417915d7f 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1081,7 +1081,7 @@ static int ppl_load_distributed(struct ppl_log *log) struct ppl_conf *ppl_conf = log->ppl_conf; struct md_rdev *rdev = log->rdev; struct mddev *mddev = rdev->mddev; - struct page *page, *page2, *tmp; + struct page *page, *page2; struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL; u32 crc, crc_stored; u32 signature; @@ -1156,9 +1156,7 @@ static int ppl_load_distributed(struct ppl_log *log) prev_pplhdr_offset = pplhdr_offset; prev_pplhdr = pplhdr; - tmp = page; - page = page2; - page2 = tmp; + swap(page, page2); /* calculate next potential ppl offset */ for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) From 494dbee341e7a02529ce776ee9a5e0b7733ca280 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 1 Nov 2021 17:25:38 +0800 Subject: [PATCH 11/34] nbd: error out if socket index doesn't match in nbd_handle_reply() commit fcf3d633d8e1 ("nbd: check sock index in nbd_read_stat()") just add error message when socket index doesn't match. Since the request and reply must be transmitted over the same socket, it's ok to error out in such situation. Signed-off-by: Yu Kuai Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20211101092538.1155842-1-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 60751b185e6d..c62240883ea1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -762,6 +762,8 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, if (cmd->index != index) { dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)", tag, index, cmd->index); + ret = -ENOENT; + goto out; } if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", From 6f1637795f2827d36aec9e0246487f5852e8abf7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Oct 2021 10:54:23 +0800 Subject: [PATCH 12/34] zram: fix race between zram_reset_device() and disksize_store() When the ->init_lock is released in zram_reset_device(), disksize_store() can come in and try to allocate meta, but zram_reset_device() is freeing free meta, so cause races. Link: https://lore.kernel.org/linux-block/20210927163805.808907-1-mcgrof@kernel.org/T/#mc617f865a3fa2778e40f317ddf48f6447c20c073 Reported-by: Luis Chamberlain Reviewed-by: Luis Chamberlain Signed-off-by: Ming Lei Acked-by: Minchan Kim Link: https://lore.kernel.org/r/20211025025426.2815424-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 876bf191e2ca..9b38e5f749c6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1704,12 +1704,13 @@ static void zram_reset_device(struct zram *zram) set_capacity_and_notify(zram->disk, 0); part_stat_set_all(zram->disk->part0, 0); - up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ zram_meta_free(zram, disksize); memset(&zram->stats, 0, sizeof(zram->stats)); zcomp_destroy(comp); reset_bdev(zram); + + up_write(&zram->init_lock); } static ssize_t disksize_store(struct device *dev, From 8c54499a59b026a3dc2afccf6e1b36d5700d2fef Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Oct 2021 10:54:24 +0800 Subject: [PATCH 13/34] zram: don't fail to remove zram during unloading module When the zram module is being unloaded, no one should be using the zram disks. However even while being unloaded the zram module's sysfs attributes might be poked at to re-configure zram devices. This is expected, and kernfs ensures that these operations complete before device_del() completes. But reset_store() may set ->claim which will fail zram_remove(), when this happens, zram_reset_device() is bypassed, and zram->comp can't be destroyed, so the warning of 'Error: Removing state 63 which has instances left.' is triggered during unloading module, together with memory leak and sort of thing. Fixes the issue by not failing zram_remove() if ->claim is set, and we actually need to do nothing in case that zram_reset() is running since del_gendisk() will wait until zram_reset() is done. Reported-by: Luis Chamberlain Reviewed-by: Luis Chamberlain Signed-off-by: Ming Lei Acked-by: Minchan Kim Link: https://lore.kernel.org/r/20211025025426.2815424-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9b38e5f749c6..13b65ebbab8d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1972,25 +1972,40 @@ static int zram_add(void) static int zram_remove(struct zram *zram) { struct block_device *bdev = zram->disk->part0; + bool claimed; mutex_lock(&bdev->bd_disk->open_mutex); - if (bdev->bd_openers || zram->claim) { + if (bdev->bd_openers) { mutex_unlock(&bdev->bd_disk->open_mutex); return -EBUSY; } - zram->claim = true; + claimed = zram->claim; + if (!claimed) + zram->claim = true; mutex_unlock(&bdev->bd_disk->open_mutex); zram_debugfs_unregister(zram); - /* Make sure all the pending I/O are finished */ - fsync_bdev(bdev); - zram_reset_device(zram); + if (claimed) { + /* + * If we were claimed by reset_store(), del_gendisk() will + * wait until reset_store() is done, so nothing need to do. + */ + ; + } else { + /* Make sure all the pending I/O are finished */ + fsync_bdev(bdev); + zram_reset_device(zram); + } pr_info("Removed device: %s\n", zram->disk->disk_name); del_gendisk(zram->disk); + + /* del_gendisk drains pending reset_store */ + WARN_ON_ONCE(claimed && zram->claim); + blk_cleanup_disk(zram->disk); kfree(zram); return 0; @@ -2067,7 +2082,7 @@ static struct class zram_control_class = { static int zram_remove_cb(int id, void *ptr, void *data) { - zram_remove(ptr); + WARN_ON_ONCE(zram_remove(ptr)); return 0; } From 5a4b653655d554b5f51a5d2252882708c56a6f7e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Oct 2021 10:54:25 +0800 Subject: [PATCH 14/34] zram: avoid race between zram_remove and disksize_store After resetting device in zram_remove(), disksize_store still may come and allocate resources again before deleting gendisk, fix the race by resetting zram after del_gendisk() returns. At that time, disksize_store can't come any more. Reported-by: Luis Chamberlain Reviewed-by: Luis Chamberlain Signed-off-by: Ming Lei Acked-by: Minchan Kim Link: https://lore.kernel.org/r/20211025025426.2815424-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 13b65ebbab8d..2dfa3a396c7c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2006,6 +2006,13 @@ static int zram_remove(struct zram *zram) /* del_gendisk drains pending reset_store */ WARN_ON_ONCE(claimed && zram->claim); + /* + * disksize_store() may be called in between zram_reset_device() + * and del_gendisk(), so run the last reset to avoid leaking + * anything allocated with disksize_store() + */ + zram_reset_device(zram); + blk_cleanup_disk(zram->disk); kfree(zram); return 0; From 00c5495c54f785beb0f6a34f7a3674d3ea0997d5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Oct 2021 10:54:26 +0800 Subject: [PATCH 15/34] zram: replace fsync_bdev with sync_blockdev When calling fsync_bdev(), zram driver guarantees that the bdev won't be opened by anyone, then there can't be one active fs/superblock over the zram bdev, so replace fsync_bdev with sync_blockdev. Reviewed-by: Luis Chamberlain Signed-off-by: Ming Lei Acked-by: Minchan Kim Link: https://lore.kernel.org/r/20211025025426.2815424-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2dfa3a396c7c..edc6bd640559 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1790,7 +1790,7 @@ static ssize_t reset_store(struct device *dev, mutex_unlock(&bdev->bd_disk->open_mutex); /* Make sure all the pending I/O are finished */ - fsync_bdev(bdev); + sync_blockdev(bdev); zram_reset_device(zram); mutex_lock(&bdev->bd_disk->open_mutex); @@ -1995,7 +1995,7 @@ static int zram_remove(struct zram *zram) ; } else { /* Make sure all the pending I/O are finished */ - fsync_bdev(bdev); + sync_blockdev(bdev); zram_reset_device(zram); } From 8468f45091d2866affed6f6a7aecc20779139173 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 3 Nov 2021 14:49:17 +0800 Subject: [PATCH 16/34] bcache: fix use-after-free problem in bcache_device_free() In bcache_device_free(), pointer disk is referenced still in ida_simple_remove() after blk_cleanup_disk() gets called on this pointer. This may cause a potential panic by use-after-free on the disk pointer. This patch fixes the problem by calling blk_cleanup_disk() after ida_simple_remove(). Fixes: bc70852fd104 ("bcache: convert to blk_alloc_disk/blk_cleanup_disk") Signed-off-by: Coly Li Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ulf Hansson Cc: stable@vger.kernel.org # v5.14+ Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211103064917.67383-1-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 84a48eed8e24..a7bb3355b776 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -885,9 +885,9 @@ static void bcache_device_free(struct bcache_device *d) bcache_device_detach(d); if (disk) { - blk_cleanup_disk(disk); ida_simple_remove(&bcache_device_idx, first_minor_to_idx(disk->first_minor)); + blk_cleanup_disk(disk); } bioset_exit(&d->bio_split); From 3aefb5ee843fbe4789d03bb181e190d462df95e4 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 09:58:43 -0700 Subject: [PATCH 17/34] nvdimm/btt: do not call del_gendisk() if not needed del_gendisk() should not called if the disk has not been added. Fix this. Fixes: 41cd8b70c37a ("libnvdimm, btt: add support for blk integrity") Reviewed-by: Dan Williams Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103165843.1402142-1-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/nvdimm/btt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 4295fa809420..9b9d5e506e11 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1537,7 +1537,6 @@ static int btt_blk_init(struct btt *btt) int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); if (rc) { - del_gendisk(btt->btt_disk); blk_cleanup_disk(btt->btt_disk); return rc; } From 27548088ac628109f70eb0b1eb521d035844dba8 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Thu, 4 Nov 2021 16:07:09 +0800 Subject: [PATCH 18/34] drbd: Fix double free problem in drbd_create_device In drbd_create_device(), the 'out_no_io_page' lable has called blk_cleanup_disk() when return failed. So remove the 'out_cleanup_disk' lable to avoid double free the disk pointer. Fixes: e92ab4eda516 ("drbd: add error handling support for add_disk()") Signed-off-by: Wu Bo Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/1636013229-26309-1-git-send-email-wubo40@huawei.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 19db80a1e409..53ba2dddba6e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2796,7 +2796,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig err = add_disk(disk); if (err) - goto out_cleanup_disk; + goto out_idr_remove_vol; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2810,8 +2810,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_cleanup_disk: - blk_cleanup_disk(disk); out_idr_remove_vol: idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: From 18c6c96897a3b1ba44ab4df7324bf0b3454e090b Mon Sep 17 00:00:00 2001 From: luo penghao Date: Thu, 4 Nov 2021 06:45:46 +0000 Subject: [PATCH 19/34] loop: Remove duplicate assignments The assignment and operation there will be overwritten later, so it should be deleted. The clang_analyzer complains as follows: drivers/block/loop.c:2330:2 warning: Value stored to 'err' is never read change in v2: Repair the sending email box Reported-by: Zeal Robot Signed-off-by: luo penghao Link: https://lore.kernel.org/r/20211104064546.3074-1-luo.penghao@zte.com.cn Signed-off-by: Jens Axboe --- drivers/block/loop.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 302ac8f4f8ac..686b0a19f794 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2003,7 +2003,6 @@ static int loop_add(int i) goto out_free_dev; i = err; - err = -ENOMEM; lo->tag_set.ops = &loop_mq_ops; lo->tag_set.nr_hw_queues = 1; lo->tag_set.queue_depth = 128; From 2762ff06aa49e3a13fb4b779120f4f8c12c39fd1 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:24 -0700 Subject: [PATCH 20/34] nvdimm/btt: use goto error labels on btt_blk_init() This will make it easier to share common error paths. Reviewed-by: Dan Williams Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-2-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/nvdimm/btt.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 9b9d5e506e11..5cb6d7ac6e36 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1519,6 +1519,7 @@ static int btt_blk_init(struct btt *btt) { struct nd_btt *nd_btt = btt->nd_btt; struct nd_namespace_common *ndns = nd_btt->ndns; + int rc = -ENOMEM; btt->btt_disk = blk_alloc_disk(NUMA_NO_NODE); if (!btt->btt_disk) @@ -1534,19 +1535,22 @@ static int btt_blk_init(struct btt *btt) blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue); if (btt_meta_size(btt)) { - int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); - - if (rc) { - blk_cleanup_disk(btt->btt_disk); - return rc; - } + rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); + if (rc) + goto out_cleanup_disk; } + set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); + btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; nvdimm_check_and_set_ro(btt->btt_disk); return 0; + +out_cleanup_disk: + blk_cleanup_disk(btt->btt_disk); + return rc; } static void btt_blk_cleanup(struct btt *btt) From 16be7974ff5d0a5cd9f345571c3eac1c3f6ba6de Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:25 -0700 Subject: [PATCH 21/34] nvdimm/btt: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-3-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/nvdimm/btt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 5cb6d7ac6e36..38ed53eeea5e 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1541,7 +1541,9 @@ static int btt_blk_init(struct btt *btt) } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); - device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); + rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); + if (rc) + goto out_cleanup_disk; btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; nvdimm_check_and_set_ro(btt->btt_disk); From b7421afcec0c77ab58633587ddc29d53e6eb95af Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:26 -0700 Subject: [PATCH 22/34] nvdimm/blk: avoid calling del_gendisk() on early failures If nd_integrity_init() fails we'd get del_gendisk() called, but that's not correct as we should only call that if we're done with device_add_disk(). Fix this by providing unwinding prior to the devm call being registered and moving the devm registration to the very end. This should fix calling del_gendisk() if nd_integrity_init() fails. I only spotted this issue through code inspection. It does not fix any real world bug. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-4-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/nvdimm/blk.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index b6c6866f9259..4eef67918a7e 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -239,6 +239,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) resource_size_t available_disk_size; struct gendisk *disk; u64 internal_nlba; + int rc; internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk)); available_disk_size = internal_nlba * nsblk_sector_size(nsblk); @@ -255,20 +256,26 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) blk_queue_logical_block_size(disk->queue, nsblk_sector_size(nsblk)); blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) - return -ENOMEM; - if (nsblk_meta_size(nsblk)) { - int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk)); + rc = nd_integrity_init(disk, nsblk_meta_size(nsblk)); if (rc) - return rc; + goto out_before_devm_err; } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); device_add_disk(dev, disk, NULL); + + /* nd_blk_release_disk() is called if this fails */ + if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) + return -ENOMEM; + nvdimm_check_and_set_ro(disk); return 0; + +out_before_devm_err: + blk_cleanup_disk(disk); + return rc; } static int nd_blk_probe(struct device *dev) From dc104f4bb2d0a652dee010e47bc89c1ad2ab37c9 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:27 -0700 Subject: [PATCH 23/34] nvdimm/blk: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Since nvdimm/blk uses devm we just need to move the devm registration towards the end. And in hindsight, that seems to also provide a fix given del_gendisk() should not be called unless the disk was already added via add_disk(). The probably of that issue happening is low though, like OOM while calling devm_add_action(), so the fix is minor. We manually unwind in case of add_disk() failure prior to the devm registration. Reviewed-by: Dan Williams Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-5-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/nvdimm/blk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 4eef67918a7e..228c33b8d1d6 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -264,7 +264,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); - device_add_disk(dev, disk, NULL); + rc = device_add_disk(dev, disk, NULL); + if (rc) + goto out_before_devm_err; /* nd_blk_release_disk() is called if this fails */ if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) From accf58afb689f81daadde24080ea1164ad2db75f Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:28 -0700 Subject: [PATCH 24/34] nvdimm/pmem: cleanup the disk if pmem_release_disk() is yet assigned Prior to devm being able to use pmem_release_disk() there are other failure which can occur for which we must account for and release the disk for. Address those few cases. Fixes: 3dd60fb9d95d ("nvdimm/pmem: stop using q_usage_count as external pgmap refcount") Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-6-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/nvdimm/pmem.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index a67a3ad1d413..cbb3662391c2 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -457,8 +457,10 @@ static int pmem_attach_disk(struct device *dev, bb_range.end = res->end; } - if (IS_ERR(addr)) - return PTR_ERR(addr); + if (IS_ERR(addr)) { + rc = PTR_ERR(addr); + goto out; + } pmem->virt_addr = addr; blk_queue_write_cache(q, true, fua); @@ -483,7 +485,8 @@ static int pmem_attach_disk(struct device *dev, flags = DAXDEV_F_SYNC; dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); if (IS_ERR(dax_dev)) { - return PTR_ERR(dax_dev); + rc = PTR_ERR(dax_dev); + goto out; } dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); pmem->dax_dev = dax_dev; @@ -498,8 +501,10 @@ static int pmem_attach_disk(struct device *dev, "badblocks"); if (!pmem->bb_state) dev_warn(dev, "'badblocks' notification disabled\n"); - return 0; +out: + blk_cleanup_disk(pmem->disk); + return rc; } static int nd_pmem_probe(struct device *dev) From 5a192ccc32e2981f721343c750b8cfb4c3f41007 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:29 -0700 Subject: [PATCH 25/34] nvdimm/pmem: use add_disk() error handling Now that device_add_disk() supports returning an error, use that. We must unwind alloc_dax() on error. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-7-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/nvdimm/pmem.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index cbb3662391c2..6052091f6f59 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -491,7 +491,9 @@ static int pmem_attach_disk(struct device *dev, dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); pmem->dax_dev = dax_dev; - device_add_disk(dev, disk, pmem_attribute_groups); + rc = device_add_disk(dev, disk, pmem_attribute_groups); + if (rc) + goto out_cleanup_dax; if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; @@ -502,6 +504,10 @@ static int pmem_attach_disk(struct device *dev, if (!pmem->bb_state) dev_warn(dev, "'badblocks' notification disabled\n"); return 0; + +out_cleanup_dax: + kill_dax(pmem->dax_dev); + put_dax(pmem->dax_dev); out: blk_cleanup_disk(pmem->disk); return rc; From 15733754ccf35c49d2f36a7ac51adc8b975c1c78 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:30 -0700 Subject: [PATCH 26/34] z2ram: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Only the disk is cleaned up inside z2ram_register_disk() as the caller deals with the rest. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-8-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/z2ram.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 4eef218108c6..ccc52c935faf 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -318,6 +318,7 @@ static const struct blk_mq_ops z2_mq_ops = { static int z2ram_register_disk(int minor) { struct gendisk *disk; + int err; disk = blk_mq_alloc_disk(&tag_set, NULL); if (IS_ERR(disk)) @@ -333,8 +334,10 @@ static int z2ram_register_disk(int minor) sprintf(disk->disk_name, "z2ram"); z2ram_gendisk[minor] = disk; - add_disk(disk); - return 0; + err = add_disk(disk); + if (err) + blk_cleanup_disk(disk); + return err; } static int __init z2_init(void) From f583eaef0af39b792d74e39721b5ba4b6948a270 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:31 -0700 Subject: [PATCH 27/34] block/sunvdc: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. We re-use the same free tag call, so we also add a label for that as well. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-9-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/sunvdc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 4d4bb810c2ae..6f45a53f7cbf 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -826,8 +826,8 @@ static int probe_disk(struct vdc_port *port) if (IS_ERR(g)) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); - blk_mq_free_tag_set(&port->tag_set); - return PTR_ERR(g); + err = PTR_ERR(g); + goto out_free_tag; } port->disk = g; @@ -879,9 +879,17 @@ static int probe_disk(struct vdc_port *port) port->vdisk_size, (port->vdisk_size >> (20 - 9)), port->vio.ver.major, port->vio.ver.minor); - device_add_disk(&port->vio.vdev->dev, g, NULL); + err = device_add_disk(&port->vio.vdev->dev, g, NULL); + if (err) + goto out_cleanup_disk; return 0; + +out_cleanup_disk: + blk_cleanup_disk(g); +out_free_tag: + blk_mq_free_tag_set(&port->tag_set); + return err; } static struct ldc_channel_config vdc_ldc_cfg = { From ed73919124b2e48490adbbe48ffe885a2a4c6fee Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:32 -0700 Subject: [PATCH 28/34] mtd/ubi/block: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-10-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/mtd/ubi/block.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index e003b4b44ffa..062e6c2c45f5 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -447,12 +447,18 @@ int ubiblock_create(struct ubi_volume_info *vi) list_add_tail(&dev->list, &ubiblock_devices); /* Must be the last step: anyone can call file ops from now on */ - add_disk(dev->gd); + ret = add_disk(dev->gd); + if (ret) + goto out_destroy_wq; + dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)", dev->ubi_num, dev->vol_id, vi->name); mutex_unlock(&devices_mutex); return 0; +out_destroy_wq: + list_del(&dev->list); + destroy_workqueue(dev->wq); out_remove_minor: idr_remove(&ubiblock_minor_idr, gd->first_minor); out_cleanup_disk: From 4ddb85d36613c45bde00d368bf9f357bd0708a0c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 3 Nov 2021 16:04:33 -0700 Subject: [PATCH 29/34] ataflop: remove ataflop_probe_lock mutex Commit bf9c0538e485b591 ("ataflop: use a separate gendisk for each media format") introduced ataflop_probe_lock mutex, but forgot to unlock the mutex when atari_floppy_init() (i.e. module loading) succeeded. This will result in double lock deadlock if ataflop_probe() is called. Also, unregister_blkdev() must not be called from atari_floppy_init() with ataflop_probe_lock held when atari_floppy_init() failed, for ataflop_probe() waits for ataflop_probe_lock with major_names_lock held (i.e. AB-BA deadlock). __register_blkdev() needs to be called last in order to avoid calling ataflop_probe() when atari_floppy_init() is about to fail, for memory for completing already-started ataflop_probe() safely will be released as soon as atari_floppy_init() released ataflop_probe_lock mutex. As with commit 8b52d8be86d72308 ("loop: reorder loop_exit"), unregister_blkdev() needs to be called first in order to avoid calling ataflop_alloc_disk() from ataflop_probe() after del_gendisk() from atari_floppy_exit(). By relocating __register_blkdev() / unregister_blkdev() as explained above, we can remove ataflop_probe_lock mutex, for probe function and __exit function are serialized by major_names_lock mutex. Signed-off-by: Tetsuo Handa Fixes: bf9c0538e485b591 ("ataflop: use a separate gendisk for each media format") Reviewed-by: Luis Chamberlain Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20211103230437.1639990-11-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 47 +++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index d14bdc3589b2..170dd193cef6 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2008,8 +2008,6 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type) return 0; } -static DEFINE_MUTEX(ataflop_probe_lock); - static void ataflop_probe(dev_t dev) { int drive = MINOR(dev) & 3; @@ -2020,14 +2018,32 @@ static void ataflop_probe(dev_t dev) if (drive >= FD_MAX_UNITS || type >= NUM_DISK_MINORS) return; - mutex_lock(&ataflop_probe_lock); if (!unit[drive].disk[type]) { if (ataflop_alloc_disk(drive, type) == 0) { add_disk(unit[drive].disk[type]); unit[drive].registered[type] = true; } } - mutex_unlock(&ataflop_probe_lock); +} + +static void atari_floppy_cleanup(void) +{ + int i; + int type; + + for (i = 0; i < FD_MAX_UNITS; i++) { + for (type = 0; type < NUM_DISK_MINORS; type++) { + if (!unit[i].disk[type]) + continue; + del_gendisk(unit[i].disk[type]); + blk_cleanup_queue(unit[i].disk[type]->queue); + put_disk(unit[i].disk[type]); + } + blk_mq_free_tag_set(&unit[i].tag_set); + } + + del_timer_sync(&fd_timer); + atari_stram_free(DMABuffer); } static void atari_cleanup_floppy_disk(struct atari_floppy_struct *fs) @@ -2053,11 +2069,6 @@ static int __init atari_floppy_init (void) /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ return -ENODEV; - mutex_lock(&ataflop_probe_lock); - ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe); - if (ret) - goto out_unlock; - for (i = 0; i < FD_MAX_UNITS; i++) { memset(&unit[i].tag_set, 0, sizeof(unit[i].tag_set)); unit[i].tag_set.ops = &ataflop_mq_ops; @@ -2113,7 +2124,12 @@ static int __init atari_floppy_init (void) UseTrackbuffer ? "" : "no "); config_types(); - return 0; + ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe); + if (ret) { + printk(KERN_ERR "atari_floppy_init: cannot register block device\n"); + atari_floppy_cleanup(); + } + return ret; err_out_dma: atari_stram_free(DMABuffer); @@ -2121,9 +2137,6 @@ static int __init atari_floppy_init (void) while (--i >= 0) atari_cleanup_floppy_disk(&unit[i]); - unregister_blkdev(FLOPPY_MAJOR, "fd"); -out_unlock: - mutex_unlock(&ataflop_probe_lock); return ret; } @@ -2168,14 +2181,8 @@ __setup("floppy=", atari_floppy_setup); static void __exit atari_floppy_exit(void) { - int i; - - for (i = 0; i < FD_MAX_UNITS; i++) - atari_cleanup_floppy_disk(&unit[i]); unregister_blkdev(FLOPPY_MAJOR, "fd"); - - del_timer_sync(&fd_timer); - atari_stram_free( DMABuffer ); + atari_floppy_cleanup(); } module_init(atari_floppy_init) From 26e06f5b13671d194d67ae8e2b66f524ab174153 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:34 -0700 Subject: [PATCH 30/34] block: update __register_blkdev() probe documentation __register_blkdev() is used to register a probe callback, and that callback is typically used to call add_disk(). Now that we are able to capture errors for add_disk(), we need to fix those probe calls where add_disk() fails and clean up resources. We don't extend the probe call to return the error given: 1) we'd have to always special-case the case where the disk was already present, as otherwise concurrent requests to open an existing block device would fail, and this would be a userspace visible change 2) the error from ilookup() on blkdev_get_no_open() is sufficient 3) The only thing the probe call is used for is to support pre-devtmpfs, pre-udev semantics that want to create disks when their pre-created device node is accessed, and so we don't care for failures on probe there. Expand documentation for the probe callback to ensure users cleanup resources if add_disk() is used and to clarify this interface may be removed in the future. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-12-mcgrof@kernel.org Signed-off-by: Jens Axboe --- block/genhd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/genhd.c b/block/genhd.c index 759bc06810f8..13494ed79675 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -212,7 +212,10 @@ void blkdev_show(struct seq_file *seqf, off_t offset) * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If * @major = 0, try to allocate any unused major number. * @name: the name of the new block device as a zero terminated string - * @probe: allback that is called on access to any minor number of @major + * @probe: pre-devtmpfs / pre-udev callback used to create disks when their + * pre-created device node is accessed. When a probe call uses + * add_disk() and it fails the driver must cleanup resources. This + * interface may soon be removed. * * The @name must be unique within the system. * From 46a7db492e7a27408bc164cbe6424683e79529b0 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:35 -0700 Subject: [PATCH 31/34] ataflop: address add_disk() error handling on probe We need to cleanup resources on the probe() callback registered with __register_blkdev(), now that add_disk() error handling is supported. Address this. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-13-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 170dd193cef6..de8c3785899a 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2018,12 +2018,18 @@ static void ataflop_probe(dev_t dev) if (drive >= FD_MAX_UNITS || type >= NUM_DISK_MINORS) return; - if (!unit[drive].disk[type]) { - if (ataflop_alloc_disk(drive, type) == 0) { - add_disk(unit[drive].disk[type]); - unit[drive].registered[type] = true; - } - } + if (unit[drive].disk[type]) + return + if (ataflop_alloc_disk(drive, type)) + return; + if (add_disk(unit[drive].disk[type])) + goto cleanup_disk; + unit[drive].registered[type] = true; + return; + +cleanup_disk: + blk_cleanup_disk(unit[drive].disk[type]); + unit[drive].disk[type] = NULL; } static void atari_floppy_cleanup(void) From ec28fcc6cfcd418d20038ad2c492e87bf3a9f026 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 3 Nov 2021 16:04:36 -0700 Subject: [PATCH 32/34] floppy: address add_disk() error handling on probe We need to cleanup resources on the probe() callback registered with __register_blkdev(), now that add_disk() error handling is supported. Address this. Reviewed-by: Christoph Hellwig Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211103230437.1639990-14-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 3873e789478e..c4267da716fe 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4528,10 +4528,19 @@ static void floppy_probe(dev_t dev) return; mutex_lock(&floppy_probe_lock); - if (!disks[drive][type]) { - if (floppy_alloc_disk(drive, type) == 0) - add_disk(disks[drive][type]); - } + if (disks[drive][type]) + goto out; + if (floppy_alloc_disk(drive, type)) + goto out; + if (add_disk(disks[drive][type])) + goto cleanup_disk; +out: + mutex_unlock(&floppy_probe_lock); + return; + +cleanup_disk: + blk_cleanup_disk(disks[drive][type]); + disks[drive][type] = NULL; mutex_unlock(&floppy_probe_lock); } From 38987a872b313e72f7a64e91ec0b8084eaec0f10 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 6 Nov 2021 19:55:49 +0100 Subject: [PATCH 33/34] ataflop: Add missing semicolon to return statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/block/ataflop.c: In function ‘ataflop_probe’: drivers/block/ataflop.c:2023:2: error: expected expression before ‘if’ 2023 | if (ataflop_alloc_disk(drive, type)) | ^~ drivers/block/ataflop.c:2023:2: error: ‘return’ with a value, in function returning void [-Werror=return-type] drivers/block/ataflop.c:2011:13: note: declared here 2011 | static void ataflop_probe(dev_t dev) | ^~~~~~~~~~~~~ Fixes: 46a7db492e7a2740 ("ataflop: address add_disk() error handling on probe") Reported-by: noreply@ellerman.id.au Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211106185549.1578444-1-geert@linux-m68k.org Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index de8c3785899a..bf769e6e32fe 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2019,7 +2019,7 @@ static void ataflop_probe(dev_t dev) if (drive >= FD_MAX_UNITS || type >= NUM_DISK_MINORS) return; if (unit[drive].disk[type]) - return + return; if (ataflop_alloc_disk(drive, type)) return; if (add_disk(unit[drive].disk[type])) From 2878feaed543c35f9dbbe6d8ce36fb67ac803eef Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 3 Nov 2021 23:10:41 +0800 Subject: [PATCH 34/34] bcache: Revert "bcache: use bvec_virt" This reverts commit 2fd3e5efe791946be0957c8e1eed9560b541fe46. The above commit replaces page_address(bv->bv_page) by bvec_virt(bv) to avoid directly access to bv->bv_page, but in situation bv->bv_offset is not zero and page_address(bv->bv_page) is not equal to bvec_virt(bv). In such case a memory corruption may happen because memory in next page is tainted by following line in do_btree_node_write(), memcpy(bvec_virt(bv), addr, PAGE_SIZE); This patch reverts the mentioned commit to avoid the memory corruption. Fixes: 2fd3e5efe791 ("bcache: use bvec_virt") Signed-off-by: Coly Li Cc: Christoph Hellwig Cc: stable@vger.kernel.org # 5.15 Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211103151041.70516-1-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 93b67b8d31c3..88c573eeb598 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -378,7 +378,7 @@ static void do_btree_node_write(struct btree *b) struct bvec_iter_all iter_all; bio_for_each_segment_all(bv, b->bio, iter_all) { - memcpy(bvec_virt(bv), addr, PAGE_SIZE); + memcpy(page_address(bv->bv_page), addr, PAGE_SIZE); addr += PAGE_SIZE; }