X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fmd%2Fmd.c;h=c2ff77ccec5079ded33ef9e440744fcd2eaff004;hb=43de804df8d6002059bf4af4522fa9273a19b8aa;hp=19f646a7640243b8049635c12e9dce1eba7966b9;hpb=e7debaa4951b37d6c9ace4c6b984cd4805c5bfbb;p=linux-2.6 diff --git a/drivers/md/md.c b/drivers/md/md.c index 19f646a764..c2ff77ccec 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -273,6 +273,7 @@ static mddev_t * mddev_find(dev_t unit) INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); atomic_set(&new->active, 1); + atomic_set(&new->openers, 0); spin_lock_init(&new->write_lock); init_waitqueue_head(&new->sb_wait); init_waitqueue_head(&new->recovery_wait); @@ -347,15 +348,16 @@ static struct mdk_personality *find_pers(int level, char *clevel) return NULL; } +/* return the offset of the super block in 512byte sectors */ static inline sector_t calc_dev_sboffset(struct block_device *bdev) { - sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; - return MD_NEW_SIZE_BLOCKS(size); + sector_t num_sectors = bdev->bd_inode->i_size / 512; + return MD_NEW_SIZE_SECTORS(num_sectors); } static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size) { - sector_t num_sectors = rdev->sb_offset * 2; + sector_t num_sectors = rdev->sb_start; if (chunk_size) num_sectors &= ~((sector_t)chunk_size/512 - 1); @@ -382,7 +384,7 @@ static void free_disk_sb(mdk_rdev_t * rdev) put_page(rdev->sb_page); rdev->sb_loaded = 0; rdev->sb_page = NULL; - rdev->sb_offset = 0; + rdev->sb_start = 0; rdev->size = 0; } } @@ -528,7 +530,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size) return 0; - if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ)) + if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ)) goto fail; rdev->sb_loaded = 1; return 0; @@ -654,7 +656,7 @@ struct super_type { int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev); void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev); unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev, - unsigned long long size); + sector_t num_sectors); }; /* @@ -665,16 +667,14 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; mdp_super_t *sb; int ret; - sector_t sb_offset; /* - * Calculate the position of the superblock, + * Calculate the position of the superblock (512byte sectors), * it's at the end of the disk. * * It also happens to be a multiple of 4Kb. */ - sb_offset = calc_dev_sboffset(rdev->bdev); - rdev->sb_offset = sb_offset; + rdev->sb_start = calc_dev_sboffset(rdev->bdev); ret = read_disk_sb(rdev, MD_SB_BYTES); if (ret) return ret; @@ -999,20 +999,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) * rdev_size_change for 0.90.0 */ static unsigned long long -super_90_rdev_size_change(mdk_rdev_t *rdev, unsigned long long size) +super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) { - if (size && size < rdev->mddev->size) + if (num_sectors && num_sectors < rdev->mddev->size * 2) return 0; /* component must fit device */ - size *= 2; /* convert to sectors */ if (rdev->mddev->bitmap_offset) return 0; /* can't move bitmap */ - rdev->sb_offset = calc_dev_sboffset(rdev->bdev); - if (!size || size > rdev->sb_offset*2) - size = rdev->sb_offset*2; - md_super_write(rdev->mddev, rdev, rdev->sb_offset << 1, rdev->sb_size, + rdev->sb_start = calc_dev_sboffset(rdev->bdev); + if (!num_sectors || num_sectors > rdev->sb_start) + num_sectors = rdev->sb_start; + md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); md_super_wait(rdev->mddev); - return size/2; /* kB for sysfs */ + return num_sectors / 2; /* kB for sysfs */ } @@ -1047,12 +1046,12 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) { struct mdp_superblock_1 *sb; int ret; - sector_t sb_offset; + sector_t sb_start; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; int bmask; /* - * Calculate the position of the superblock. + * Calculate the position of the superblock in 512byte sectors. * It is always aligned to a 4K boundary and * depeding on minor_version, it can be: * 0: At least 8K, but less than 12K, from end of device @@ -1061,22 +1060,20 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) */ switch(minor_version) { case 0: - sb_offset = rdev->bdev->bd_inode->i_size >> 9; - sb_offset -= 8*2; - sb_offset &= ~(sector_t)(4*2-1); - /* convert from sectors to K */ - sb_offset /= 2; + sb_start = rdev->bdev->bd_inode->i_size >> 9; + sb_start -= 8*2; + sb_start &= ~(sector_t)(4*2-1); break; case 1: - sb_offset = 0; + sb_start = 0; break; case 2: - sb_offset = 4; + sb_start = 8; break; default: return -EINVAL; } - rdev->sb_offset = sb_offset; + rdev->sb_start = sb_start; /* superblock is rarely larger than 1K, but it can be larger, * and it is safe to read 4k, so we do that @@ -1090,7 +1087,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (sb->magic != cpu_to_le32(MD_SB_MAGIC) || sb->major_version != cpu_to_le32(1) || le32_to_cpu(sb->max_dev) > (4096-256)/2 || - le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || + le64_to_cpu(sb->super_offset) != rdev->sb_start || (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) return -EINVAL; @@ -1126,7 +1123,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) rdev->sb_size = (rdev->sb_size | bmask) + 1; if (minor_version - && rdev->data_offset < sb_offset + (rdev->sb_size/512)) + && rdev->data_offset < sb_start + (rdev->sb_size/512)) return -EINVAL; if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) @@ -1162,7 +1159,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (minor_version) rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; else - rdev->size = rdev->sb_offset; + rdev->size = rdev->sb_start / 2; if (rdev->size < le64_to_cpu(sb->data_size)/2) return -EINVAL; rdev->size = le64_to_cpu(sb->data_size)/2; @@ -1342,40 +1339,39 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) } static unsigned long long -super_1_rdev_size_change(mdk_rdev_t *rdev, unsigned long long size) +super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) { struct mdp_superblock_1 *sb; - unsigned long long max_size; - if (size && size < rdev->mddev->size) + sector_t max_sectors; + if (num_sectors && num_sectors < rdev->mddev->size * 2) return 0; /* component must fit device */ - size *= 2; /* convert to sectors */ - if (rdev->sb_offset < rdev->data_offset/2) { + if (rdev->sb_start < rdev->data_offset) { /* minor versions 1 and 2; superblock before data */ - max_size = (rdev->bdev->bd_inode->i_size >> 9); - max_size -= rdev->data_offset; - if (!size || size > max_size) - size = max_size; + max_sectors = rdev->bdev->bd_inode->i_size >> 9; + max_sectors -= rdev->data_offset; + if (!num_sectors || num_sectors > max_sectors) + num_sectors = max_sectors; } else if (rdev->mddev->bitmap_offset) { /* minor version 0 with bitmap we can't move */ return 0; } else { /* minor version 0; superblock after data */ - sector_t sb_offset; - sb_offset = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; - sb_offset &= ~(sector_t)(4*2 - 1); - max_size = rdev->size*2 + sb_offset - rdev->sb_offset*2; - if (!size || size > max_size) - size = max_size; - rdev->sb_offset = sb_offset/2; + sector_t sb_start; + sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; + sb_start &= ~(sector_t)(4*2 - 1); + max_sectors = rdev->size * 2 + sb_start - rdev->sb_start; + if (!num_sectors || num_sectors > max_sectors) + num_sectors = max_sectors; + rdev->sb_start = sb_start; } sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page); - sb->data_size = cpu_to_le64(size); - sb->super_offset = rdev->sb_offset*2; + sb->data_size = cpu_to_le64(num_sectors); + sb->super_offset = rdev->sb_start; sb->sb_csum = calc_sb_1_csum(sb); - md_super_write(rdev->mddev, rdev, rdev->sb_offset << 1, rdev->sb_size, + md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); md_super_wait(rdev->mddev); - return size/2; /* kB for sysfs */ + return num_sectors / 2; /* kB for sysfs */ } static struct super_type super_types[] = { @@ -1399,15 +1395,17 @@ static struct super_type super_types[] = { static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) { - struct list_head *tmp, *tmp2; mdk_rdev_t *rdev, *rdev2; - rdev_for_each(rdev, tmp, mddev1) - rdev_for_each(rdev2, tmp2, mddev2) + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev1) + rdev_for_each_rcu(rdev2, mddev2) if (rdev->bdev->bd_contains == - rdev2->bdev->bd_contains) + rdev2->bdev->bd_contains) { + rcu_read_unlock(); return 1; - + } + rcu_read_unlock(); return 0; } @@ -1474,7 +1472,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) kobject_del(&rdev->kobj); goto fail; } - list_add(&rdev->same_set, &mddev->disks); + list_add_rcu(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); return 0; @@ -1499,14 +1497,16 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) return; } bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); - list_del_init(&rdev->same_set); + list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); /* We need to delay this, otherwise we can deadlock when - * writing to 'remove' to "dev/state" + * writing to 'remove' to "dev/state". We also need + * to delay it due to rcu usage. */ + synchronize_rcu(); INIT_WORK(&rdev->del_work, md_delayed_delete); kobject_get(&rdev->kobj); schedule_work(&rdev->del_work); @@ -1562,7 +1562,6 @@ static void export_rdev(mdk_rdev_t * rdev) if (rdev->mddev) MD_BUG(); free_disk_sb(rdev); - list_del_init(&rdev->same_set); #ifndef MODULE if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); @@ -1809,11 +1808,11 @@ repeat: dprintk("%s ", bdevname(rdev->bdev,b)); if (!test_bit(Faulty, &rdev->flags)) { md_super_write(mddev,rdev, - rdev->sb_offset<<1, rdev->sb_size, + rdev->sb_start, rdev->sb_size, rdev->sb_page); dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", bdevname(rdev->bdev,b), - (unsigned long long)rdev->sb_offset); + (unsigned long long)rdev->sb_start); rdev->sb_events = mddev->events; } else @@ -2104,29 +2103,30 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) static ssize_t rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) { - char *e; - unsigned long long size = simple_strtoull(buf, &e, 10); + unsigned long long size; unsigned long long oldsize = rdev->size; mddev_t *my_mddev = rdev->mddev; - if (e==buf || (*e && *e != '\n')) + if (strict_strtoull(buf, 10, &size) < 0) + return -EINVAL; + if (size < my_mddev->size) return -EINVAL; if (my_mddev->pers && rdev->raid_disk >= 0) { - if (rdev->mddev->persistent) { - size = super_types[rdev->mddev->major_version]. - rdev_size_change(rdev, size); + if (my_mddev->persistent) { + size = super_types[my_mddev->major_version]. + rdev_size_change(rdev, size * 2); if (!size) return -EBUSY; } else if (!size) { size = (rdev->bdev->bd_inode->i_size >> 10); size -= rdev->data_offset/2; } - if (size < rdev->mddev->size) + if (size < my_mddev->size) return -EINVAL; /* component must fit device */ } rdev->size = size; - if (size > oldsize && rdev->mddev->external) { + if (size > oldsize && my_mddev->external) { /* need to check that all other rdevs with the same ->bdev * do not overlap. We need to unlock the mddev to avoid * a deadlock. We have already changed rdev->size, and if @@ -2145,8 +2145,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (test_bit(AllReserved, &rdev2->flags) || (rdev->bdev == rdev2->bdev && rdev != rdev2 && - overlaps(rdev->data_offset, rdev->size, - rdev2->data_offset, rdev2->size))) { + overlaps(rdev->data_offset, rdev->size * 2, + rdev2->data_offset, + rdev2->size * 2))) { overlap = 1; break; } @@ -2168,8 +2169,6 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) return -EBUSY; } } - if (size < my_mddev->size || my_mddev->size == 0) - my_mddev->size = size; return len; } @@ -2700,14 +2699,14 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) break; case clear: /* stopping an active array */ - if (atomic_read(&mddev->active) > 1) + if (atomic_read(&mddev->openers) > 0) return -EBUSY; err = do_md_stop(mddev, 0, 0); break; case inactive: /* stopping an active array */ if (mddev->pers) { - if (atomic_read(&mddev->active) > 1) + if (atomic_read(&mddev->openers) > 0) return -EBUSY; err = do_md_stop(mddev, 2, 0); } else @@ -3576,16 +3575,16 @@ static int do_md_run(mddev_t * mddev) * We don't want the data to overlap the metadata, * Internal Bitmap issues has handled elsewhere. */ - if (rdev->data_offset < rdev->sb_offset) { + if (rdev->data_offset < rdev->sb_start) { if (mddev->size && rdev->data_offset + mddev->size*2 - > rdev->sb_offset*2) { + > rdev->sb_start) { printk("md: %s: data overlaps metadata\n", mdname(mddev)); return -EINVAL; } } else { - if (rdev->sb_offset*2 + rdev->sb_size/512 + if (rdev->sb_start + rdev->sb_size/512 > rdev->data_offset) { printk("md: %s: metadata overlaps data\n", mdname(mddev)); @@ -3709,7 +3708,7 @@ static int do_md_run(mddev_t * mddev) if (mddev->flags) md_update_sb(mddev, 0); - set_capacity(disk, mddev->array_size<<1); + set_capacity(disk, mddev->array_sectors); /* If we call blk_queue_make_request here, it will * re-initialise max_sectors etc which may have been @@ -3821,7 +3820,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) int err = 0; struct gendisk *disk = mddev->gendisk; - if (atomic_read(&mddev->active) > 1 + is_open) { + if (atomic_read(&mddev->openers) > is_open) { printk("md: %s still in use.\n",mdname(mddev)); return -EBUSY; } @@ -3910,7 +3909,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) export_array(mddev); - mddev->array_size = 0; + mddev->array_sectors = 0; mddev->size = 0; mddev->raid_disks = 0; mddev->recovery_cp = 0; @@ -4066,8 +4065,10 @@ static void autorun_devices(int part) /* on success, candidates will be empty, on error * it won't... */ - rdev_for_each_list(rdev, tmp, candidates) + rdev_for_each_list(rdev, tmp, candidates) { + list_del_init(&rdev->same_set); export_rdev(rdev); + } mddev_put(mddev); } printk(KERN_INFO "md: ... autorun DONE.\n"); @@ -4354,9 +4355,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (!mddev->persistent) { printk(KERN_INFO "md: nonpersistent superblock ...\n"); - rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; + rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; } else - rdev->sb_offset = calc_dev_sboffset(rdev->bdev); + rdev->sb_start = calc_dev_sboffset(rdev->bdev); rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; err = bind_rdev_to_array(rdev, mddev); @@ -4423,10 +4424,9 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) } if (mddev->persistent) - rdev->sb_offset = calc_dev_sboffset(rdev->bdev); + rdev->sb_start = calc_dev_sboffset(rdev->bdev); else - rdev->sb_offset = - rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; + rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; @@ -4627,7 +4627,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) * linear and raid0 always use whatever space is available. We can only * consider changing this number if no resync or reconstruction is * happening, and if the new size is acceptable. It must fit before the - * sb_offset or, if that is gendisk, 0); if (bdev) { mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, (loff_t)mddev->array_size << 10); + i_size_write(bdev->bd_inode, + (loff_t)mddev->array_sectors << 9); mutex_unlock(&bdev->bd_inode->i_mutex); bdput(bdev); } @@ -5019,6 +5020,7 @@ static int md_open(struct inode *inode, struct file *file) err = 0; mddev_get(mddev); + atomic_inc(&mddev->openers); mddev_unlock(mddev); check_disk_change(inode->i_bdev); @@ -5031,6 +5033,7 @@ static int md_release(struct inode *inode, struct file * file) mddev_t *mddev = inode->i_bdev->bd_disk->private_data; BUG_ON(!mddev); + atomic_dec(&mddev->openers); mddev_put(mddev); return 0; @@ -5397,10 +5400,11 @@ static int md_seq_show(struct seq_file *seq, void *v) if (!list_empty(&mddev->disks)) { if (mddev->pers) seq_printf(seq, "\n %llu blocks", - (unsigned long long)mddev->array_size); + (unsigned long long) + mddev->array_sectors / 2); else seq_printf(seq, "\n %llu blocks", - (unsigned long long)size); + (unsigned long long)size); } if (mddev->persistent) { if (mddev->major_version != 0 || @@ -5530,12 +5534,12 @@ int unregister_md_personality(struct mdk_personality *p) static int is_mddev_idle(mddev_t *mddev) { mdk_rdev_t * rdev; - struct list_head *tmp; int idle; long curr_events; idle = 1; - rdev_for_each(rdev, tmp, mddev) { + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; curr_events = disk_stat_read(disk, sectors[0]) + disk_stat_read(disk, sectors[1]) - @@ -5567,6 +5571,7 @@ static int is_mddev_idle(mddev_t *mddev) idle = 0; } } + rcu_read_unlock(); return idle; }