* Any code which breaks out of this loop while own
* a reference to the current mddev and must mddev_put it.
*/
-#define ITERATE_MDDEV(mddev,tmp) \
+#define for_each_mddev(mddev,tmp) \
\
for (({ spin_lock(&all_mddevs_lock); \
tmp = all_mddevs.next; \
mdk_rdev_t * rdev;
struct list_head *tmp;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev->desc_nr == nr)
return rdev;
}
struct list_head *tmp;
mdk_rdev_t *rdev;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev->bdev->bd_dev == dev)
return rdev;
}
__u64 ev1 = md_event(sb);
rdev->raid_disk = -1;
- rdev->flags = 0;
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(In_sync, &rdev->flags);
+ clear_bit(WriteMostly, &rdev->flags);
+ clear_bit(BarriersNotsupp, &rdev->flags);
+
if (mddev->raid_disks == 0) {
mddev->major_version = 0;
mddev->minor_version = sb->minor_version;
mddev->patch_version = sb->patch_version;
- mddev->persistent = 1;
mddev->external = 0;
mddev->chunk_size = sb->chunk_size;
mddev->ctime = sb->ctime;
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
sb->disks[0].state = (1<<MD_DISK_REMOVED);
- ITERATE_RDEV(mddev,rdev2,tmp) {
+ rdev_for_each(rdev2, tmp, mddev) {
mdp_disk_t *d;
int desc_nr;
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
__u64 ev1 = le64_to_cpu(sb->events);
rdev->raid_disk = -1;
- rdev->flags = 0;
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(In_sync, &rdev->flags);
+ clear_bit(WriteMostly, &rdev->flags);
+ clear_bit(BarriersNotsupp, &rdev->flags);
+
if (mddev->raid_disks == 0) {
mddev->major_version = 1;
mddev->patch_version = 0;
- mddev->persistent = 1;
mddev->external = 0;
mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
}
max_dev = 0;
- ITERATE_RDEV(mddev,rdev2,tmp)
+ rdev_for_each(rdev2, tmp, mddev)
if (rdev2->desc_nr+1 > max_dev)
max_dev = rdev2->desc_nr+1;
for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe);
- ITERATE_RDEV(mddev,rdev2,tmp) {
+ rdev_for_each(rdev2, tmp, mddev) {
i = rdev2->desc_nr;
if (test_bit(Faulty, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(0xfffe);
struct list_head *tmp, *tmp2;
mdk_rdev_t *rdev, *rdev2;
- ITERATE_RDEV(mddev1,rdev,tmp)
- ITERATE_RDEV(mddev2, rdev2, tmp2)
+ rdev_for_each(rdev, tmp, mddev1)
+ rdev_for_each(rdev2, tmp2, mddev2)
if (rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains)
return 1;
goto fail;
}
list_add(&rdev->same_set, &mddev->disks);
- bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
+ bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
return 0;
fail:
return err;
}
-static void delayed_delete(struct work_struct *ws)
+static void md_delayed_delete(struct work_struct *ws)
{
mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
kobject_del(&rdev->kobj);
+ kobject_put(&rdev->kobj);
}
static void unbind_rdev_from_array(mdk_rdev_t * rdev)
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state"
*/
- INIT_WORK(&rdev->del_work, delayed_delete);
+ INIT_WORK(&rdev->del_work, md_delayed_delete);
+ kobject_get(&rdev->kobj);
schedule_work(&rdev->del_work);
}
* otherwise reused by a RAID array (or any other kernel
* subsystem), by bd_claiming the device.
*/
-static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
+static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
{
int err = 0;
struct block_device *bdev;
__bdevname(dev, b));
return PTR_ERR(bdev);
}
- err = bd_claim(bdev, rdev);
+ err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
if (err) {
printk(KERN_ERR "md: could not bd_claim %s.\n",
bdevname(bdev, b));
blkdev_put(bdev);
return err;
}
+ if (!shared)
+ set_bit(AllReserved, &rdev->flags);
rdev->bdev = bdev;
return err;
}
struct list_head *tmp;
mdk_rdev_t *rdev;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (!rdev->mddev) {
MD_BUG();
continue;
printk("md: **********************************\n");
printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
printk("md: **********************************\n");
- ITERATE_MDDEV(mddev,tmp) {
+ for_each_mddev(mddev, tmp) {
if (mddev->bitmap)
bitmap_print_sb(mddev->bitmap);
else
printk("%s: ", mdname(mddev));
- ITERATE_RDEV(mddev,rdev,tmp2)
+ rdev_for_each(rdev, tmp2, mddev)
printk("<%s>", bdevname(rdev->bdev,b));
printk("\n");
- ITERATE_RDEV(mddev,rdev,tmp2)
+ rdev_for_each(rdev, tmp2, mddev)
print_rdev(rdev);
}
printk("md: **********************************\n");
mdk_rdev_t *rdev;
struct list_head *tmp;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev->sb_events == mddev->events ||
(nospares &&
rdev->raid_disk < 0 &&
mdname(mddev),mddev->in_sync);
bitmap_update_sb(mddev->bitmap);
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
char b[BDEVNAME_SIZE];
dprintk(KERN_INFO "md: ");
if (rdev->sb_loaded != 1)
state_show(mdk_rdev_t *rdev, char *page)
{
char *sep = "";
- int len=0;
+ size_t len = 0;
if (test_bit(Faulty, &rdev->flags)) {
len+= sprintf(page+len, "%sfaulty",sep);
return -ENOSPC;
rdev->raid_disk = slot;
/* assume it is working */
- rdev->flags = 0;
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
}
return len;
return -EINVAL;
if (rdev->mddev->pers)
return -EBUSY;
+ if (rdev->size && rdev->mddev->external)
+ /* Must set offset before size, so overlap checks
+ * can be sane */
+ return -EBUSY;
rdev->data_offset = offset;
return len;
}
return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
}
+static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
+{
+ /* check if two start/length pairs overlap */
+ if (s1+l1 <= s2)
+ return 0;
+ if (s2+l2 <= s1)
+ return 0;
+ return 1;
+}
+
static ssize_t
rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
char *e;
unsigned long long size = simple_strtoull(buf, &e, 10);
+ unsigned long long oldsize = rdev->size;
if (e==buf || (*e && *e != '\n'))
return -EINVAL;
if (rdev->mddev->pers)
return -EBUSY;
rdev->size = size;
+ if (size > oldsize && rdev->mddev->external) {
+ /* need to check that all other rdevs with the same ->bdev
+ * do not overlap. We need to unlock the mddev to avoid
+ * a deadlock. We have already changed rdev->size, and if
+ * we have to change it back, we will have the lock again.
+ */
+ mddev_t *mddev;
+ int overlap = 0;
+ struct list_head *tmp, *tmp2;
+
+ mddev_unlock(rdev->mddev);
+ for_each_mddev(mddev, tmp) {
+ mdk_rdev_t *rdev2;
+
+ mddev_lock(mddev);
+ rdev_for_each(rdev2, tmp2, mddev)
+ if (test_bit(AllReserved, &rdev2->flags) ||
+ (rdev->bdev == rdev2->bdev &&
+ rdev != rdev2 &&
+ overlaps(rdev->data_offset, rdev->size,
+ rdev2->data_offset, rdev2->size))) {
+ overlap = 1;
+ break;
+ }
+ mddev_unlock(mddev);
+ if (overlap) {
+ mddev_put(mddev);
+ break;
+ }
+ }
+ mddev_lock(rdev->mddev);
+ if (overlap) {
+ /* Someone else could have slipped in a size
+ * change here, but doing so is just silly.
+ * We put oldsize back because we *know* it is
+ * safe, and trust userspace not to race with
+ * itself
+ */
+ rdev->size = oldsize;
+ return -EBUSY;
+ }
+ }
if (size < rdev->mddev->size || rdev->mddev->size == 0)
rdev->mddev->size = size;
return len;
{
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+ int rv;
if (!entry->store)
return -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- return entry->store(rdev, page, length);
+ rv = mddev_lock(rdev->mddev);
+ if (!rv) {
+ rv = entry->store(rdev, page, length);
+ mddev_unlock(rdev->mddev);
+ }
+ return rv;
}
static void rdev_free(struct kobject *ko)
if ((err = alloc_disk_sb(rdev)))
goto abort_free;
- err = lock_rdev(rdev, newdev);
+ err = lock_rdev(rdev, newdev, super_format == -2);
if (err)
goto abort_free;
char b[BDEVNAME_SIZE];
freshest = NULL;
- ITERATE_RDEV(mddev,rdev,tmp)
+ rdev_for_each(rdev, tmp, mddev)
switch (super_types[mddev->major_version].
load_super(rdev, freshest, mddev->minor_version)) {
case 1:
validate_super(mddev, freshest);
i = 0;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev != freshest)
if (super_types[mddev->major_version].
validate_super(mddev, rdev)) {
static ssize_t
level_store(mddev_t *mddev, const char *buf, size_t len)
{
- int rv = len;
+ ssize_t rv = len;
if (mddev->pers)
return -EBUSY;
if (len == 0)
if (err < 0)
goto out;
}
- } else
+ } else if (mddev->external)
+ rdev = md_import_device(dev, -2, -1);
+ else
rdev = md_import_device(dev, -1, -1);
if (IS_ERR(rdev))
return len;
}
if (strncmp(buf, "external:", 9) == 0) {
- int namelen = len-9;
+ size_t namelen = len-9;
if (namelen >= sizeof(mddev->metadata_type))
namelen = sizeof(mddev->metadata_type)-1;
strncpy(mddev->metadata_type, buf+9, namelen);
/*
* Analyze all RAID superblock(s)
*/
- if (!mddev->raid_disks)
+ if (!mddev->raid_disks) {
+ if (!mddev->persistent)
+ return -EINVAL;
analyze_sbs(mddev);
+ }
chunk_size = mddev->chunk_size;
}
/* devices must have minimum size of one chunk */
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
if (rdev->size < chunk_size / 1024) {
* the only valid external interface is through the md
* device.
*/
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
sync_blockdev(rdev->bdev);
mdk_rdev_t *rdev2;
struct list_head *tmp2;
int warned = 0;
- ITERATE_RDEV(mddev, rdev, tmp) {
- ITERATE_RDEV(mddev, rdev2, tmp2) {
+ rdev_for_each(rdev, tmp, mddev) {
+ rdev_for_each(rdev2, tmp2, mddev) {
if (rdev < rdev2 &&
rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains) {
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
mddev->in_sync = 1;
- ITERATE_RDEV(mddev,rdev,tmp)
+ rdev_for_each(rdev, tmp, mddev)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
if (mddev->degraded && !mddev->sync_thread) {
struct list_head *rtmp;
int spares = 0;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags))
}
mddev->bitmap_offset = 0;
- ITERATE_RDEV(mddev,rdev,tmp)
+ rdev_for_each(rdev, tmp, mddev)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
}
- /* make sure all delayed_delete calls have finished */
+ /* make sure all md_delayed_delete calls have finished */
flush_scheduled_work();
export_array(mddev);
mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector;
mddev->external = 0;
+ mddev->persistent = 0;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
printk(KERN_INFO "md: running: ");
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
char b[BDEVNAME_SIZE];
printk("<%s>", bdevname(rdev->bdev,b));
}
printk(KERN_INFO "md: considering %s ...\n",
bdevname(rdev0->bdev,b));
INIT_LIST_HEAD(&candidates);
- ITERATE_RDEV_PENDING(rdev,tmp)
+ rdev_for_each_list(rdev, tmp, pending_raid_disks)
if (super_90_load(rdev, rdev0, 0) >= 0) {
printk(KERN_INFO "md: adding %s ...\n",
bdevname(rdev->bdev,b));
mddev_unlock(mddev);
} else {
printk(KERN_INFO "md: created %s\n", mdname(mddev));
- ITERATE_RDEV_GENERIC(candidates,rdev,tmp) {
+ mddev->persistent = 1;
+ rdev_for_each_list(rdev, tmp, candidates) {
list_del_init(&rdev->same_set);
if (bind_rdev_to_array(rdev, mddev))
export_rdev(rdev);
/* on success, candidates will be empty, on error
* it won't...
*/
- ITERATE_RDEV_GENERIC(candidates,rdev,tmp)
+ rdev_for_each_list(rdev, tmp, candidates)
export_rdev(rdev);
mddev_put(mddev);
}
struct list_head *tmp;
nr=working=active=failed=spare=0;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
nr++;
if (test_bit(Faulty, &rdev->flags))
failed++;
else
rdev->raid_disk = -1;
- rdev->flags = 0;
-
if (rdev->raid_disk < mddev->raid_disks)
if (info->state & (1<<MD_DISK_SYNC))
set_bit(In_sync, &rdev->flags);
*/
if (mddev->sync_thread)
return -EBUSY;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
sector_t avail;
avail = rdev->size * 2;
*/
/* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY,
* RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */
- if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
- && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
- && cmd != GET_BITMAP_FILE) {
+ if ((!mddev->raid_disks && !mddev->external)
+ && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
+ && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
+ && cmd != GET_BITMAP_FILE) {
err = -ENODEV;
goto abort_unlock;
}
seq_printf(seq, "unused devices: ");
- ITERATE_RDEV_PENDING(rdev,tmp) {
+ rdev_for_each_list(rdev, tmp, pending_raid_disks) {
char b[BDEVNAME_SIZE];
i++;
seq_printf(seq, "%s ",
}
size = 0;
- ITERATE_RDEV(mddev,rdev,tmp2) {
+ rdev_for_each(rdev, tmp2, mddev) {
char b[BDEVNAME_SIZE];
seq_printf(seq, " %s[%d]",
bdevname(rdev->bdev,b), rdev->desc_nr);
chunk_kb ? "KB" : "B");
if (bitmap->file) {
seq_printf(seq, ", file: ");
- seq_path(seq, bitmap->file->f_path.mnt,
- bitmap->file->f_path.dentry," \t\n");
+ seq_path(seq, &bitmap->file->f_path, " \t\n");
}
seq_printf(seq, "\n");
long curr_events;
idle = 1;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
curr_events = disk_stat_read(disk, sectors[0]) +
disk_stat_read(disk, sectors[1]) -
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto skip;
}
- ITERATE_MDDEV(mddev2,tmp) {
+ for_each_mddev(mddev2, tmp) {
if (mddev2 == mddev)
continue;
if (mddev2->curr_resync &&
/* recovery follows the physical size of devices */
max_sectors = mddev->size << 1;
j = MaxSector;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
} else {
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync = MaxSector;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
struct list_head *rtmp;
int spares = 0;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
!mddev->external &&
(test_bit(Faulty, &rdev->flags) ||
}
if (mddev->degraded) {
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
* information must be scrapped
*/
if (!mddev->degraded)
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
rdev->saved_raid_disk = -1;
mddev->recovery = 0;
printk(KERN_INFO "md: stopping all md devices.\n");
- ITERATE_MDDEV(mddev,tmp)
+ for_each_mddev(mddev, tmp)
if (mddev_trylock(mddev)) {
do_md_stop (mddev, 1);
mddev_unlock(mddev);
unregister_reboot_notifier(&md_notifier);
unregister_sysctl_table(raid_table_header);
remove_proc_entry("mdstat", NULL);
- ITERATE_MDDEV(mddev,tmp) {
+ for_each_mddev(mddev, tmp) {
struct gendisk *disk = mddev->gendisk;
if (!disk)
continue;