return ret;
}
-static int read_disk_sb(mdk_rdev_t * rdev)
+static int read_disk_sb(mdk_rdev_t * rdev, int size)
{
char b[BDEVNAME_SIZE];
if (!rdev->sb_page) {
return 0;
- if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ))
+ if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ))
goto fail;
rdev->sb_loaded = 1;
return 0;
sb_offset = calc_dev_sboffset(rdev->bdev);
rdev->sb_offset = sb_offset;
- ret = read_disk_sb(rdev);
+ ret = read_disk_sb(rdev, MD_SB_BYTES);
if (ret) return ret;
ret = -EINVAL;
rdev->preferred_minor = sb->md_minor;
rdev->data_offset = 0;
+ rdev->sb_size = MD_SB_BYTES;
if (sb->level == LEVEL_MULTIPATH)
rdev->desc_nr = -1;
mddev->size = sb->size;
mddev->events = md_event(sb);
mddev->bitmap_offset = 0;
+ mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
if (sb->state & (1<<MD_SB_CLEAN))
mddev->recovery_cp = MaxSector;
if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
mddev->bitmap_file == NULL) {
- if (mddev->level != 1) {
+ if (mddev->level != 1 && mddev->level != 5) {
/* FIXME use a better test */
printk(KERN_WARNING "md: bitmaps only support for raid1\n");
return -EINVAL;
}
- mddev->bitmap_offset = (MD_SB_BYTES >> 9);
+ mddev->bitmap_offset = mddev->default_bitmap_offset;
}
} else if (mddev->pers == NULL) {
if (mddev->level != LEVEL_MULTIPATH) {
rdev->faulty = 0;
+ rdev->flags = 0;
desc = sb->disks + rdev->desc_nr;
if (desc->state & (1<<MD_DISK_FAULTY))
rdev->in_sync = 1;
rdev->raid_disk = desc->raid_disk;
}
+ if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
+ set_bit(WriteMostly, &rdev->flags);
} else /* MULTIPATH are always insync */
rdev->in_sync = 1;
return 0;
spare++;
working++;
}
+ if (test_bit(WriteMostly, &rdev2->flags))
+ d->state |= (1<<MD_DISK_WRITEMOSTLY);
}
/* now set the "removed" and "faulty" bits on any missing devices */
int ret;
sector_t sb_offset;
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
+ int bmask;
/*
* Calculate the position of the superblock.
}
rdev->sb_offset = sb_offset;
- ret = read_disk_sb(rdev);
+ /* superblock is rarely larger than 1K, but it can be larger,
+ * and it is safe to read 4k, so we do that
+ */
+ ret = read_disk_sb(rdev, 4096);
if (ret) return ret;
sb->major_version != cpu_to_le32(1) ||
le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) ||
- sb->feature_map != 0)
+ (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
return -EINVAL;
if (calc_sb_1_csum(sb) != sb->sb_csum) {
rdev->preferred_minor = 0xffff;
rdev->data_offset = le64_to_cpu(sb->data_offset);
+ rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
+ bmask = block_size(rdev->bdev)-1;
+ if (rdev->sb_size & bmask)
+ rdev-> sb_size = (rdev->sb_size | bmask)+1;
+
if (refdev == 0)
return 1;
else {
mddev->size = le64_to_cpu(sb->size)/2;
mddev->events = le64_to_cpu(sb->events);
mddev->bitmap_offset = 0;
+ mddev->default_bitmap_offset = 0;
+ if (mddev->minor_version == 0)
+ mddev->default_bitmap_offset = -(64*1024)/512;
mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
memcpy(mddev->uuid, sb->set_uuid, 16);
mddev->max_disks = (4096-256)/2;
- if ((le32_to_cpu(sb->feature_map) & 1) &&
+ if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
mddev->bitmap_file == NULL ) {
if (mddev->level != 1) {
printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
rdev->raid_disk = role;
break;
}
+ rdev->flags = 0;
+ if (sb->devflags & WriteMostly1)
+ set_bit(WriteMostly, &rdev->flags);
} else /* MULTIPATH are always insync */
rdev->in_sync = 1;
if (mddev->bitmap && mddev->bitmap_file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
- sb->feature_map = cpu_to_le32(1);
+ sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
}
max_dev = 0;
dprintk("%s ", bdevname(rdev->bdev,b));
if (!rdev->faulty) {
md_super_write(mddev,rdev,
- rdev->sb_offset<<1, MD_SB_BYTES,
+ rdev->sb_offset<<1, rdev->sb_size,
rdev->sb_page);
dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
bdevname(rdev->bdev,b),
mddev->pers = pers[pnum];
spin_unlock(&pers_lock);
+ mddev->recovery = 0;
mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
/* before we start the array running, initialise the bitmap */
info.state = 0;
if (mddev->in_sync)
info.state = (1<<MD_SB_CLEAN);
+ if (mddev->bitmap && mddev->bitmap_offset)
+ info.state = (1<<MD_SB_BITMAP_PRESENT);
info.active_disks = active;
info.working_disks = working;
info.failed_disks = failed;
return 0;
}
-static int get_bitmap_file(mddev_t * mddev, void * arg)
+static int get_bitmap_file(mddev_t * mddev, void __user * arg)
{
mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
char *ptr, *buf = NULL;
info.state |= (1<<MD_DISK_ACTIVE);
info.state |= (1<<MD_DISK_SYNC);
}
+ if (test_bit(WriteMostly, &rdev->flags))
+ info.state |= (1<<MD_DISK_WRITEMOSTLY);
} else {
info.major = info.minor = 0;
info.raid_disk = -1;
mdname(mddev));
return -EINVAL;
}
- rdev = md_import_device(dev, mddev->major_version,
- mddev->minor_version);
+ if (mddev->persistent)
+ rdev = md_import_device(dev, mddev->major_version,
+ mddev->minor_version);
+ else
+ rdev = md_import_device(dev, -1, -1);
if (IS_ERR(rdev)) {
printk(KERN_WARNING
"md: md_import_device returned %ld\n",
rdev->saved_raid_disk = rdev->raid_disk;
rdev->in_sync = 0; /* just to be sure */
+ if (info->state & (1<<MD_DISK_WRITEMOSTLY))
+ set_bit(WriteMostly, &rdev->flags);
+
rdev->raid_disk = -1;
err = bind_rdev_to_array(rdev, mddev);
if (err)
else
rdev->in_sync = 0;
+ if (info->state & (1<<MD_DISK_WRITEMOSTLY))
+ set_bit(WriteMostly, &rdev->flags);
+
err = bind_rdev_to_array(rdev, mddev);
if (err) {
export_rdev(rdev);
{
int err;
- if (mddev->pers)
- return -EBUSY;
+ if (mddev->pers) {
+ if (!mddev->pers->quiesce)
+ return -EBUSY;
+ if (mddev->recovery || mddev->sync_thread)
+ return -EBUSY;
+ /* we should be able to change the bitmap.. */
+ }
- mddev->bitmap_file = fget(fd);
- if (mddev->bitmap_file == NULL) {
- printk(KERN_ERR "%s: error: failed to get bitmap file\n",
- mdname(mddev));
- return -EBADF;
- }
+ if (fd >= 0) {
+ if (mddev->bitmap)
+ return -EEXIST; /* cannot add when bitmap is present */
+ mddev->bitmap_file = fget(fd);
- err = deny_bitmap_write_access(mddev->bitmap_file);
- if (err) {
- printk(KERN_ERR "%s: error: bitmap file is already in use\n",
- mdname(mddev));
- fput(mddev->bitmap_file);
- mddev->bitmap_file = NULL;
- } else
+ if (mddev->bitmap_file == NULL) {
+ printk(KERN_ERR "%s: error: failed to get bitmap file\n",
+ mdname(mddev));
+ return -EBADF;
+ }
+
+ err = deny_bitmap_write_access(mddev->bitmap_file);
+ if (err) {
+ printk(KERN_ERR "%s: error: bitmap file is already in use\n",
+ mdname(mddev));
+ fput(mddev->bitmap_file);
+ mddev->bitmap_file = NULL;
+ return err;
+ }
mddev->bitmap_offset = 0; /* file overrides offset */
+ } else if (mddev->bitmap == NULL)
+ return -ENOENT; /* cannot remove what isn't there */
+ err = 0;
+ if (mddev->pers) {
+ mddev->pers->quiesce(mddev, 1);
+ if (fd >= 0)
+ err = bitmap_create(mddev);
+ if (fd < 0 || err)
+ bitmap_destroy(mddev);
+ mddev->pers->quiesce(mddev, 0);
+ } else if (fd < 0) {
+ if (mddev->bitmap_file)
+ fput(mddev->bitmap_file);
+ mddev->bitmap_file = NULL;
+ }
+
return err;
}
{
int rv = 0;
int cnt = 0;
+ int state = 0;
+
+ /* calculate expected state,ignoring low bits */
+ if (mddev->bitmap && mddev->bitmap_offset)
+ state |= (1 << MD_SB_BITMAP_PRESENT);
if (mddev->major_version != info->major_version ||
mddev->minor_version != info->minor_version ||
mddev->level != info->level ||
/* mddev->layout != info->layout || */
!mddev->persistent != info->not_persistent||
- mddev->chunk_size != info->chunk_size )
+ mddev->chunk_size != info->chunk_size ||
+ /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
+ ((state^info->state) & 0xfffffe00)
+ )
return -EINVAL;
/* Check there is only one change */
if (mddev->size != info->size) cnt++;
if (mddev->raid_disks != info->raid_disks) cnt++;
if (mddev->layout != info->layout) cnt++;
+ if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
if (cnt == 0) return 0;
if (cnt > 1) return -EINVAL;
}
}
}
+ if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
+ if (mddev->pers->quiesce == NULL)
+ return -EINVAL;
+ if (mddev->recovery || mddev->sync_thread)
+ return -EBUSY;
+ if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
+ /* add the bitmap */
+ if (mddev->bitmap)
+ return -EEXIST;
+ if (mddev->default_bitmap_offset == 0)
+ return -EINVAL;
+ mddev->bitmap_offset = mddev->default_bitmap_offset;
+ mddev->pers->quiesce(mddev, 1);
+ rv = bitmap_create(mddev);
+ if (rv)
+ bitmap_destroy(mddev);
+ mddev->pers->quiesce(mddev, 0);
+ } else {
+ /* remove the bitmap */
+ if (!mddev->bitmap)
+ return -ENOENT;
+ if (mddev->bitmap->file)
+ return -EINVAL;
+ mddev->pers->quiesce(mddev, 1);
+ bitmap_destroy(mddev);
+ mddev->pers->quiesce(mddev, 0);
+ mddev->bitmap_offset = 0;
+ }
+ }
md_update_sb(mddev);
return rv;
}
goto done_unlock;
case GET_BITMAP_FILE:
- err = get_bitmap_file(mddev, (void *)arg);
+ err = get_bitmap_file(mddev, argp);
goto done_unlock;
case GET_DISK_INFO:
char b[BDEVNAME_SIZE];
seq_printf(seq, " %s[%d]",
bdevname(rdev->bdev,b), rdev->desc_nr);
+ if (test_bit(WriteMostly, &rdev->flags))
+ seq_printf(seq, "(W)");
if (rdev->faulty) {
seq_printf(seq, "(F)");
continue;
*/
void md_write_start(mddev_t *mddev, struct bio *bi)
{
- DEFINE_WAIT(w);
if (bio_data_dir(bi) != WRITE)
return;
EXPORT_SYMBOL(md_check_recovery);
MODULE_LICENSE("GPL");
MODULE_ALIAS("md");
+MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);