X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=fs%2Focfs2%2Fsuper.c;h=df63ba20ae9016d7957930da638b729d9ab242bb;hb=34b4a8731f50fb6fe772f1e47432bfb1da1f4edd;hp=5ee7754206651baceff8735e57b4b7382e2617e9;hpb=980110c5da56cb56d3356f5a5251fdc920f83ba6;p=linux-2.6 diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5ee7754206..df63ba20ae 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -40,8 +40,7 @@ #include #include #include - -#include +#include #define MLOG_MASK_PREFIX ML_SUPER #include @@ -65,7 +64,6 @@ #include "sysfile.h" #include "uptodate.h" #include "ver.h" -#include "vote.h" #include "buffer_head_io.h" @@ -84,9 +82,12 @@ MODULE_LICENSE("GPL"); struct mount_options { + unsigned long commit_interval; unsigned long mount_opt; unsigned int atime_quantum; signed short slot; + unsigned int localalloc_opt; + char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; }; static int ocfs2_parse_options(struct super_block *sb, char *options, @@ -108,7 +109,6 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait); static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); static void ocfs2_release_system_inodes(struct ocfs2_super *osb); -static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); static int ocfs2_check_volume(struct ocfs2_super *osb); static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct buffer_head *bh, @@ -150,6 +150,10 @@ enum { Opt_data_writeback, Opt_atime_quantum, Opt_slot, + Opt_commit, + Opt_localalloc, + Opt_localflocks, + Opt_stack, Opt_err, }; @@ -165,6 +169,10 @@ static match_table_t tokens = { {Opt_data_writeback, "data=writeback"}, {Opt_atime_quantum, "atime_quantum=%u"}, {Opt_slot, "preferred_slot=%u"}, + {Opt_commit, "commit=%u"}, + {Opt_localalloc, "localalloc=%d"}, + {Opt_localflocks, "localflocks"}, + {Opt_stack, "cluster_stack=%s"}, {Opt_err, NULL} }; @@ -213,7 +221,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) mlog_entry_void(); - new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE); + new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); if (IS_ERR(new)) { status = PTR_ERR(new); mlog_errno(status); @@ -221,7 +229,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) } osb->root_inode = new; - new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE); + new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0); if (IS_ERR(new)) { status = PTR_ERR(new); mlog_errno(status); @@ -443,6 +451,8 @@ unlock_osb: osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; + if (parsed_options.commit_interval) + osb->osb_commit_interval = parsed_options.commit_interval; if (!ocfs2_is_hard_readonly(osb)) ocfs2_set_journal_params(osb); @@ -542,8 +552,17 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) } } + if (ocfs2_userspace_stack(osb)) { + if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + mlog(ML_ERROR, "Userspace stack expected, but " + "o2cb heartbeat arguments passed to mount\n"); + return -EINVAL; + } + } + if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { - if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { + if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && + !ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Heartbeat has to be started to mount " "a read-write clustered device.\n"); return -EINVAL; @@ -553,6 +572,35 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) return 0; } +/* + * If we're using a userspace stack, mount should have passed + * a name that matches the disk. If not, mount should not + * have passed a stack. + */ +static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, + struct mount_options *mopt) +{ + if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) { + mlog(ML_ERROR, + "cluster stack passed to mount, but this filesystem " + "does not support it\n"); + return -EINVAL; + } + + if (ocfs2_userspace_stack(osb) && + strncmp(osb->osb_cluster_stack, mopt->cluster_stack, + OCFS2_STACK_LABEL_LEN)) { + mlog(ML_ERROR, + "cluster stack passed to mount (\"%s\") does not " + "match the filesystem (\"%s\")\n", + mopt->cluster_stack, + osb->osb_cluster_stack); + return -EINVAL; + } + + return 0; +} + static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; @@ -570,15 +618,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } - /* for now we only have one cluster/node, make sure we see it - * in the heartbeat universe */ - if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) { - if (!o2hb_check_local_node_heartbeating()) { - status = -EINVAL; - goto read_super_error; - } - } - /* probe for superblock */ status = ocfs2_sb_probe(sb, &bh, §or_size); if (status < 0) { @@ -597,6 +636,12 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; + osb->osb_commit_interval = parsed_options.commit_interval; + osb->local_alloc_size = parsed_options.localalloc_opt; + + status = ocfs2_verify_userspace_stack(osb, &parsed_options); + if (status) + goto read_super_error; sb->s_magic = OCFS2_SUPER_MAGIC; @@ -683,7 +728,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else - snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); + snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " "with %s data mode.\n", @@ -747,9 +792,12 @@ static int ocfs2_parse_options(struct super_block *sb, mlog_entry("remount: %d, options: \"%s\"\n", is_remount, options ? options : "(none)"); + mopt->commit_interval = 0; mopt->mount_opt = 0; mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; mopt->slot = OCFS2_INVALID_SLOT; + mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; + mopt->cluster_stack[0] = '\0'; if (!options) { status = 1; @@ -816,6 +864,60 @@ static int ocfs2_parse_options(struct super_block *sb, if (option) mopt->slot = (s16)option; break; + case Opt_commit: + option = 0; + if (match_int(&args[0], &option)) { + status = 0; + goto bail; + } + if (option < 0) + return 0; + if (option == 0) + option = JBD_DEFAULT_MAX_COMMIT_AGE; + mopt->commit_interval = HZ * option; + break; + case Opt_localalloc: + option = 0; + if (match_int(&args[0], &option)) { + status = 0; + goto bail; + } + if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) + mopt->localalloc_opt = option; + break; + case Opt_localflocks: + /* + * Changing this during remount could race + * flock() requests, or "unbalance" existing + * ones (e.g., a lock is taken in one mode but + * dropped in the other). If users care enough + * to flip locking modes during remount, we + * could add a "local" flag to individual + * flock structures for proper tracking of + * state. + */ + if (!is_remount) + mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; + break; + case Opt_stack: + /* Check both that the option we were passed + * is of the right length and that it is a proper + * string of the right length. + */ + if (((args[0].to - args[0].from) != + OCFS2_STACK_LABEL_LEN) || + (strnlen(args[0].from, + OCFS2_STACK_LABEL_LEN) != + OCFS2_STACK_LABEL_LEN)) { + mlog(ML_ERROR, + "Invalid cluster_stack option\n"); + status = 0; + goto bail; + } + memcpy(mopt->cluster_stack, args[0].from, + OCFS2_STACK_LABEL_LEN); + mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; + break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -864,6 +966,20 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); + if (osb->osb_commit_interval) + seq_printf(s, ",commit=%u", + (unsigned) (osb->osb_commit_interval / HZ)); + + if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) + seq_printf(s, ",localalloc=%d", osb->local_alloc_size); + + if (opts & OCFS2_MOUNT_LOCALFLOCKS) + seq_printf(s, ",localflocks,"); + + if (osb->osb_cluster_stack[0]) + seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, + osb->osb_cluster_stack); + return 0; } @@ -899,6 +1015,8 @@ static int __init ocfs2_init(void) mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); } + ocfs2_set_locking_protocol(); + leave: if (status < 0) { ocfs2_free_mem_caches(); @@ -965,7 +1083,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) goto bail; } - status = ocfs2_meta_lock(inode, &bh, 0); + status = ocfs2_inode_lock(inode, &bh, 0); if (status < 0) { mlog_errno(status); goto bail; @@ -989,7 +1107,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) brelse(bh); - ocfs2_meta_unlock(inode, 0); + ocfs2_inode_unlock(inode, 0); status = 0; bail: if (inode) @@ -1020,8 +1138,7 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) oi->ip_clusters = 0; ocfs2_lock_res_init_once(&oi->ip_rw_lockres); - ocfs2_lock_res_init_once(&oi->ip_meta_lockres); - ocfs2_lock_res_init_once(&oi->ip_data_lockres); + ocfs2_lock_res_init_once(&oi->ip_inode_lockres); ocfs2_lock_res_init_once(&oi->ip_open_lockres); ocfs2_metadata_cache_init(&oi->vfs_inode); @@ -1075,31 +1192,6 @@ static int ocfs2_get_sector(struct super_block *sb, return 0; } -/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */ -static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) -{ - int status; - - /* XXX hold a ref on the node while mounte? easy enough, if - * desirable. */ - if (ocfs2_mount_local(osb)) - osb->node_num = 0; - else - osb->node_num = o2nm_this_node(); - - if (osb->node_num == O2NM_MAX_NODES) { - mlog(ML_ERROR, "could not find this host's node number\n"); - status = -ENOENT; - goto bail; - } - - mlog(0, "I am node %d\n", osb->node_num); - - status = 0; -bail: - return status; -} - static int ocfs2_mount_volume(struct super_block *sb) { int status = 0; @@ -1111,31 +1203,12 @@ static int ocfs2_mount_volume(struct super_block *sb) if (ocfs2_is_hard_readonly(osb)) goto leave; - status = ocfs2_fill_local_node_info(osb); - if (status < 0) { - mlog_errno(status); - goto leave; - } - - status = ocfs2_register_hb_callbacks(osb); - if (status < 0) { - mlog_errno(status); - goto leave; - } - status = ocfs2_dlm_init(osb); if (status < 0) { mlog_errno(status); goto leave; } - /* requires vote_thread to be running. */ - status = ocfs2_register_net_handlers(osb); - if (status < 0) { - mlog_errno(status); - goto leave; - } - status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1150,8 +1223,6 @@ static int ocfs2_mount_volume(struct super_block *sb) goto leave; } - ocfs2_populate_mounted_map(osb); - /* load all node-local system inodes */ status = ocfs2_init_local_system_inodes(osb); if (status < 0) { @@ -1174,15 +1245,6 @@ static int ocfs2_mount_volume(struct super_block *sb) if (ocfs2_mount_local(osb)) goto leave; - /* This should be sent *after* we recovered our journal as it - * will cause other nodes to unmark us as needing - * recovery. However, we need to send it *before* dropping the - * super block lock as otherwise their recovery threads might - * try to clean us up while we're live! */ - status = ocfs2_request_mount_vote(osb); - if (status < 0) - mlog_errno(status); - leave: if (unlock_super) ocfs2_super_unlock(osb, 1); @@ -1191,18 +1253,9 @@ leave: return status; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ -static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) -{ - mb(); - return osb->recovery_thread_task != NULL; -} - static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) { - int tmp; + int tmp, hangup_needed = 0; struct ocfs2_super *osb = NULL; char nodestr[8]; @@ -1216,63 +1269,54 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_truncate_log_shutdown(osb); - /* disable any new recovery threads and wait for any currently - * running ones to exit. Do this before setting the vol_state. */ - mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; - mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ - flush_workqueue(ocfs2_wq); + /* This will disable recovery and flush any recovery work. */ + ocfs2_recovery_exit(osb); ocfs2_journal_shutdown(osb); ocfs2_sync_blockdev(sb); - /* No dlm means we've failed during mount, so skip all the - * steps which depended on that to complete. */ - if (osb->dlm) { + /* No cluster connection means we've failed during mount, so skip + * all the steps which depended on that to complete. */ + if (osb->cconn) { tmp = ocfs2_super_lock(osb, 1); if (tmp < 0) { mlog_errno(tmp); return; } - - tmp = ocfs2_request_umount_vote(osb); - if (tmp < 0) - mlog_errno(tmp); } if (osb->slot_num != OCFS2_INVALID_SLOT) ocfs2_put_slot(osb); - if (osb->dlm) + if (osb->cconn) ocfs2_super_unlock(osb, 1); ocfs2_release_system_inodes(osb); - if (osb->dlm) { - ocfs2_unregister_net_handlers(osb); - - ocfs2_dlm_shutdown(osb); - } + /* + * If we're dismounting due to mount error, mount.ocfs2 will clean + * up heartbeat. If we're a local mount, there is no heartbeat. + * If we failed before we got a uuid_str yet, we can't stop + * heartbeat. Otherwise, do it. + */ + if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str) + hangup_needed = 1; - ocfs2_clear_hb_callbacks(osb); + if (osb->cconn) + ocfs2_dlm_shutdown(osb, hangup_needed); debugfs_remove(osb->osb_debug_root); - if (!mnt_err) - ocfs2_stop_heartbeat(osb); + if (hangup_needed) + ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str)); atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else - snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); + snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", osb->dev_str, nodestr); @@ -1315,7 +1359,6 @@ static int ocfs2_initialize_super(struct super_block *sb, int i, cbits, bbits; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; struct inode *inode = NULL; - struct buffer_head *bitmap_bh = NULL; struct ocfs2_journal *journal; __le32 uuid_net_key; struct ocfs2_super *osb; @@ -1344,20 +1387,14 @@ static int ocfs2_initialize_super(struct super_block *sb, osb->s_sectsize_bits = blksize_bits(sector_size); BUG_ON(!osb->s_sectsize_bits); - osb->net_response_ids = 0; - spin_lock_init(&osb->net_response_lock); - INIT_LIST_HEAD(&osb->net_response_list); - - INIT_LIST_HEAD(&osb->osb_net_handlers); - init_waitqueue_head(&osb->recovery_event); - spin_lock_init(&osb->vote_task_lock); - init_waitqueue_head(&osb->vote_event); - osb->vote_work_sequence = 0; - osb->vote_wake_sequence = 0; + spin_lock_init(&osb->dc_task_lock); + init_waitqueue_head(&osb->dc_event); + osb->dc_work_sequence = 0; + osb->dc_wake_sequence = 0; INIT_LIST_HEAD(&osb->blocked_lock_list); osb->blocked_lock_count = 0; - INIT_LIST_HEAD(&osb->vote_list); spin_lock_init(&osb->osb_lock); + ocfs2_init_inode_steal_slot(osb); atomic_set(&osb->alloc_stats.moves, 0); atomic_set(&osb->alloc_stats.local_data, 0); @@ -1370,24 +1407,23 @@ static int ocfs2_initialize_super(struct super_block *sb, snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); - mutex_init(&osb->recovery_lock); - - osb->disable_recovery = 0; - osb->recovery_thread_task = NULL; + status = ocfs2_recovery_init(osb); + if (status) { + mlog(ML_ERROR, "Unable to initialize recovery state\n"); + mlog_errno(status); + goto bail; + } init_waitqueue_head(&osb->checkpoint_event); atomic_set(&osb->needs_checkpoint, 0); osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; - osb->node_num = O2NM_INVALID_NODE_NUM; osb->slot_num = OCFS2_INVALID_SLOT; osb->local_alloc_state = OCFS2_LA_UNUSED; osb->local_alloc_bh = NULL; - ocfs2_setup_hb_callbacks(osb); - init_waitqueue_head(&osb->osb_mount_event); osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); @@ -1437,6 +1473,25 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } + if (ocfs2_userspace_stack(osb)) { + memcpy(osb->osb_cluster_stack, + OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, + OCFS2_STACK_LABEL_LEN); + osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; + if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { + mlog(ML_ERROR, + "couldn't mount because of an invalid " + "cluster stack label (%s) \n", + osb->osb_cluster_stack); + status = -EINVAL; + goto bail; + } + } else { + /* The empty string is identical with classic tools that + * don't know about s_cluster_info. */ + osb->osb_cluster_stack[0] = '\0'; + } + get_random_bytes(&osb->s_next_generation, sizeof(u32)); /* FIXME @@ -1496,7 +1551,6 @@ static int ocfs2_initialize_super(struct super_block *sb, } memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); - osb->net_key = le32_to_cpu(uuid_net_key); strncpy(osb->vol_label, di->id2.i_super.s_label, 63); osb->vol_label[63] = '\0'; @@ -1539,25 +1593,9 @@ static int ocfs2_initialize_super(struct super_block *sb, } osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; - - /* We don't have a cluster lock on the bitmap here because - * we're only interested in static information and the extra - * complexity at mount time isn't worht it. Don't pass the - * inode in to the read function though as we don't want it to - * be put in the cache. */ - status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, - NULL); iput(inode); - if (status < 0) { - mlog_errno(status); - goto bail; - } - di = (struct ocfs2_dinode *) bitmap_bh->b_data; - osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); - brelse(bitmap_bh); - mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", - (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); + osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; status = ocfs2_init_slot_info(osb); if (status < 0) { @@ -1723,8 +1761,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) /* This function assumes that the caller has the main osb resource */ - if (osb->slot_info) - ocfs2_free_slot_info(osb->slot_info); + ocfs2_free_slot_info(osb); kfree(osb->osb_orphan_wipes); /* FIXME