]> err.no Git - linux-2.6/blobdiff - fs/ocfs2/dlm/dlmdomain.c
Merge master.kernel.org:/pub/scm/linux/kernel/git/gregkh/driver-2.6
[linux-2.6] / fs / ocfs2 / dlm / dlmdomain.c
index 19b57a6bcb1a72c257b81d6a6fc40e4f1401486f..d836b98dd99a42614ab0e1afbb4c5f459ef708ae 100644 (file)
@@ -138,8 +138,10 @@ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);
 
 void __dlm_unhash_lockres(struct dlm_lock_resource *lockres)
 {
-       hlist_del_init(&lockres->hash_node);
-       dlm_lockres_put(lockres);
+       if (!hlist_unhashed(&lockres->hash_node)) {
+               hlist_del_init(&lockres->hash_node);
+               dlm_lockres_put(lockres);
+       }
 }
 
 void __dlm_insert_lockres(struct dlm_ctxt *dlm,
@@ -428,11 +430,10 @@ redo_bucket:
 
                        dlm_lockres_put(res);
 
-                       cond_resched_lock(&dlm->spinlock);
-
                        if (dropped)
                                goto redo_bucket;
                }
+               cond_resched_lock(&dlm->spinlock);
                num += n;
                mlog(0, "%s: touched %d lockreses in bucket %d "
                     "(tot=%d)\n", dlm->name, n, i, num);
@@ -655,6 +656,8 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
                dlm_kick_thread(dlm, NULL);
 
                while (dlm_migrate_all_locks(dlm)) {
+                       /* Give dlm_thread time to purge the lockres' */
+                       msleep(500);
                        mlog(0, "%s: more migration to do\n", dlm->name);
                }
                dlm_mark_domain_leaving(dlm);
@@ -707,6 +710,9 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
        while (nodenum < O2NM_MAX_NODES) {
                if (test_bit(nodenum, dlm->domain_map)) {
                        if (!byte_test_bit(nodenum, query->node_map)) {
+                               mlog(0, "disallow join as node %u does not "
+                                    "have node %u in its nodemap\n",
+                                    query->node_idx, nodenum);
                                response = JOIN_DISALLOW;
                                goto unlock_respond;
                        }
@@ -732,15 +738,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
                        /* Disallow parallel joins. */
                        response = JOIN_DISALLOW;
                } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
-                       mlog(ML_NOTICE, "node %u trying to join, but recovery "
+                       mlog(0, "node %u trying to join, but recovery "
                             "is ongoing.\n", bit);
                        response = JOIN_DISALLOW;
                } else if (test_bit(bit, dlm->recovery_map)) {
-                       mlog(ML_NOTICE, "node %u trying to join, but it "
+                       mlog(0, "node %u trying to join, but it "
                             "still needs recovery.\n", bit);
                        response = JOIN_DISALLOW;
                } else if (test_bit(bit, dlm->domain_map)) {
-                       mlog(ML_NOTICE, "node %u trying to join, but it "
+                       mlog(0, "node %u trying to join, but it "
                             "is still in the domain! needs recovery?\n",
                             bit);
                        response = JOIN_DISALLOW;
@@ -1028,7 +1034,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
 {
        int status = 0, tmpstat, node;
        struct domain_join_ctxt *ctxt;
-       enum dlm_query_join_response response;
+       enum dlm_query_join_response response = JOIN_DISALLOW;
 
        mlog_entry("%p", dlm);
 
@@ -1261,6 +1267,8 @@ bail:
 static int dlm_join_domain(struct dlm_ctxt *dlm)
 {
        int status;
+       unsigned int backoff;
+       unsigned int total_backoff = 0;
 
        BUG_ON(!dlm);
 
@@ -1292,18 +1300,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
        }
 
        do {
-               unsigned int backoff;
                status = dlm_try_to_join_domain(dlm);
 
                /* If we're racing another node to the join, then we
                 * need to back off temporarily and let them
                 * complete. */
+#define        DLM_JOIN_TIMEOUT_MSECS  90000
                if (status == -EAGAIN) {
                        if (signal_pending(current)) {
                                status = -ERESTARTSYS;
                                goto bail;
                        }
 
+                       if (total_backoff >
+                           msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+                               status = -ERESTARTSYS;
+                               mlog(ML_NOTICE, "Timed out joining dlm domain "
+                                    "%s after %u msecs\n", dlm->name,
+                                    jiffies_to_msecs(total_backoff));
+                               goto bail;
+                       }
+
                        /*
                         * <chip> After you!
                         * <dale> No, after you!
@@ -1313,6 +1330,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
                         */
                        backoff = (unsigned int)(jiffies & 0x3);
                        backoff *= DLM_DOMAIN_BACKOFF_MS;
+                       total_backoff += backoff;
                        mlog(0, "backoff %d\n", backoff);
                        msleep(backoff);
                }