LIST_HEAD(dlm_domains);
static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
+/*
+ * The supported protocol version for DLM communication. Running domains
+ * will have a negotiated version with the same major number and a minor
+ * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should
+ * be used to determine what a running domain is actually using.
+ */
+static const struct dlm_protocol_version dlm_protocol = {
+ .pv_major = 1,
+ .pv_minor = 0,
+};
+
#define DLM_DOMAIN_BACKOFF_MS 200
static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
+static int dlm_protocol_compare(struct dlm_protocol_version *existing,
+ struct dlm_protocol_version *request);
static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);
dlm_lockres_put(res);
- cond_resched_lock(&dlm->spinlock);
-
if (dropped)
goto redo_bucket;
}
+ cond_resched_lock(&dlm->spinlock);
num += n;
mlog(0, "%s: touched %d lockreses in bucket %d "
"(tot=%d)\n", dlm->name, n, i, num);
}
EXPORT_SYMBOL_GPL(dlm_unregister_domain);
+static int dlm_query_join_proto_check(char *proto_type, int node,
+ struct dlm_protocol_version *ours,
+ struct dlm_protocol_version *request)
+{
+ int rc;
+ struct dlm_protocol_version proto = *request;
+
+ if (!dlm_protocol_compare(ours, &proto)) {
+ mlog(0,
+ "node %u wanted to join with %s locking protocol "
+ "%u.%u, we respond with %u.%u\n",
+ node, proto_type,
+ request->pv_major,
+ request->pv_minor,
+ proto.pv_major, proto.pv_minor);
+ request->pv_minor = proto.pv_minor;
+ rc = 0;
+ } else {
+ mlog(ML_NOTICE,
+ "Node %u wanted to join with %s locking "
+ "protocol %u.%u, but we have %u.%u, disallowing\n",
+ node, proto_type,
+ request->pv_major,
+ request->pv_minor,
+ ours->pv_major,
+ ours->pv_minor);
+ rc = 1;
+ }
+
+ return rc;
+}
+
+/*
+ * struct dlm_query_join_packet is made up of four one-byte fields. They
+ * are effectively in big-endian order already. However, little-endian
+ * machines swap them before putting the packet on the wire (because
+ * query_join's response is a status, and that status is treated as a u32
+ * on the wire). Thus, a big-endian and little-endian machines will treat
+ * this structure differently.
+ *
+ * The solution is to have little-endian machines swap the structure when
+ * converting from the structure to the u32 representation. This will
+ * result in the structure having the correct format on the wire no matter
+ * the host endian format.
+ */
+static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet,
+ u32 *wire)
+{
+ union dlm_query_join_response response;
+
+ response.packet = *packet;
+ *wire = cpu_to_be32(response.intval);
+}
+
+static void dlm_query_join_wire_to_packet(u32 wire,
+ struct dlm_query_join_packet *packet)
+{
+ union dlm_query_join_response response;
+
+ response.intval = cpu_to_be32(wire);
+ *packet = response.packet;
+}
+
static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data)
{
struct dlm_query_join_request *query;
- enum dlm_query_join_response response;
+ struct dlm_query_join_packet packet = {
+ .code = JOIN_DISALLOW,
+ };
struct dlm_ctxt *dlm = NULL;
+ u32 response;
u8 nodenum;
query = (struct dlm_query_join_request *) msg->buf;
mlog(0, "node %u is not in our live map yet\n",
query->node_idx);
- response = JOIN_DISALLOW;
+ packet.code = JOIN_DISALLOW;
goto respond;
}
- response = JOIN_OK_NO_MAP;
+ packet.code = JOIN_OK_NO_MAP;
spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
mlog(0, "disallow join as node %u does not "
"have node %u in its nodemap\n",
query->node_idx, nodenum);
- response = JOIN_DISALLOW;
+ packet.code = JOIN_DISALLOW;
goto unlock_respond;
}
}
/*If this is a brand new context and we
* haven't started our join process yet, then
* the other node won the race. */
- response = JOIN_OK_NO_MAP;
+ packet.code = JOIN_OK_NO_MAP;
} else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) {
/* Disallow parallel joins. */
- response = JOIN_DISALLOW;
+ packet.code = JOIN_DISALLOW;
} else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
mlog(0, "node %u trying to join, but recovery "
"is ongoing.\n", bit);
- response = JOIN_DISALLOW;
+ packet.code = JOIN_DISALLOW;
} else if (test_bit(bit, dlm->recovery_map)) {
mlog(0, "node %u trying to join, but it "
"still needs recovery.\n", bit);
- response = JOIN_DISALLOW;
+ packet.code = JOIN_DISALLOW;
} else if (test_bit(bit, dlm->domain_map)) {
mlog(0, "node %u trying to join, but it "
"is still in the domain! needs recovery?\n",
bit);
- response = JOIN_DISALLOW;
+ packet.code = JOIN_DISALLOW;
} else {
/* Alright we're fully a part of this domain
* so we keep some state as to who's joining
* and indicate to him that needs to be fixed
* up. */
- response = JOIN_OK;
- __dlm_set_joining_node(dlm, query->node_idx);
+
+ /* Make sure we speak compatible locking protocols. */
+ if (dlm_query_join_proto_check("DLM", bit,
+ &dlm->dlm_locking_proto,
+ &query->dlm_proto)) {
+ packet.code = JOIN_PROTOCOL_MISMATCH;
+ } else if (dlm_query_join_proto_check("fs", bit,
+ &dlm->fs_locking_proto,
+ &query->fs_proto)) {
+ packet.code = JOIN_PROTOCOL_MISMATCH;
+ } else {
+ packet.dlm_minor = query->dlm_proto.pv_minor;
+ packet.fs_minor = query->fs_proto.pv_minor;
+ packet.code = JOIN_OK;
+ __dlm_set_joining_node(dlm, query->node_idx);
+ }
}
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
respond:
- mlog(0, "We respond with %u\n", response);
+ mlog(0, "We respond with %u\n", packet.code);
+ dlm_query_join_packet_to_wire(&packet, &response);
return response;
}
sizeof(unsigned long))) {
mlog(ML_ERROR,
"map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n",
- map_size, BITS_TO_LONGS(O2NM_MAX_NODES));
+ map_size, (unsigned)BITS_TO_LONGS(O2NM_MAX_NODES));
return -EINVAL;
}
static int dlm_request_join(struct dlm_ctxt *dlm,
int node,
- enum dlm_query_join_response *response)
+ enum dlm_query_join_response_code *response)
{
- int status, retval;
+ int status;
struct dlm_query_join_request join_msg;
+ struct dlm_query_join_packet packet;
+ u32 join_resp;
mlog(0, "querying node %d\n", node);
join_msg.node_idx = dlm->node_num;
join_msg.name_len = strlen(dlm->name);
memcpy(join_msg.domain, dlm->name, join_msg.name_len);
+ join_msg.dlm_proto = dlm->dlm_locking_proto;
+ join_msg.fs_proto = dlm->fs_locking_proto;
/* copy live node map to join message */
byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
- sizeof(join_msg), node, &retval);
+ sizeof(join_msg), node,
+ &join_resp);
if (status < 0 && status != -ENOPROTOOPT) {
mlog_errno(status);
goto bail;
}
+ dlm_query_join_wire_to_packet(join_resp, &packet);
/* -ENOPROTOOPT from the net code means the other side isn't
listening for our message type -- that's fine, it means
if (status == -ENOPROTOOPT) {
status = 0;
*response = JOIN_OK_NO_MAP;
- } else if (retval == JOIN_DISALLOW ||
- retval == JOIN_OK ||
- retval == JOIN_OK_NO_MAP) {
- *response = retval;
+ } else if (packet.code == JOIN_DISALLOW ||
+ packet.code == JOIN_OK_NO_MAP) {
+ *response = packet.code;
+ } else if (packet.code == JOIN_PROTOCOL_MISMATCH) {
+ mlog(ML_NOTICE,
+ "This node requested DLM locking protocol %u.%u and "
+ "filesystem locking protocol %u.%u. At least one of "
+ "the protocol versions on node %d is not compatible, "
+ "disconnecting\n",
+ dlm->dlm_locking_proto.pv_major,
+ dlm->dlm_locking_proto.pv_minor,
+ dlm->fs_locking_proto.pv_major,
+ dlm->fs_locking_proto.pv_minor,
+ node);
+ status = -EPROTO;
+ *response = packet.code;
+ } else if (packet.code == JOIN_OK) {
+ *response = packet.code;
+ /* Use the same locking protocol as the remote node */
+ dlm->dlm_locking_proto.pv_minor = packet.dlm_minor;
+ dlm->fs_locking_proto.pv_minor = packet.fs_minor;
+ mlog(0,
+ "Node %d responds JOIN_OK with DLM locking protocol "
+ "%u.%u and fs locking protocol %u.%u\n",
+ node,
+ dlm->dlm_locking_proto.pv_major,
+ dlm->dlm_locking_proto.pv_minor,
+ dlm->fs_locking_proto.pv_major,
+ dlm->fs_locking_proto.pv_minor);
} else {
status = -EINVAL;
- mlog(ML_ERROR, "invalid response %d from node %u\n", retval,
- node);
+ mlog(ML_ERROR, "invalid response %d from node %u\n",
+ packet.code, node);
}
mlog(0, "status %d, node %d response is %d\n", status, node,
- *response);
+ *response);
bail:
return status;
static int dlm_should_restart_join(struct dlm_ctxt *dlm,
struct domain_join_ctxt *ctxt,
- enum dlm_query_join_response response)
+ enum dlm_query_join_response_code response)
{
int ret;
{
int status = 0, tmpstat, node;
struct domain_join_ctxt *ctxt;
- enum dlm_query_join_response response;
+ enum dlm_query_join_response_code response = JOIN_DISALLOW;
mlog_entry("%p", dlm);
static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
{
- o2hb_unregister_callback(&dlm->dlm_hb_up);
- o2hb_unregister_callback(&dlm->dlm_hb_down);
+ o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
+ o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
}
o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
- status = o2hb_register_callback(&dlm->dlm_hb_down);
+ status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
if (status)
goto bail;
o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
- status = o2hb_register_callback(&dlm->dlm_hb_up);
+ status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
if (status)
goto bail;
}
/*
- * dlm_register_domain: one-time setup per "domain"
+ * Compare a requested locking protocol version against the current one.
+ *
+ * If the major numbers are different, they are incompatible.
+ * If the current minor is greater than the request, they are incompatible.
+ * If the current minor is less than or equal to the request, they are
+ * compatible, and the requester should run at the current minor version.
+ */
+static int dlm_protocol_compare(struct dlm_protocol_version *existing,
+ struct dlm_protocol_version *request)
+{
+ if (existing->pv_major != request->pv_major)
+ return 1;
+
+ if (existing->pv_minor > request->pv_minor)
+ return 1;
+
+ if (existing->pv_minor < request->pv_minor)
+ request->pv_minor = existing->pv_minor;
+
+ return 0;
+}
+
+/*
+ * dlm_register_domain: one-time setup per "domain".
+ *
+ * The filesystem passes in the requested locking version via proto.
+ * If registration was successful, proto will contain the negotiated
+ * locking protocol.
*/
struct dlm_ctxt * dlm_register_domain(const char *domain,
- u32 key)
+ u32 key,
+ struct dlm_protocol_version *fs_proto)
{
int ret;
struct dlm_ctxt *dlm = NULL;
goto retry;
}
+ if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
+ mlog(ML_ERROR,
+ "Requested locking protocol version is not "
+ "compatible with already registered domain "
+ "\"%s\"\n", domain);
+ ret = -EPROTO;
+ goto leave;
+ }
+
__dlm_get(dlm);
dlm->num_joins++;
list_add_tail(&dlm->list, &dlm_domains);
spin_unlock(&dlm_domain_lock);
+ /*
+ * Pass the locking protocol version into the join. If the join
+ * succeeds, it will have the negotiated protocol set.
+ */
+ dlm->dlm_locking_proto = dlm_protocol;
+ dlm->fs_locking_proto = *fs_proto;
+
ret = dlm_join_domain(dlm);
if (ret) {
mlog_errno(ret);
goto leave;
}
+ /* Tell the caller what locking protocol we negotiated */
+ *fs_proto = dlm->fs_locking_proto;
+
ret = 0;
leave:
if (new_ctxt)