2 * linux/fs/lockd/svclock.c
4 * Handling of server-side locks, mostly of the blocked variety.
5 * This is the ugliest part of lockd because we tread on very thin ice.
6 * GRANT and CANCEL calls may get stuck, meet in mid-flight, etc.
7 * IMNSHO introducing the grant callback into the NLM protocol was one
8 * of the worst ideas Sun ever had. Except maybe for the idea of doing
9 * NFS file locking at all.
11 * I'm trying hard to avoid race conditions by protecting most accesses
12 * to a file's list of blocked locks through a semaphore. The global
13 * list of blocked locks is not protected in this fashion however.
14 * Therefore, some functions (such as the RPC callback for the async grant
15 * call) move blocked locks towards the head of the list *while some other
16 * process might be traversing it*. This should not be a problem in
17 * practice, because this will only cause functions traversing the list
18 * to visit some blocks twice.
20 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
23 #include <linux/types.h>
24 #include <linux/errno.h>
25 #include <linux/kernel.h>
26 #include <linux/sched.h>
27 #include <linux/smp_lock.h>
28 #include <linux/sunrpc/clnt.h>
29 #include <linux/sunrpc/svc.h>
30 #include <linux/lockd/nlm.h>
31 #include <linux/lockd/lockd.h>
33 #define NLMDBG_FACILITY NLMDBG_SVCLOCK
35 #ifdef CONFIG_LOCKD_V4
36 #define nlm_deadlock nlm4_deadlock
38 #define nlm_deadlock nlm_lck_denied
41 static void nlmsvc_release_block(struct nlm_block *block);
42 static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
43 static void nlmsvc_remove_block(struct nlm_block *block);
45 static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
46 static void nlmsvc_freegrantargs(struct nlm_rqst *call);
47 static const struct rpc_call_ops nlmsvc_grant_ops;
50 * The list of blocked locks to retry
52 static LIST_HEAD(nlm_blocked);
55 * Insert a blocked lock into the global list
58 nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
61 struct list_head *pos;
63 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
64 if (list_empty(&block->b_list)) {
65 kref_get(&block->b_count);
67 list_del_init(&block->b_list);
71 if (when != NLM_NEVER) {
72 if ((when += jiffies) == NLM_NEVER)
74 list_for_each(pos, &nlm_blocked) {
75 b = list_entry(pos, struct nlm_block, b_list);
76 if (time_after(b->b_when,when) || b->b_when == NLM_NEVER)
79 /* On normal exit from the loop, pos == &nlm_blocked,
80 * so we will be adding to the end of the list - good
84 list_add_tail(&block->b_list, pos);
89 * Remove a block from the global list
92 nlmsvc_remove_block(struct nlm_block *block)
94 if (!list_empty(&block->b_list)) {
95 list_del_init(&block->b_list);
96 nlmsvc_release_block(block);
101 * Find a block for a given lock
103 static struct nlm_block *
104 nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
106 struct nlm_block *block;
107 struct file_lock *fl;
109 dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
110 file, lock->fl.fl_pid,
111 (long long)lock->fl.fl_start,
112 (long long)lock->fl.fl_end, lock->fl.fl_type);
113 list_for_each_entry(block, &nlm_blocked, b_list) {
114 fl = &block->b_call->a_args.lock.fl;
115 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
116 block->b_file, fl->fl_pid,
117 (long long)fl->fl_start,
118 (long long)fl->fl_end, fl->fl_type,
119 nlmdbg_cookie2a(&block->b_call->a_args.cookie));
120 if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
121 kref_get(&block->b_count);
129 static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
133 if(memcmp(a->data,b->data,a->len))
139 * Find a block with a given NLM cookie.
141 static inline struct nlm_block *
142 nlmsvc_find_block(struct nlm_cookie *cookie)
144 struct nlm_block *block;
146 list_for_each_entry(block, &nlm_blocked, b_list) {
147 if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
154 dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
155 kref_get(&block->b_count);
160 * Create a block and initialize it.
162 * Note: we explicitly set the cookie of the grant reply to that of
163 * the blocked lock request. The spec explicitly mentions that the client
164 * should _not_ rely on the callback containing the same cookie as the
165 * request, but (as I found out later) that's because some implementations
166 * do just this. Never mind the standards comittees, they support our
167 * logging industries.
169 * 10 years later: I hope we can safely ignore these old and broken
170 * clients by now. Let's fix this so we can uniquely identify an incoming
171 * GRANTED_RES message by cookie, without having to rely on the client's IP
174 static inline struct nlm_block *
175 nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
176 struct nlm_lock *lock, struct nlm_cookie *cookie)
178 struct nlm_block *block;
179 struct nlm_host *host;
180 struct nlm_rqst *call = NULL;
182 /* Create host handle for callback */
183 host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
187 call = nlm_alloc_call(host);
191 /* Allocate memory for block, and initialize arguments */
192 block = kzalloc(sizeof(*block), GFP_KERNEL);
195 kref_init(&block->b_count);
196 INIT_LIST_HEAD(&block->b_list);
197 INIT_LIST_HEAD(&block->b_flist);
199 if (!nlmsvc_setgrantargs(call, lock))
202 /* Set notifier function for VFS, and init args */
203 call->a_args.lock.fl.fl_flags |= FL_SLEEP;
204 call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
205 nlmclnt_next_cookie(&call->a_args.cookie);
207 dprintk("lockd: created block %p...\n", block);
209 /* Create and initialize the block */
210 block->b_daemon = rqstp->rq_server;
211 block->b_host = host;
212 block->b_file = file;
215 /* Add to file's list of blocks */
216 list_add(&block->b_flist, &file->f_blocks);
218 /* Set up RPC arguments for callback */
219 block->b_call = call;
220 call->a_flags = RPC_TASK_ASYNC;
221 call->a_block = block;
228 nlm_release_call(call);
233 * Delete a block. If the lock was cancelled or the grant callback
234 * failed, unlock is set to 1.
235 * It is the caller's responsibility to check whether the file
236 * can be closed hereafter.
238 static int nlmsvc_unlink_block(struct nlm_block *block)
241 dprintk("lockd: unlinking block %p...\n", block);
243 /* Remove block from list */
244 status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
245 nlmsvc_remove_block(block);
249 static void nlmsvc_free_block(struct kref *kref)
251 struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
252 struct nlm_file *file = block->b_file;
254 dprintk("lockd: freeing block %p...\n", block);
256 /* Remove block from file's list of blocks */
257 mutex_lock(&file->f_mutex);
258 list_del_init(&block->b_flist);
259 mutex_unlock(&file->f_mutex);
261 nlmsvc_freegrantargs(block->b_call);
262 nlm_release_call(block->b_call);
263 nlm_release_file(block->b_file);
268 static void nlmsvc_release_block(struct nlm_block *block)
271 kref_put(&block->b_count, nlmsvc_free_block);
275 * Loop over all blocks and delete blocks held by
278 void nlmsvc_traverse_blocks(struct nlm_host *host,
279 struct nlm_file *file,
280 nlm_host_match_fn_t match)
282 struct nlm_block *block, *next;
285 mutex_lock(&file->f_mutex);
286 list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
287 if (!match(block->b_host, host))
289 /* Do not destroy blocks that are not on
290 * the global retry list - why? */
291 if (list_empty(&block->b_list))
293 kref_get(&block->b_count);
294 mutex_unlock(&file->f_mutex);
295 nlmsvc_unlink_block(block);
296 nlmsvc_release_block(block);
299 mutex_unlock(&file->f_mutex);
303 * Initialize arguments for GRANTED call. The nlm_rqst structure
304 * has been cleared already.
306 static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
308 locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
309 memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
310 call->a_args.lock.caller = utsname()->nodename;
311 call->a_args.lock.oh.len = lock->oh.len;
313 /* set default data area */
314 call->a_args.lock.oh.data = call->a_owner;
315 call->a_args.lock.svid = lock->fl.fl_pid;
317 if (lock->oh.len > NLMCLNT_OHSIZE) {
318 void *data = kmalloc(lock->oh.len, GFP_KERNEL);
321 call->a_args.lock.oh.data = (u8 *) data;
324 memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
328 static void nlmsvc_freegrantargs(struct nlm_rqst *call)
330 if (call->a_args.lock.oh.data != call->a_owner)
331 kfree(call->a_args.lock.oh.data);
335 * Deferred lock request handling for non-blocking lock
338 nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
340 u32 status = nlm_lck_denied_nolocks;
342 block->b_flags |= B_QUEUED;
344 nlmsvc_insert_block(block, NLM_TIMEOUT);
346 block->b_cache_req = &rqstp->rq_chandle;
347 if (rqstp->rq_chandle.defer) {
348 block->b_deferred_req =
349 rqstp->rq_chandle.defer(block->b_cache_req);
350 if (block->b_deferred_req != NULL)
351 status = nlm_drop_reply;
353 dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n",
354 block, block->b_flags, status);
360 * Attempt to establish a lock, and if it can't be granted, block it
364 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
365 struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
367 struct nlm_block *block, *newblock = NULL;
371 dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
372 file->f_file->f_path.dentry->d_inode->i_sb->s_id,
373 file->f_file->f_path.dentry->d_inode->i_ino,
374 lock->fl.fl_type, lock->fl.fl_pid,
375 (long long)lock->fl.fl_start,
376 (long long)lock->fl.fl_end,
380 lock->fl.fl_flags &= ~FL_SLEEP;
382 /* Lock file against concurrent access */
383 mutex_lock(&file->f_mutex);
384 /* Get existing block (in case client is busy-waiting) */
385 block = nlmsvc_lookup_block(file, lock);
387 if (newblock != NULL)
388 lock = &newblock->b_call->a_args.lock;
390 lock = &block->b_call->a_args.lock;
392 error = posix_lock_file(file->f_file, &lock->fl, NULL);
393 lock->fl.fl_flags &= ~FL_SLEEP;
395 dprintk("lockd: posix_lock_file returned %d\n", error);
406 default: /* includes ENOLCK */
407 ret = nlm_lck_denied_nolocks;
411 ret = nlm_lck_denied;
415 ret = nlm_lck_blocked;
419 /* If we don't have a block, create and initialize it. Then
420 * retry because we may have slept in kmalloc. */
421 /* We have to release f_mutex as nlmsvc_create_block may try to
422 * to claim it while doing host garbage collection */
423 if (newblock == NULL) {
424 mutex_unlock(&file->f_mutex);
425 dprintk("lockd: blocking on this lock (allocating).\n");
426 if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
427 return nlm_lck_denied_nolocks;
431 /* Append to list of blocked */
432 nlmsvc_insert_block(newblock, NLM_NEVER);
434 mutex_unlock(&file->f_mutex);
435 nlmsvc_release_block(newblock);
436 nlmsvc_release_block(block);
437 dprintk("lockd: nlmsvc_lock returned %u\n", ret);
442 * Test for presence of a conflicting lock.
445 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
446 struct nlm_lock *lock, struct nlm_lock *conflock,
447 struct nlm_cookie *cookie)
449 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
450 file->f_file->f_path.dentry->d_inode->i_sb->s_id,
451 file->f_file->f_path.dentry->d_inode->i_ino,
453 (long long)lock->fl.fl_start,
454 (long long)lock->fl.fl_end);
456 if (posix_test_lock(file->f_file, &lock->fl)) {
457 dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
459 (long long)lock->fl.fl_start,
460 (long long)lock->fl.fl_end);
461 conflock->caller = "somehost"; /* FIXME */
462 conflock->len = strlen(conflock->caller);
463 conflock->oh.len = 0; /* don't return OH info */
464 conflock->svid = lock->fl.fl_pid;
465 conflock->fl.fl_type = lock->fl.fl_type;
466 conflock->fl.fl_start = lock->fl.fl_start;
467 conflock->fl.fl_end = lock->fl.fl_end;
468 return nlm_lck_denied;
476 * This implies a CANCEL call: We send a GRANT_MSG, the client replies
477 * with a GRANT_RES call which gets lost, and calls UNLOCK immediately
478 * afterwards. In this case the block will still be there, and hence
482 nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
486 dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
487 file->f_file->f_path.dentry->d_inode->i_sb->s_id,
488 file->f_file->f_path.dentry->d_inode->i_ino,
490 (long long)lock->fl.fl_start,
491 (long long)lock->fl.fl_end);
493 /* First, cancel any lock that might be there */
494 nlmsvc_cancel_blocked(file, lock);
496 lock->fl.fl_type = F_UNLCK;
497 error = posix_lock_file(file->f_file, &lock->fl, NULL);
499 return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
503 * Cancel a previously blocked request.
505 * A cancel request always overrides any grant that may currently
507 * The calling procedure must check whether the file can be closed.
510 nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
512 struct nlm_block *block;
515 dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
516 file->f_file->f_path.dentry->d_inode->i_sb->s_id,
517 file->f_file->f_path.dentry->d_inode->i_ino,
519 (long long)lock->fl.fl_start,
520 (long long)lock->fl.fl_end);
522 mutex_lock(&file->f_mutex);
523 block = nlmsvc_lookup_block(file, lock);
524 mutex_unlock(&file->f_mutex);
526 status = nlmsvc_unlink_block(block);
527 nlmsvc_release_block(block);
529 return status ? nlm_lck_denied : nlm_granted;
533 * This is a callback from the filesystem for VFS file lock requests.
534 * It will be used if fl_grant is defined and the filesystem can not
535 * respond to the request immediately.
536 * For GETLK request it will copy the reply to the nlm_block.
537 * For SETLK or SETLKW request it will get the local posix lock.
538 * In all cases it will move the block to the head of nlm_blocked q where
539 * nlmsvc_retry_blocked() can send back a reply for SETLKW or revisit the
540 * deferred rpc for GETLK and SETLK.
543 nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
546 block->b_flags |= B_GOT_CALLBACK;
548 block->b_granted = 1;
550 block->b_flags |= B_TIMED_OUT;
552 block->b_fl = kzalloc(sizeof(struct file_lock), GFP_KERNEL);
554 locks_copy_lock(block->b_fl, conf);
558 static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
561 struct nlm_block *block;
565 list_for_each_entry(block, &nlm_blocked, b_list) {
566 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
567 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
568 block, block->b_flags);
569 if (block->b_flags & B_QUEUED) {
570 if (block->b_flags & B_TIMED_OUT) {
574 nlmsvc_update_deferred_block(block, conf, result);
575 } else if (result == 0)
576 block->b_granted = 1;
578 nlmsvc_insert_block(block, 0);
579 svc_wake_up(block->b_daemon);
586 printk(KERN_WARNING "lockd: grant for unknown block\n");
591 * Unblock a blocked lock request. This is a callback invoked from the
592 * VFS layer when a lock on which we blocked is removed.
594 * This function doesn't grant the blocked lock instantly, but rather moves
595 * the block to the head of nlm_blocked where it can be picked up by lockd.
598 nlmsvc_notify_blocked(struct file_lock *fl)
600 struct nlm_block *block;
602 dprintk("lockd: VFS unblock notification for block %p\n", fl);
603 list_for_each_entry(block, &nlm_blocked, b_list) {
604 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
605 nlmsvc_insert_block(block, 0);
606 svc_wake_up(block->b_daemon);
611 printk(KERN_WARNING "lockd: notification for unknown block!\n");
614 static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
616 return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
619 struct lock_manager_operations nlmsvc_lock_operations = {
620 .fl_compare_owner = nlmsvc_same_owner,
621 .fl_notify = nlmsvc_notify_blocked,
622 .fl_grant = nlmsvc_grant_deferred,
626 * Try to claim a lock that was previously blocked.
628 * Note that we use both the RPC_GRANTED_MSG call _and_ an async
629 * RPC thread when notifying the client. This seems like overkill...
631 * - we don't want to use a synchronous RPC thread, otherwise
632 * we might find ourselves hanging on a dead portmapper.
633 * - Some lockd implementations (e.g. HP) don't react to
634 * RPC_GRANTED calls; they seem to insist on RPC_GRANTED_MSG calls.
637 nlmsvc_grant_blocked(struct nlm_block *block)
639 struct nlm_file *file = block->b_file;
640 struct nlm_lock *lock = &block->b_call->a_args.lock;
643 dprintk("lockd: grant blocked lock %p\n", block);
645 kref_get(&block->b_count);
647 /* Unlink block request from list */
648 nlmsvc_unlink_block(block);
650 /* If b_granted is true this means we've been here before.
651 * Just retry the grant callback, possibly refreshing the RPC
653 if (block->b_granted) {
654 nlm_rebind_host(block->b_host);
658 /* Try the lock operation again */
659 lock->fl.fl_flags |= FL_SLEEP;
660 error = posix_lock_file(file->f_file, &lock->fl, NULL);
661 lock->fl.fl_flags &= ~FL_SLEEP;
667 dprintk("lockd: lock still blocked\n");
668 nlmsvc_insert_block(block, NLM_NEVER);
669 nlmsvc_release_block(block);
672 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
673 -error, __FUNCTION__);
674 nlmsvc_insert_block(block, 10 * HZ);
675 nlmsvc_release_block(block);
680 /* Lock was granted by VFS. */
681 dprintk("lockd: GRANTing blocked lock.\n");
682 block->b_granted = 1;
684 /* Schedule next grant callback in 30 seconds */
685 nlmsvc_insert_block(block, 30 * HZ);
687 /* Call the client */
688 nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops);
692 * This is the callback from the RPC layer when the NLM_GRANTED_MSG
693 * RPC call has succeeded or timed out.
694 * Like all RPC callbacks, it is invoked by the rpciod process, so it
695 * better not sleep. Therefore, we put the blocked lock on the nlm_blocked
696 * chain once more in order to have it removed by lockd itself (which can
697 * then sleep on the file semaphore without disrupting e.g. the nfs client).
699 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
701 struct nlm_rqst *call = data;
702 struct nlm_block *block = call->a_block;
703 unsigned long timeout;
705 dprintk("lockd: GRANT_MSG RPC callback\n");
707 /* Technically, we should down the file semaphore here. Since we
708 * move the block towards the head of the queue only, no harm
709 * can be done, though. */
710 if (task->tk_status < 0) {
711 /* RPC error: Re-insert for retransmission */
714 /* Call was successful, now wait for client callback */
717 nlmsvc_insert_block(block, timeout);
718 svc_wake_up(block->b_daemon);
721 static void nlmsvc_grant_release(void *data)
723 struct nlm_rqst *call = data;
725 nlmsvc_release_block(call->a_block);
728 static const struct rpc_call_ops nlmsvc_grant_ops = {
729 .rpc_call_done = nlmsvc_grant_callback,
730 .rpc_release = nlmsvc_grant_release,
734 * We received a GRANT_RES callback. Try to find the corresponding
738 nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
740 struct nlm_block *block;
742 dprintk("grant_reply: looking for cookie %x, s=%d \n",
743 *(unsigned int *)(cookie->data), status);
744 if (!(block = nlmsvc_find_block(cookie)))
748 if (status == nlm_lck_denied_grace_period) {
749 /* Try again in a couple of seconds */
750 nlmsvc_insert_block(block, 10 * HZ);
752 /* Lock is now held by client, or has been rejected.
753 * In both cases, the block should be removed. */
754 nlmsvc_unlink_block(block);
757 nlmsvc_release_block(block);
760 /* Helper function to handle retry of a deferred block.
761 * If it is a blocking lock, call grant_blocked.
762 * For a non-blocking lock or test lock, revisit the request.
765 retry_deferred_block(struct nlm_block *block)
767 if (!(block->b_flags & B_GOT_CALLBACK))
768 block->b_flags |= B_TIMED_OUT;
769 nlmsvc_insert_block(block, NLM_TIMEOUT);
770 dprintk("revisit block %p flags %d\n", block, block->b_flags);
771 if (block->b_deferred_req) {
772 block->b_deferred_req->revisit(block->b_deferred_req, 0);
773 block->b_deferred_req = NULL;
778 * Retry all blocked locks that have been notified. This is where lockd
779 * picks up locks that can be granted, or grant notifications that must
783 nlmsvc_retry_blocked(void)
785 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
786 struct nlm_block *block;
788 while (!list_empty(&nlm_blocked)) {
789 block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
791 if (block->b_when == NLM_NEVER)
793 if (time_after(block->b_when,jiffies)) {
794 timeout = block->b_when - jiffies;
798 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
799 block, block->b_when);
800 if (block->b_flags & B_QUEUED) {
801 dprintk("nlmsvc_retry_blocked delete block (%p, granted=%d, flags=%d)\n",
802 block, block->b_granted, block->b_flags);
803 retry_deferred_block(block);
805 nlmsvc_grant_blocked(block);