2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
22 static kmem_cache_t *fuse_req_cachep;
24 static inline struct fuse_conn *fuse_get_conn(struct file *file)
27 spin_lock(&fuse_lock);
28 fc = file->private_data;
31 spin_unlock(&fuse_lock);
35 static inline void fuse_request_init(struct fuse_req *req)
37 memset(req, 0, sizeof(*req));
38 INIT_LIST_HEAD(&req->list);
39 init_waitqueue_head(&req->waitq);
40 atomic_set(&req->count, 1);
43 struct fuse_req *fuse_request_alloc(void)
45 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
47 fuse_request_init(req);
51 void fuse_request_free(struct fuse_req *req)
53 kmem_cache_free(fuse_req_cachep, req);
56 static inline void block_sigs(sigset_t *oldset)
60 siginitsetinv(&mask, sigmask(SIGKILL));
61 sigprocmask(SIG_BLOCK, &mask, oldset);
64 static inline void restore_sigs(sigset_t *oldset)
66 sigprocmask(SIG_SETMASK, oldset, NULL);
69 void fuse_reset_request(struct fuse_req *req)
71 int preallocated = req->preallocated;
72 BUG_ON(atomic_read(&req->count) != 1);
73 fuse_request_init(req);
74 req->preallocated = preallocated;
77 static void __fuse_get_request(struct fuse_req *req)
79 atomic_inc(&req->count);
82 /* Must be called with > 1 refcount */
83 static void __fuse_put_request(struct fuse_req *req)
85 BUG_ON(atomic_read(&req->count) < 2);
86 atomic_dec(&req->count);
89 static struct fuse_req *do_get_request(struct fuse_conn *fc)
93 spin_lock(&fuse_lock);
94 BUG_ON(list_empty(&fc->unused_list));
95 req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 list_del_init(&req->list);
97 spin_unlock(&fuse_lock);
98 fuse_request_init(req);
99 req->preallocated = 1;
100 req->in.h.uid = current->fsuid;
101 req->in.h.gid = current->fsgid;
102 req->in.h.pid = current->pid;
106 struct fuse_req *fuse_get_request(struct fuse_conn *fc)
108 if (down_interruptible(&fc->outstanding_sem))
110 return do_get_request(fc);
114 * Non-interruptible version of the above function is for operations
115 * which can't legally return -ERESTART{SYS,NOINTR}. This can still
116 * return NULL, but only in case the signal is SIGKILL.
118 struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc)
124 intr = down_interruptible(&fc->outstanding_sem);
125 restore_sigs(&oldset);
126 return intr ? NULL : do_get_request(fc);
129 static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
131 spin_lock(&fuse_lock);
132 if (req->preallocated)
133 list_add(&req->list, &fc->unused_list);
135 fuse_request_free(req);
137 /* If we are in debt decrease that first */
138 if (fc->outstanding_debt)
139 fc->outstanding_debt--;
141 up(&fc->outstanding_sem);
142 spin_unlock(&fuse_lock);
145 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
147 if (atomic_dec_and_test(&req->count))
148 fuse_putback_request(fc, req);
152 * This function is called when a request is finished. Either a reply
153 * has arrived or it was interrupted (and not yet sent) or some error
154 * occured during communication with userspace, or the device file was
155 * closed. It decreases the referece count for the request. In case
156 * of a background request the referece to the stored objects are
157 * released. The requester thread is woken up (if still waiting), and
158 * finally the request is either freed or put on the unused_list
160 * Called with fuse_lock, unlocks it
162 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
166 putback = atomic_dec_and_test(&req->count);
167 spin_unlock(&fuse_lock);
168 if (req->background) {
176 wake_up(&req->waitq);
177 if (req->in.h.opcode == FUSE_INIT) {
180 if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
183 /* After INIT reply is received other requests can go
184 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
185 up()s on outstanding_sem. The last up() is done in
186 fuse_putback_request() */
187 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
188 up(&fc->outstanding_sem);
191 fuse_putback_request(fc, req);
194 static void background_request(struct fuse_req *req)
196 /* Need to get hold of the inode(s) and/or file used in the
197 request, so FORGET and RELEASE are not sent too early */
200 req->inode = igrab(req->inode);
202 req->inode2 = igrab(req->inode2);
207 static int request_wait_answer_nonint(struct fuse_req *req)
212 err = wait_event_interruptible(req->waitq, req->finished);
213 restore_sigs(&oldset);
217 /* Called with fuse_lock held. Releases, and then reacquires it. */
218 static void request_wait_answer(struct fuse_req *req, int interruptible)
222 spin_unlock(&fuse_lock);
224 intr = wait_event_interruptible(req->waitq, req->finished);
226 intr = request_wait_answer_nonint(req);
227 spin_lock(&fuse_lock);
228 if (intr && interruptible && req->sent) {
229 /* If request is already in userspace, only allow KILL
230 signal to interrupt */
231 spin_unlock(&fuse_lock);
232 intr = request_wait_answer_nonint(req);
233 spin_lock(&fuse_lock);
238 if (!interruptible || req->sent)
239 req->out.h.error = -EINTR;
241 req->out.h.error = -ERESTARTNOINTR;
243 req->interrupted = 1;
245 /* This is uninterruptible sleep, because data is
246 being copied to/from the buffers of req. During
247 locked state, there mustn't be any filesystem
248 operation (e.g. page fault), since that could lead
250 spin_unlock(&fuse_lock);
251 wait_event(req->waitq, !req->locked);
252 spin_lock(&fuse_lock);
254 if (!req->sent && !list_empty(&req->list)) {
255 list_del(&req->list);
256 __fuse_put_request(req);
257 } else if (!req->finished && req->sent)
258 background_request(req);
261 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
266 for (i = 0; i < numargs; i++)
267 nbytes += args[i].size;
272 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
275 /* zero is special */
278 req->in.h.unique = fc->reqctr;
279 req->in.h.len = sizeof(struct fuse_in_header) +
280 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
281 if (!req->preallocated) {
282 /* If request is not preallocated (either FORGET or
283 RELEASE), then still decrease outstanding_sem, so
284 user can't open infinite number of files while not
285 processing the RELEASE requests. However for
286 efficiency do it without blocking, so if down()
287 would block, just increase the debt instead */
288 if (down_trylock(&fc->outstanding_sem))
289 fc->outstanding_debt++;
291 list_add_tail(&req->list, &fc->pending);
295 static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
299 spin_lock(&fuse_lock);
301 req->out.h.error = -ENOTCONN;
302 else if (fc->conn_error)
303 req->out.h.error = -ECONNREFUSED;
305 queue_request(fc, req);
306 /* acquire extra reference, since request is still needed
307 after request_end() */
308 __fuse_get_request(req);
310 request_wait_answer(req, interruptible);
312 spin_unlock(&fuse_lock);
315 void request_send(struct fuse_conn *fc, struct fuse_req *req)
317 request_send_wait(fc, req, 1);
321 * Non-interruptible version of the above function is for operations
322 * which can't legally return -ERESTART{SYS,NOINTR}. This can still
323 * be interrupted but only with SIGKILL.
325 void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req)
327 request_send_wait(fc, req, 0);
330 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
332 spin_lock(&fuse_lock);
334 queue_request(fc, req);
335 spin_unlock(&fuse_lock);
337 req->out.h.error = -ENOTCONN;
338 request_end(fc, req);
342 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
345 request_send_nowait(fc, req);
348 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
351 background_request(req);
352 request_send_nowait(fc, req);
355 void fuse_send_init(struct fuse_conn *fc)
357 /* This is called from fuse_read_super() so there's guaranteed
358 to be a request available */
359 struct fuse_req *req = do_get_request(fc);
360 struct fuse_init_in_out *arg = &req->misc.init_in_out;
361 arg->major = FUSE_KERNEL_VERSION;
362 arg->minor = FUSE_KERNEL_MINOR_VERSION;
363 req->in.h.opcode = FUSE_INIT;
365 req->in.args[0].size = sizeof(*arg);
366 req->in.args[0].value = arg;
367 req->out.numargs = 1;
368 req->out.args[0].size = sizeof(*arg);
369 req->out.args[0].value = arg;
370 request_send_background(fc, req);
374 * Lock the request. Up to the next unlock_request() there mustn't be
375 * anything that could cause a page-fault. If the request was already
376 * interrupted bail out.
378 static inline int lock_request(struct fuse_req *req)
382 spin_lock(&fuse_lock);
383 if (req->interrupted)
387 spin_unlock(&fuse_lock);
393 * Unlock request. If it was interrupted during being locked, the
394 * requester thread is currently waiting for it to be unlocked, so
397 static inline void unlock_request(struct fuse_req *req)
400 spin_lock(&fuse_lock);
402 if (req->interrupted)
403 wake_up(&req->waitq);
404 spin_unlock(&fuse_lock);
408 struct fuse_copy_state {
410 struct fuse_req *req;
411 const struct iovec *iov;
412 unsigned long nr_segs;
413 unsigned long seglen;
421 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
422 struct fuse_req *req, const struct iovec *iov,
423 unsigned long nr_segs)
425 memset(cs, 0, sizeof(*cs));
429 cs->nr_segs = nr_segs;
432 /* Unmap and put previous page of userspace buffer */
433 static inline void fuse_copy_finish(struct fuse_copy_state *cs)
436 kunmap_atomic(cs->mapaddr, KM_USER0);
438 flush_dcache_page(cs->pg);
439 set_page_dirty_lock(cs->pg);
447 * Get another pagefull of userspace buffer, and map it to kernel
448 * address space, and lock request
450 static int fuse_copy_fill(struct fuse_copy_state *cs)
452 unsigned long offset;
455 unlock_request(cs->req);
456 fuse_copy_finish(cs);
458 BUG_ON(!cs->nr_segs);
459 cs->seglen = cs->iov[0].iov_len;
460 cs->addr = (unsigned long) cs->iov[0].iov_base;
464 down_read(¤t->mm->mmap_sem);
465 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
467 up_read(¤t->mm->mmap_sem);
471 offset = cs->addr % PAGE_SIZE;
472 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
473 cs->buf = cs->mapaddr + offset;
474 cs->len = min(PAGE_SIZE - offset, cs->seglen);
475 cs->seglen -= cs->len;
478 return lock_request(cs->req);
481 /* Do as much copy to/from userspace buffer as we can */
482 static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
485 unsigned ncpy = min(*size, cs->len);
488 memcpy(cs->buf, *val, ncpy);
490 memcpy(*val, cs->buf, ncpy);
500 * Copy a page in the request to/from the userspace buffer. Must be
503 static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
504 unsigned offset, unsigned count, int zeroing)
506 if (page && zeroing && count < PAGE_SIZE) {
507 void *mapaddr = kmap_atomic(page, KM_USER1);
508 memset(mapaddr, 0, PAGE_SIZE);
509 kunmap_atomic(mapaddr, KM_USER1);
513 if (!cs->len && (err = fuse_copy_fill(cs)))
516 void *mapaddr = kmap_atomic(page, KM_USER1);
517 void *buf = mapaddr + offset;
518 offset += fuse_copy_do(cs, &buf, &count);
519 kunmap_atomic(mapaddr, KM_USER1);
521 offset += fuse_copy_do(cs, NULL, &count);
523 if (page && !cs->write)
524 flush_dcache_page(page);
528 /* Copy pages in the request to/from userspace buffer */
529 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
533 struct fuse_req *req = cs->req;
534 unsigned offset = req->page_offset;
535 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
537 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
538 struct page *page = req->pages[i];
539 int err = fuse_copy_page(cs, page, offset, count, zeroing);
544 count = min(nbytes, (unsigned) PAGE_SIZE);
550 /* Copy a single argument in the request to/from userspace buffer */
551 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
555 if (!cs->len && (err = fuse_copy_fill(cs)))
557 fuse_copy_do(cs, &val, &size);
562 /* Copy request arguments to/from userspace buffer */
563 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
564 unsigned argpages, struct fuse_arg *args,
570 for (i = 0; !err && i < numargs; i++) {
571 struct fuse_arg *arg = &args[i];
572 if (i == numargs - 1 && argpages)
573 err = fuse_copy_pages(cs, arg->size, zeroing);
575 err = fuse_copy_one(cs, arg->value, arg->size);
580 /* Wait until a request is available on the pending list */
581 static void request_wait(struct fuse_conn *fc)
583 DECLARE_WAITQUEUE(wait, current);
585 add_wait_queue_exclusive(&fc->waitq, &wait);
586 while (fc->sb && list_empty(&fc->pending)) {
587 set_current_state(TASK_INTERRUPTIBLE);
588 if (signal_pending(current))
591 spin_unlock(&fuse_lock);
593 spin_lock(&fuse_lock);
595 set_current_state(TASK_RUNNING);
596 remove_wait_queue(&fc->waitq, &wait);
600 * Read a single request into the userspace filesystem's buffer. This
601 * function waits until a request is available, then removes it from
602 * the pending list and copies request data to userspace buffer. If
603 * no reply is needed (FORGET) or request has been interrupted or
604 * there was an error during the copying then it's finished by calling
605 * request_end(). Otherwise add it to the processing list, and set
608 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
609 unsigned long nr_segs, loff_t *off)
612 struct fuse_conn *fc;
613 struct fuse_req *req;
615 struct fuse_copy_state cs;
618 spin_lock(&fuse_lock);
619 fc = file->private_data;
628 if (list_empty(&fc->pending))
631 req = list_entry(fc->pending.next, struct fuse_req, list);
632 list_del_init(&req->list);
633 spin_unlock(&fuse_lock);
636 reqsize = req->in.h.len;
637 fuse_copy_init(&cs, 1, req, iov, nr_segs);
639 if (iov_length(iov, nr_segs) >= reqsize) {
640 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
642 err = fuse_copy_args(&cs, in->numargs, in->argpages,
643 (struct fuse_arg *) in->args, 0);
645 fuse_copy_finish(&cs);
647 spin_lock(&fuse_lock);
649 if (!err && req->interrupted)
652 if (!req->interrupted)
653 req->out.h.error = -EIO;
654 request_end(fc, req);
658 request_end(fc, req);
661 list_add_tail(&req->list, &fc->processing);
662 spin_unlock(&fuse_lock);
667 spin_unlock(&fuse_lock);
671 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
672 size_t nbytes, loff_t *off)
675 iov.iov_len = nbytes;
677 return fuse_dev_readv(file, &iov, 1, off);
680 /* Look up request on processing list by unique ID */
681 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
683 struct list_head *entry;
685 list_for_each(entry, &fc->processing) {
686 struct fuse_req *req;
687 req = list_entry(entry, struct fuse_req, list);
688 if (req->in.h.unique == unique)
694 /* fget() needs to be done in this context */
695 static void process_getdir(struct fuse_req *req)
697 struct fuse_getdir_out_i *arg = req->out.args[0].value;
698 arg->file = fget(arg->fd);
701 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
704 unsigned reqsize = sizeof(struct fuse_out_header);
707 return nbytes != reqsize ? -EINVAL : 0;
709 reqsize += len_args(out->numargs, out->args);
711 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
713 else if (reqsize > nbytes) {
714 struct fuse_arg *lastarg = &out->args[out->numargs-1];
715 unsigned diffsize = reqsize - nbytes;
716 if (diffsize > lastarg->size)
718 lastarg->size -= diffsize;
720 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
725 * Write a single reply to a request. First the header is copied from
726 * the write buffer. The request is then searched on the processing
727 * list by the unique ID found in the header. If found, then remove
728 * it from the list and copy the rest of the buffer to the request.
729 * The request is finished by calling request_end()
731 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
732 unsigned long nr_segs, loff_t *off)
735 unsigned nbytes = iov_length(iov, nr_segs);
736 struct fuse_req *req;
737 struct fuse_out_header oh;
738 struct fuse_copy_state cs;
739 struct fuse_conn *fc = fuse_get_conn(file);
743 fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
744 if (nbytes < sizeof(struct fuse_out_header))
747 err = fuse_copy_one(&cs, &oh, sizeof(oh));
751 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
755 spin_lock(&fuse_lock);
756 req = request_find(fc, oh.unique);
761 list_del_init(&req->list);
762 if (req->interrupted) {
763 request_end(fc, req);
764 fuse_copy_finish(&cs);
770 spin_unlock(&fuse_lock);
772 err = copy_out_args(&cs, &req->out, nbytes);
773 fuse_copy_finish(&cs);
775 spin_lock(&fuse_lock);
778 if (req->interrupted)
780 else if (req->in.h.opcode == FUSE_GETDIR && !oh.error)
782 } else if (!req->interrupted)
783 req->out.h.error = -EIO;
784 request_end(fc, req);
786 return err ? err : nbytes;
789 spin_unlock(&fuse_lock);
791 fuse_copy_finish(&cs);
795 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
796 size_t nbytes, loff_t *off)
799 iov.iov_len = nbytes;
800 iov.iov_base = (char __user *) buf;
801 return fuse_dev_writev(file, &iov, 1, off);
804 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
806 struct fuse_conn *fc = fuse_get_conn(file);
807 unsigned mask = POLLOUT | POLLWRNORM;
812 poll_wait(file, &fc->waitq, wait);
814 spin_lock(&fuse_lock);
815 if (!list_empty(&fc->pending))
816 mask |= POLLIN | POLLRDNORM;
817 spin_unlock(&fuse_lock);
822 /* Abort all requests on the given list (pending or processing) */
823 static void end_requests(struct fuse_conn *fc, struct list_head *head)
825 while (!list_empty(head)) {
826 struct fuse_req *req;
827 req = list_entry(head->next, struct fuse_req, list);
828 list_del_init(&req->list);
829 req->out.h.error = -ECONNABORTED;
830 request_end(fc, req);
831 spin_lock(&fuse_lock);
835 static int fuse_dev_release(struct inode *inode, struct file *file)
837 struct fuse_conn *fc;
839 spin_lock(&fuse_lock);
840 fc = file->private_data;
843 end_requests(fc, &fc->pending);
844 end_requests(fc, &fc->processing);
845 fuse_release_conn(fc);
847 spin_unlock(&fuse_lock);
851 struct file_operations fuse_dev_operations = {
852 .owner = THIS_MODULE,
854 .read = fuse_dev_read,
855 .readv = fuse_dev_readv,
856 .write = fuse_dev_write,
857 .writev = fuse_dev_writev,
858 .poll = fuse_dev_poll,
859 .release = fuse_dev_release,
862 static struct miscdevice fuse_miscdevice = {
865 .fops = &fuse_dev_operations,
868 int __init fuse_dev_init(void)
871 fuse_req_cachep = kmem_cache_create("fuse_request",
872 sizeof(struct fuse_req),
874 if (!fuse_req_cachep)
877 err = misc_register(&fuse_miscdevice);
879 goto out_cache_clean;
884 kmem_cache_destroy(fuse_req_cachep);
889 void fuse_dev_cleanup(void)
891 misc_deregister(&fuse_miscdevice);
892 kmem_cache_destroy(fuse_req_cachep);