X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmadvise.c;h=93ee375b38e7edc7b414b7a2b6891411a45d0b0c;hb=08291429cfa6258c4cd95d8833beb40f828b194e;hp=ae0ae3ea299a7a83b72939236ef4931eb5ac16d0;hpb=cc918c7ab7da017bfaf9661420bb5c462e057cfb;p=linux-2.6 diff --git a/mm/madvise.c b/mm/madvise.c index ae0ae3ea29..93ee375b38 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -10,6 +10,25 @@ #include #include #include +#include + +/* + * Any behaviour which results in changes to the vma->vm_flags needs to + * take mmap_sem for writing. Others, which simply traverse vmas, need + * to only take it for reading. + */ +static int madvise_need_mmap_write(int behavior) +{ + switch (behavior) { + case MADV_REMOVE: + case MADV_WILLNEED: + case MADV_DONTNEED: + return 0; + default: + /* be safe, default to 1. list exceptions explicitly */ + return 1; + } +} /* * We can potentially split a vm area into separate @@ -22,16 +41,23 @@ static long madvise_behavior(struct vm_area_struct * vma, struct mm_struct * mm = vma->vm_mm; int error = 0; pgoff_t pgoff; - int new_flags = vma->vm_flags & ~VM_READHINTMASK; + int new_flags = vma->vm_flags; switch (behavior) { + case MADV_NORMAL: + new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; + break; case MADV_SEQUENTIAL: - new_flags |= VM_SEQ_READ; + new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; break; case MADV_RANDOM: - new_flags |= VM_RAND_READ; + new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; break; - default: + case MADV_DONTFORK: + new_flags |= VM_DONTCOPY; + break; + case MADV_DOFORK: + new_flags &= ~VM_DONTCOPY; break; } @@ -148,10 +174,14 @@ static long madvise_dontneed(struct vm_area_struct * vma, * Other filesystems return -ENOSYS. */ static long madvise_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, unsigned long end) { struct address_space *mapping; - loff_t offset, endoff; + loff_t offset, endoff; + int error; + + *prev = NULL; /* tell sys_madvise we drop mmap_sem */ if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) return -EINVAL; @@ -161,13 +191,21 @@ static long madvise_remove(struct vm_area_struct *vma, return -EINVAL; } + if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) + return -EACCES; + mapping = vma->vm_file->f_mapping; offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); endoff = (loff_t)(end - vma->vm_start - 1) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - return vmtruncate_range(mapping->host, offset, endoff); + + /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ + up_read(¤t->mm->mmap_sem); + error = vmtruncate_range(mapping->host, offset, endoff); + down_read(¤t->mm->mmap_sem); + return error; } static long @@ -177,13 +215,19 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, long error; switch (behavior) { + case MADV_DOFORK: + if (vma->vm_flags & VM_IO) { + error = -EINVAL; + break; + } + case MADV_DONTFORK: case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: error = madvise_behavior(vma, prev, start, end, behavior); break; case MADV_REMOVE: - error = madvise_remove(vma, start, end); + error = madvise_remove(vma, prev, start, end); break; case MADV_WILLNEED: @@ -243,9 +287,14 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) struct vm_area_struct * vma, *prev; int unmapped_error = 0; int error = -EINVAL; + int write; size_t len; - down_write(¤t->mm->mmap_sem); + write = madvise_need_mmap_write(behavior); + if (write) + down_write(¤t->mm->mmap_sem); + else + down_read(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; @@ -296,14 +345,21 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) if (error) goto out; start = tmp; - if (start < prev->vm_end) + if (prev && start < prev->vm_end) start = prev->vm_end; error = unmapped_error; if (start >= end) goto out; - vma = prev->vm_next; + if (prev) + vma = prev->vm_next; + else /* madvise_remove dropped mmap_sem */ + vma = find_vma(current->mm, start); } out: - up_write(¤t->mm->mmap_sem); + if (write) + up_write(¤t->mm->mmap_sem); + else + up_read(¤t->mm->mmap_sem); + return error; }