#include <linux/ctype.h>
#include <linux/migrate.h>
#include <linux/highmem.h>
+#include <linux/seq_file.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
SGP_WRITE, /* may exceed i_size, may allocate page */
};
+#ifdef CONFIG_TMPFS
+static unsigned long shmem_default_max_blocks(void)
+{
+ return totalram_pages / 2;
+}
+
+static unsigned long shmem_default_max_inodes(void)
+{
+ return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
+}
+#endif
+
static int shmem_getpage(struct inode *inode, unsigned long idx,
struct page **pagep, enum sgp_type sgp, int *type);
static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
.unplug_io_fn = default_unplug_io_fn,
};
}
}
-/*
+/**
* shmem_recalc_inode - recalculate the size of an inode
- *
* @inode: inode to recalc
*
* We have to calculate the free blocks since the mm can drop
}
}
-/*
+/**
* shmem_swp_entry - find the swap vector position in the info structure
- *
* @info: info structure for the inode
* @index: index of the page to find
* @page: optional page to add to the structure. Has to be preset to
}
}
-/*
+/**
* shmem_swp_alloc - get the position of the swap entry for the page.
- * If it does not exist allocate the entry.
- *
* @info: info structure for the inode
* @index: index of the page to find
* @sgp: check and recheck i_size? skip allocation?
+ *
+ * If the entry does not exist, allocate it.
*/
static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
{
return entry;
}
-/*
+/**
* shmem_free_swp - free some swap entries in a directory
- *
* @dir: pointer to the directory
* @edir: pointer after last entry of the directory
* @punch_lock: pointer to spinlock when needed for the holepunch case
error = 1;
if (!inode)
goto out;
- error = radix_tree_preload(GFP_KERNEL);
+ /* Precharge page while we can wait, compensate afterwards */
+ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error)
goto out;
+ error = radix_tree_preload(GFP_KERNEL);
+ if (error)
+ goto uncharge;
error = 1;
spin_lock(&info->lock);
shmem_swp_unmap(ptr);
spin_unlock(&info->lock);
radix_tree_preload_end();
+uncharge:
+ mem_cgroup_uncharge_page(page);
out:
unlock_page(page);
page_cache_release(page);
}
#ifdef CONFIG_NUMA
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+#ifdef CONFIG_TMPFS
+static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
{
- char *nodelist = strchr(value, ':');
- int err = 1;
+ char buffer[64];
- if (nodelist) {
- /* NUL-terminate policy string */
- *nodelist++ = '\0';
- if (nodelist_parse(nodelist, *policy_nodes))
- goto out;
- if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY]))
- goto out;
- }
- if (!strcmp(value, "default")) {
- *policy = MPOL_DEFAULT;
- /* Don't allow a nodelist */
- if (!nodelist)
- err = 0;
- } else if (!strcmp(value, "prefer")) {
- *policy = MPOL_PREFERRED;
- /* Insist on a nodelist of one node only */
- if (nodelist) {
- char *rest = nodelist;
- while (isdigit(*rest))
- rest++;
- if (!*rest)
- err = 0;
- }
- } else if (!strcmp(value, "bind")) {
- *policy = MPOL_BIND;
- /* Insist on a nodelist */
- if (nodelist)
- err = 0;
- } else if (!strcmp(value, "interleave")) {
- *policy = MPOL_INTERLEAVE;
- /*
- * Default to online nodes with memory if no nodelist
- */
- if (!nodelist)
- *policy_nodes = node_states[N_HIGH_MEMORY];
- err = 0;
+ if (!mpol || mpol->mode == MPOL_DEFAULT)
+ return; /* show nothing */
+
+ mpol_to_str(buffer, sizeof(buffer), mpol, 1);
+
+ seq_printf(seq, ",mpol=%s", buffer);
+}
+
+static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+ struct mempolicy *mpol = NULL;
+ if (sbinfo->mpol) {
+ spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
+ mpol = sbinfo->mpol;
+ mpol_get(mpol);
+ spin_unlock(&sbinfo->stat_lock);
}
-out:
- /* Restore string for error message */
- if (nodelist)
- *--nodelist = ':';
- return err;
+ return mpol;
}
+#endif /* CONFIG_TMPFS */
static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
struct shmem_inode_info *info, unsigned long idx)
{
+ struct mempolicy mpol, *spol;
struct vm_area_struct pvma;
struct page *page;
+ spol = mpol_cond_copy(&mpol,
+ mpol_shared_policy_lookup(&info->policy, idx));
+
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
pvma.vm_pgoff = idx;
pvma.vm_ops = NULL;
- pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+ pvma.vm_policy = spol;
page = swapin_readahead(entry, gfp, &pvma, 0);
- mpol_free(pvma.vm_policy);
return page;
}
struct shmem_inode_info *info, unsigned long idx)
{
struct vm_area_struct pvma;
- struct page *page;
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
pvma.vm_pgoff = idx;
pvma.vm_ops = NULL;
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
- page = alloc_page_vma(gfp, &pvma, 0);
- mpol_free(pvma.vm_policy);
- return page;
+
+ /*
+ * alloc_page_vma() will drop the shared policy reference
+ */
+ return alloc_page_vma(gfp, &pvma, 0);
}
-#else
-static inline int shmem_parse_mpol(char *value, int *policy,
- nodemask_t *policy_nodes)
+#else /* !CONFIG_NUMA */
+#ifdef CONFIG_TMPFS
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
{
- return 1;
}
+#endif /* CONFIG_TMPFS */
static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
struct shmem_inode_info *info, unsigned long idx)
{
return alloc_page(gfp);
}
+#endif /* CONFIG_NUMA */
+
+#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
+static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+ return NULL;
+}
#endif
/*
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
unlock_page(swappage);
+ if (error == -ENOMEM) {
+ /* allow reclaim from this memory cgroup */
+ error = mem_cgroup_cache_charge(swappage,
+ current->mm, gfp & ~__GFP_HIGHMEM);
+ if (error) {
+ page_cache_release(swappage);
+ goto failed;
+ }
+ mem_cgroup_uncharge_page(swappage);
+ }
page_cache_release(swappage);
goto repeat;
}
goto failed;
}
+ /* Precharge page while we can wait, compensate after */
+ error = mem_cgroup_cache_charge(filepage, current->mm,
+ gfp & ~__GFP_HIGHMEM);
+ if (error) {
+ page_cache_release(filepage);
+ shmem_unacct_blocks(info->flags, 1);
+ shmem_free_blocks(inode, 1);
+ filepage = NULL;
+ goto failed;
+ }
+
spin_lock(&info->lock);
entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry))
if (error || swap.val || 0 != add_to_page_cache_lru(
filepage, mapping, idx, GFP_NOWAIT)) {
spin_unlock(&info->lock);
+ mem_cgroup_uncharge_page(filepage);
page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
goto failed;
goto repeat;
}
+ mem_cgroup_uncharge_page(filepage);
info->flags |= SHMEM_PAGEIN;
}
case S_IFREG:
inode->i_op = &shmem_inode_operations;
inode->i_fop = &shmem_file_operations;
- mpol_shared_policy_init(&info->policy, sbinfo->policy,
- &sbinfo->policy_nodes);
+ mpol_shared_policy_init(&info->policy,
+ shmem_get_sbmpol(sbinfo));
break;
case S_IFDIR:
inc_nlink(inode);
* Must not load anything in the rbtree,
* mpol_free_shared_policy will not be called.
*/
- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
- NULL);
+ mpol_shared_policy_init(&info->policy, NULL);
break;
}
} else
};
#ifdef CONFIG_TMPFS_POSIX_ACL
-/**
+/*
* Superblocks without xattr inode operations will get security.* xattr
* support from the VFS "for free". As soon as we have any other xattrs
* like ACLs, we also need to implement the security.* handlers at
.fh_to_dentry = shmem_fh_to_dentry,
};
-static int shmem_parse_options(char *options, int *mode, uid_t *uid,
- gid_t *gid, unsigned long *blocks, unsigned long *inodes,
- int *policy, nodemask_t *policy_nodes)
+static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
+ bool remount)
{
char *this_char, *value, *rest;
}
if (*rest)
goto bad_val;
- *blocks = DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
+ sbinfo->max_blocks =
+ DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
} else if (!strcmp(this_char,"nr_blocks")) {
- *blocks = memparse(value,&rest);
+ sbinfo->max_blocks = memparse(value, &rest);
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"nr_inodes")) {
- *inodes = memparse(value,&rest);
+ sbinfo->max_inodes = memparse(value, &rest);
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"mode")) {
- if (!mode)
+ if (remount)
continue;
- *mode = simple_strtoul(value,&rest,8);
+ sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"uid")) {
- if (!uid)
+ if (remount)
continue;
- *uid = simple_strtoul(value,&rest,0);
+ sbinfo->uid = simple_strtoul(value, &rest, 0);
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"gid")) {
- if (!gid)
+ if (remount)
continue;
- *gid = simple_strtoul(value,&rest,0);
+ sbinfo->gid = simple_strtoul(value, &rest, 0);
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"mpol")) {
- if (shmem_parse_mpol(value,policy,policy_nodes))
+ if (mpol_parse_str(value, &sbinfo->mpol, 1))
goto bad_val;
} else {
printk(KERN_ERR "tmpfs: Bad mount option %s\n",
static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
- unsigned long max_blocks = sbinfo->max_blocks;
- unsigned long max_inodes = sbinfo->max_inodes;
- int policy = sbinfo->policy;
- nodemask_t policy_nodes = sbinfo->policy_nodes;
+ struct shmem_sb_info config = *sbinfo;
unsigned long blocks;
unsigned long inodes;
int error = -EINVAL;
- if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks,
- &max_inodes, &policy, &policy_nodes))
+ if (shmem_parse_options(data, &config, true))
return error;
spin_lock(&sbinfo->stat_lock);
blocks = sbinfo->max_blocks - sbinfo->free_blocks;
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
- if (max_blocks < blocks)
+ if (config.max_blocks < blocks)
goto out;
- if (max_inodes < inodes)
+ if (config.max_inodes < inodes)
goto out;
/*
* Those tests also disallow limited->unlimited while any are in
* but we must separately disallow unlimited->limited, because
* in that case we have no record of how much is already in use.
*/
- if (max_blocks && !sbinfo->max_blocks)
+ if (config.max_blocks && !sbinfo->max_blocks)
goto out;
- if (max_inodes && !sbinfo->max_inodes)
+ if (config.max_inodes && !sbinfo->max_inodes)
goto out;
error = 0;
- sbinfo->max_blocks = max_blocks;
- sbinfo->free_blocks = max_blocks - blocks;
- sbinfo->max_inodes = max_inodes;
- sbinfo->free_inodes = max_inodes - inodes;
- sbinfo->policy = policy;
- sbinfo->policy_nodes = policy_nodes;
+ sbinfo->max_blocks = config.max_blocks;
+ sbinfo->free_blocks = config.max_blocks - blocks;
+ sbinfo->max_inodes = config.max_inodes;
+ sbinfo->free_inodes = config.max_inodes - inodes;
+
+ mpol_put(sbinfo->mpol);
+ sbinfo->mpol = config.mpol; /* transfers initial ref */
out:
spin_unlock(&sbinfo->stat_lock);
return error;
}
-#endif
+
+static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+ struct shmem_sb_info *sbinfo = SHMEM_SB(vfs->mnt_sb);
+
+ if (sbinfo->max_blocks != shmem_default_max_blocks())
+ seq_printf(seq, ",size=%luk",
+ sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
+ if (sbinfo->max_inodes != shmem_default_max_inodes())
+ seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
+ if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
+ seq_printf(seq, ",mode=%03o", sbinfo->mode);
+ if (sbinfo->uid != 0)
+ seq_printf(seq, ",uid=%u", sbinfo->uid);
+ if (sbinfo->gid != 0)
+ seq_printf(seq, ",gid=%u", sbinfo->gid);
+ shmem_show_mpol(seq, sbinfo->mpol);
+ return 0;
+}
+#endif /* CONFIG_TMPFS */
static void shmem_put_super(struct super_block *sb)
{
{
struct inode *inode;
struct dentry *root;
- int mode = S_IRWXUGO | S_ISVTX;
- uid_t uid = current->fsuid;
- gid_t gid = current->fsgid;
- int err = -ENOMEM;
struct shmem_sb_info *sbinfo;
- unsigned long blocks = 0;
- unsigned long inodes = 0;
- int policy = MPOL_DEFAULT;
- nodemask_t policy_nodes = node_states[N_HIGH_MEMORY];
+ int err = -ENOMEM;
+
+ /* Round up to L1_CACHE_BYTES to resist false sharing */
+ sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info),
+ L1_CACHE_BYTES), GFP_KERNEL);
+ if (!sbinfo)
+ return -ENOMEM;
+
+ sbinfo->max_blocks = 0;
+ sbinfo->max_inodes = 0;
+ sbinfo->mode = S_IRWXUGO | S_ISVTX;
+ sbinfo->uid = current->fsuid;
+ sbinfo->gid = current->fsgid;
+ sbinfo->mpol = NULL;
+ sb->s_fs_info = sbinfo;
#ifdef CONFIG_TMPFS
/*
* but the internal instance is left unlimited.
*/
if (!(sb->s_flags & MS_NOUSER)) {
- blocks = totalram_pages / 2;
- inodes = totalram_pages - totalhigh_pages;
- if (inodes > blocks)
- inodes = blocks;
- if (shmem_parse_options(data, &mode, &uid, &gid, &blocks,
- &inodes, &policy, &policy_nodes))
- return -EINVAL;
+ sbinfo->max_blocks = shmem_default_max_blocks();
+ sbinfo->max_inodes = shmem_default_max_inodes();
+ if (shmem_parse_options(data, sbinfo, false)) {
+ err = -EINVAL;
+ goto failed;
+ }
}
sb->s_export_op = &shmem_export_ops;
#else
sb->s_flags |= MS_NOUSER;
#endif
- /* Round up to L1_CACHE_BYTES to resist false sharing */
- sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info),
- L1_CACHE_BYTES), GFP_KERNEL);
- if (!sbinfo)
- return -ENOMEM;
-
spin_lock_init(&sbinfo->stat_lock);
- sbinfo->max_blocks = blocks;
- sbinfo->free_blocks = blocks;
- sbinfo->max_inodes = inodes;
- sbinfo->free_inodes = inodes;
- sbinfo->policy = policy;
- sbinfo->policy_nodes = policy_nodes;
+ sbinfo->free_blocks = sbinfo->max_blocks;
+ sbinfo->free_inodes = sbinfo->max_inodes;
- sb->s_fs_info = sbinfo;
sb->s_maxbytes = SHMEM_MAX_BYTES;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_flags |= MS_POSIXACL;
#endif
- inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
+ inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0);
if (!inode)
goto failed;
- inode->i_uid = uid;
- inode->i_gid = gid;
+ inode->i_uid = sbinfo->uid;
+ inode->i_gid = sbinfo->gid;
root = d_alloc_root(inode);
if (!root)
goto failed_iput;
#ifdef CONFIG_TMPFS
.statfs = shmem_statfs,
.remount_fs = shmem_remount_fs,
+ .show_options = shmem_show_options,
#endif
.delete_inode = shmem_delete_inode,
.drop_inode = generic_delete_inode,
}
module_init(init_tmpfs)
-/*
+/**
* shmem_file_setup - get an unlinked file living in tmpfs
- *
* @name: name for dentry (to be seen in /proc/<pid>/maps
* @size: size to be set for the file
- *
+ * @flags: vm_flags
*/
struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
{
return ERR_PTR(error);
}
-/*
+/**
* shmem_zero_setup - setup a shared anonymous mapping
- *
* @vma: the vma to be mmapped is prepared by do_mmap_pgoff
*/
int shmem_zero_setup(struct vm_area_struct *vma)