struct list_head lru; /* per cgroup LRU list */
struct page *page;
struct mem_cgroup *mem_cgroup;
- atomic_t ref_cnt; /* Helpful when pages move b/w */
- /* mapped and cached states */
+ int ref_cnt; /* cached, mapped, migrating */
int flags;
};
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
-static inline int page_cgroup_nid(struct page_cgroup *pc)
+static int page_cgroup_nid(struct page_cgroup *pc)
{
return page_to_nid(pc->page);
}
-static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
+static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
{
return page_zonenum(pc->page);
}
__mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
}
-static inline struct mem_cgroup_per_zone *
+static struct mem_cgroup_per_zone *
mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
{
- BUG_ON(!mem->info.nodeinfo[nid]);
return &mem->info.nodeinfo[nid]->zoneinfo[zid];
}
-static inline struct mem_cgroup_per_zone *
+static struct mem_cgroup_per_zone *
page_cgroup_zoneinfo(struct page_cgroup *pc)
{
struct mem_cgroup *mem = pc->mem_cgroup;
return total;
}
-static inline
-struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
{
return container_of(cgroup_subsys_state(cont,
mem_cgroup_subsys_id), struct mem_cgroup,
css);
}
-static inline
-struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
+static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
{
return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
struct mem_cgroup, css);
return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK);
}
-static void __always_inline lock_page_cgroup(struct page *page)
+static void lock_page_cgroup(struct page *page)
{
bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
- VM_BUG_ON(!page_cgroup_locked(page));
}
-static void __always_inline unlock_page_cgroup(struct page *page)
+static int try_lock_page_cgroup(struct page *page)
{
- bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
+ return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
}
-/*
- * Clear page->page_cgroup member under lock_page_cgroup().
- * If given "pc" value is different from one page->page_cgroup,
- * page->cgroup is not cleared.
- * Returns a value of page->page_cgroup at lock taken.
- * A can can detect failure of clearing by following
- * clear_page_cgroup(page, pc) == pc
- */
-static struct page_cgroup *clear_page_cgroup(struct page *page,
- struct page_cgroup *pc)
+static void unlock_page_cgroup(struct page *page)
{
- struct page_cgroup *ret;
- /* lock and clear */
- lock_page_cgroup(page);
- ret = page_get_page_cgroup(page);
- if (likely(ret == pc))
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
- return ret;
+ bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
}
static void __mem_cgroup_remove_list(struct page_cgroup *pc)
struct mem_cgroup_per_zone *mz;
unsigned long flags;
- pc = page_get_page_cgroup(page);
- if (!pc)
+ /*
+ * We cannot lock_page_cgroup while holding zone's lru_lock,
+ * because other holders of lock_page_cgroup can be interrupted
+ * with an attempt to rotate_reclaimable_page. But we cannot
+ * safely get to page_cgroup without it, so just try_lock it:
+ * mem_cgroup_isolate_pages allows for page left on wrong list.
+ */
+ if (!try_lock_page_cgroup(page))
return;
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_move_lists(pc, active);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+ pc = page_get_page_cgroup(page);
+ if (pc) {
+ mz = page_cgroup_zoneinfo(pc);
+ spin_lock_irqsave(&mz->lru_lock, flags);
+ __mem_cgroup_move_lists(pc, active);
+ spin_unlock_irqrestore(&mz->lru_lock, flags);
+ }
+ unlock_page_cgroup(page);
}
/*
* the page has already been accounted.
*/
if (pc) {
- if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
- /* this page is under being uncharged ? */
- unlock_page_cgroup(page);
- cpu_relax();
- goto retry;
- } else {
- unlock_page_cgroup(page);
- goto done;
- }
+ VM_BUG_ON(pc->page != page);
+ VM_BUG_ON(pc->ref_cnt <= 0);
+
+ pc->ref_cnt++;
+ unlock_page_cgroup(page);
+ goto done;
}
unlock_page_cgroup(page);
congestion_wait(WRITE, HZ/10);
}
- atomic_set(&pc->ref_cnt, 1);
+ pc->ref_cnt = 1;
pc->mem_cgroup = mem;
pc->page = page;
pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
goto retry;
}
page_assign_page_cgroup(page, pc);
- unlock_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_add_list(pc);
spin_unlock_irqrestore(&mz->lru_lock, flags);
+ unlock_page_cgroup(page);
done:
return 0;
out:
if (!pc)
goto unlock;
- if (atomic_dec_and_test(&pc->ref_cnt)) {
- page = pc->page;
+ VM_BUG_ON(pc->page != page);
+ VM_BUG_ON(pc->ref_cnt <= 0);
+
+ if (--(pc->ref_cnt) == 0) {
mz = page_cgroup_zoneinfo(pc);
- /*
- * get page->cgroup and clear it under lock.
- * force_empty can drop page->cgroup without checking refcnt.
- */
+ spin_lock_irqsave(&mz->lru_lock, flags);
+ __mem_cgroup_remove_list(pc);
+ spin_unlock_irqrestore(&mz->lru_lock, flags);
+
+ page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);
- if (clear_page_cgroup(page, pc) == pc) {
- mem = pc->mem_cgroup;
- css_put(&mem->css);
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_remove_list(pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
- kfree(pc);
- }
- lock_page_cgroup(page);
+
+ mem = pc->mem_cgroup;
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ css_put(&mem->css);
+
+ kfree(pc);
+ return;
}
unlock:
int mem_cgroup_prepare_migration(struct page *page)
{
struct page_cgroup *pc;
- int ret = 0;
lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
- if (pc && atomic_inc_not_zero(&pc->ref_cnt))
- ret = 1;
+ if (pc)
+ pc->ref_cnt++;
unlock_page_cgroup(page);
- return ret;
+ return pc != NULL;
}
void mem_cgroup_end_migration(struct page *page)
void mem_cgroup_page_migration(struct page *page, struct page *newpage)
{
struct page_cgroup *pc;
- struct mem_cgroup *mem;
- unsigned long flags;
struct mem_cgroup_per_zone *mz;
+ unsigned long flags;
-retry:
+ lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
- if (!pc)
+ if (!pc) {
+ unlock_page_cgroup(page);
return;
+ }
- mem = pc->mem_cgroup;
mz = page_cgroup_zoneinfo(pc);
- if (clear_page_cgroup(page, pc) != pc)
- goto retry;
-
spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_remove_list(pc);
spin_unlock_irqrestore(&mz->lru_lock, flags);
+ page_assign_page_cgroup(page, NULL);
+ unlock_page_cgroup(page);
+
pc->page = newpage;
lock_page_cgroup(newpage);
page_assign_page_cgroup(newpage, pc);
- unlock_page_cgroup(newpage);
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_add_list(pc);
spin_unlock_irqrestore(&mz->lru_lock, flags);
+
+ unlock_page_cgroup(newpage);
}
/*
{
struct page_cgroup *pc;
struct page *page;
- int count;
+ int count = FORCE_UNCHARGE_BATCH;
unsigned long flags;
struct list_head *list;
else
list = &mz->inactive_list;
- if (list_empty(list))
- return;
-retry:
- count = FORCE_UNCHARGE_BATCH;
spin_lock_irqsave(&mz->lru_lock, flags);
-
- while (--count && !list_empty(list)) {
+ while (!list_empty(list)) {
pc = list_entry(list->prev, struct page_cgroup, lru);
page = pc->page;
- /* Avoid race with charge */
- atomic_set(&pc->ref_cnt, 0);
- if (clear_page_cgroup(page, pc) == pc) {
- css_put(&mem->css);
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- __mem_cgroup_remove_list(pc);
- kfree(pc);
- } else /* being uncharged ? ...do relax */
- break;
+ get_page(page);
+ spin_unlock_irqrestore(&mz->lru_lock, flags);
+ mem_cgroup_uncharge_page(page);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ spin_lock_irqsave(&mz->lru_lock, flags);
}
-
spin_unlock_irqrestore(&mz->lru_lock, flags);
- if (!list_empty(list)) {
- cond_resched();
- goto retry;
- }
}
/*
* make mem_cgroup's charge to be 0 if there is no task.
* This enables deleting this mem_cgroup.
*/
-int mem_cgroup_force_empty(struct mem_cgroup *mem)
+static int mem_cgroup_force_empty(struct mem_cgroup *mem)
{
int ret = -EBUSY;
int node, zid;
return ret;
}
-int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
+static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
{
*tmp = memparse(buf, &buf);
if (*buf != '\0')