fmem: add qcache implementation
Part of this code is originally from the 3.1 version of the tmem backend zcache. The zcache code was copied to the qcache directory and modifications were made, notably to the memory allocation (from a block of pre-reserved contiguous memory rather than from system memory), and to allow tmem and qcache to be turned on and off either from a /sys file or using an exported API. A higher level driver (fmem) is provided that is the interface to other (PMEM, ION, tmem/qcache) drivers. Change-Id: Ieda939ed1ba7c5337dd4338b9d9caffde883e82b Signed-off-by: Larry Bassel <lbassel@codeaurora.org>
This commit is contained in:
committed by
Stephen Boyd
parent
2496d8c228
commit
fca8a06f35
@@ -84,6 +84,8 @@ source "drivers/staging/zram/Kconfig"
|
||||
|
||||
source "drivers/staging/zcache/Kconfig"
|
||||
|
||||
source "drivers/staging/qcache/Kconfig"
|
||||
|
||||
source "drivers/staging/zsmalloc/Kconfig"
|
||||
|
||||
source "drivers/staging/wlags49_h2/Kconfig"
|
||||
|
||||
@@ -34,6 +34,7 @@ obj-$(CONFIG_DX_SEP) += sep/
|
||||
obj-$(CONFIG_IIO) += iio/
|
||||
obj-$(CONFIG_ZRAM) += zram/
|
||||
obj-$(CONFIG_ZCACHE) += zcache/
|
||||
obj-$(CONFIG_QCACHE) += qcache/
|
||||
obj-$(CONFIG_ZSMALLOC) += zsmalloc/
|
||||
obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/
|
||||
obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/
|
||||
|
||||
8
drivers/staging/qcache/Kconfig
Normal file
8
drivers/staging/qcache/Kconfig
Normal file
@@ -0,0 +1,8 @@
|
||||
config QCACHE
|
||||
tristate "Dynamic compression of clean pagecache pages"
|
||||
depends on CLEANCACHE
|
||||
select LZO_COMPRESS
|
||||
select LZO_DECOMPRESS
|
||||
default n
|
||||
help
|
||||
Qcache is the backend for fmem
|
||||
3
drivers/staging/qcache/Makefile
Normal file
3
drivers/staging/qcache/Makefile
Normal file
@@ -0,0 +1,3 @@
|
||||
qcache-y := qcache-main.o tmem.o fmem.o
|
||||
|
||||
obj-$(CONFIG_QCACHE) += qcache.o
|
||||
344
drivers/staging/qcache/fmem.c
Normal file
344
drivers/staging/qcache/fmem.c
Normal file
@@ -0,0 +1,344 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 and
|
||||
* only version 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/fmem.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/io.h>
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
#include <linux/memory.h>
|
||||
#include <linux/memory_hotplug.h>
|
||||
#endif
|
||||
#include "tmem.h"
|
||||
#include <asm/mach/map.h>
|
||||
|
||||
struct fmem_data fmem_data;
|
||||
enum fmem_state fmem_state;
|
||||
static spinlock_t fmem_state_lock;
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static unsigned int section_powered_off[NR_MEM_SECTIONS];
|
||||
static unsigned int fmem_section_start, fmem_section_end;
|
||||
#endif
|
||||
|
||||
void *fmem_map_virtual_area(int cacheability)
|
||||
{
|
||||
unsigned long addr;
|
||||
const struct mem_type *type;
|
||||
int ret;
|
||||
|
||||
addr = (unsigned long) fmem_data.area->addr;
|
||||
type = get_mem_type(cacheability);
|
||||
ret = ioremap_pages(addr, fmem_data.phys, fmem_data.size, type);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
fmem_data.virt = fmem_data.area->addr;
|
||||
|
||||
return fmem_data.virt;
|
||||
}
|
||||
|
||||
void fmem_unmap_virtual_area(void)
|
||||
{
|
||||
unmap_kernel_range((unsigned long)fmem_data.virt, fmem_data.size);
|
||||
fmem_data.virt = NULL;
|
||||
}
|
||||
|
||||
static int fmem_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct fmem_platform_data *pdata = pdev->dev.platform_data;
|
||||
|
||||
if (!pdata->phys)
|
||||
pdata->phys = allocate_contiguous_ebi_nomap(pdata->size,
|
||||
pdata->align);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
fmem_section_start = pdata->phys >> PA_SECTION_SHIFT;
|
||||
fmem_section_end = (pdata->phys - 1 + pdata->size) >> PA_SECTION_SHIFT;
|
||||
#endif
|
||||
fmem_data.phys = pdata->phys + pdata->reserved_size_low;
|
||||
fmem_data.size = pdata->size - pdata->reserved_size_low -
|
||||
pdata->reserved_size_high;
|
||||
fmem_data.reserved_size_low = pdata->reserved_size_low;
|
||||
fmem_data.reserved_size_high = pdata->reserved_size_high;
|
||||
|
||||
if (!fmem_data.size)
|
||||
return -ENODEV;
|
||||
|
||||
fmem_data.area = get_vm_area(fmem_data.size, VM_IOREMAP);
|
||||
if (!fmem_data.area)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!fmem_map_virtual_area(MT_DEVICE_CACHED)) {
|
||||
remove_vm_area(fmem_data.area->addr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
pr_info("fmem phys %lx virt %p size %lx\n",
|
||||
fmem_data.phys, fmem_data.virt, fmem_data.size);
|
||||
|
||||
spin_lock_init(&fmem_state_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fmem_remove(struct platform_device *pdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver fmem_driver = {
|
||||
.probe = fmem_probe,
|
||||
.remove = fmem_remove,
|
||||
.driver = { .name = "fmem" }
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
static ssize_t fmem_state_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
if (fmem_state == FMEM_T_STATE)
|
||||
return snprintf(buf, 3, "t\n");
|
||||
else if (fmem_state == FMEM_C_STATE)
|
||||
return snprintf(buf, 3, "c\n");
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
else if (fmem_state == FMEM_O_STATE)
|
||||
return snprintf(buf, 3, "o\n");
|
||||
#endif
|
||||
else if (fmem_state == FMEM_UNINITIALIZED)
|
||||
return snprintf(buf, 15, "uninitialized\n");
|
||||
return snprintf(buf, 3, "?\n");
|
||||
}
|
||||
|
||||
static ssize_t fmem_state_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!strncmp(buf, "t", 1))
|
||||
ret = fmem_set_state(FMEM_T_STATE);
|
||||
else if (!strncmp(buf, "c", 1))
|
||||
ret = fmem_set_state(FMEM_C_STATE);
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
else if (!strncmp(buf, "o", 1))
|
||||
ret = fmem_set_state(FMEM_O_STATE);
|
||||
#endif
|
||||
if (ret)
|
||||
return ret;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct kobj_attribute fmem_state_attr = {
|
||||
.attr = { .name = "state", .mode = 0644 },
|
||||
.show = fmem_state_show,
|
||||
.store = fmem_state_store,
|
||||
};
|
||||
|
||||
static struct attribute *fmem_attrs[] = {
|
||||
&fmem_state_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group fmem_attr_group = {
|
||||
.attrs = fmem_attrs,
|
||||
.name = "fmem",
|
||||
};
|
||||
|
||||
static int fmem_create_sysfs(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ret = sysfs_create_group(mm_kobj, &fmem_attr_group);
|
||||
if (ret)
|
||||
pr_err("fmem: can't create sysfs\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
bool fmem_is_disjoint(unsigned long start_pfn, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long fmem_start_pfn, fmem_end_pfn;
|
||||
unsigned long unstable_end_pfn;
|
||||
unsigned long highest_start_pfn, lowest_end_pfn;
|
||||
|
||||
fmem_start_pfn = (fmem_data.phys - fmem_data.reserved_size_low)
|
||||
>> PAGE_SHIFT;
|
||||
fmem_end_pfn = (fmem_data.phys + fmem_data.size +
|
||||
fmem_data.reserved_size_high - 1) >> PAGE_SHIFT;
|
||||
unstable_end_pfn = start_pfn + nr_pages - 1;
|
||||
|
||||
highest_start_pfn = max(fmem_start_pfn, start_pfn);
|
||||
lowest_end_pfn = min(fmem_end_pfn, unstable_end_pfn);
|
||||
|
||||
return lowest_end_pfn < highest_start_pfn;
|
||||
}
|
||||
|
||||
static int fmem_mem_going_offline_callback(void *arg)
|
||||
{
|
||||
struct memory_notify *marg = arg;
|
||||
|
||||
if (fmem_is_disjoint(marg->start_pfn, marg->nr_pages))
|
||||
return 0;
|
||||
return fmem_set_state(FMEM_O_STATE);
|
||||
}
|
||||
|
||||
static void fmem_mem_online_callback(void *arg)
|
||||
{
|
||||
struct memory_notify *marg = arg;
|
||||
int i;
|
||||
|
||||
section_powered_off[marg->start_pfn >> PFN_SECTION_SHIFT] = 0;
|
||||
|
||||
if (fmem_state != FMEM_O_STATE)
|
||||
return;
|
||||
|
||||
for (i = fmem_section_start; i <= fmem_section_end; i++) {
|
||||
if (section_powered_off[i])
|
||||
return;
|
||||
}
|
||||
|
||||
fmem_set_state(FMEM_T_STATE);
|
||||
}
|
||||
|
||||
static void fmem_mem_offline_callback(void *arg)
|
||||
{
|
||||
struct memory_notify *marg = arg;
|
||||
|
||||
section_powered_off[marg->start_pfn >> PFN_SECTION_SHIFT] = 1;
|
||||
}
|
||||
|
||||
static int fmem_memory_callback(struct notifier_block *self,
|
||||
unsigned long action, void *arg)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (fmem_state == FMEM_UNINITIALIZED)
|
||||
return NOTIFY_OK;
|
||||
|
||||
switch (action) {
|
||||
case MEM_ONLINE:
|
||||
fmem_mem_online_callback(arg);
|
||||
break;
|
||||
case MEM_GOING_OFFLINE:
|
||||
ret = fmem_mem_going_offline_callback(arg);
|
||||
break;
|
||||
case MEM_OFFLINE:
|
||||
fmem_mem_offline_callback(arg);
|
||||
break;
|
||||
case MEM_GOING_ONLINE:
|
||||
case MEM_CANCEL_ONLINE:
|
||||
case MEM_CANCEL_OFFLINE:
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
ret = notifier_from_errno(ret);
|
||||
else
|
||||
ret = NOTIFY_OK;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __init fmem_init(void)
|
||||
{
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
hotplug_memory_notifier(fmem_memory_callback, 0);
|
||||
#endif
|
||||
return platform_driver_register(&fmem_driver);
|
||||
}
|
||||
|
||||
static void __exit fmem_exit(void)
|
||||
{
|
||||
platform_driver_unregister(&fmem_driver);
|
||||
}
|
||||
|
||||
struct fmem_data *fmem_get_info(void)
|
||||
{
|
||||
return &fmem_data;
|
||||
}
|
||||
EXPORT_SYMBOL(fmem_get_info);
|
||||
|
||||
void lock_fmem_state(void)
|
||||
{
|
||||
spin_lock(&fmem_state_lock);
|
||||
}
|
||||
|
||||
void unlock_fmem_state(void)
|
||||
{
|
||||
spin_unlock(&fmem_state_lock);
|
||||
}
|
||||
|
||||
int fmem_set_state(enum fmem_state new_state)
|
||||
{
|
||||
int ret = 0;
|
||||
int create_sysfs = 0;
|
||||
|
||||
lock_fmem_state();
|
||||
if (fmem_state == new_state)
|
||||
goto out;
|
||||
|
||||
if (fmem_state == FMEM_UNINITIALIZED) {
|
||||
if (new_state == FMEM_T_STATE) {
|
||||
tmem_enable();
|
||||
create_sysfs = 1;
|
||||
goto out_set;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
if (fmem_state == FMEM_C_STATE && new_state == FMEM_O_STATE) {
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fmem_state == FMEM_O_STATE && new_state == FMEM_C_STATE) {
|
||||
pr_warn("attempting to use powered off memory as fmem\n");
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (new_state == FMEM_T_STATE) {
|
||||
void *v;
|
||||
v = fmem_map_virtual_area(MT_DEVICE_CACHED);
|
||||
if (IS_ERR_OR_NULL(v)) {
|
||||
ret = PTR_ERR(v);
|
||||
goto out;
|
||||
}
|
||||
tmem_enable();
|
||||
} else {
|
||||
tmem_disable();
|
||||
fmem_unmap_virtual_area();
|
||||
}
|
||||
|
||||
out_set:
|
||||
fmem_state = new_state;
|
||||
out:
|
||||
unlock_fmem_state();
|
||||
#ifdef CONFIG_SYSFS
|
||||
if (create_sysfs)
|
||||
fmem_create_sysfs();
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(fmem_set_state);
|
||||
|
||||
arch_initcall(fmem_init);
|
||||
module_exit(fmem_exit);
|
||||
1358
drivers/staging/qcache/qcache-main.c
Normal file
1358
drivers/staging/qcache/qcache-main.c
Normal file
File diff suppressed because it is too large
Load Diff
833
drivers/staging/qcache/tmem.c
Normal file
833
drivers/staging/qcache/tmem.c
Normal file
@@ -0,0 +1,833 @@
|
||||
/*
|
||||
* In-kernel transcendent memory (generic implementation)
|
||||
*
|
||||
* Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
|
||||
* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
|
||||
*
|
||||
* The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
|
||||
* "handles" (triples containing a pool id, and object id, and an index), to
|
||||
* pages in a page-accessible memory (PAM). Tmem references the PAM pages via
|
||||
* an abstract "pampd" (PAM page-descriptor), which can be operated on by a
|
||||
* set of functions (pamops). Each pampd contains some representation of
|
||||
* PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
|
||||
* pages and must be able to insert, find, and delete these pages at a
|
||||
* potential frequency of thousands per second concurrently across many CPUs,
|
||||
* (and, if used with KVM, across many vcpus across many guests).
|
||||
* Tmem is tracked with a hierarchy of data structures, organized by
|
||||
* the elements in a handle-tuple: pool_id, object_id, and page index.
|
||||
* One or more "clients" (e.g. guests) each provide one or more tmem_pools.
|
||||
* Each pool, contains a hash table of rb_trees of tmem_objs. Each
|
||||
* tmem_obj contains a radix-tree-like tree of pointers, with intermediate
|
||||
* nodes called tmem_objnodes. Each leaf pointer in this tree points to
|
||||
* a pampd, which is accessible only through a small set of callbacks
|
||||
* registered by the PAM implementation (see tmem_register_pamops). Tmem
|
||||
* does all memory allocation via a set of callbacks registered by the tmem
|
||||
* host implementation (e.g. see tmem_register_hostops).
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include "tmem.h"
|
||||
|
||||
/* data structure sentinels used for debugging... see tmem.h */
|
||||
#define POOL_SENTINEL 0x87658765
|
||||
#define OBJ_SENTINEL 0x12345678
|
||||
#define OBJNODE_SENTINEL 0xfedcba09
|
||||
|
||||
static bool tmem_enabled;
|
||||
|
||||
static void lock_tmem_state(void)
|
||||
{
|
||||
lock_fmem_state();
|
||||
}
|
||||
|
||||
static void unlock_tmem_state(void)
|
||||
{
|
||||
unlock_fmem_state();
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem host implementation must use this function to register callbacks
|
||||
* for memory allocation.
|
||||
*/
|
||||
static struct tmem_hostops tmem_hostops;
|
||||
|
||||
static void tmem_objnode_tree_init(void);
|
||||
|
||||
void tmem_register_hostops(struct tmem_hostops *m)
|
||||
{
|
||||
tmem_objnode_tree_init();
|
||||
tmem_hostops = *m;
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem host implementation must use this function to register
|
||||
* callbacks for a page-accessible memory (PAM) implementation
|
||||
*/
|
||||
static struct tmem_pamops tmem_pamops;
|
||||
|
||||
void tmem_register_pamops(struct tmem_pamops *m)
|
||||
{
|
||||
tmem_pamops = *m;
|
||||
}
|
||||
|
||||
/*
|
||||
* Oid's are potentially very sparse and tmem_objs may have an indeterminately
|
||||
* short life, being added and deleted at a relatively high frequency.
|
||||
* So an rb_tree is an ideal data structure to manage tmem_objs. But because
|
||||
* of the potentially huge number of tmem_objs, each pool manages a hashtable
|
||||
* of rb_trees to reduce search, insert, delete, and rebalancing time.
|
||||
* Each hashbucket also has a lock to manage concurrent access.
|
||||
*
|
||||
* The following routines manage tmem_objs. When any tmem_obj is accessed,
|
||||
* the hashbucket lock must be held.
|
||||
*/
|
||||
|
||||
/* searches for object==oid in pool, returns locked object if found */
|
||||
static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
|
||||
struct tmem_oid *oidp)
|
||||
{
|
||||
struct rb_node *rbnode;
|
||||
struct tmem_obj *obj;
|
||||
|
||||
rbnode = hb->obj_rb_root.rb_node;
|
||||
while (rbnode) {
|
||||
BUG_ON(RB_EMPTY_NODE(rbnode));
|
||||
obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
|
||||
switch (tmem_oid_compare(oidp, &obj->oid)) {
|
||||
case 0: /* equal */
|
||||
goto out;
|
||||
case -1:
|
||||
rbnode = rbnode->rb_left;
|
||||
break;
|
||||
case 1:
|
||||
rbnode = rbnode->rb_right;
|
||||
break;
|
||||
}
|
||||
}
|
||||
obj = NULL;
|
||||
out:
|
||||
return obj;
|
||||
}
|
||||
|
||||
static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
|
||||
|
||||
/* free an object that has no more pampds in it */
|
||||
static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
|
||||
{
|
||||
struct tmem_pool *pool;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pampd_count > 0);
|
||||
pool = obj->pool;
|
||||
BUG_ON(pool == NULL);
|
||||
if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
|
||||
tmem_pampd_destroy_all_in_obj(obj);
|
||||
BUG_ON(obj->objnode_tree_root != NULL);
|
||||
BUG_ON((long)obj->objnode_count != 0);
|
||||
atomic_dec(&pool->obj_count);
|
||||
BUG_ON(atomic_read(&pool->obj_count) < 0);
|
||||
INVERT_SENTINEL(obj, OBJ);
|
||||
obj->pool = NULL;
|
||||
tmem_oid_set_invalid(&obj->oid);
|
||||
rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize, and insert an tmem_object_root (called only if find failed)
|
||||
*/
|
||||
static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
|
||||
struct tmem_pool *pool,
|
||||
struct tmem_oid *oidp)
|
||||
{
|
||||
struct rb_root *root = &hb->obj_rb_root;
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
struct tmem_obj *this;
|
||||
|
||||
BUG_ON(pool == NULL);
|
||||
atomic_inc(&pool->obj_count);
|
||||
obj->objnode_tree_height = 0;
|
||||
obj->objnode_tree_root = NULL;
|
||||
obj->pool = pool;
|
||||
obj->oid = *oidp;
|
||||
obj->objnode_count = 0;
|
||||
obj->pampd_count = 0;
|
||||
(*tmem_pamops.new_obj)(obj);
|
||||
SET_SENTINEL(obj, OBJ);
|
||||
while (*new) {
|
||||
BUG_ON(RB_EMPTY_NODE(*new));
|
||||
this = rb_entry(*new, struct tmem_obj, rb_tree_node);
|
||||
parent = *new;
|
||||
switch (tmem_oid_compare(oidp, &this->oid)) {
|
||||
case 0:
|
||||
BUG(); /* already present; should never happen! */
|
||||
break;
|
||||
case -1:
|
||||
new = &(*new)->rb_left;
|
||||
break;
|
||||
case 1:
|
||||
new = &(*new)->rb_right;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rb_link_node(&obj->rb_tree_node, parent, new);
|
||||
rb_insert_color(&obj->rb_tree_node, root);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tmem is managed as a set of tmem_pools with certain attributes, such as
|
||||
* "ephemeral" vs "persistent". These attributes apply to all tmem_objs
|
||||
* and all pampds that belong to a tmem_pool. A tmem_pool is created
|
||||
* or deleted relatively rarely (for example, when a filesystem is
|
||||
* mounted or unmounted.
|
||||
*/
|
||||
|
||||
/* flush all data from a pool and, optionally, free it */
|
||||
static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
|
||||
{
|
||||
struct rb_node *rbnode;
|
||||
struct tmem_obj *obj;
|
||||
struct tmem_hashbucket *hb = &pool->hashbucket[0];
|
||||
int i;
|
||||
|
||||
BUG_ON(pool == NULL);
|
||||
for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
|
||||
spin_lock(&hb->lock);
|
||||
rbnode = rb_first(&hb->obj_rb_root);
|
||||
while (rbnode != NULL) {
|
||||
obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
|
||||
rbnode = rb_next(rbnode);
|
||||
tmem_pampd_destroy_all_in_obj(obj);
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
}
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
if (destroy)
|
||||
list_del(&pool->pool_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem_obj contains a radix-tree-like tree in which the intermediate
|
||||
* nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
|
||||
* is very specialized and tuned for specific uses and is not particularly
|
||||
* suited for use from this code, though some code from the core algorithms has
|
||||
* been reused, thus the copyright notices below). Each tmem_objnode contains
|
||||
* a set of pointers which point to either a set of intermediate tmem_objnodes
|
||||
* or a set of of pampds.
|
||||
*
|
||||
* Portions Copyright (C) 2001 Momchil Velikov
|
||||
* Portions Copyright (C) 2001 Christoph Hellwig
|
||||
* Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
|
||||
*/
|
||||
|
||||
struct tmem_objnode_tree_path {
|
||||
struct tmem_objnode *objnode;
|
||||
int offset;
|
||||
};
|
||||
|
||||
/* objnode height_to_maxindex translation */
|
||||
static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
|
||||
|
||||
static void tmem_objnode_tree_init(void)
|
||||
{
|
||||
unsigned int ht, tmp;
|
||||
|
||||
for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
|
||||
tmp = ht * OBJNODE_TREE_MAP_SHIFT;
|
||||
if (tmp >= OBJNODE_TREE_INDEX_BITS)
|
||||
tmem_objnode_tree_h2max[ht] = ~0UL;
|
||||
else
|
||||
tmem_objnode_tree_h2max[ht] =
|
||||
(~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
|
||||
{
|
||||
struct tmem_objnode *objnode;
|
||||
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
|
||||
if (unlikely(objnode == NULL))
|
||||
goto out;
|
||||
objnode->obj = obj;
|
||||
SET_SENTINEL(objnode, OBJNODE);
|
||||
memset(&objnode->slots, 0, sizeof(objnode->slots));
|
||||
objnode->slots_in_use = 0;
|
||||
obj->objnode_count++;
|
||||
out:
|
||||
return objnode;
|
||||
}
|
||||
|
||||
static void tmem_objnode_free(struct tmem_objnode *objnode)
|
||||
{
|
||||
struct tmem_pool *pool;
|
||||
int i;
|
||||
|
||||
BUG_ON(objnode == NULL);
|
||||
for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
|
||||
BUG_ON(objnode->slots[i] != NULL);
|
||||
ASSERT_SENTINEL(objnode, OBJNODE);
|
||||
INVERT_SENTINEL(objnode, OBJNODE);
|
||||
BUG_ON(objnode->obj == NULL);
|
||||
ASSERT_SENTINEL(objnode->obj, OBJ);
|
||||
pool = objnode->obj->pool;
|
||||
BUG_ON(pool == NULL);
|
||||
ASSERT_SENTINEL(pool, POOL);
|
||||
objnode->obj->objnode_count--;
|
||||
objnode->obj = NULL;
|
||||
(*tmem_hostops.objnode_free)(objnode, pool);
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup index in object and return associated pampd (or NULL if not found)
|
||||
*/
|
||||
static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
unsigned int height, shift;
|
||||
struct tmem_objnode **slot = NULL;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
|
||||
height = obj->objnode_tree_height;
|
||||
if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
|
||||
goto out;
|
||||
if (height == 0 && obj->objnode_tree_root) {
|
||||
slot = &obj->objnode_tree_root;
|
||||
goto out;
|
||||
}
|
||||
shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
slot = &obj->objnode_tree_root;
|
||||
while (height > 0) {
|
||||
if (*slot == NULL)
|
||||
goto out;
|
||||
slot = (struct tmem_objnode **)
|
||||
((*slot)->slots +
|
||||
((index >> shift) & OBJNODE_TREE_MAP_MASK));
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
}
|
||||
out:
|
||||
return slot != NULL ? (void **)slot : NULL;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
struct tmem_objnode **slot;
|
||||
|
||||
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
|
||||
return slot != NULL ? *slot : NULL;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
|
||||
void *new_pampd)
|
||||
{
|
||||
struct tmem_objnode **slot;
|
||||
void *ret = NULL;
|
||||
|
||||
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
|
||||
if ((slot != NULL) && (*slot != NULL)) {
|
||||
void *old_pampd = *(void **)slot;
|
||||
*(void **)slot = new_pampd;
|
||||
(*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
|
||||
ret = new_pampd;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
|
||||
void *pampd)
|
||||
{
|
||||
int ret = 0;
|
||||
struct tmem_objnode *objnode = NULL, *newnode, *slot;
|
||||
unsigned int height, shift;
|
||||
int offset = 0;
|
||||
|
||||
/* if necessary, extend the tree to be higher */
|
||||
if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
|
||||
height = obj->objnode_tree_height + 1;
|
||||
if (index > tmem_objnode_tree_h2max[height])
|
||||
while (index > tmem_objnode_tree_h2max[height])
|
||||
height++;
|
||||
if (obj->objnode_tree_root == NULL) {
|
||||
obj->objnode_tree_height = height;
|
||||
goto insert;
|
||||
}
|
||||
do {
|
||||
newnode = tmem_objnode_alloc(obj);
|
||||
if (!newnode) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
newnode->slots[0] = obj->objnode_tree_root;
|
||||
newnode->slots_in_use = 1;
|
||||
obj->objnode_tree_root = newnode;
|
||||
obj->objnode_tree_height++;
|
||||
} while (height > obj->objnode_tree_height);
|
||||
}
|
||||
insert:
|
||||
slot = obj->objnode_tree_root;
|
||||
height = obj->objnode_tree_height;
|
||||
shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
while (height > 0) {
|
||||
if (slot == NULL) {
|
||||
/* add a child objnode. */
|
||||
slot = tmem_objnode_alloc(obj);
|
||||
if (!slot) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (objnode) {
|
||||
|
||||
objnode->slots[offset] = slot;
|
||||
objnode->slots_in_use++;
|
||||
} else
|
||||
obj->objnode_tree_root = slot;
|
||||
}
|
||||
/* go down a level */
|
||||
offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
|
||||
objnode = slot;
|
||||
slot = objnode->slots[offset];
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
}
|
||||
BUG_ON(slot != NULL);
|
||||
if (objnode) {
|
||||
objnode->slots_in_use++;
|
||||
objnode->slots[offset] = pampd;
|
||||
} else
|
||||
obj->objnode_tree_root = pampd;
|
||||
obj->pampd_count++;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
|
||||
struct tmem_objnode_tree_path *pathp = path;
|
||||
struct tmem_objnode *slot = NULL;
|
||||
unsigned int height, shift;
|
||||
int offset;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
height = obj->objnode_tree_height;
|
||||
if (index > tmem_objnode_tree_h2max[height])
|
||||
goto out;
|
||||
slot = obj->objnode_tree_root;
|
||||
if (height == 0 && obj->objnode_tree_root) {
|
||||
obj->objnode_tree_root = NULL;
|
||||
goto out;
|
||||
}
|
||||
shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
pathp->objnode = NULL;
|
||||
do {
|
||||
if (slot == NULL)
|
||||
goto out;
|
||||
pathp++;
|
||||
offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
|
||||
pathp->offset = offset;
|
||||
pathp->objnode = slot;
|
||||
slot = slot->slots[offset];
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
} while (height > 0);
|
||||
if (slot == NULL)
|
||||
goto out;
|
||||
while (pathp->objnode) {
|
||||
pathp->objnode->slots[pathp->offset] = NULL;
|
||||
pathp->objnode->slots_in_use--;
|
||||
if (pathp->objnode->slots_in_use) {
|
||||
if (pathp->objnode == obj->objnode_tree_root) {
|
||||
while (obj->objnode_tree_height > 0 &&
|
||||
obj->objnode_tree_root->slots_in_use == 1 &&
|
||||
obj->objnode_tree_root->slots[0]) {
|
||||
struct tmem_objnode *to_free =
|
||||
obj->objnode_tree_root;
|
||||
|
||||
obj->objnode_tree_root =
|
||||
to_free->slots[0];
|
||||
obj->objnode_tree_height--;
|
||||
to_free->slots[0] = NULL;
|
||||
to_free->slots_in_use = 0;
|
||||
tmem_objnode_free(to_free);
|
||||
}
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
|
||||
pathp--;
|
||||
}
|
||||
obj->objnode_tree_height = 0;
|
||||
obj->objnode_tree_root = NULL;
|
||||
|
||||
out:
|
||||
if (slot != NULL)
|
||||
obj->pampd_count--;
|
||||
BUG_ON(obj->pampd_count < 0);
|
||||
return slot;
|
||||
}
|
||||
|
||||
/* recursively walk the objnode_tree destroying pampds and objnodes */
|
||||
static void tmem_objnode_node_destroy(struct tmem_obj *obj,
|
||||
struct tmem_objnode *objnode,
|
||||
unsigned int ht)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (ht == 0)
|
||||
return;
|
||||
for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
|
||||
if (objnode->slots[i]) {
|
||||
if (ht == 1) {
|
||||
obj->pampd_count--;
|
||||
(*tmem_pamops.free)(objnode->slots[i],
|
||||
obj->pool, NULL, 0);
|
||||
objnode->slots[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
|
||||
tmem_objnode_free(objnode->slots[i]);
|
||||
objnode->slots[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
|
||||
{
|
||||
if (obj->objnode_tree_root == NULL)
|
||||
return;
|
||||
if (obj->objnode_tree_height == 0) {
|
||||
obj->pampd_count--;
|
||||
(*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
|
||||
} else {
|
||||
tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
|
||||
obj->objnode_tree_height);
|
||||
tmem_objnode_free(obj->objnode_tree_root);
|
||||
obj->objnode_tree_height = 0;
|
||||
}
|
||||
obj->objnode_tree_root = NULL;
|
||||
(*tmem_pamops.free_obj)(obj->pool, obj);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tmem is operated on by a set of well-defined actions:
|
||||
* "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
|
||||
* (The tmem ABI allows for subpages and exchanges but these operations
|
||||
* are not included in this implementation.)
|
||||
*
|
||||
* These "tmem core" operations are implemented in the following functions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* "Put" a page, e.g. copy a page from the kernel into newly allocated
|
||||
* PAM space (if such space is available). Tmem_put is complicated by
|
||||
* a corner case: What if a page with matching handle already exists in
|
||||
* tmem? To guarantee coherency, one of two actions is necessary: Either
|
||||
* the data for the page must be overwritten, or the page must be
|
||||
* "flushed" so that the data is not accessible to a subsequent "get".
|
||||
* Since these "duplicate puts" are relatively rare, this implementation
|
||||
* always flushes for simplicity.
|
||||
*/
|
||||
int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
|
||||
char *data, size_t size, bool raw, bool ephemeral)
|
||||
{
|
||||
struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
|
||||
void *pampd = NULL, *pampd_del = NULL;
|
||||
int ret = -ENOMEM;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
lock_tmem_state();
|
||||
if (!tmem_enabled)
|
||||
goto disabled;
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = objfound = tmem_obj_find(hb, oidp);
|
||||
if (obj != NULL) {
|
||||
pampd = tmem_pampd_lookup_in_obj(objfound, index);
|
||||
if (pampd != NULL) {
|
||||
/* if found, is a dup put, flush the old one */
|
||||
pampd_del = tmem_pampd_delete_from_obj(obj, index);
|
||||
BUG_ON(pampd_del != pampd);
|
||||
(*tmem_pamops.free)(pampd, pool, oidp, index);
|
||||
if (obj->pampd_count == 0) {
|
||||
objnew = obj;
|
||||
objfound = NULL;
|
||||
}
|
||||
pampd = NULL;
|
||||
}
|
||||
} else {
|
||||
obj = objnew = (*tmem_hostops.obj_alloc)(pool);
|
||||
if (unlikely(obj == NULL)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
tmem_obj_init(obj, hb, pool, oidp);
|
||||
}
|
||||
BUG_ON(obj == NULL);
|
||||
BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
|
||||
pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
|
||||
obj->pool, &obj->oid, index);
|
||||
if (unlikely(pampd == NULL))
|
||||
goto free;
|
||||
ret = tmem_pampd_add_to_obj(obj, index, pampd);
|
||||
if (unlikely(ret == -ENOMEM))
|
||||
/* may have partially built objnode tree ("stump") */
|
||||
goto delete_and_free;
|
||||
goto out;
|
||||
|
||||
delete_and_free:
|
||||
(void)tmem_pampd_delete_from_obj(obj, index);
|
||||
free:
|
||||
if (pampd)
|
||||
(*tmem_pamops.free)(pampd, pool, NULL, 0);
|
||||
if (objnew) {
|
||||
tmem_obj_free(objnew, hb);
|
||||
(*tmem_hostops.obj_free)(objnew, pool);
|
||||
}
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
disabled:
|
||||
unlock_tmem_state();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Get" a page, e.g. if one can be found, copy the tmem page with the
|
||||
* matching handle from PAM space to the kernel. By tmem definition,
|
||||
* when a "get" is successful on an ephemeral page, the page is "flushed",
|
||||
* and when a "get" is successful on a persistent page, the page is retained
|
||||
* in tmem. Note that to preserve
|
||||
* coherency, "get" can never be skipped if tmem contains the data.
|
||||
* That is, if a get is done with a certain handle and fails, any
|
||||
* subsequent "get" must also fail (unless of course there is a
|
||||
* "put" done with the same handle).
|
||||
|
||||
*/
|
||||
int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
|
||||
char *data, size_t *size, bool raw, int get_and_free)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
void *pampd;
|
||||
bool ephemeral = is_ephemeral(pool);
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
|
||||
bool lock_held = false;
|
||||
|
||||
lock_tmem_state();
|
||||
if (!tmem_enabled)
|
||||
goto disabled;
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
lock_held = true;
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
if (free)
|
||||
pampd = tmem_pampd_delete_from_obj(obj, index);
|
||||
else
|
||||
pampd = tmem_pampd_lookup_in_obj(obj, index);
|
||||
if (pampd == NULL)
|
||||
goto out;
|
||||
if (free) {
|
||||
if (obj->pampd_count == 0) {
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
obj = NULL;
|
||||
}
|
||||
}
|
||||
if (tmem_pamops.is_remote(pampd)) {
|
||||
lock_held = false;
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
if (free)
|
||||
ret = (*tmem_pamops.get_data_and_free)(
|
||||
data, size, raw, pampd, pool, oidp, index);
|
||||
else
|
||||
ret = (*tmem_pamops.get_data)(
|
||||
data, size, raw, pampd, pool, oidp, index);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = 0;
|
||||
out:
|
||||
if (lock_held)
|
||||
spin_unlock(&hb->lock);
|
||||
disabled:
|
||||
unlock_tmem_state();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page in tmem matches the handle, "flush" this page from tmem such
|
||||
* that any subsequent "get" does not succeed (unless, of course, there
|
||||
* was another "put" with the same handle).
|
||||
*/
|
||||
int tmem_flush_page(struct tmem_pool *pool,
|
||||
struct tmem_oid *oidp, uint32_t index)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
void *pampd;
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
pampd = tmem_pampd_delete_from_obj(obj, index);
|
||||
if (pampd == NULL)
|
||||
goto out;
|
||||
(*tmem_pamops.free)(pampd, pool, oidp, index);
|
||||
if (obj->pampd_count == 0) {
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page in tmem matches the handle, replace the page so that any
|
||||
* subsequent "get" gets the new page. Returns 0 if
|
||||
* there was a page to replace, else returns -1.
|
||||
*/
|
||||
int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
|
||||
uint32_t index, void *new_pampd)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
lock_tmem_state();
|
||||
if (!tmem_enabled)
|
||||
goto disabled;
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
|
||||
ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
disabled:
|
||||
unlock_tmem_state();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Flush" all pages in tmem matching this oid.
|
||||
*/
|
||||
int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
struct tmem_hashbucket *hb;
|
||||
int ret = -1;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
tmem_pampd_destroy_all_in_obj(obj);
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Flush" all pages (and tmem_objs) from this tmem_pool and disable
|
||||
* all subsequent access to this tmem_pool.
|
||||
*/
|
||||
int tmem_destroy_pool(struct tmem_pool *pool)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
if (pool == NULL)
|
||||
goto out;
|
||||
tmem_pool_flush(pool, 1);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tmem_flush_pool(struct tmem_pool *pool)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
if (pool == NULL)
|
||||
goto out;
|
||||
tmem_pool_flush(pool, 0);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static LIST_HEAD(tmem_global_pool_list);
|
||||
|
||||
/*
|
||||
* Create a new tmem_pool with the provided flag and return
|
||||
* a pool id provided by the tmem host implementation.
|
||||
*/
|
||||
void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
|
||||
{
|
||||
int persistent = flags & TMEM_POOL_PERSIST;
|
||||
int shared = flags & TMEM_POOL_SHARED;
|
||||
struct tmem_hashbucket *hb = &pool->hashbucket[0];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
|
||||
hb->obj_rb_root = RB_ROOT;
|
||||
spin_lock_init(&hb->lock);
|
||||
}
|
||||
INIT_LIST_HEAD(&pool->pool_list);
|
||||
atomic_set(&pool->obj_count, 0);
|
||||
SET_SENTINEL(pool, POOL);
|
||||
list_add_tail(&pool->pool_list, &tmem_global_pool_list);
|
||||
pool->persistent = persistent;
|
||||
pool->shared = shared;
|
||||
}
|
||||
|
||||
/* The following must be called with tmem state locked */
|
||||
static void tmem_cleanup(void)
|
||||
{
|
||||
(*tmem_hostops.flush_all_obj)();
|
||||
}
|
||||
|
||||
void tmem_enable(void)
|
||||
{
|
||||
pr_info("turning tmem on\n");
|
||||
tmem_enabled = true;
|
||||
|
||||
(*tmem_hostops.control)(false);
|
||||
}
|
||||
|
||||
void tmem_disable(void)
|
||||
{
|
||||
pr_info("turning tmem off\n");
|
||||
tmem_enabled = false;
|
||||
|
||||
tmem_cleanup();
|
||||
(*tmem_hostops.control)(true);
|
||||
}
|
||||
212
drivers/staging/qcache/tmem.h
Normal file
212
drivers/staging/qcache/tmem.h
Normal file
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
* tmem.h
|
||||
*
|
||||
* Transcendent memory
|
||||
*
|
||||
* Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
|
||||
* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _TMEM_H_
|
||||
#define _TMEM_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/fmem.h>
|
||||
|
||||
/*
|
||||
* These are pre-defined by the Xen<->Linux ABI
|
||||
*/
|
||||
#define TMEM_PUT_PAGE 4
|
||||
#define TMEM_GET_PAGE 5
|
||||
#define TMEM_FLUSH_PAGE 6
|
||||
#define TMEM_FLUSH_OBJECT 7
|
||||
#define TMEM_POOL_PERSIST 1
|
||||
#define TMEM_POOL_SHARED 2
|
||||
#define TMEM_POOL_PRECOMPRESSED 4
|
||||
#define TMEM_POOL_PAGESIZE_SHIFT 4
|
||||
#define TMEM_POOL_PAGESIZE_MASK 0xf
|
||||
#define TMEM_POOL_RESERVED_BITS 0x00ffff00
|
||||
|
||||
/*
|
||||
* sentinels have proven very useful for debugging but can be removed
|
||||
* or disabled before final merge.
|
||||
*/
|
||||
#define SENTINELS
|
||||
#ifdef SENTINELS
|
||||
#define DECL_SENTINEL uint32_t sentinel;
|
||||
#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL)
|
||||
#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL)
|
||||
#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL)
|
||||
#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL)
|
||||
#else
|
||||
#define DECL_SENTINEL
|
||||
#define SET_SENTINEL(_x, _y) do { } while (0)
|
||||
#define INVERT_SENTINEL(_x, _y) do { } while (0)
|
||||
#define ASSERT_SENTINEL(_x, _y) do { } while (0)
|
||||
#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A pool is the highest-level data structure managed by tmem and
|
||||
* usually corresponds to a large independent set of pages such as
|
||||
* a filesystem. Each pool has an id, and certain attributes and counters.
|
||||
* It also contains a set of hash buckets, each of which contains an rbtree
|
||||
* of objects and a lock to manage concurrency within the pool.
|
||||
*/
|
||||
|
||||
#define TMEM_HASH_BUCKET_BITS 8
|
||||
#define TMEM_HASH_BUCKETS (1<<TMEM_HASH_BUCKET_BITS)
|
||||
|
||||
struct tmem_hashbucket {
|
||||
struct rb_root obj_rb_root;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
struct tmem_pool {
|
||||
void *client; /* "up" for some clients, avoids table lookup */
|
||||
struct list_head pool_list;
|
||||
uint32_t pool_id;
|
||||
bool persistent;
|
||||
bool shared;
|
||||
atomic_t obj_count;
|
||||
atomic_t refcount;
|
||||
struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS];
|
||||
DECL_SENTINEL
|
||||
};
|
||||
|
||||
#define is_persistent(_p) (_p->persistent)
|
||||
#define is_ephemeral(_p) (!(_p->persistent))
|
||||
|
||||
/*
|
||||
* An object id ("oid") is large: 192-bits (to ensure, for example, files
|
||||
* in a modern filesystem can be uniquely identified).
|
||||
*/
|
||||
|
||||
struct tmem_oid {
|
||||
uint64_t oid[3];
|
||||
};
|
||||
|
||||
static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
|
||||
{
|
||||
oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
|
||||
}
|
||||
|
||||
static inline bool tmem_oid_valid(struct tmem_oid *oidp)
|
||||
{
|
||||
return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL ||
|
||||
oidp->oid[2] != -1UL;
|
||||
}
|
||||
|
||||
static inline int tmem_oid_compare(struct tmem_oid *left,
|
||||
struct tmem_oid *right)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (left->oid[2] == right->oid[2]) {
|
||||
if (left->oid[1] == right->oid[1]) {
|
||||
if (left->oid[0] == right->oid[0])
|
||||
ret = 0;
|
||||
else if (left->oid[0] < right->oid[0])
|
||||
ret = -1;
|
||||
else
|
||||
return 1;
|
||||
} else if (left->oid[1] < right->oid[1])
|
||||
ret = -1;
|
||||
else
|
||||
ret = 1;
|
||||
} else if (left->oid[2] < right->oid[2])
|
||||
ret = -1;
|
||||
else
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
|
||||
{
|
||||
return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
|
||||
TMEM_HASH_BUCKET_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem_obj contains an identifier (oid), pointers to the parent
|
||||
* pool and the rb_tree to which it belongs, counters, and an ordered
|
||||
* set of pampds, structured in a radix-tree-like tree. The intermediate
|
||||
* nodes of the tree are called tmem_objnodes.
|
||||
*/
|
||||
|
||||
struct tmem_objnode;
|
||||
|
||||
struct tmem_obj {
|
||||
struct tmem_oid oid;
|
||||
struct tmem_pool *pool;
|
||||
struct rb_node rb_tree_node;
|
||||
struct tmem_objnode *objnode_tree_root;
|
||||
unsigned int objnode_tree_height;
|
||||
unsigned long objnode_count;
|
||||
long pampd_count;
|
||||
void *extra; /* for private use by pampd implementation */
|
||||
DECL_SENTINEL
|
||||
};
|
||||
|
||||
#define OBJNODE_TREE_MAP_SHIFT 6
|
||||
#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT)
|
||||
#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1)
|
||||
#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
|
||||
#define OBJNODE_TREE_MAX_PATH \
|
||||
(OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2)
|
||||
|
||||
struct tmem_objnode {
|
||||
struct tmem_obj *obj;
|
||||
DECL_SENTINEL
|
||||
void *slots[OBJNODE_TREE_MAP_SIZE];
|
||||
unsigned int slots_in_use;
|
||||
};
|
||||
|
||||
/* pampd abstract datatype methods provided by the PAM implementation */
|
||||
struct tmem_pamops {
|
||||
void *(*create)(char *, size_t, bool, int,
|
||||
struct tmem_pool *, struct tmem_oid *, uint32_t);
|
||||
int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
|
||||
struct tmem_oid *, uint32_t);
|
||||
int (*get_data_and_free)(char *, size_t *, bool, void *,
|
||||
struct tmem_pool *, struct tmem_oid *,
|
||||
uint32_t);
|
||||
void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t);
|
||||
void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
|
||||
bool (*is_remote)(void *);
|
||||
void (*new_obj)(struct tmem_obj *);
|
||||
int (*replace_in_obj)(void *, struct tmem_obj *);
|
||||
};
|
||||
extern void tmem_register_pamops(struct tmem_pamops *m);
|
||||
|
||||
/* memory allocation methods provided by the host implementation */
|
||||
struct tmem_hostops {
|
||||
struct tmem_obj *(*obj_alloc)(struct tmem_pool *);
|
||||
void (*obj_free)(struct tmem_obj *, struct tmem_pool *);
|
||||
struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *);
|
||||
void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *);
|
||||
void (*flush_all_obj)(void);
|
||||
void (*control)(bool);
|
||||
};
|
||||
extern void tmem_register_hostops(struct tmem_hostops *m);
|
||||
|
||||
/* core tmem accessor functions */
|
||||
extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
char *, size_t, bool, bool);
|
||||
extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
char *, size_t *, bool, int);
|
||||
extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
void *);
|
||||
extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
|
||||
uint32_t index);
|
||||
extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
|
||||
extern int tmem_destroy_pool(struct tmem_pool *);
|
||||
extern int tmem_flush_pool(struct tmem_pool *);
|
||||
extern void tmem_new_pool(struct tmem_pool *, uint32_t);
|
||||
|
||||
extern void tmem_enable(void);
|
||||
extern void tmem_disable(void);
|
||||
#endif /* _TMEM_H */
|
||||
62
include/linux/fmem.h
Normal file
62
include/linux/fmem.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 and
|
||||
* only version 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
#ifndef _FMEM_H_
|
||||
#define _FMEM_H_
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
struct fmem_platform_data {
|
||||
unsigned long phys;
|
||||
unsigned long size;
|
||||
unsigned long reserved_size_low;
|
||||
unsigned long reserved_size_high;
|
||||
unsigned long align;
|
||||
};
|
||||
|
||||
struct fmem_data {
|
||||
unsigned long phys;
|
||||
void *virt;
|
||||
struct vm_struct *area;
|
||||
unsigned long size;
|
||||
unsigned long reserved_size_low;
|
||||
unsigned long reserved_size_high;
|
||||
};
|
||||
|
||||
enum fmem_state {
|
||||
FMEM_UNINITIALIZED = 0,
|
||||
FMEM_C_STATE,
|
||||
FMEM_T_STATE,
|
||||
FMEM_O_STATE,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_QCACHE
|
||||
struct fmem_data *fmem_get_info(void);
|
||||
int fmem_set_state(enum fmem_state);
|
||||
void lock_fmem_state(void);
|
||||
void unlock_fmem_state(void);
|
||||
void *fmem_map_virtual_area(int cacheability);
|
||||
void fmem_unmap_virtual_area(void);
|
||||
#else
|
||||
static inline struct fmem_data *fmem_get_info(void) { return NULL; }
|
||||
static inline int fmem_set_state(enum fmem_state f) { return -ENODEV; }
|
||||
static inline void lock_fmem_state(void) { return; }
|
||||
static inline void unlock_fmem_state(void) { return; }
|
||||
static inline void *fmem_map_virtual_area(int cacheability) { return NULL; }
|
||||
static inline void fmem_unmap_virtual_area(void) { return; }
|
||||
#endif
|
||||
|
||||
int request_fmem_c_region(void *unused);
|
||||
int release_fmem_c_region(void *unused);
|
||||
#endif
|
||||
Reference in New Issue
Block a user