Pulled in android-ready zram from davidmueller13/Vindicator-flo-aosp.
This commit is contained in:
@@ -1,9 +1,6 @@
|
||||
config ZRAM
|
||||
tristate "Compressed RAM block device support"
|
||||
# X86 dependency is because zsmalloc uses non-portable pte/tlb
|
||||
# functions
|
||||
depends on BLOCK && SYSFS && X86
|
||||
select ZSMALLOC
|
||||
depends on BLOCK && SYSFS && ZSMALLOC
|
||||
select LZO_COMPRESS
|
||||
select LZO_DECOMPRESS
|
||||
default n
|
||||
@@ -17,7 +14,7 @@ config ZRAM
|
||||
disks and maybe many more.
|
||||
|
||||
See zram.txt for more information.
|
||||
Project home: http://compcache.googlecode.com/
|
||||
Project home: <https://compcache.googlecode.com/>
|
||||
|
||||
config ZRAM_DEBUG
|
||||
bool "Compressed RAM block device debug support"
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
zram-y := zram_drv.o zram_sysfs.o
|
||||
zram-y := zram_drv.o
|
||||
|
||||
obj-$(CONFIG_ZRAM) += zram.o
|
||||
|
||||
@@ -23,17 +23,17 @@ Following shows a typical sequence of steps for using zram.
|
||||
This creates 4 devices: /dev/zram{0,1,2,3}
|
||||
(num_devices parameter is optional. Default: 1)
|
||||
|
||||
2) Set Disksize (Optional):
|
||||
Set disk size by writing the value to sysfs node 'disksize'
|
||||
(in bytes). If disksize is not given, default value of 25%
|
||||
of RAM is used.
|
||||
|
||||
2) Set Disksize
|
||||
Set disk size by writing the value to sysfs node 'disksize'.
|
||||
The value can be either in bytes or you can use mem suffixes.
|
||||
Examples:
|
||||
# Initialize /dev/zram0 with 50MB disksize
|
||||
echo $((50*1024*1024)) > /sys/block/zram0/disksize
|
||||
|
||||
NOTE: disksize cannot be changed if the disk contains any
|
||||
data. So, for such a disk, you need to issue 'reset' (see below)
|
||||
before you can change its disksize.
|
||||
# Using mem suffixes
|
||||
echo 256K > /sys/block/zram0/disksize
|
||||
echo 512M > /sys/block/zram0/disksize
|
||||
echo 1G > /sys/block/zram0/disksize
|
||||
|
||||
3) Activate:
|
||||
mkswap /dev/zram0
|
||||
@@ -65,8 +65,9 @@ Following shows a typical sequence of steps for using zram.
|
||||
echo 1 > /sys/block/zram0/reset
|
||||
echo 1 > /sys/block/zram1/reset
|
||||
|
||||
(This frees all the memory allocated for the given device).
|
||||
|
||||
This frees all the memory allocated for the given device and
|
||||
resets the disksize to zero. You must set the disksize again
|
||||
before reusing the device.
|
||||
|
||||
Please report any problems at:
|
||||
- Mailing list: linux-mm-cc at laptop dot org
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -26,33 +26,18 @@
|
||||
*/
|
||||
static const unsigned max_num_devices = 32;
|
||||
|
||||
/*
|
||||
* Stored at beginning of each compressed object.
|
||||
*
|
||||
* It stores back-reference to table entry which points to this
|
||||
* object. This is required to support memory defragmentation.
|
||||
*/
|
||||
struct zobj_header {
|
||||
#if 0
|
||||
u32 table_idx;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*-- Configurable parameters */
|
||||
|
||||
/* Default zram disk size: 25% of total RAM */
|
||||
static const unsigned default_disksize_perc_ram = 25;
|
||||
|
||||
/*
|
||||
* Pages that compress to size greater than this are stored
|
||||
* uncompressed in memory.
|
||||
*/
|
||||
static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
|
||||
static const size_t max_zpage_size = PAGE_SIZE / 10 * 9;
|
||||
|
||||
/*
|
||||
* NOTE: max_zpage_size must be less than or equal to:
|
||||
* ZS_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
|
||||
* otherwise, xv_malloc() would always return failure.
|
||||
* ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
|
||||
* always return failure.
|
||||
*/
|
||||
|
||||
/*-- End of configurable params */
|
||||
@@ -68,9 +53,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
|
||||
|
||||
/* Flags for zram pages (table[page_no].flags) */
|
||||
enum zram_pageflags {
|
||||
/* Page is stored uncompressed */
|
||||
ZRAM_UNCOMPRESSED,
|
||||
|
||||
/* Page consists entirely of zeros */
|
||||
ZRAM_ZERO,
|
||||
|
||||
@@ -81,34 +63,51 @@ enum zram_pageflags {
|
||||
|
||||
/* Allocated for each disk page */
|
||||
struct table {
|
||||
void *handle;
|
||||
unsigned long handle;
|
||||
u16 size; /* object size (excluding header) */
|
||||
u8 count; /* object ref count (not yet used) */
|
||||
u8 flags;
|
||||
} __attribute__((aligned(4)));
|
||||
} __aligned(4);
|
||||
|
||||
/*
|
||||
* All 64bit fields should only be manipulated by 64bit atomic accessors.
|
||||
* All modifications to 32bit counter should be protected by zram->lock.
|
||||
*/
|
||||
struct zram_stats {
|
||||
u64 compr_size; /* compressed size of pages stored */
|
||||
u64 num_reads; /* failed + successful */
|
||||
u64 num_writes; /* --do-- */
|
||||
u64 failed_reads; /* should NEVER! happen */
|
||||
u64 failed_writes; /* can happen when memory is too low */
|
||||
u64 invalid_io; /* non-page-aligned I/O requests */
|
||||
u64 notify_free; /* no. of swap slot free notifications */
|
||||
atomic64_t compr_size; /* compressed size of pages stored */
|
||||
atomic64_t num_reads; /* failed + successful */
|
||||
atomic64_t num_writes; /* --do-- */
|
||||
atomic64_t failed_reads; /* should NEVER! happen */
|
||||
atomic64_t failed_writes; /* can happen when memory is too low */
|
||||
atomic64_t invalid_io; /* non-page-aligned I/O requests */
|
||||
atomic64_t notify_free; /* no. of swap slot free notifications */
|
||||
u32 pages_zero; /* no. of zero filled pages */
|
||||
u32 pages_stored; /* no. of pages currently stored */
|
||||
u32 good_compress; /* % of pages with compression ratio<=50% */
|
||||
u32 pages_expand; /* % of incompressible pages */
|
||||
u32 bad_compress; /* % of pages with compression ratio>=75% */
|
||||
};
|
||||
|
||||
struct zram {
|
||||
struct zs_pool *mem_pool;
|
||||
struct zram_meta {
|
||||
void *compress_workmem;
|
||||
void *compress_buffer;
|
||||
struct table *table;
|
||||
spinlock_t stat64_lock; /* protect 64-bit stats */
|
||||
struct rw_semaphore lock; /* protect compression buffers and table
|
||||
* against concurrent read and writes */
|
||||
struct zs_pool *mem_pool;
|
||||
};
|
||||
|
||||
struct zram_slot_free {
|
||||
unsigned long index;
|
||||
struct zram_slot_free *next;
|
||||
};
|
||||
|
||||
struct zram {
|
||||
struct zram_meta *meta;
|
||||
struct rw_semaphore lock; /* protect compression buffers, table,
|
||||
* 32bit stat counters against concurrent
|
||||
* notifications, reads and writes */
|
||||
|
||||
struct work_struct free_work; /* handle pending free request */
|
||||
struct zram_slot_free *slot_free_rq; /* list head of free request */
|
||||
|
||||
struct request_queue *queue;
|
||||
struct gendisk *disk;
|
||||
int init_done;
|
||||
@@ -119,17 +118,8 @@ struct zram {
|
||||
* we can store in a disk.
|
||||
*/
|
||||
u64 disksize; /* bytes */
|
||||
spinlock_t slot_free_lock;
|
||||
|
||||
struct zram_stats stats;
|
||||
};
|
||||
|
||||
extern struct zram *zram_devices;
|
||||
unsigned int zram_get_num_devices(void);
|
||||
#ifdef CONFIG_SYSFS
|
||||
extern struct attribute_group zram_disk_attr_group;
|
||||
#endif
|
||||
|
||||
extern int zram_init_device(struct zram *zram);
|
||||
extern void __zram_reset_device(struct zram *zram);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,227 +0,0 @@
|
||||
/*
|
||||
* Compressed RAM block device
|
||||
*
|
||||
* Copyright (C) 2008, 2009, 2010 Nitin Gupta
|
||||
*
|
||||
* This code is released using a dual license strategy: BSD/GPL
|
||||
* You can choose the licence that better fits your requirements.
|
||||
*
|
||||
* Released under the terms of 3-clause BSD License
|
||||
* Released under the terms of GNU General Public License Version 2.0
|
||||
*
|
||||
* Project home: http://compcache.googlecode.com/
|
||||
*/
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include "zram_drv.h"
|
||||
|
||||
static u64 zram_stat64_read(struct zram *zram, u64 *v)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
spin_lock(&zram->stat64_lock);
|
||||
val = *v;
|
||||
spin_unlock(&zram->stat64_lock);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static struct zram *dev_to_zram(struct device *dev)
|
||||
{
|
||||
int i;
|
||||
struct zram *zram = NULL;
|
||||
|
||||
for (i = 0; i < zram_get_num_devices(); i++) {
|
||||
zram = &zram_devices[i];
|
||||
if (disk_to_dev(zram->disk) == dev)
|
||||
break;
|
||||
}
|
||||
|
||||
return zram;
|
||||
}
|
||||
|
||||
static ssize_t disksize_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n", zram->disksize);
|
||||
}
|
||||
|
||||
static ssize_t disksize_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t len)
|
||||
{
|
||||
int ret;
|
||||
u64 disksize;
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
ret = kstrtoull(buf, 10, &disksize);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
down_write(&zram->init_lock);
|
||||
if (zram->init_done) {
|
||||
up_write(&zram->init_lock);
|
||||
pr_info("Cannot change disksize for initialized device\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
zram->disksize = PAGE_ALIGN(disksize);
|
||||
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
||||
up_write(&zram->init_lock);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t initstate_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%u\n", zram->init_done);
|
||||
}
|
||||
|
||||
static ssize_t reset_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t len)
|
||||
{
|
||||
int ret;
|
||||
unsigned short do_reset;
|
||||
struct zram *zram;
|
||||
struct block_device *bdev;
|
||||
|
||||
zram = dev_to_zram(dev);
|
||||
bdev = bdget_disk(zram->disk, 0);
|
||||
|
||||
/* Do not reset an active device! */
|
||||
if (bdev->bd_holders)
|
||||
return -EBUSY;
|
||||
|
||||
ret = kstrtou16(buf, 10, &do_reset);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!do_reset)
|
||||
return -EINVAL;
|
||||
|
||||
/* Make sure all pending I/O is finished */
|
||||
if (bdev)
|
||||
fsync_bdev(bdev);
|
||||
|
||||
down_write(&zram->init_lock);
|
||||
if (zram->init_done)
|
||||
__zram_reset_device(zram);
|
||||
up_write(&zram->init_lock);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t num_reads_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n",
|
||||
zram_stat64_read(zram, &zram->stats.num_reads));
|
||||
}
|
||||
|
||||
static ssize_t num_writes_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n",
|
||||
zram_stat64_read(zram, &zram->stats.num_writes));
|
||||
}
|
||||
|
||||
static ssize_t invalid_io_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n",
|
||||
zram_stat64_read(zram, &zram->stats.invalid_io));
|
||||
}
|
||||
|
||||
static ssize_t notify_free_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n",
|
||||
zram_stat64_read(zram, &zram->stats.notify_free));
|
||||
}
|
||||
|
||||
static ssize_t zero_pages_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%u\n", zram->stats.pages_zero);
|
||||
}
|
||||
|
||||
static ssize_t orig_data_size_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n",
|
||||
(u64)(zram->stats.pages_stored) << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static ssize_t compr_data_size_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n",
|
||||
zram_stat64_read(zram, &zram->stats.compr_size));
|
||||
}
|
||||
|
||||
static ssize_t mem_used_total_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
u64 val = 0;
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
if (zram->init_done) {
|
||||
val = zs_get_total_size_bytes(zram->mem_pool) +
|
||||
((u64)(zram->stats.pages_expand) << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
return sprintf(buf, "%llu\n", val);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
|
||||
disksize_show, disksize_store);
|
||||
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
|
||||
static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
|
||||
static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
|
||||
static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
|
||||
static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
|
||||
static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
|
||||
static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
|
||||
static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
|
||||
static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
|
||||
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
|
||||
|
||||
static struct attribute *zram_disk_attrs[] = {
|
||||
&dev_attr_disksize.attr,
|
||||
&dev_attr_initstate.attr,
|
||||
&dev_attr_reset.attr,
|
||||
&dev_attr_num_reads.attr,
|
||||
&dev_attr_num_writes.attr,
|
||||
&dev_attr_invalid_io.attr,
|
||||
&dev_attr_notify_free.attr,
|
||||
&dev_attr_zero_pages.attr,
|
||||
&dev_attr_orig_data_size.attr,
|
||||
&dev_attr_compr_data_size.attr,
|
||||
&dev_attr_mem_used_total.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
struct attribute_group zram_disk_attr_group = {
|
||||
.attrs = zram_disk_attrs,
|
||||
};
|
||||
@@ -1,9 +1,5 @@
|
||||
config ZSMALLOC
|
||||
tristate "Memory allocator for compressed pages"
|
||||
# X86 dependency is because of the use of __flush_tlb_one and set_pte
|
||||
# in zsmalloc-main.c.
|
||||
# TODO: convert these to portable functions
|
||||
depends on X86
|
||||
bool "Memory allocator for compressed pages"
|
||||
default n
|
||||
help
|
||||
zsmalloc is a slab-based memory allocator designed to store
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
CFLAGS_zsmalloc-main.o := -Wno-error=implicit-function-declaration -Wno-implicit-function-declaration
|
||||
zsmalloc-y := zsmalloc-main.o
|
||||
|
||||
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
|
||||
|
||||
@@ -10,6 +10,54 @@
|
||||
* Released under the terms of GNU General Public License Version 2.0
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This allocator is designed for use with zcache and zram. Thus, the
|
||||
* allocator is supposed to work well under low memory conditions. In
|
||||
* particular, it never attempts higher order page allocation which is
|
||||
* very likely to fail under memory pressure. On the other hand, if we
|
||||
* just use single (0-order) pages, it would suffer from very high
|
||||
* fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy
|
||||
* an entire page. This was one of the major issues with its predecessor
|
||||
* (xvmalloc).
|
||||
*
|
||||
* To overcome these issues, zsmalloc allocates a bunch of 0-order pages
|
||||
* and links them together using various 'struct page' fields. These linked
|
||||
* pages act as a single higher-order page i.e. an object can span 0-order
|
||||
* page boundaries. The code refers to these linked pages as a single entity
|
||||
* called zspage.
|
||||
*
|
||||
* Following is how we use various fields and flags of underlying
|
||||
* struct page(s) to form a zspage.
|
||||
*
|
||||
* Usage of struct page fields:
|
||||
* page->first_page: points to the first component (0-order) page
|
||||
* page->index (union with page->freelist): offset of the first object
|
||||
* starting in this page. For the first page, this is
|
||||
* always 0, so we use this field (aka freelist) to point
|
||||
* to the first free object in zspage.
|
||||
* page->lru: links together all component pages (except the first page)
|
||||
* of a zspage
|
||||
*
|
||||
* For _first_ page only:
|
||||
*
|
||||
* page->private (union with page->first_page): refers to the
|
||||
* component page after the first page
|
||||
* page->freelist: points to the first free object in zspage.
|
||||
* Free objects are linked together using in-place
|
||||
* metadata.
|
||||
* page->objects: maximum number of objects we can store in this
|
||||
* zspage (class->zspage_order * PAGE_SIZE / class->size)
|
||||
* page->lru: links together first pages of various zspages.
|
||||
* Basically forming list of zspages in a fullness group.
|
||||
* page->mapping: class index and fullness group of the zspage
|
||||
*
|
||||
* Usage of struct page flags:
|
||||
* PG_private: identifies the first component page
|
||||
* PG_private2: identifies the last component page
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_ZSMALLOC_DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
@@ -27,9 +75,139 @@
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "zsmalloc.h"
|
||||
#include "zsmalloc_int.h"
|
||||
|
||||
/*
|
||||
* This must be power of 2 and greater than of equal to sizeof(link_free).
|
||||
* These two conditions ensure that any 'struct link_free' itself doesn't
|
||||
* span more than 1 page which avoids complex case of mapping 2 pages simply
|
||||
* to restore link_free pointer values.
|
||||
*/
|
||||
#define ZS_ALIGN 8
|
||||
|
||||
/*
|
||||
* A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
|
||||
* pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
|
||||
*/
|
||||
#define ZS_MAX_ZSPAGE_ORDER 2
|
||||
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
|
||||
|
||||
/*
|
||||
* Object location (<PFN>, <obj_idx>) is encoded as
|
||||
* as single (void *) handle value.
|
||||
*
|
||||
* Note that object index <obj_idx> is relative to system
|
||||
* page <PFN> it is stored in, so for each sub-page belonging
|
||||
* to a zspage, obj_idx starts with 0.
|
||||
*
|
||||
* This is made more complicated by various memory models and PAE.
|
||||
*/
|
||||
|
||||
#ifndef MAX_PHYSMEM_BITS
|
||||
#ifdef CONFIG_HIGHMEM64G
|
||||
#define MAX_PHYSMEM_BITS 36
|
||||
#else /* !CONFIG_HIGHMEM64G */
|
||||
/*
|
||||
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
|
||||
* be PAGE_SHIFT
|
||||
*/
|
||||
#define MAX_PHYSMEM_BITS BITS_PER_LONG
|
||||
#endif
|
||||
#endif
|
||||
#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
|
||||
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
|
||||
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
|
||||
|
||||
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
||||
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
|
||||
#define ZS_MIN_ALLOC_SIZE \
|
||||
MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
|
||||
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
|
||||
|
||||
/*
|
||||
* On systems with 4K page size, this gives 254 size classes! There is a
|
||||
* trader-off here:
|
||||
* - Large number of size classes is potentially wasteful as free page are
|
||||
* spread across these classes
|
||||
* - Small number of size classes causes large internal fragmentation
|
||||
* - Probably its better to use specific size classes (empirically
|
||||
* determined). NOTE: all those class sizes must be set as multiple of
|
||||
* ZS_ALIGN to make sure link_free itself never has to span 2 pages.
|
||||
*
|
||||
* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
|
||||
* (reason above)
|
||||
*/
|
||||
#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8)
|
||||
#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
|
||||
ZS_SIZE_CLASS_DELTA + 1)
|
||||
|
||||
/*
|
||||
* We do not maintain any list for completely empty or full pages
|
||||
*/
|
||||
enum fullness_group {
|
||||
ZS_ALMOST_FULL,
|
||||
ZS_ALMOST_EMPTY,
|
||||
_ZS_NR_FULLNESS_GROUPS,
|
||||
|
||||
ZS_EMPTY,
|
||||
ZS_FULL
|
||||
};
|
||||
|
||||
/*
|
||||
* We assign a page to ZS_ALMOST_EMPTY fullness group when:
|
||||
* n <= N / f, where
|
||||
* n = number of allocated objects
|
||||
* N = total number of objects zspage can store
|
||||
* f = 1/fullness_threshold_frac
|
||||
*
|
||||
* Similarly, we assign zspage to:
|
||||
* ZS_ALMOST_FULL when n > N / f
|
||||
* ZS_EMPTY when n == 0
|
||||
* ZS_FULL when n == N
|
||||
*
|
||||
* (see: fix_fullness_group())
|
||||
*/
|
||||
static const int fullness_threshold_frac = 4;
|
||||
|
||||
struct size_class {
|
||||
/*
|
||||
* Size of objects stored in this class. Must be multiple
|
||||
* of ZS_ALIGN.
|
||||
*/
|
||||
int size;
|
||||
unsigned int index;
|
||||
|
||||
/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
|
||||
int pages_per_zspage;
|
||||
|
||||
spinlock_t lock;
|
||||
|
||||
/* stats */
|
||||
u64 pages_allocated;
|
||||
|
||||
struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
|
||||
};
|
||||
|
||||
/*
|
||||
* Placed within free objects to form a singly linked list.
|
||||
* For every zspage, first_page->freelist gives head of this list.
|
||||
*
|
||||
* This must be power of 2 and less than or equal to ZS_ALIGN
|
||||
*/
|
||||
struct link_free {
|
||||
/* Handle of next free chunk (encodes <PFN, obj_idx>) */
|
||||
void *next;
|
||||
};
|
||||
|
||||
struct zs_pool {
|
||||
struct size_class size_class[ZS_SIZE_CLASSES];
|
||||
|
||||
gfp_t flags; /* allocation flags used when growing pool */
|
||||
};
|
||||
|
||||
/*
|
||||
* A zspage's class index and fullness group
|
||||
@@ -40,17 +218,39 @@
|
||||
#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
|
||||
#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
|
||||
|
||||
/*
|
||||
* By default, zsmalloc uses a copy-based object mapping method to access
|
||||
* allocations that span two pages. However, if a particular architecture
|
||||
* performs VM mapping faster than copying, then it should be added here
|
||||
* so that USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use
|
||||
* page table mapping rather than copying for object mapping.
|
||||
*/
|
||||
#if defined(CONFIG_ARM) && !defined(MODULE)
|
||||
#define USE_PGTABLE_MAPPING
|
||||
#endif
|
||||
|
||||
struct mapping_area {
|
||||
#ifdef USE_PGTABLE_MAPPING
|
||||
struct vm_struct *vm; /* vm area for mapping object that span pages */
|
||||
#else
|
||||
char *vm_buf; /* copy buffer for objects that span pages */
|
||||
#endif
|
||||
char *vm_addr; /* address of kmap_atomic()'ed pages */
|
||||
enum zs_mapmode vm_mm; /* mapping mode */
|
||||
};
|
||||
|
||||
|
||||
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
|
||||
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
|
||||
|
||||
static int is_first_page(struct page *page)
|
||||
{
|
||||
return test_bit(PG_private, &page->flags);
|
||||
return PagePrivate(page);
|
||||
}
|
||||
|
||||
static int is_last_page(struct page *page)
|
||||
{
|
||||
return test_bit(PG_private_2, &page->flags);
|
||||
return PagePrivate2(page);
|
||||
}
|
||||
|
||||
static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
|
||||
@@ -180,7 +380,7 @@ out:
|
||||
* link together 3 PAGE_SIZE sized pages to form a zspage
|
||||
* since then we can perfectly fit in 8 such objects.
|
||||
*/
|
||||
static int get_zspage_order(int class_size)
|
||||
static int get_pages_per_zspage(int class_size)
|
||||
{
|
||||
int i, max_usedpc = 0;
|
||||
/* zspage order which gives maximum used size per KB */
|
||||
@@ -223,14 +423,19 @@ static struct page *get_next_page(struct page *page)
|
||||
if (is_last_page(page))
|
||||
next = NULL;
|
||||
else if (is_first_page(page))
|
||||
next = (struct page *)page->private;
|
||||
next = (struct page *)page_private(page);
|
||||
else
|
||||
next = list_entry(page->lru.next, struct page, lru);
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
/* Encode <page, obj_idx> as a single handle value */
|
||||
/*
|
||||
* Encode <page, obj_idx> as a single handle value.
|
||||
* On hardware platforms with physical memory starting at 0x0 the pfn
|
||||
* could be 0 so we ensure that the handle will never be 0 by adjusting the
|
||||
* encoded obj_idx value before encoding.
|
||||
*/
|
||||
static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
|
||||
{
|
||||
unsigned long handle;
|
||||
@@ -241,19 +446,21 @@ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
|
||||
}
|
||||
|
||||
handle = page_to_pfn(page) << OBJ_INDEX_BITS;
|
||||
handle |= (obj_idx & OBJ_INDEX_MASK);
|
||||
handle |= ((obj_idx + 1) & OBJ_INDEX_MASK);
|
||||
|
||||
return (void *)handle;
|
||||
}
|
||||
|
||||
/* Decode <page, obj_idx> pair from the given object handle */
|
||||
static void obj_handle_to_location(void *handle, struct page **page,
|
||||
/*
|
||||
* Decode <page, obj_idx> pair from the given object handle. We adjust the
|
||||
* decoded obj_idx back to its original value since it was adjusted in
|
||||
* obj_location_to_handle().
|
||||
*/
|
||||
static void obj_handle_to_location(unsigned long handle, struct page **page,
|
||||
unsigned long *obj_idx)
|
||||
{
|
||||
unsigned long hval = (unsigned long)handle;
|
||||
|
||||
*page = pfn_to_page(hval >> OBJ_INDEX_BITS);
|
||||
*obj_idx = hval & OBJ_INDEX_MASK;
|
||||
*page = pfn_to_page(handle >> OBJ_INDEX_BITS);
|
||||
*obj_idx = (handle & OBJ_INDEX_MASK) - 1;
|
||||
}
|
||||
|
||||
static unsigned long obj_idx_to_offset(struct page *page,
|
||||
@@ -274,7 +481,7 @@ static void reset_page(struct page *page)
|
||||
set_page_private(page, 0);
|
||||
page->mapping = NULL;
|
||||
page->freelist = NULL;
|
||||
reset_page_mapcount(page);
|
||||
page_mapcount_reset(page);
|
||||
}
|
||||
|
||||
static void free_zspage(struct page *first_page)
|
||||
@@ -354,7 +561,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
|
||||
static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
|
||||
{
|
||||
int i, error;
|
||||
struct page *first_page = NULL;
|
||||
struct page *first_page = NULL, *uninitialized_var(prev_page);
|
||||
|
||||
/*
|
||||
* Allocate individual pages and link them together as:
|
||||
@@ -368,8 +575,8 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
|
||||
* identify the last page.
|
||||
*/
|
||||
error = -ENOMEM;
|
||||
for (i = 0; i < class->zspage_order; i++) {
|
||||
struct page *page, *prev_page;
|
||||
for (i = 0; i < class->pages_per_zspage; i++) {
|
||||
struct page *page;
|
||||
|
||||
page = alloc_page(flags);
|
||||
if (!page)
|
||||
@@ -377,20 +584,19 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
|
||||
|
||||
INIT_LIST_HEAD(&page->lru);
|
||||
if (i == 0) { /* first page */
|
||||
set_bit(PG_private, &page->flags);
|
||||
SetPagePrivate(page);
|
||||
set_page_private(page, 0);
|
||||
first_page = page;
|
||||
first_page->inuse = 0;
|
||||
}
|
||||
if (i == 1)
|
||||
first_page->private = (unsigned long)page;
|
||||
set_page_private(first_page, (unsigned long)page);
|
||||
if (i >= 1)
|
||||
page->first_page = first_page;
|
||||
if (i >= 2)
|
||||
list_add(&page->lru, &prev_page->lru);
|
||||
if (i == class->zspage_order - 1) /* last page */
|
||||
set_bit(PG_private_2, &page->flags);
|
||||
|
||||
if (i == class->pages_per_zspage - 1) /* last page */
|
||||
SetPagePrivate2(page);
|
||||
prev_page = page;
|
||||
}
|
||||
|
||||
@@ -398,7 +604,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
|
||||
|
||||
first_page->freelist = obj_location_to_handle(first_page, 0);
|
||||
/* Maximum number of objects we can store in this zspage */
|
||||
first_page->objects = class->zspage_order * PAGE_SIZE / class->size;
|
||||
first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
|
||||
|
||||
error = 0; /* Success */
|
||||
|
||||
@@ -425,34 +631,141 @@ static struct page *find_get_zspage(struct size_class *class)
|
||||
return page;
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_PGTABLE_MAPPING
|
||||
static inline int __zs_cpu_up(struct mapping_area *area)
|
||||
{
|
||||
/*
|
||||
* If this becomes a separate module, register zs_init() with
|
||||
* module_init(), zs_exit with module_exit(), and remove zs_initialized
|
||||
* Make sure we don't leak memory if a cpu UP notification
|
||||
* and zs_init() race and both call zs_cpu_up() on the same cpu
|
||||
*/
|
||||
static int zs_initialized;
|
||||
if (area->vm)
|
||||
return 0;
|
||||
area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
|
||||
if (!area->vm)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void __zs_cpu_down(struct mapping_area *area)
|
||||
{
|
||||
if (area->vm)
|
||||
free_vm_area(area->vm);
|
||||
area->vm = NULL;
|
||||
}
|
||||
|
||||
static inline void *__zs_map_object(struct mapping_area *area,
|
||||
struct page *pages[2], int off, int size)
|
||||
{
|
||||
BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
|
||||
area->vm_addr = area->vm->addr;
|
||||
return area->vm_addr + off;
|
||||
}
|
||||
|
||||
static inline void __zs_unmap_object(struct mapping_area *area,
|
||||
struct page *pages[2], int off, int size)
|
||||
{
|
||||
unsigned long addr = (unsigned long)area->vm_addr;
|
||||
|
||||
unmap_kernel_range(addr, PAGE_SIZE * 2);
|
||||
}
|
||||
|
||||
#else /* USE_PGTABLE_MAPPING */
|
||||
|
||||
static inline int __zs_cpu_up(struct mapping_area *area)
|
||||
{
|
||||
/*
|
||||
* Make sure we don't leak memory if a cpu UP notification
|
||||
* and zs_init() race and both call zs_cpu_up() on the same cpu
|
||||
*/
|
||||
if (area->vm_buf)
|
||||
return 0;
|
||||
area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
|
||||
if (!area->vm_buf)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void __zs_cpu_down(struct mapping_area *area)
|
||||
{
|
||||
if (area->vm_buf)
|
||||
free_page((unsigned long)area->vm_buf);
|
||||
area->vm_buf = NULL;
|
||||
}
|
||||
|
||||
static void *__zs_map_object(struct mapping_area *area,
|
||||
struct page *pages[2], int off, int size)
|
||||
{
|
||||
int sizes[2];
|
||||
void *addr;
|
||||
char *buf = area->vm_buf;
|
||||
|
||||
/* disable page faults to match kmap_atomic() return conditions */
|
||||
pagefault_disable();
|
||||
|
||||
/* no read fastpath */
|
||||
if (area->vm_mm == ZS_MM_WO)
|
||||
goto out;
|
||||
|
||||
sizes[0] = PAGE_SIZE - off;
|
||||
sizes[1] = size - sizes[0];
|
||||
|
||||
/* copy object to per-cpu buffer */
|
||||
addr = kmap_atomic(pages[0]);
|
||||
memcpy(buf, addr + off, sizes[0]);
|
||||
kunmap_atomic(addr);
|
||||
addr = kmap_atomic(pages[1]);
|
||||
memcpy(buf + sizes[0], addr, sizes[1]);
|
||||
kunmap_atomic(addr);
|
||||
out:
|
||||
return area->vm_buf;
|
||||
}
|
||||
|
||||
static void __zs_unmap_object(struct mapping_area *area,
|
||||
struct page *pages[2], int off, int size)
|
||||
{
|
||||
int sizes[2];
|
||||
void *addr;
|
||||
char *buf = area->vm_buf;
|
||||
|
||||
/* no write fastpath */
|
||||
if (area->vm_mm == ZS_MM_RO)
|
||||
goto out;
|
||||
|
||||
sizes[0] = PAGE_SIZE - off;
|
||||
sizes[1] = size - sizes[0];
|
||||
|
||||
/* copy per-cpu buffer to object */
|
||||
addr = kmap_atomic(pages[0]);
|
||||
memcpy(addr + off, buf, sizes[0]);
|
||||
kunmap_atomic(addr);
|
||||
addr = kmap_atomic(pages[1]);
|
||||
memcpy(addr, buf + sizes[0], sizes[1]);
|
||||
kunmap_atomic(addr);
|
||||
|
||||
out:
|
||||
/* enable page faults to match kunmap_atomic() return conditions */
|
||||
pagefault_enable();
|
||||
}
|
||||
|
||||
#endif /* USE_PGTABLE_MAPPING */
|
||||
|
||||
static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
|
||||
void *pcpu)
|
||||
{
|
||||
int cpu = (long)pcpu;
|
||||
int ret, cpu = (long)pcpu;
|
||||
struct mapping_area *area;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
area = &per_cpu(zs_map_area, cpu);
|
||||
if (area->vm)
|
||||
break;
|
||||
area->vm = alloc_vm_area(2 * PAGE_SIZE, area->vm_ptes);
|
||||
if (!area->vm)
|
||||
return notifier_from_errno(-ENOMEM);
|
||||
ret = __zs_cpu_up(area);
|
||||
if (ret)
|
||||
return notifier_from_errno(ret);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_UP_CANCELED:
|
||||
area = &per_cpu(zs_map_area, cpu);
|
||||
if (area->vm)
|
||||
free_vm_area(area->vm);
|
||||
area->vm = NULL;
|
||||
__zs_cpu_down(area);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -488,14 +801,21 @@ fail:
|
||||
return notifier_to_errno(ret);
|
||||
}
|
||||
|
||||
struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
|
||||
/**
|
||||
* zs_create_pool - Creates an allocation pool to work from.
|
||||
* @flags: allocation flags used to allocate pool metadata
|
||||
*
|
||||
* This function must be called before anything when using
|
||||
* the zsmalloc allocator.
|
||||
*
|
||||
* On success, a pointer to the newly created pool is returned,
|
||||
* otherwise NULL.
|
||||
*/
|
||||
struct zs_pool *zs_create_pool(gfp_t flags)
|
||||
{
|
||||
int i, error, ovhd_size;
|
||||
int i, ovhd_size;
|
||||
struct zs_pool *pool;
|
||||
|
||||
if (!name)
|
||||
return NULL;
|
||||
|
||||
ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
|
||||
pool = kzalloc(ovhd_size, GFP_KERNEL);
|
||||
if (!pool)
|
||||
@@ -513,31 +833,11 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
|
||||
class->size = size;
|
||||
class->index = i;
|
||||
spin_lock_init(&class->lock);
|
||||
class->zspage_order = get_zspage_order(size);
|
||||
class->pages_per_zspage = get_pages_per_zspage(size);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* If this becomes a separate module, register zs_init with
|
||||
* module_init, and remove this block
|
||||
*/
|
||||
if (!zs_initialized) {
|
||||
error = zs_init();
|
||||
if (error)
|
||||
goto cleanup;
|
||||
zs_initialized = 1;
|
||||
}
|
||||
|
||||
pool->flags = flags;
|
||||
pool->name = name;
|
||||
|
||||
error = 0; /* Success */
|
||||
|
||||
cleanup:
|
||||
if (error) {
|
||||
zs_destroy_pool(pool);
|
||||
pool = NULL;
|
||||
}
|
||||
|
||||
return pool;
|
||||
}
|
||||
@@ -553,8 +853,7 @@ void zs_destroy_pool(struct zs_pool *pool)
|
||||
|
||||
for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
|
||||
if (class->fullness_list[fg]) {
|
||||
pr_info("Freeing non-empty class with size "
|
||||
"%db, fullness group %d\n",
|
||||
pr_info("Freeing non-empty class with size %db, fullness group %d\n",
|
||||
class->size, fg);
|
||||
}
|
||||
}
|
||||
@@ -567,18 +866,14 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);
|
||||
* zs_malloc - Allocate block of given size from pool.
|
||||
* @pool: pool to allocate from
|
||||
* @size: size of block to allocate
|
||||
* @page: page no. that holds the object
|
||||
* @offset: location of object within page
|
||||
*
|
||||
* On success, <page, offset> identifies block allocated
|
||||
* and 0 is returned. On failure, <page, offset> is set to
|
||||
* 0 and -ENOMEM is returned.
|
||||
*
|
||||
* On success, handle to the allocated object is returned,
|
||||
* otherwise 0.
|
||||
* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
|
||||
*/
|
||||
void *zs_malloc(struct zs_pool *pool, size_t size)
|
||||
unsigned long zs_malloc(struct zs_pool *pool, size_t size)
|
||||
{
|
||||
void *obj;
|
||||
unsigned long obj;
|
||||
struct link_free *link;
|
||||
int class_idx;
|
||||
struct size_class *class;
|
||||
@@ -587,7 +882,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
|
||||
unsigned long m_objidx, m_offset;
|
||||
|
||||
if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
|
||||
return NULL;
|
||||
return 0;
|
||||
|
||||
class_idx = get_size_class_index(size);
|
||||
class = &pool->size_class[class_idx];
|
||||
@@ -600,14 +895,14 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
|
||||
spin_unlock(&class->lock);
|
||||
first_page = alloc_zspage(class, pool->flags);
|
||||
if (unlikely(!first_page))
|
||||
return NULL;
|
||||
return 0;
|
||||
|
||||
set_zspage_mapping(first_page, class->index, ZS_EMPTY);
|
||||
spin_lock(&class->lock);
|
||||
class->pages_allocated += class->zspage_order;
|
||||
class->pages_allocated += class->pages_per_zspage;
|
||||
}
|
||||
|
||||
obj = first_page->freelist;
|
||||
obj = (unsigned long)first_page->freelist;
|
||||
obj_handle_to_location(obj, &m_page, &m_objidx);
|
||||
m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
|
||||
|
||||
@@ -626,7 +921,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zs_malloc);
|
||||
|
||||
void zs_free(struct zs_pool *pool, void *obj)
|
||||
void zs_free(struct zs_pool *pool, unsigned long obj)
|
||||
{
|
||||
struct link_free *link;
|
||||
struct page *first_page, *f_page;
|
||||
@@ -653,13 +948,13 @@ void zs_free(struct zs_pool *pool, void *obj)
|
||||
+ f_offset);
|
||||
link->next = first_page->freelist;
|
||||
kunmap_atomic(link);
|
||||
first_page->freelist = obj;
|
||||
first_page->freelist = (void *)obj;
|
||||
|
||||
first_page->inuse--;
|
||||
fullness = fix_fullness_group(pool, first_page);
|
||||
|
||||
if (fullness == ZS_EMPTY)
|
||||
class->pages_allocated -= class->zspage_order;
|
||||
class->pages_allocated -= class->pages_per_zspage;
|
||||
|
||||
spin_unlock(&class->lock);
|
||||
|
||||
@@ -668,7 +963,22 @@ void zs_free(struct zs_pool *pool, void *obj)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zs_free);
|
||||
|
||||
void *zs_map_object(struct zs_pool *pool, void *handle)
|
||||
/**
|
||||
* zs_map_object - get address of allocated object from handle.
|
||||
* @pool: pool from which the object was allocated
|
||||
* @handle: handle returned from zs_malloc
|
||||
*
|
||||
* Before using an object allocated from zs_malloc, it must be mapped using
|
||||
* this function. When done with the object, it must be unmapped using
|
||||
* zs_unmap_object.
|
||||
*
|
||||
* Only one object can be mapped per cpu at a time. There is no protection
|
||||
* against nested mappings.
|
||||
*
|
||||
* This function returns with preemption and page faults disabled.
|
||||
*/
|
||||
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
|
||||
enum zs_mapmode mm)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long obj_idx, off;
|
||||
@@ -677,38 +987,40 @@ void *zs_map_object(struct zs_pool *pool, void *handle)
|
||||
enum fullness_group fg;
|
||||
struct size_class *class;
|
||||
struct mapping_area *area;
|
||||
struct page *pages[2];
|
||||
|
||||
BUG_ON(!handle);
|
||||
|
||||
/*
|
||||
* Because we use per-cpu mapping areas shared among the
|
||||
* pools/users, we can't allow mapping in interrupt context
|
||||
* because it can corrupt another users mappings.
|
||||
*/
|
||||
BUG_ON(in_interrupt());
|
||||
|
||||
obj_handle_to_location(handle, &page, &obj_idx);
|
||||
get_zspage_mapping(get_first_page(page), &class_idx, &fg);
|
||||
class = &pool->size_class[class_idx];
|
||||
off = obj_idx_to_offset(page, obj_idx, class->size);
|
||||
|
||||
area = &get_cpu_var(zs_map_area);
|
||||
area->vm_mm = mm;
|
||||
if (off + class->size <= PAGE_SIZE) {
|
||||
/* this object is contained entirely within a page */
|
||||
area->vm_addr = kmap_atomic(page);
|
||||
} else {
|
||||
/* this object spans two pages */
|
||||
struct page *nextp;
|
||||
|
||||
nextp = get_next_page(page);
|
||||
BUG_ON(!nextp);
|
||||
|
||||
|
||||
set_pte(area->vm_ptes[0], mk_pte(page, PAGE_KERNEL));
|
||||
set_pte(area->vm_ptes[1], mk_pte(nextp, PAGE_KERNEL));
|
||||
|
||||
/* We pre-allocated VM area so mapping can never fail */
|
||||
area->vm_addr = area->vm->addr;
|
||||
return area->vm_addr + off;
|
||||
}
|
||||
|
||||
return area->vm_addr + off;
|
||||
/* this object spans two pages */
|
||||
pages[0] = page;
|
||||
pages[1] = get_next_page(page);
|
||||
BUG_ON(!pages[1]);
|
||||
|
||||
return __zs_map_object(area, pages, off, class->size);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zs_map_object);
|
||||
|
||||
void zs_unmap_object(struct zs_pool *pool, void *handle)
|
||||
void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long obj_idx, off;
|
||||
@@ -726,13 +1038,16 @@ void zs_unmap_object(struct zs_pool *pool, void *handle)
|
||||
off = obj_idx_to_offset(page, obj_idx, class->size);
|
||||
|
||||
area = &__get_cpu_var(zs_map_area);
|
||||
if (off + class->size <= PAGE_SIZE) {
|
||||
if (off + class->size <= PAGE_SIZE)
|
||||
kunmap_atomic(area->vm_addr);
|
||||
} else {
|
||||
set_pte(area->vm_ptes[0], __pte(0));
|
||||
set_pte(area->vm_ptes[1], __pte(0));
|
||||
__flush_tlb_one((unsigned long)area->vm_addr);
|
||||
__flush_tlb_one((unsigned long)area->vm_addr + PAGE_SIZE);
|
||||
else {
|
||||
struct page *pages[2];
|
||||
|
||||
pages[0] = page;
|
||||
pages[1] = get_next_page(page);
|
||||
BUG_ON(!pages[1]);
|
||||
|
||||
__zs_unmap_object(area, pages, off, class->size);
|
||||
}
|
||||
put_cpu_var(zs_map_area);
|
||||
}
|
||||
@@ -749,3 +1064,9 @@ u64 zs_get_total_size_bytes(struct zs_pool *pool)
|
||||
return npages << PAGE_SHIFT;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
|
||||
|
||||
module_init(zs_init);
|
||||
module_exit(zs_exit);
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
|
||||
|
||||
@@ -15,16 +15,28 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* zsmalloc mapping modes
|
||||
*
|
||||
* NOTE: These only make a difference when a mapped object spans pages
|
||||
*/
|
||||
enum zs_mapmode {
|
||||
ZS_MM_RW, /* normal read-write mapping */
|
||||
ZS_MM_RO, /* read-only (no copy-out at unmap time) */
|
||||
ZS_MM_WO /* write-only (no copy-in at map time) */
|
||||
};
|
||||
|
||||
struct zs_pool;
|
||||
|
||||
struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
|
||||
struct zs_pool *zs_create_pool(gfp_t flags);
|
||||
void zs_destroy_pool(struct zs_pool *pool);
|
||||
|
||||
void *zs_malloc(struct zs_pool *pool, size_t size);
|
||||
void zs_free(struct zs_pool *pool, void *obj);
|
||||
unsigned long zs_malloc(struct zs_pool *pool, size_t size);
|
||||
void zs_free(struct zs_pool *pool, unsigned long obj);
|
||||
|
||||
void *zs_map_object(struct zs_pool *pool, void *handle);
|
||||
void zs_unmap_object(struct zs_pool *pool, void *handle);
|
||||
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
|
||||
enum zs_mapmode mm);
|
||||
void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
|
||||
|
||||
u64 zs_get_total_size_bytes(struct zs_pool *pool);
|
||||
|
||||
|
||||
@@ -1,155 +0,0 @@
|
||||
/*
|
||||
* zsmalloc memory allocator
|
||||
*
|
||||
* Copyright (C) 2011 Nitin Gupta
|
||||
*
|
||||
* This code is released using a dual license strategy: BSD/GPL
|
||||
* You can choose the license that better fits your requirements.
|
||||
*
|
||||
* Released under the terms of 3-clause BSD License
|
||||
* Released under the terms of GNU General Public License Version 2.0
|
||||
*/
|
||||
|
||||
#ifndef _ZS_MALLOC_INT_H_
|
||||
#define _ZS_MALLOC_INT_H_
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* This must be power of 2 and greater than of equal to sizeof(link_free).
|
||||
* These two conditions ensure that any 'struct link_free' itself doesn't
|
||||
* span more than 1 page which avoids complex case of mapping 2 pages simply
|
||||
* to restore link_free pointer values.
|
||||
*/
|
||||
#define ZS_ALIGN 8
|
||||
|
||||
/*
|
||||
* A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
|
||||
* pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
|
||||
*/
|
||||
#define ZS_MAX_ZSPAGE_ORDER 2
|
||||
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
|
||||
|
||||
/*
|
||||
* Object location (<PFN>, <obj_idx>) is encoded as
|
||||
* as single (void *) handle value.
|
||||
*
|
||||
* Note that object index <obj_idx> is relative to system
|
||||
* page <PFN> it is stored in, so for each sub-page belonging
|
||||
* to a zspage, obj_idx starts with 0.
|
||||
*
|
||||
* This is made more complicated by various memory models and PAE.
|
||||
*/
|
||||
|
||||
#ifndef MAX_PHYSMEM_BITS
|
||||
#ifdef CONFIG_HIGHMEM64G
|
||||
#define MAX_PHYSMEM_BITS 36
|
||||
#else /* !CONFIG_HIGHMEM64G */
|
||||
/*
|
||||
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
|
||||
* be PAGE_SHIFT
|
||||
*/
|
||||
#define MAX_PHYSMEM_BITS BITS_PER_LONG
|
||||
#endif
|
||||
#endif
|
||||
#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
|
||||
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
|
||||
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
|
||||
|
||||
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
||||
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
|
||||
#define ZS_MIN_ALLOC_SIZE \
|
||||
MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
|
||||
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
|
||||
|
||||
/*
|
||||
* On systems with 4K page size, this gives 254 size classes! There is a
|
||||
* trader-off here:
|
||||
* - Large number of size classes is potentially wasteful as free page are
|
||||
* spread across these classes
|
||||
* - Small number of size classes causes large internal fragmentation
|
||||
* - Probably its better to use specific size classes (empirically
|
||||
* determined). NOTE: all those class sizes must be set as multiple of
|
||||
* ZS_ALIGN to make sure link_free itself never has to span 2 pages.
|
||||
*
|
||||
* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
|
||||
* (reason above)
|
||||
*/
|
||||
#define ZS_SIZE_CLASS_DELTA 16
|
||||
#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
|
||||
ZS_SIZE_CLASS_DELTA + 1)
|
||||
|
||||
/*
|
||||
* We do not maintain any list for completely empty or full pages
|
||||
*/
|
||||
enum fullness_group {
|
||||
ZS_ALMOST_FULL,
|
||||
ZS_ALMOST_EMPTY,
|
||||
_ZS_NR_FULLNESS_GROUPS,
|
||||
|
||||
ZS_EMPTY,
|
||||
ZS_FULL
|
||||
};
|
||||
|
||||
/*
|
||||
* We assign a page to ZS_ALMOST_EMPTY fullness group when:
|
||||
* n <= N / f, where
|
||||
* n = number of allocated objects
|
||||
* N = total number of objects zspage can store
|
||||
* f = 1/fullness_threshold_frac
|
||||
*
|
||||
* Similarly, we assign zspage to:
|
||||
* ZS_ALMOST_FULL when n > N / f
|
||||
* ZS_EMPTY when n == 0
|
||||
* ZS_FULL when n == N
|
||||
*
|
||||
* (see: fix_fullness_group())
|
||||
*/
|
||||
static const int fullness_threshold_frac = 4;
|
||||
|
||||
struct mapping_area {
|
||||
struct vm_struct *vm;
|
||||
pte_t *vm_ptes[2];
|
||||
char *vm_addr;
|
||||
};
|
||||
|
||||
struct size_class {
|
||||
/*
|
||||
* Size of objects stored in this class. Must be multiple
|
||||
* of ZS_ALIGN.
|
||||
*/
|
||||
int size;
|
||||
unsigned int index;
|
||||
|
||||
/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
|
||||
int zspage_order;
|
||||
|
||||
spinlock_t lock;
|
||||
|
||||
/* stats */
|
||||
u64 pages_allocated;
|
||||
|
||||
struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
|
||||
};
|
||||
|
||||
/*
|
||||
* Placed within free objects to form a singly linked list.
|
||||
* For every zspage, first_page->freelist gives head of this list.
|
||||
*
|
||||
* This must be power of 2 and less than or equal to ZS_ALIGN
|
||||
*/
|
||||
struct link_free {
|
||||
/* Handle of next free chunk (encodes <PFN, obj_idx>) */
|
||||
void *next;
|
||||
};
|
||||
|
||||
struct zs_pool {
|
||||
struct size_class size_class[ZS_SIZE_CLASSES];
|
||||
|
||||
gfp_t flags; /* allocation flags used when growing pool */
|
||||
const char *name;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1637,5 +1637,10 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
|
||||
static inline bool page_is_guard(struct page *page) { return false; }
|
||||
#endif /* CONFIG_DEBUG_PAGEALLOC */
|
||||
|
||||
static inline void page_mapcount_reset(struct page *page)
|
||||
{
|
||||
atomic_set(&(page)->_mapcount, -1);
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _LINUX_MM_H */
|
||||
|
||||
Reference in New Issue
Block a user