Pulled in android-ready zram from davidmueller13/Vindicator-flo-aosp.

This commit is contained in:
Tonoxis
2021-04-04 16:21:55 -04:00
parent 820a7dfba0
commit 66b91c76c0
12 changed files with 1057 additions and 983 deletions

View File

@@ -1,9 +1,6 @@
config ZRAM
tristate "Compressed RAM block device support"
# X86 dependency is because zsmalloc uses non-portable pte/tlb
# functions
depends on BLOCK && SYSFS && X86
select ZSMALLOC
depends on BLOCK && SYSFS && ZSMALLOC
select LZO_COMPRESS
select LZO_DECOMPRESS
default n
@@ -17,7 +14,7 @@ config ZRAM
disks and maybe many more.
See zram.txt for more information.
Project home: http://compcache.googlecode.com/
Project home: <https://compcache.googlecode.com/>
config ZRAM_DEBUG
bool "Compressed RAM block device debug support"

View File

@@ -1,3 +1,3 @@
zram-y := zram_drv.o zram_sysfs.o
zram-y := zram_drv.o
obj-$(CONFIG_ZRAM) += zram.o

View File

@@ -23,17 +23,17 @@ Following shows a typical sequence of steps for using zram.
This creates 4 devices: /dev/zram{0,1,2,3}
(num_devices parameter is optional. Default: 1)
2) Set Disksize (Optional):
Set disk size by writing the value to sysfs node 'disksize'
(in bytes). If disksize is not given, default value of 25%
of RAM is used.
2) Set Disksize
Set disk size by writing the value to sysfs node 'disksize'.
The value can be either in bytes or you can use mem suffixes.
Examples:
# Initialize /dev/zram0 with 50MB disksize
echo $((50*1024*1024)) > /sys/block/zram0/disksize
NOTE: disksize cannot be changed if the disk contains any
data. So, for such a disk, you need to issue 'reset' (see below)
before you can change its disksize.
# Using mem suffixes
echo 256K > /sys/block/zram0/disksize
echo 512M > /sys/block/zram0/disksize
echo 1G > /sys/block/zram0/disksize
3) Activate:
mkswap /dev/zram0
@@ -65,8 +65,9 @@ Following shows a typical sequence of steps for using zram.
echo 1 > /sys/block/zram0/reset
echo 1 > /sys/block/zram1/reset
(This frees all the memory allocated for the given device).
This frees all the memory allocated for the given device and
resets the disksize to zero. You must set the disksize again
before reusing the device.
Please report any problems at:
- Mailing list: linux-mm-cc at laptop dot org

File diff suppressed because it is too large Load Diff

View File

@@ -26,33 +26,18 @@
*/
static const unsigned max_num_devices = 32;
/*
* Stored at beginning of each compressed object.
*
* It stores back-reference to table entry which points to this
* object. This is required to support memory defragmentation.
*/
struct zobj_header {
#if 0
u32 table_idx;
#endif
};
/*-- Configurable parameters */
/* Default zram disk size: 25% of total RAM */
static const unsigned default_disksize_perc_ram = 25;
/*
* Pages that compress to size greater than this are stored
* uncompressed in memory.
*/
static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
static const size_t max_zpage_size = PAGE_SIZE / 10 * 9;
/*
* NOTE: max_zpage_size must be less than or equal to:
* ZS_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
* otherwise, xv_malloc() would always return failure.
* ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
* always return failure.
*/
/*-- End of configurable params */
@@ -68,9 +53,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {
/* Page is stored uncompressed */
ZRAM_UNCOMPRESSED,
/* Page consists entirely of zeros */
ZRAM_ZERO,
@@ -81,34 +63,51 @@ enum zram_pageflags {
/* Allocated for each disk page */
struct table {
void *handle;
unsigned long handle;
u16 size; /* object size (excluding header) */
u8 count; /* object ref count (not yet used) */
u8 flags;
} __attribute__((aligned(4)));
} __aligned(4);
/*
* All 64bit fields should only be manipulated by 64bit atomic accessors.
* All modifications to 32bit counter should be protected by zram->lock.
*/
struct zram_stats {
u64 compr_size; /* compressed size of pages stored */
u64 num_reads; /* failed + successful */
u64 num_writes; /* --do-- */
u64 failed_reads; /* should NEVER! happen */
u64 failed_writes; /* can happen when memory is too low */
u64 invalid_io; /* non-page-aligned I/O requests */
u64 notify_free; /* no. of swap slot free notifications */
atomic64_t compr_size; /* compressed size of pages stored */
atomic64_t num_reads; /* failed + successful */
atomic64_t num_writes; /* --do-- */
atomic64_t failed_reads; /* should NEVER! happen */
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */
atomic64_t notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 pages_stored; /* no. of pages currently stored */
u32 good_compress; /* % of pages with compression ratio<=50% */
u32 pages_expand; /* % of incompressible pages */
u32 bad_compress; /* % of pages with compression ratio>=75% */
};
struct zram {
struct zs_pool *mem_pool;
struct zram_meta {
void *compress_workmem;
void *compress_buffer;
struct table *table;
spinlock_t stat64_lock; /* protect 64-bit stats */
struct rw_semaphore lock; /* protect compression buffers and table
* against concurrent read and writes */
struct zs_pool *mem_pool;
};
struct zram_slot_free {
unsigned long index;
struct zram_slot_free *next;
};
struct zram {
struct zram_meta *meta;
struct rw_semaphore lock; /* protect compression buffers, table,
* 32bit stat counters against concurrent
* notifications, reads and writes */
struct work_struct free_work; /* handle pending free request */
struct zram_slot_free *slot_free_rq; /* list head of free request */
struct request_queue *queue;
struct gendisk *disk;
int init_done;
@@ -119,17 +118,8 @@ struct zram {
* we can store in a disk.
*/
u64 disksize; /* bytes */
spinlock_t slot_free_lock;
struct zram_stats stats;
};
extern struct zram *zram_devices;
unsigned int zram_get_num_devices(void);
#ifdef CONFIG_SYSFS
extern struct attribute_group zram_disk_attr_group;
#endif
extern int zram_init_device(struct zram *zram);
extern void __zram_reset_device(struct zram *zram);
#endif

View File

@@ -1,227 +0,0 @@
/*
* Compressed RAM block device
*
* Copyright (C) 2008, 2009, 2010 Nitin Gupta
*
* This code is released using a dual license strategy: BSD/GPL
* You can choose the licence that better fits your requirements.
*
* Released under the terms of 3-clause BSD License
* Released under the terms of GNU General Public License Version 2.0
*
* Project home: http://compcache.googlecode.com/
*/
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/mm.h>
#include "zram_drv.h"
static u64 zram_stat64_read(struct zram *zram, u64 *v)
{
u64 val;
spin_lock(&zram->stat64_lock);
val = *v;
spin_unlock(&zram->stat64_lock);
return val;
}
static struct zram *dev_to_zram(struct device *dev)
{
int i;
struct zram *zram = NULL;
for (i = 0; i < zram_get_num_devices(); i++) {
zram = &zram_devices[i];
if (disk_to_dev(zram->disk) == dev)
break;
}
return zram;
}
static ssize_t disksize_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n", zram->disksize);
}
static ssize_t disksize_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
int ret;
u64 disksize;
struct zram *zram = dev_to_zram(dev);
ret = kstrtoull(buf, 10, &disksize);
if (ret)
return ret;
down_write(&zram->init_lock);
if (zram->init_done) {
up_write(&zram->init_lock);
pr_info("Cannot change disksize for initialized device\n");
return -EBUSY;
}
zram->disksize = PAGE_ALIGN(disksize);
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
up_write(&zram->init_lock);
return len;
}
static ssize_t initstate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%u\n", zram->init_done);
}
static ssize_t reset_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
int ret;
unsigned short do_reset;
struct zram *zram;
struct block_device *bdev;
zram = dev_to_zram(dev);
bdev = bdget_disk(zram->disk, 0);
/* Do not reset an active device! */
if (bdev->bd_holders)
return -EBUSY;
ret = kstrtou16(buf, 10, &do_reset);
if (ret)
return ret;
if (!do_reset)
return -EINVAL;
/* Make sure all pending I/O is finished */
if (bdev)
fsync_bdev(bdev);
down_write(&zram->init_lock);
if (zram->init_done)
__zram_reset_device(zram);
up_write(&zram->init_lock);
return len;
}
static ssize_t num_reads_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n",
zram_stat64_read(zram, &zram->stats.num_reads));
}
static ssize_t num_writes_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n",
zram_stat64_read(zram, &zram->stats.num_writes));
}
static ssize_t invalid_io_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n",
zram_stat64_read(zram, &zram->stats.invalid_io));
}
static ssize_t notify_free_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n",
zram_stat64_read(zram, &zram->stats.notify_free));
}
static ssize_t zero_pages_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%u\n", zram->stats.pages_zero);
}
static ssize_t orig_data_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n",
(u64)(zram->stats.pages_stored) << PAGE_SHIFT);
}
static ssize_t compr_data_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n",
zram_stat64_read(zram, &zram->stats.compr_size));
}
static ssize_t mem_used_total_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
u64 val = 0;
struct zram *zram = dev_to_zram(dev);
if (zram->init_done) {
val = zs_get_total_size_bytes(zram->mem_pool) +
((u64)(zram->stats.pages_expand) << PAGE_SHIFT);
}
return sprintf(buf, "%llu\n", val);
}
static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
disksize_show, disksize_store);
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
&dev_attr_initstate.attr,
&dev_attr_reset.attr,
&dev_attr_num_reads.attr,
&dev_attr_num_writes.attr,
&dev_attr_invalid_io.attr,
&dev_attr_notify_free.attr,
&dev_attr_zero_pages.attr,
&dev_attr_orig_data_size.attr,
&dev_attr_compr_data_size.attr,
&dev_attr_mem_used_total.attr,
NULL,
};
struct attribute_group zram_disk_attr_group = {
.attrs = zram_disk_attrs,
};

View File

@@ -1,9 +1,5 @@
config ZSMALLOC
tristate "Memory allocator for compressed pages"
# X86 dependency is because of the use of __flush_tlb_one and set_pte
# in zsmalloc-main.c.
# TODO: convert these to portable functions
depends on X86
bool "Memory allocator for compressed pages"
default n
help
zsmalloc is a slab-based memory allocator designed to store

View File

@@ -1,3 +1,4 @@
CFLAGS_zsmalloc-main.o := -Wno-error=implicit-function-declaration -Wno-implicit-function-declaration
zsmalloc-y := zsmalloc-main.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o

View File

@@ -10,6 +10,54 @@
* Released under the terms of GNU General Public License Version 2.0
*/
/*
* This allocator is designed for use with zcache and zram. Thus, the
* allocator is supposed to work well under low memory conditions. In
* particular, it never attempts higher order page allocation which is
* very likely to fail under memory pressure. On the other hand, if we
* just use single (0-order) pages, it would suffer from very high
* fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy
* an entire page. This was one of the major issues with its predecessor
* (xvmalloc).
*
* To overcome these issues, zsmalloc allocates a bunch of 0-order pages
* and links them together using various 'struct page' fields. These linked
* pages act as a single higher-order page i.e. an object can span 0-order
* page boundaries. The code refers to these linked pages as a single entity
* called zspage.
*
* Following is how we use various fields and flags of underlying
* struct page(s) to form a zspage.
*
* Usage of struct page fields:
* page->first_page: points to the first component (0-order) page
* page->index (union with page->freelist): offset of the first object
* starting in this page. For the first page, this is
* always 0, so we use this field (aka freelist) to point
* to the first free object in zspage.
* page->lru: links together all component pages (except the first page)
* of a zspage
*
* For _first_ page only:
*
* page->private (union with page->first_page): refers to the
* component page after the first page
* page->freelist: points to the first free object in zspage.
* Free objects are linked together using in-place
* metadata.
* page->objects: maximum number of objects we can store in this
* zspage (class->zspage_order * PAGE_SIZE / class->size)
* page->lru: links together first pages of various zspages.
* Basically forming list of zspages in a fullness group.
* page->mapping: class index and fullness group of the zspage
*
* Usage of struct page flags:
* PG_private: identifies the first component page
* PG_private2: identifies the last component page
*
*/
#ifdef CONFIG_ZSMALLOC_DEBUG
#define DEBUG
#endif
@@ -27,9 +75,139 @@
#include <linux/cpumask.h>
#include <linux/cpu.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include "zsmalloc.h"
#include "zsmalloc_int.h"
/*
* This must be power of 2 and greater than of equal to sizeof(link_free).
* These two conditions ensure that any 'struct link_free' itself doesn't
* span more than 1 page which avoids complex case of mapping 2 pages simply
* to restore link_free pointer values.
*/
#define ZS_ALIGN 8
/*
* A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
* pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
*/
#define ZS_MAX_ZSPAGE_ORDER 2
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
/*
* Object location (<PFN>, <obj_idx>) is encoded as
* as single (void *) handle value.
*
* Note that object index <obj_idx> is relative to system
* page <PFN> it is stored in, so for each sub-page belonging
* to a zspage, obj_idx starts with 0.
*
* This is made more complicated by various memory models and PAE.
*/
#ifndef MAX_PHYSMEM_BITS
#ifdef CONFIG_HIGHMEM64G
#define MAX_PHYSMEM_BITS 36
#else /* !CONFIG_HIGHMEM64G */
/*
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
* be PAGE_SHIFT
*/
#define MAX_PHYSMEM_BITS BITS_PER_LONG
#endif
#endif
#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE \
MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
/*
* On systems with 4K page size, this gives 254 size classes! There is a
* trader-off here:
* - Large number of size classes is potentially wasteful as free page are
* spread across these classes
* - Small number of size classes causes large internal fragmentation
* - Probably its better to use specific size classes (empirically
* determined). NOTE: all those class sizes must be set as multiple of
* ZS_ALIGN to make sure link_free itself never has to span 2 pages.
*
* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
* (reason above)
*/
#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8)
#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
ZS_SIZE_CLASS_DELTA + 1)
/*
* We do not maintain any list for completely empty or full pages
*/
enum fullness_group {
ZS_ALMOST_FULL,
ZS_ALMOST_EMPTY,
_ZS_NR_FULLNESS_GROUPS,
ZS_EMPTY,
ZS_FULL
};
/*
* We assign a page to ZS_ALMOST_EMPTY fullness group when:
* n <= N / f, where
* n = number of allocated objects
* N = total number of objects zspage can store
* f = 1/fullness_threshold_frac
*
* Similarly, we assign zspage to:
* ZS_ALMOST_FULL when n > N / f
* ZS_EMPTY when n == 0
* ZS_FULL when n == N
*
* (see: fix_fullness_group())
*/
static const int fullness_threshold_frac = 4;
struct size_class {
/*
* Size of objects stored in this class. Must be multiple
* of ZS_ALIGN.
*/
int size;
unsigned int index;
/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
int pages_per_zspage;
spinlock_t lock;
/* stats */
u64 pages_allocated;
struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
};
/*
* Placed within free objects to form a singly linked list.
* For every zspage, first_page->freelist gives head of this list.
*
* This must be power of 2 and less than or equal to ZS_ALIGN
*/
struct link_free {
/* Handle of next free chunk (encodes <PFN, obj_idx>) */
void *next;
};
struct zs_pool {
struct size_class size_class[ZS_SIZE_CLASSES];
gfp_t flags; /* allocation flags used when growing pool */
};
/*
* A zspage's class index and fullness group
@@ -40,17 +218,39 @@
#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
/*
* By default, zsmalloc uses a copy-based object mapping method to access
* allocations that span two pages. However, if a particular architecture
* performs VM mapping faster than copying, then it should be added here
* so that USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use
* page table mapping rather than copying for object mapping.
*/
#if defined(CONFIG_ARM) && !defined(MODULE)
#define USE_PGTABLE_MAPPING
#endif
struct mapping_area {
#ifdef USE_PGTABLE_MAPPING
struct vm_struct *vm; /* vm area for mapping object that span pages */
#else
char *vm_buf; /* copy buffer for objects that span pages */
#endif
char *vm_addr; /* address of kmap_atomic()'ed pages */
enum zs_mapmode vm_mm; /* mapping mode */
};
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
static int is_first_page(struct page *page)
{
return test_bit(PG_private, &page->flags);
return PagePrivate(page);
}
static int is_last_page(struct page *page)
{
return test_bit(PG_private_2, &page->flags);
return PagePrivate2(page);
}
static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
@@ -180,7 +380,7 @@ out:
* link together 3 PAGE_SIZE sized pages to form a zspage
* since then we can perfectly fit in 8 such objects.
*/
static int get_zspage_order(int class_size)
static int get_pages_per_zspage(int class_size)
{
int i, max_usedpc = 0;
/* zspage order which gives maximum used size per KB */
@@ -223,14 +423,19 @@ static struct page *get_next_page(struct page *page)
if (is_last_page(page))
next = NULL;
else if (is_first_page(page))
next = (struct page *)page->private;
next = (struct page *)page_private(page);
else
next = list_entry(page->lru.next, struct page, lru);
return next;
}
/* Encode <page, obj_idx> as a single handle value */
/*
* Encode <page, obj_idx> as a single handle value.
* On hardware platforms with physical memory starting at 0x0 the pfn
* could be 0 so we ensure that the handle will never be 0 by adjusting the
* encoded obj_idx value before encoding.
*/
static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
{
unsigned long handle;
@@ -241,19 +446,21 @@ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
}
handle = page_to_pfn(page) << OBJ_INDEX_BITS;
handle |= (obj_idx & OBJ_INDEX_MASK);
handle |= ((obj_idx + 1) & OBJ_INDEX_MASK);
return (void *)handle;
}
/* Decode <page, obj_idx> pair from the given object handle */
static void obj_handle_to_location(void *handle, struct page **page,
/*
* Decode <page, obj_idx> pair from the given object handle. We adjust the
* decoded obj_idx back to its original value since it was adjusted in
* obj_location_to_handle().
*/
static void obj_handle_to_location(unsigned long handle, struct page **page,
unsigned long *obj_idx)
{
unsigned long hval = (unsigned long)handle;
*page = pfn_to_page(hval >> OBJ_INDEX_BITS);
*obj_idx = hval & OBJ_INDEX_MASK;
*page = pfn_to_page(handle >> OBJ_INDEX_BITS);
*obj_idx = (handle & OBJ_INDEX_MASK) - 1;
}
static unsigned long obj_idx_to_offset(struct page *page,
@@ -274,7 +481,7 @@ static void reset_page(struct page *page)
set_page_private(page, 0);
page->mapping = NULL;
page->freelist = NULL;
reset_page_mapcount(page);
page_mapcount_reset(page);
}
static void free_zspage(struct page *first_page)
@@ -354,7 +561,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
{
int i, error;
struct page *first_page = NULL;
struct page *first_page = NULL, *uninitialized_var(prev_page);
/*
* Allocate individual pages and link them together as:
@@ -368,8 +575,8 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
* identify the last page.
*/
error = -ENOMEM;
for (i = 0; i < class->zspage_order; i++) {
struct page *page, *prev_page;
for (i = 0; i < class->pages_per_zspage; i++) {
struct page *page;
page = alloc_page(flags);
if (!page)
@@ -377,20 +584,19 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
INIT_LIST_HEAD(&page->lru);
if (i == 0) { /* first page */
set_bit(PG_private, &page->flags);
SetPagePrivate(page);
set_page_private(page, 0);
first_page = page;
first_page->inuse = 0;
}
if (i == 1)
first_page->private = (unsigned long)page;
set_page_private(first_page, (unsigned long)page);
if (i >= 1)
page->first_page = first_page;
if (i >= 2)
list_add(&page->lru, &prev_page->lru);
if (i == class->zspage_order - 1) /* last page */
set_bit(PG_private_2, &page->flags);
if (i == class->pages_per_zspage - 1) /* last page */
SetPagePrivate2(page);
prev_page = page;
}
@@ -398,7 +604,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
first_page->freelist = obj_location_to_handle(first_page, 0);
/* Maximum number of objects we can store in this zspage */
first_page->objects = class->zspage_order * PAGE_SIZE / class->size;
first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
error = 0; /* Success */
@@ -425,34 +631,141 @@ static struct page *find_get_zspage(struct size_class *class)
return page;
}
#ifdef USE_PGTABLE_MAPPING
static inline int __zs_cpu_up(struct mapping_area *area)
{
/*
* If this becomes a separate module, register zs_init() with
* module_init(), zs_exit with module_exit(), and remove zs_initialized
* Make sure we don't leak memory if a cpu UP notification
* and zs_init() race and both call zs_cpu_up() on the same cpu
*/
static int zs_initialized;
if (area->vm)
return 0;
area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
if (!area->vm)
return -ENOMEM;
return 0;
}
static inline void __zs_cpu_down(struct mapping_area *area)
{
if (area->vm)
free_vm_area(area->vm);
area->vm = NULL;
}
static inline void *__zs_map_object(struct mapping_area *area,
struct page *pages[2], int off, int size)
{
BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
area->vm_addr = area->vm->addr;
return area->vm_addr + off;
}
static inline void __zs_unmap_object(struct mapping_area *area,
struct page *pages[2], int off, int size)
{
unsigned long addr = (unsigned long)area->vm_addr;
unmap_kernel_range(addr, PAGE_SIZE * 2);
}
#else /* USE_PGTABLE_MAPPING */
static inline int __zs_cpu_up(struct mapping_area *area)
{
/*
* Make sure we don't leak memory if a cpu UP notification
* and zs_init() race and both call zs_cpu_up() on the same cpu
*/
if (area->vm_buf)
return 0;
area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
if (!area->vm_buf)
return -ENOMEM;
return 0;
}
static inline void __zs_cpu_down(struct mapping_area *area)
{
if (area->vm_buf)
free_page((unsigned long)area->vm_buf);
area->vm_buf = NULL;
}
static void *__zs_map_object(struct mapping_area *area,
struct page *pages[2], int off, int size)
{
int sizes[2];
void *addr;
char *buf = area->vm_buf;
/* disable page faults to match kmap_atomic() return conditions */
pagefault_disable();
/* no read fastpath */
if (area->vm_mm == ZS_MM_WO)
goto out;
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
/* copy object to per-cpu buffer */
addr = kmap_atomic(pages[0]);
memcpy(buf, addr + off, sizes[0]);
kunmap_atomic(addr);
addr = kmap_atomic(pages[1]);
memcpy(buf + sizes[0], addr, sizes[1]);
kunmap_atomic(addr);
out:
return area->vm_buf;
}
static void __zs_unmap_object(struct mapping_area *area,
struct page *pages[2], int off, int size)
{
int sizes[2];
void *addr;
char *buf = area->vm_buf;
/* no write fastpath */
if (area->vm_mm == ZS_MM_RO)
goto out;
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
/* copy per-cpu buffer to object */
addr = kmap_atomic(pages[0]);
memcpy(addr + off, buf, sizes[0]);
kunmap_atomic(addr);
addr = kmap_atomic(pages[1]);
memcpy(addr, buf + sizes[0], sizes[1]);
kunmap_atomic(addr);
out:
/* enable page faults to match kunmap_atomic() return conditions */
pagefault_enable();
}
#endif /* USE_PGTABLE_MAPPING */
static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
void *pcpu)
{
int cpu = (long)pcpu;
int ret, cpu = (long)pcpu;
struct mapping_area *area;
switch (action) {
case CPU_UP_PREPARE:
area = &per_cpu(zs_map_area, cpu);
if (area->vm)
break;
area->vm = alloc_vm_area(2 * PAGE_SIZE, area->vm_ptes);
if (!area->vm)
return notifier_from_errno(-ENOMEM);
ret = __zs_cpu_up(area);
if (ret)
return notifier_from_errno(ret);
break;
case CPU_DEAD:
case CPU_UP_CANCELED:
area = &per_cpu(zs_map_area, cpu);
if (area->vm)
free_vm_area(area->vm);
area->vm = NULL;
__zs_cpu_down(area);
break;
}
@@ -488,14 +801,21 @@ fail:
return notifier_to_errno(ret);
}
struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
/**
* zs_create_pool - Creates an allocation pool to work from.
* @flags: allocation flags used to allocate pool metadata
*
* This function must be called before anything when using
* the zsmalloc allocator.
*
* On success, a pointer to the newly created pool is returned,
* otherwise NULL.
*/
struct zs_pool *zs_create_pool(gfp_t flags)
{
int i, error, ovhd_size;
int i, ovhd_size;
struct zs_pool *pool;
if (!name)
return NULL;
ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
pool = kzalloc(ovhd_size, GFP_KERNEL);
if (!pool)
@@ -513,31 +833,11 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
class->size = size;
class->index = i;
spin_lock_init(&class->lock);
class->zspage_order = get_zspage_order(size);
class->pages_per_zspage = get_pages_per_zspage(size);
}
/*
* If this becomes a separate module, register zs_init with
* module_init, and remove this block
*/
if (!zs_initialized) {
error = zs_init();
if (error)
goto cleanup;
zs_initialized = 1;
}
pool->flags = flags;
pool->name = name;
error = 0; /* Success */
cleanup:
if (error) {
zs_destroy_pool(pool);
pool = NULL;
}
return pool;
}
@@ -553,8 +853,7 @@ void zs_destroy_pool(struct zs_pool *pool)
for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
if (class->fullness_list[fg]) {
pr_info("Freeing non-empty class with size "
"%db, fullness group %d\n",
pr_info("Freeing non-empty class with size %db, fullness group %d\n",
class->size, fg);
}
}
@@ -567,18 +866,14 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);
* zs_malloc - Allocate block of given size from pool.
* @pool: pool to allocate from
* @size: size of block to allocate
* @page: page no. that holds the object
* @offset: location of object within page
*
* On success, <page, offset> identifies block allocated
* and 0 is returned. On failure, <page, offset> is set to
* 0 and -ENOMEM is returned.
*
* On success, handle to the allocated object is returned,
* otherwise 0.
* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
*/
void *zs_malloc(struct zs_pool *pool, size_t size)
unsigned long zs_malloc(struct zs_pool *pool, size_t size)
{
void *obj;
unsigned long obj;
struct link_free *link;
int class_idx;
struct size_class *class;
@@ -587,7 +882,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
unsigned long m_objidx, m_offset;
if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
return NULL;
return 0;
class_idx = get_size_class_index(size);
class = &pool->size_class[class_idx];
@@ -600,14 +895,14 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
spin_unlock(&class->lock);
first_page = alloc_zspage(class, pool->flags);
if (unlikely(!first_page))
return NULL;
return 0;
set_zspage_mapping(first_page, class->index, ZS_EMPTY);
spin_lock(&class->lock);
class->pages_allocated += class->zspage_order;
class->pages_allocated += class->pages_per_zspage;
}
obj = first_page->freelist;
obj = (unsigned long)first_page->freelist;
obj_handle_to_location(obj, &m_page, &m_objidx);
m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
@@ -626,7 +921,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
}
EXPORT_SYMBOL_GPL(zs_malloc);
void zs_free(struct zs_pool *pool, void *obj)
void zs_free(struct zs_pool *pool, unsigned long obj)
{
struct link_free *link;
struct page *first_page, *f_page;
@@ -653,13 +948,13 @@ void zs_free(struct zs_pool *pool, void *obj)
+ f_offset);
link->next = first_page->freelist;
kunmap_atomic(link);
first_page->freelist = obj;
first_page->freelist = (void *)obj;
first_page->inuse--;
fullness = fix_fullness_group(pool, first_page);
if (fullness == ZS_EMPTY)
class->pages_allocated -= class->zspage_order;
class->pages_allocated -= class->pages_per_zspage;
spin_unlock(&class->lock);
@@ -668,7 +963,22 @@ void zs_free(struct zs_pool *pool, void *obj)
}
EXPORT_SYMBOL_GPL(zs_free);
void *zs_map_object(struct zs_pool *pool, void *handle)
/**
* zs_map_object - get address of allocated object from handle.
* @pool: pool from which the object was allocated
* @handle: handle returned from zs_malloc
*
* Before using an object allocated from zs_malloc, it must be mapped using
* this function. When done with the object, it must be unmapped using
* zs_unmap_object.
*
* Only one object can be mapped per cpu at a time. There is no protection
* against nested mappings.
*
* This function returns with preemption and page faults disabled.
*/
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum zs_mapmode mm)
{
struct page *page;
unsigned long obj_idx, off;
@@ -677,38 +987,40 @@ void *zs_map_object(struct zs_pool *pool, void *handle)
enum fullness_group fg;
struct size_class *class;
struct mapping_area *area;
struct page *pages[2];
BUG_ON(!handle);
/*
* Because we use per-cpu mapping areas shared among the
* pools/users, we can't allow mapping in interrupt context
* because it can corrupt another users mappings.
*/
BUG_ON(in_interrupt());
obj_handle_to_location(handle, &page, &obj_idx);
get_zspage_mapping(get_first_page(page), &class_idx, &fg);
class = &pool->size_class[class_idx];
off = obj_idx_to_offset(page, obj_idx, class->size);
area = &get_cpu_var(zs_map_area);
area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
area->vm_addr = kmap_atomic(page);
} else {
/* this object spans two pages */
struct page *nextp;
nextp = get_next_page(page);
BUG_ON(!nextp);
set_pte(area->vm_ptes[0], mk_pte(page, PAGE_KERNEL));
set_pte(area->vm_ptes[1], mk_pte(nextp, PAGE_KERNEL));
/* We pre-allocated VM area so mapping can never fail */
area->vm_addr = area->vm->addr;
return area->vm_addr + off;
}
return area->vm_addr + off;
/* this object spans two pages */
pages[0] = page;
pages[1] = get_next_page(page);
BUG_ON(!pages[1]);
return __zs_map_object(area, pages, off, class->size);
}
EXPORT_SYMBOL_GPL(zs_map_object);
void zs_unmap_object(struct zs_pool *pool, void *handle)
void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
{
struct page *page;
unsigned long obj_idx, off;
@@ -726,13 +1038,16 @@ void zs_unmap_object(struct zs_pool *pool, void *handle)
off = obj_idx_to_offset(page, obj_idx, class->size);
area = &__get_cpu_var(zs_map_area);
if (off + class->size <= PAGE_SIZE) {
if (off + class->size <= PAGE_SIZE)
kunmap_atomic(area->vm_addr);
} else {
set_pte(area->vm_ptes[0], __pte(0));
set_pte(area->vm_ptes[1], __pte(0));
__flush_tlb_one((unsigned long)area->vm_addr);
__flush_tlb_one((unsigned long)area->vm_addr + PAGE_SIZE);
else {
struct page *pages[2];
pages[0] = page;
pages[1] = get_next_page(page);
BUG_ON(!pages[1]);
__zs_unmap_object(area, pages, off, class->size);
}
put_cpu_var(zs_map_area);
}
@@ -749,3 +1064,9 @@ u64 zs_get_total_size_bytes(struct zs_pool *pool)
return npages << PAGE_SHIFT;
}
EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
module_init(zs_init);
module_exit(zs_exit);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");

View File

@@ -15,16 +15,28 @@
#include <linux/types.h>
/*
* zsmalloc mapping modes
*
* NOTE: These only make a difference when a mapped object spans pages
*/
enum zs_mapmode {
ZS_MM_RW, /* normal read-write mapping */
ZS_MM_RO, /* read-only (no copy-out at unmap time) */
ZS_MM_WO /* write-only (no copy-in at map time) */
};
struct zs_pool;
struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
struct zs_pool *zs_create_pool(gfp_t flags);
void zs_destroy_pool(struct zs_pool *pool);
void *zs_malloc(struct zs_pool *pool, size_t size);
void zs_free(struct zs_pool *pool, void *obj);
unsigned long zs_malloc(struct zs_pool *pool, size_t size);
void zs_free(struct zs_pool *pool, unsigned long obj);
void *zs_map_object(struct zs_pool *pool, void *handle);
void zs_unmap_object(struct zs_pool *pool, void *handle);
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum zs_mapmode mm);
void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
u64 zs_get_total_size_bytes(struct zs_pool *pool);

View File

@@ -1,155 +0,0 @@
/*
* zsmalloc memory allocator
*
* Copyright (C) 2011 Nitin Gupta
*
* This code is released using a dual license strategy: BSD/GPL
* You can choose the license that better fits your requirements.
*
* Released under the terms of 3-clause BSD License
* Released under the terms of GNU General Public License Version 2.0
*/
#ifndef _ZS_MALLOC_INT_H_
#define _ZS_MALLOC_INT_H_
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/types.h>
/*
* This must be power of 2 and greater than of equal to sizeof(link_free).
* These two conditions ensure that any 'struct link_free' itself doesn't
* span more than 1 page which avoids complex case of mapping 2 pages simply
* to restore link_free pointer values.
*/
#define ZS_ALIGN 8
/*
* A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
* pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
*/
#define ZS_MAX_ZSPAGE_ORDER 2
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
/*
* Object location (<PFN>, <obj_idx>) is encoded as
* as single (void *) handle value.
*
* Note that object index <obj_idx> is relative to system
* page <PFN> it is stored in, so for each sub-page belonging
* to a zspage, obj_idx starts with 0.
*
* This is made more complicated by various memory models and PAE.
*/
#ifndef MAX_PHYSMEM_BITS
#ifdef CONFIG_HIGHMEM64G
#define MAX_PHYSMEM_BITS 36
#else /* !CONFIG_HIGHMEM64G */
/*
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
* be PAGE_SHIFT
*/
#define MAX_PHYSMEM_BITS BITS_PER_LONG
#endif
#endif
#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE \
MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
/*
* On systems with 4K page size, this gives 254 size classes! There is a
* trader-off here:
* - Large number of size classes is potentially wasteful as free page are
* spread across these classes
* - Small number of size classes causes large internal fragmentation
* - Probably its better to use specific size classes (empirically
* determined). NOTE: all those class sizes must be set as multiple of
* ZS_ALIGN to make sure link_free itself never has to span 2 pages.
*
* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
* (reason above)
*/
#define ZS_SIZE_CLASS_DELTA 16
#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
ZS_SIZE_CLASS_DELTA + 1)
/*
* We do not maintain any list for completely empty or full pages
*/
enum fullness_group {
ZS_ALMOST_FULL,
ZS_ALMOST_EMPTY,
_ZS_NR_FULLNESS_GROUPS,
ZS_EMPTY,
ZS_FULL
};
/*
* We assign a page to ZS_ALMOST_EMPTY fullness group when:
* n <= N / f, where
* n = number of allocated objects
* N = total number of objects zspage can store
* f = 1/fullness_threshold_frac
*
* Similarly, we assign zspage to:
* ZS_ALMOST_FULL when n > N / f
* ZS_EMPTY when n == 0
* ZS_FULL when n == N
*
* (see: fix_fullness_group())
*/
static const int fullness_threshold_frac = 4;
struct mapping_area {
struct vm_struct *vm;
pte_t *vm_ptes[2];
char *vm_addr;
};
struct size_class {
/*
* Size of objects stored in this class. Must be multiple
* of ZS_ALIGN.
*/
int size;
unsigned int index;
/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
int zspage_order;
spinlock_t lock;
/* stats */
u64 pages_allocated;
struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
};
/*
* Placed within free objects to form a singly linked list.
* For every zspage, first_page->freelist gives head of this list.
*
* This must be power of 2 and less than or equal to ZS_ALIGN
*/
struct link_free {
/* Handle of next free chunk (encodes <PFN, obj_idx>) */
void *next;
};
struct zs_pool {
struct size_class size_class[ZS_SIZE_CLASSES];
gfp_t flags; /* allocation flags used when growing pool */
const char *name;
};
#endif

View File

@@ -1637,5 +1637,10 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
static inline bool page_is_guard(struct page *page) { return false; }
#endif /* CONFIG_DEBUG_PAGEALLOC */
static inline void page_mapcount_reset(struct page *page)
{
atomic_set(&(page)->_mapcount, -1);
}
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */