diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig
index 9d11a4cb99b..983314c4134 100644
--- a/drivers/staging/zram/Kconfig
+++ b/drivers/staging/zram/Kconfig
@@ -1,9 +1,6 @@
config ZRAM
tristate "Compressed RAM block device support"
- # X86 dependency is because zsmalloc uses non-portable pte/tlb
- # functions
- depends on BLOCK && SYSFS && X86
- select ZSMALLOC
+ depends on BLOCK && SYSFS && ZSMALLOC
select LZO_COMPRESS
select LZO_DECOMPRESS
default n
@@ -17,7 +14,7 @@ config ZRAM
disks and maybe many more.
See zram.txt for more information.
- Project home: http://compcache.googlecode.com/
+ Project home:
config ZRAM_DEBUG
bool "Compressed RAM block device debug support"
diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile
index 7f4a3019e9c..cb0f9ced6a9 100644
--- a/drivers/staging/zram/Makefile
+++ b/drivers/staging/zram/Makefile
@@ -1,3 +1,3 @@
-zram-y := zram_drv.o zram_sysfs.o
+zram-y := zram_drv.o
obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt
index 5f75d298756..765d790ae83 100644
--- a/drivers/staging/zram/zram.txt
+++ b/drivers/staging/zram/zram.txt
@@ -23,17 +23,17 @@ Following shows a typical sequence of steps for using zram.
This creates 4 devices: /dev/zram{0,1,2,3}
(num_devices parameter is optional. Default: 1)
-2) Set Disksize (Optional):
- Set disk size by writing the value to sysfs node 'disksize'
- (in bytes). If disksize is not given, default value of 25%
- of RAM is used.
+2) Set Disksize
+ Set disk size by writing the value to sysfs node 'disksize'.
+ The value can be either in bytes or you can use mem suffixes.
+ Examples:
+ # Initialize /dev/zram0 with 50MB disksize
+ echo $((50*1024*1024)) > /sys/block/zram0/disksize
- # Initialize /dev/zram0 with 50MB disksize
- echo $((50*1024*1024)) > /sys/block/zram0/disksize
-
- NOTE: disksize cannot be changed if the disk contains any
- data. So, for such a disk, you need to issue 'reset' (see below)
- before you can change its disksize.
+ # Using mem suffixes
+ echo 256K > /sys/block/zram0/disksize
+ echo 512M > /sys/block/zram0/disksize
+ echo 1G > /sys/block/zram0/disksize
3) Activate:
mkswap /dev/zram0
@@ -65,8 +65,9 @@ Following shows a typical sequence of steps for using zram.
echo 1 > /sys/block/zram0/reset
echo 1 > /sys/block/zram1/reset
- (This frees all the memory allocated for the given device).
-
+ This frees all the memory allocated for the given device and
+ resets the disksize to zero. You must set the disksize again
+ before reusing the device.
Please report any problems at:
- Mailing list: linux-mm-cc at laptop dot org
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 685d612a627..8853d524354 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -32,61 +32,229 @@
#include
#include
#include
+#include
#include "zram_drv.h"
/* Globals */
static int zram_major;
-struct zram *zram_devices;
+static struct zram *zram_devices;
+
+/*
+ * We don't need to see memory allocation errors more than once every 1
+ * second to know that a problem is occurring.
+ */
+#define ALLOC_ERROR_LOG_RATE_MS 1000
/* Module params (documentation at end) */
-static unsigned int num_devices;
+static unsigned int num_devices = 1;
-static void zram_stat_inc(u32 *v)
+static inline struct zram *dev_to_zram(struct device *dev)
{
- *v = *v + 1;
+ return (struct zram *)dev_to_disk(dev)->private_data;
}
-static void zram_stat_dec(u32 *v)
+static ssize_t disksize_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- *v = *v - 1;
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n", zram->disksize);
}
-static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc)
+static ssize_t initstate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- spin_lock(&zram->stat64_lock);
- *v = *v + inc;
- spin_unlock(&zram->stat64_lock);
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%u\n", zram->init_done);
}
-static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec)
+static ssize_t num_reads_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- spin_lock(&zram->stat64_lock);
- *v = *v - dec;
- spin_unlock(&zram->stat64_lock);
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.num_reads));
}
-static void zram_stat64_inc(struct zram *zram, u64 *v)
+static ssize_t num_writes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- zram_stat64_add(zram, v, 1);
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.num_writes));
}
-static int zram_test_flag(struct zram *zram, u32 index,
+static ssize_t invalid_io_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.invalid_io));
+}
+
+static ssize_t notify_free_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.notify_free));
+}
+
+static ssize_t zero_pages_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%u\n", zram->stats.pages_zero);
+}
+
+static ssize_t orig_data_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)(zram->stats.pages_stored) << PAGE_SHIFT);
+}
+
+static ssize_t compr_data_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.compr_size));
+}
+
+static ssize_t mem_used_total_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u64 val = 0;
+ struct zram *zram = dev_to_zram(dev);
+ struct zram_meta *meta = zram->meta;
+
+ down_read(&zram->init_lock);
+ if (zram->init_done)
+ val = zs_get_total_size_bytes(meta->mem_pool);
+ up_read(&zram->init_lock);
+
+ return sprintf(buf, "%llu\n", val);
+}
+
+static int zram_test_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- return zram->table[index].flags & BIT(flag);
+ return meta->table[index].flags & BIT(flag);
}
-static void zram_set_flag(struct zram *zram, u32 index,
+static void zram_set_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- zram->table[index].flags |= BIT(flag);
+ meta->table[index].flags |= BIT(flag);
}
-static void zram_clear_flag(struct zram *zram, u32 index,
+static void zram_clear_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- zram->table[index].flags &= ~BIT(flag);
+ meta->table[index].flags &= ~BIT(flag);
+}
+
+static inline int is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
+/*
+ * Check if request is within bounds and aligned on zram logical blocks.
+ */
+static inline int valid_io_request(struct zram *zram, struct bio *bio)
+{
+ u64 start, end, bound;
+
+ /* unaligned request */
+ if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+ return 0;
+ if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+ return 0;
+
+ start = bio->bi_sector;
+ end = start + (bio->bi_size >> SECTOR_SHIFT);
+ bound = zram->disksize >> SECTOR_SHIFT;
+ /* out of range range */
+ if (unlikely(start >= bound || end > bound || start > end))
+ return 0;
+
+ /* I/O request is valid */
+ return 1;
+}
+
+static void zram_meta_free(struct zram_meta *meta)
+{
+ zs_destroy_pool(meta->mem_pool);
+ kfree(meta->compress_workmem);
+ free_pages((unsigned long)meta->compress_buffer, 1);
+ vfree(meta->table);
+ kfree(meta);
+}
+
+static struct zram_meta *zram_meta_alloc(u64 disksize)
+{
+ size_t num_pages;
+ struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+ if (!meta)
+ goto out;
+
+ meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+ if (!meta->compress_workmem)
+ goto free_meta;
+
+ meta->compress_buffer =
+ (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ if (!meta->compress_buffer) {
+ pr_err("Error allocating compressor buffer space\n");
+ goto free_workmem;
+ }
+
+ num_pages = disksize >> PAGE_SHIFT;
+ meta->table = vzalloc(num_pages * sizeof(*meta->table));
+ if (!meta->table) {
+ pr_err("Error allocating zram address table\n");
+ goto free_buffer;
+ }
+
+ meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM |
+ __GFP_NOWARN);
+ if (!meta->mem_pool) {
+ pr_err("Error creating memory pool\n");
+ goto free_table;
+ }
+
+ return meta;
+
+free_table:
+ vfree(meta->table);
+free_buffer:
+ free_pages((unsigned long)meta->compress_buffer, 1);
+free_workmem:
+ kfree(meta->compress_workmem);
+free_meta:
+ kfree(meta);
+ meta = NULL;
+out:
+ return meta;
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}
static int page_zero_filled(void *ptr)
@@ -104,352 +272,272 @@ static int page_zero_filled(void *ptr)
return 1;
}
-static void zram_set_disksize(struct zram *zram, size_t totalram_bytes)
-{
- if (!zram->disksize) {
- pr_info(
- "disk size not provided. You can use disksize_kb module "
- "param to specify size.\nUsing default: (%u%% of RAM).\n",
- default_disksize_perc_ram
- );
- zram->disksize = default_disksize_perc_ram *
- (totalram_bytes / 100);
- }
-
- if (zram->disksize > 2 * (totalram_bytes)) {
- pr_info(
- "There is little point creating a zram of greater than "
- "twice the size of memory since we expect a 2:1 compression "
- "ratio. Note that zram uses about 0.1%% of the size of "
- "the disk when not in use so a huge zram is "
- "wasteful.\n"
- "\tMemory Size: %zu kB\n"
- "\tSize you selected: %llu kB\n"
- "Continuing anyway ...\n",
- totalram_bytes >> 10, zram->disksize
- );
- }
-
- zram->disksize &= PAGE_MASK;
-}
-
-static void zram_free_page(struct zram *zram, size_t index)
-{
- void *handle = zram->table[index].handle;
-
- if (unlikely(!handle)) {
- /*
- * No memory is allocated for zero filled pages.
- * Simply clear zero page flag.
- */
- if (zram_test_flag(zram, index, ZRAM_ZERO)) {
- zram_clear_flag(zram, index, ZRAM_ZERO);
- zram_stat_dec(&zram->stats.pages_zero);
- }
- return;
- }
-
- if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
- __free_page(handle);
- zram_clear_flag(zram, index, ZRAM_UNCOMPRESSED);
- zram_stat_dec(&zram->stats.pages_expand);
- goto out;
- }
-
- zs_free(zram->mem_pool, handle);
-
- if (zram->table[index].size <= PAGE_SIZE / 2)
- zram_stat_dec(&zram->stats.good_compress);
-
-out:
- zram_stat64_sub(zram, &zram->stats.compr_size,
- zram->table[index].size);
- zram_stat_dec(&zram->stats.pages_stored);
-
- zram->table[index].handle = NULL;
- zram->table[index].size = 0;
-}
-
static void handle_zero_page(struct bio_vec *bvec)
{
struct page *page = bvec->bv_page;
void *user_mem;
user_mem = kmap_atomic(page);
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ if (is_partial_io(bvec))
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ else
+ clear_page(user_mem);
kunmap_atomic(user_mem);
flush_dcache_page(page);
}
-static void handle_uncompressed_page(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+static void zram_free_page(struct zram *zram, size_t index)
{
- struct page *page = bvec->bv_page;
- unsigned char *user_mem, *cmem;
+ struct zram_meta *meta = zram->meta;
+ unsigned long handle = meta->table[index].handle;
+ u16 size = meta->table[index].size;
- user_mem = kmap_atomic(page);
- cmem = kmap_atomic(zram->table[index].handle);
+ if (unlikely(!handle)) {
+ /*
+ * No memory is allocated for zero filled pages.
+ * Simply clear zero page flag.
+ */
+ if (zram_test_flag(meta, index, ZRAM_ZERO)) {
+ zram_clear_flag(meta, index, ZRAM_ZERO);
+ zram->stats.pages_zero--;
+ }
+ return;
+ }
- memcpy(user_mem + bvec->bv_offset, cmem + offset, bvec->bv_len);
- kunmap_atomic(cmem);
- kunmap_atomic(user_mem);
+ if (unlikely(size > max_zpage_size))
+ zram->stats.bad_compress--;
- flush_dcache_page(page);
+ zs_free(meta->mem_pool, handle);
+
+ if (size <= PAGE_SIZE / 2)
+ zram->stats.good_compress--;
+
+ atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
+ zram->stats.pages_stored--;
+
+ meta->table[index].handle = 0;
+ meta->table[index].size = 0;
}
-static inline int is_partial_io(struct bio_vec *bvec)
+static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
{
- return bvec->bv_len != PAGE_SIZE;
+ int ret = LZO_E_OK;
+ size_t clen = PAGE_SIZE;
+ unsigned char *cmem;
+ struct zram_meta *meta = zram->meta;
+ unsigned long handle = meta->table[index].handle;
+
+ if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+ clear_page(mem);
+ return 0;
+ }
+
+ cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
+ if (meta->table[index].size == PAGE_SIZE)
+ copy_page(mem, cmem);
+ else
+ ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
+ mem, &clen);
+ zs_unmap_object(meta->mem_pool, handle);
+
+ /* Should NEVER happen. Return bio error if it does. */
+ if (unlikely(ret != LZO_E_OK)) {
+ pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
+ atomic64_inc(&zram->stats.failed_reads);
+ return ret;
+ }
+
+ return 0;
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio)
{
int ret;
- size_t clen;
struct page *page;
- struct zobj_header *zheader;
- unsigned char *user_mem, *cmem, *uncmem = NULL;
-
+ unsigned char *user_mem, *uncmem = NULL;
+ struct zram_meta *meta = zram->meta;
page = bvec->bv_page;
- if (zram_test_flag(zram, index, ZRAM_ZERO)) {
+ if (unlikely(!meta->table[index].handle) ||
+ zram_test_flag(meta, index, ZRAM_ZERO)) {
handle_zero_page(bvec);
return 0;
}
- /* Requested page is not present in compressed area */
- if (unlikely(!zram->table[index].handle)) {
- pr_debug("Read before write: sector=%lu, size=%u",
- (ulong)(bio->bi_sector), bio->bi_size);
- handle_zero_page(bvec);
- return 0;
- }
-
- /* Page is stored uncompressed since it's incompressible */
- if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
- handle_uncompressed_page(zram, bvec, index, offset);
- return 0;
- }
-
- if (is_partial_io(bvec)) {
+ if (is_partial_io(bvec))
/* Use a temporary buffer to decompress the page */
- uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!uncmem) {
- pr_info("Error allocating temp memory!\n");
- return -ENOMEM;
- }
- }
+ uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
user_mem = kmap_atomic(page);
if (!is_partial_io(bvec))
uncmem = user_mem;
- clen = PAGE_SIZE;
- cmem = zs_map_object(zram->mem_pool, zram->table[index].handle);
-
- ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
- zram->table[index].size,
- uncmem, &clen);
-
- if (is_partial_io(bvec)) {
- memcpy(user_mem + bvec->bv_offset, uncmem + offset,
- bvec->bv_len);
- kfree(uncmem);
+ if (!uncmem) {
+ pr_info("Unable to allocate temp memory\n");
+ ret = -ENOMEM;
+ goto out_cleanup;
}
- zs_unmap_object(zram->mem_pool, zram->table[index].handle);
- kunmap_atomic(user_mem);
-
+ ret = zram_decompress_page(zram, uncmem, index);
/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK)) {
- pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
- zram_stat64_inc(zram, &zram->stats.failed_reads);
- return ret;
- }
+ if (unlikely(ret != LZO_E_OK))
+ goto out_cleanup;
+
+ if (is_partial_io(bvec))
+ memcpy(user_mem + bvec->bv_offset, uncmem + offset,
+ bvec->bv_len);
flush_dcache_page(page);
-
- return 0;
-}
-
-static int zram_read_before_write(struct zram *zram, char *mem, u32 index)
-{
- int ret;
- size_t clen = PAGE_SIZE;
- struct zobj_header *zheader;
- unsigned char *cmem;
-
- if (zram_test_flag(zram, index, ZRAM_ZERO) ||
- !zram->table[index].handle) {
- memset(mem, 0, PAGE_SIZE);
- return 0;
- }
-
- cmem = zs_map_object(zram->mem_pool, zram->table[index].handle);
-
- /* Page is stored uncompressed since it's incompressible */
- if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
- memcpy(mem, cmem, PAGE_SIZE);
- kunmap_atomic(cmem);
- return 0;
- }
-
- ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
- zram->table[index].size,
- mem, &clen);
- zs_unmap_object(zram->mem_pool, zram->table[index].handle);
-
- /* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK)) {
- pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
- zram_stat64_inc(zram, &zram->stats.failed_reads);
- return ret;
- }
-
- return 0;
+ ret = 0;
+out_cleanup:
+ kunmap_atomic(user_mem);
+ if (is_partial_io(bvec))
+ kfree(uncmem);
+ return ret;
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset)
{
- int ret;
- u32 store_offset;
+ int ret = 0;
size_t clen;
- void *handle;
- struct zobj_header *zheader;
- struct page *page, *page_store;
+ unsigned long handle;
+ struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
+ struct zram_meta *meta = zram->meta;
+ static unsigned long zram_rs_time;
page = bvec->bv_page;
- src = zram->compress_buffer;
+ src = meta->compress_buffer;
if (is_partial_io(bvec)) {
/*
* This is a partial IO. We need to read the full page
* before to write the changes.
*/
- uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
if (!uncmem) {
- pr_info("Error allocating temp memory!\n");
ret = -ENOMEM;
goto out;
}
- ret = zram_read_before_write(zram, uncmem, index);
- if (ret) {
- kfree(uncmem);
+ ret = zram_decompress_page(zram, uncmem, index);
+ if (ret)
goto out;
- }
}
- /*
- * System overwrites unused sectors. Free memory associated
- * with this sector now.
- */
- if (zram->table[index].handle ||
- zram_test_flag(zram, index, ZRAM_ZERO))
- zram_free_page(zram, index);
-
user_mem = kmap_atomic(page);
- if (is_partial_io(bvec))
+ if (is_partial_io(bvec)) {
memcpy(uncmem + offset, user_mem + bvec->bv_offset,
bvec->bv_len);
- else
+ kunmap_atomic(user_mem);
+ user_mem = NULL;
+ } else {
uncmem = user_mem;
+ }
if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem);
- if (is_partial_io(bvec))
- kfree(uncmem);
- zram_stat_inc(&zram->stats.pages_zero);
- zram_set_flag(zram, index, ZRAM_ZERO);
+ /* Free memory associated with this sector now. */
+ zram_free_page(zram, index);
+
+ zram->stats.pages_zero++;
+ zram_set_flag(meta, index, ZRAM_ZERO);
ret = 0;
goto out;
}
- ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
- zram->compress_workmem);
+ /*
+ * zram_slot_free_notify could miss free so that let's
+ * double check.
+ */
+ if (unlikely(meta->table[index].handle ||
+ zram_test_flag(meta, index, ZRAM_ZERO)))
+ zram_free_page(zram, index);
- kunmap_atomic(user_mem);
- if (is_partial_io(bvec))
- kfree(uncmem);
+ ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
+ meta->compress_workmem);
+
+ if (!is_partial_io(bvec)) {
+ kunmap_atomic(user_mem);
+ user_mem = NULL;
+ uncmem = NULL;
+ }
if (unlikely(ret != LZO_E_OK)) {
pr_err("Compression failed! err=%d\n", ret);
goto out;
}
- /*
- * Page is incompressible. Store it as-is (uncompressed)
- * since we do not want to return too many disk write
- * errors which has side effect of hanging the system.
- */
if (unlikely(clen > max_zpage_size)) {
+ zram->stats.bad_compress++;
clen = PAGE_SIZE;
- page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
- if (unlikely(!page_store)) {
- pr_info("Error allocating memory for "
- "incompressible page: %u\n", index);
- ret = -ENOMEM;
- goto out;
- }
-
- store_offset = 0;
- zram_set_flag(zram, index, ZRAM_UNCOMPRESSED);
- zram_stat_inc(&zram->stats.pages_expand);
- handle = page_store;
- src = kmap_atomic(page);
- cmem = kmap_atomic(page_store);
- goto memstore;
+ src = NULL;
+ if (is_partial_io(bvec))
+ src = uncmem;
}
- handle = zs_malloc(zram->mem_pool, clen + sizeof(*zheader));
+ handle = zs_malloc(meta->mem_pool, clen);
if (!handle) {
- pr_info("Error allocating memory for compressed "
- "page: %u, size=%zu\n", index, clen);
+ if (printk_timed_ratelimit(&zram_rs_time,
+ ALLOC_ERROR_LOG_RATE_MS))
+ pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
+ index, clen);
ret = -ENOMEM;
goto out;
}
- cmem = zs_map_object(zram->mem_pool, handle);
+ cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
-memstore:
-#if 0
- /* Back-reference needed for memory defragmentation */
- if (!zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)) {
- zheader = (struct zobj_header *)cmem;
- zheader->table_idx = index;
- cmem += sizeof(*zheader);
- }
-#endif
-
- memcpy(cmem, src, clen);
-
- if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
- kunmap_atomic(cmem);
+ if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
+ src = kmap_atomic(page);
+ copy_page(cmem, src);
kunmap_atomic(src);
} else {
- zs_unmap_object(zram->mem_pool, handle);
+ memcpy(cmem, src, clen);
}
- zram->table[index].handle = handle;
- zram->table[index].size = clen;
+ zs_unmap_object(meta->mem_pool, handle);
+
+ /*
+ * Free memory associated with this sector
+ * before overwriting unused sectors.
+ */
+ zram_free_page(zram, index);
+
+ meta->table[index].handle = handle;
+ meta->table[index].size = clen;
/* Update stats */
- zram_stat64_add(zram, &zram->stats.compr_size, clen);
- zram_stat_inc(&zram->stats.pages_stored);
+ atomic64_add(clen, &zram->stats.compr_size);
+ zram->stats.pages_stored++;
if (clen <= PAGE_SIZE / 2)
- zram_stat_inc(&zram->stats.good_compress);
-
- return 0;
+ zram->stats.good_compress++;
out:
+ if (is_partial_io(bvec))
+ kfree(uncmem);
+
if (ret)
- zram_stat64_inc(zram, &zram->stats.failed_writes);
+ atomic64_inc(&zram->stats.failed_writes);
return ret;
}
+static void handle_pending_slot_free(struct zram *zram)
+{
+ struct zram_slot_free *free_rq;
+
+ spin_lock(&zram->slot_free_lock);
+ while (zram->slot_free_rq) {
+ free_rq = zram->slot_free_rq;
+ zram->slot_free_rq = free_rq->next;
+ zram_free_page(zram, free_rq->index);
+ kfree(free_rq);
+ }
+ spin_unlock(&zram->slot_free_lock);
+}
+
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset, struct bio *bio, int rw)
{
@@ -457,10 +545,12 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
if (rw == READ) {
down_read(&zram->lock);
+ handle_pending_slot_free(zram);
ret = zram_bvec_read(zram, bvec, index, offset, bio);
up_read(&zram->lock);
} else {
down_write(&zram->lock);
+ handle_pending_slot_free(zram);
ret = zram_bvec_write(zram, bvec, index, offset);
up_write(&zram->lock);
}
@@ -468,11 +558,124 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
return ret;
}
-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+static void zram_reset_device(struct zram *zram, bool reset_capacity)
{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
- *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+ size_t index;
+ struct zram_meta *meta;
+
+ flush_work(&zram->free_work);
+
+ down_write(&zram->init_lock);
+ if (!zram->init_done) {
+ up_write(&zram->init_lock);
+ return;
+ }
+
+ meta = zram->meta;
+ zram->init_done = 0;
+
+ /* Free all pages that are still in this zram device */
+ for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
+ unsigned long handle = meta->table[index].handle;
+ if (!handle)
+ continue;
+
+ zs_free(meta->mem_pool, handle);
+ }
+
+ zram_meta_free(zram->meta);
+ zram->meta = NULL;
+ /* Reset stats */
+ memset(&zram->stats, 0, sizeof(zram->stats));
+
+ zram->disksize = 0;
+ if (reset_capacity)
+ set_capacity(zram->disk, 0);
+ up_write(&zram->init_lock);
+}
+
+static void zram_init_device(struct zram *zram, struct zram_meta *meta)
+{
+ if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
+ pr_info(
+ "There is little point creating a zram of greater than "
+ "twice the size of memory since we expect a 2:1 compression "
+ "ratio. Note that zram uses about 0.1%% of the size of "
+ "the disk when not in use so a huge zram is "
+ "wasteful.\n"
+ "\tMemory Size: %lu kB\n"
+ "\tSize you selected: %llu kB\n"
+ "Continuing anyway ...\n",
+ (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
+ );
+ }
+
+ /* zram devices sort of resembles non-rotational disks */
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
+
+ zram->meta = meta;
+ zram->init_done = 1;
+
+ pr_debug("Initialization done!\n");
+}
+
+static ssize_t disksize_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ u64 disksize;
+ struct zram_meta *meta;
+ struct zram *zram = dev_to_zram(dev);
+
+ disksize = memparse(buf, NULL);
+ if (!disksize)
+ return -EINVAL;
+
+ disksize = PAGE_ALIGN(disksize);
+ meta = zram_meta_alloc(disksize);
+ down_write(&zram->init_lock);
+ if (zram->init_done) {
+ up_write(&zram->init_lock);
+ zram_meta_free(meta);
+ pr_info("Cannot change disksize for initialized device\n");
+ return -EBUSY;
+ }
+
+ zram->disksize = disksize;
+ set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+ zram_init_device(zram, meta);
+ up_write(&zram->init_lock);
+
+ return len;
+}
+
+static ssize_t reset_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ int ret;
+ unsigned short do_reset;
+ struct zram *zram;
+ struct block_device *bdev;
+
+ zram = dev_to_zram(dev);
+ bdev = bdget_disk(zram->disk, 0);
+
+ /* Do not reset an active device! */
+ if (bdev->bd_holders)
+ return -EBUSY;
+
+ ret = kstrtou16(buf, 10, &do_reset);
+ if (ret)
+ return ret;
+
+ if (!do_reset)
+ return -EINVAL;
+
+ /* Make sure all pending I/O is finished */
+ if (bdev)
+ fsync_bdev(bdev);
+
+ zram_reset_device(zram, true);
+ return len;
}
static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
@@ -483,10 +686,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
switch (rw) {
case READ:
- zram_stat64_inc(zram, &zram->stats.num_reads);
+ atomic64_inc(&zram->stats.num_reads);
break;
case WRITE:
- zram_stat64_inc(zram, &zram->stats.num_writes);
+ atomic64_inc(&zram->stats.num_writes);
break;
}
@@ -530,23 +733,6 @@ out:
bio_io_error(bio);
}
-/*
- * Check if request is within bounds and aligned on zram logical blocks.
- */
-static inline int valid_io_request(struct zram *zram, struct bio *bio)
-{
- if (unlikely(
- (bio->bi_sector >= (zram->disksize >> SECTOR_SHIFT)) ||
- (bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)) ||
- (bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))) {
-
- return 0;
- }
-
- /* I/O request is valid */
- return 1;
-}
-
/*
* Handler function for all zram I/O requests.
*/
@@ -554,16 +740,13 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
{
struct zram *zram = queue->queuedata;
- if (unlikely(!zram->init_done) && zram_init_device(zram))
- goto error;
-
down_read(&zram->init_lock);
if (unlikely(!zram->init_done))
- goto error_unlock;
+ goto error;
if (!valid_io_request(zram, bio)) {
- zram_stat64_inc(zram, &zram->stats.invalid_io);
- goto error_unlock;
+ atomic64_inc(&zram->stats.invalid_io);
+ goto error;
}
__zram_make_request(zram, bio, bio_data_dir(bio));
@@ -571,129 +754,45 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
return;
-error_unlock:
- up_read(&zram->init_lock);
error:
+ up_read(&zram->init_lock);
bio_io_error(bio);
}
-void __zram_reset_device(struct zram *zram)
+static void zram_slot_free(struct work_struct *work)
{
- size_t index;
+ struct zram *zram;
- zram->init_done = 0;
-
- /* Free various per-device buffers */
- kfree(zram->compress_workmem);
- free_pages((unsigned long)zram->compress_buffer, 1);
-
- zram->compress_workmem = NULL;
- zram->compress_buffer = NULL;
-
- /* Free all pages that are still in this zram device */
- for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
- void *handle = zram->table[index].handle;
- if (!handle)
- continue;
-
- if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)))
- __free_page(handle);
- else
- zs_free(zram->mem_pool, handle);
- }
-
- vfree(zram->table);
- zram->table = NULL;
-
- zs_destroy_pool(zram->mem_pool);
- zram->mem_pool = NULL;
-
- /* Reset stats */
- memset(&zram->stats, 0, sizeof(zram->stats));
-
- zram->disksize = 0;
+ zram = container_of(work, struct zram, free_work);
+ down_write(&zram->lock);
+ handle_pending_slot_free(zram);
+ up_write(&zram->lock);
}
-void zram_reset_device(struct zram *zram)
+static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq)
{
- down_write(&zram->init_lock);
- __zram_reset_device(zram);
- up_write(&zram->init_lock);
-}
-
-int zram_init_device(struct zram *zram)
-{
- int ret;
- size_t num_pages;
-
- down_write(&zram->init_lock);
-
- if (zram->init_done) {
- up_write(&zram->init_lock);
- return 0;
- }
-
- zram_set_disksize(zram, totalram_pages << PAGE_SHIFT);
-
- zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- if (!zram->compress_workmem) {
- pr_err("Error allocating compressor working memory!\n");
- ret = -ENOMEM;
- goto fail_no_table;
- }
-
- zram->compress_buffer =
- (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
- if (!zram->compress_buffer) {
- pr_err("Error allocating compressor buffer space\n");
- ret = -ENOMEM;
- goto fail_no_table;
- }
-
- num_pages = zram->disksize >> PAGE_SHIFT;
- zram->table = vzalloc(num_pages * sizeof(*zram->table));
- if (!zram->table) {
- pr_err("Error allocating zram address table\n");
- ret = -ENOMEM;
- goto fail_no_table;
- }
-
- set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
-
- /* zram devices sort of resembles non-rotational disks */
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
-
- zram->mem_pool = zs_create_pool("zram", GFP_NOIO | __GFP_HIGHMEM);
- if (!zram->mem_pool) {
- pr_err("Error creating memory pool\n");
- ret = -ENOMEM;
- goto fail;
- }
-
- zram->init_done = 1;
- up_write(&zram->init_lock);
-
- pr_debug("Initialization done!\n");
- return 0;
-
-fail_no_table:
- /* To prevent accessing table entries during cleanup */
- zram->disksize = 0;
-fail:
- __zram_reset_device(zram);
- up_write(&zram->init_lock);
- pr_err("Initialization failed: err=%d\n", ret);
- return ret;
+ spin_lock(&zram->slot_free_lock);
+ free_rq->next = zram->slot_free_rq;
+ zram->slot_free_rq = free_rq;
+ spin_unlock(&zram->slot_free_lock);
}
static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
+ struct zram_slot_free *free_rq;
zram = bdev->bd_disk->private_data;
- zram_free_page(zram, index);
- zram_stat64_inc(zram, &zram->stats.notify_free);
+ atomic64_inc(&zram->stats.notify_free);
+
+ free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC);
+ if (!free_rq)
+ return;
+
+ free_rq->index = index;
+ add_slot_free(zram, free_rq);
+ schedule_work(&zram->free_work);
}
static const struct block_device_operations zram_devops = {
@@ -701,19 +800,53 @@ static const struct block_device_operations zram_devops = {
.owner = THIS_MODULE
};
+static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
+ disksize_show, disksize_store);
+static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
+static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
+static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
+static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
+static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
+static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
+static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
+static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
+static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
+static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
+
+static struct attribute *zram_disk_attrs[] = {
+ &dev_attr_disksize.attr,
+ &dev_attr_initstate.attr,
+ &dev_attr_reset.attr,
+ &dev_attr_num_reads.attr,
+ &dev_attr_num_writes.attr,
+ &dev_attr_invalid_io.attr,
+ &dev_attr_notify_free.attr,
+ &dev_attr_zero_pages.attr,
+ &dev_attr_orig_data_size.attr,
+ &dev_attr_compr_data_size.attr,
+ &dev_attr_mem_used_total.attr,
+ NULL,
+};
+
+static struct attribute_group zram_disk_attr_group = {
+ .attrs = zram_disk_attrs,
+};
+
static int create_device(struct zram *zram, int device_id)
{
- int ret = 0;
+ int ret = -ENOMEM;
init_rwsem(&zram->lock);
init_rwsem(&zram->init_lock);
- spin_lock_init(&zram->stat64_lock);
+
+ INIT_WORK(&zram->free_work, zram_slot_free);
+ spin_lock_init(&zram->slot_free_lock);
+ zram->slot_free_rq = NULL;
zram->queue = blk_alloc_queue(GFP_KERNEL);
if (!zram->queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
- ret = -ENOMEM;
goto out;
}
@@ -723,11 +856,9 @@ static int create_device(struct zram *zram, int device_id)
/* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
- blk_cleanup_queue(zram->queue);
- pr_warning("Error allocating disk structure for device %d\n",
+ pr_warn("Error allocating disk structure for device %d\n",
device_id);
- ret = -ENOMEM;
- goto out;
+ goto out_free_queue;
}
zram->disk->major = zram_major;
@@ -755,12 +886,18 @@ static int create_device(struct zram *zram, int device_id)
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
&zram_disk_attr_group);
if (ret < 0) {
- pr_warning("Error creating sysfs group");
- goto out;
+ pr_warn("Error creating sysfs group");
+ goto out_free_disk;
}
zram->init_done = 0;
+ return 0;
+out_free_disk:
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+out_free_queue:
+ blk_cleanup_queue(zram->queue);
out:
return ret;
}
@@ -779,17 +916,12 @@ static void destroy_device(struct zram *zram)
blk_cleanup_queue(zram->queue);
}
-unsigned int zram_get_num_devices(void)
-{
- return num_devices;
-}
-
static int __init zram_init(void)
{
int ret, dev_id;
if (num_devices > max_num_devices) {
- pr_warning("Invalid value for num_devices: %u\n",
+ pr_warn("Invalid value for num_devices: %u\n",
num_devices);
ret = -EINVAL;
goto out;
@@ -797,18 +929,12 @@ static int __init zram_init(void)
zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
- pr_warning("Unable to get major number\n");
+ pr_warn("Unable to get major number\n");
ret = -EBUSY;
goto out;
}
- if (!num_devices) {
- pr_info("num_devices not specified. Using default: 1\n");
- num_devices = 1;
- }
-
/* Allocate the device array and initialize each one */
- pr_info("Creating %u devices ...\n", num_devices);
zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
if (!zram_devices) {
ret = -ENOMEM;
@@ -821,6 +947,8 @@ static int __init zram_init(void)
goto free_devices;
}
+ pr_info("Created %u device(s) ...\n", num_devices);
+
return 0;
free_devices:
@@ -841,9 +969,13 @@ static void __exit zram_exit(void)
for (i = 0; i < num_devices; i++) {
zram = &zram_devices[i];
+ get_disk(zram->disk);
destroy_device(zram);
- if (zram->init_done)
- zram_reset_device(zram);
+ /*
+ * Shouldn't access zram->disk after destroy_device
+ * because destroy_device already released zram->disk.
+ */
+ zram_reset_device(zram, false);
}
unregister_blkdev(zram_major, "zram");
@@ -852,12 +984,13 @@ static void __exit zram_exit(void)
pr_debug("Cleanup done!\n");
}
-module_param(num_devices, uint, 0);
-MODULE_PARM_DESC(num_devices, "Number of zram devices");
-
module_init(zram_init);
module_exit(zram_exit);
+module_param(num_devices, uint, 0);
+MODULE_PARM_DESC(num_devices, "Number of zram devices");
+
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta ");
MODULE_DESCRIPTION("Compressed RAM Block Device");
+MODULE_ALIAS("devname:zram");
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index fbe8ac98704..508a19f444f 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -26,33 +26,18 @@
*/
static const unsigned max_num_devices = 32;
-/*
- * Stored at beginning of each compressed object.
- *
- * It stores back-reference to table entry which points to this
- * object. This is required to support memory defragmentation.
- */
-struct zobj_header {
-#if 0
- u32 table_idx;
-#endif
-};
-
/*-- Configurable parameters */
-/* Default zram disk size: 25% of total RAM */
-static const unsigned default_disksize_perc_ram = 25;
-
/*
* Pages that compress to size greater than this are stored
* uncompressed in memory.
*/
-static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
+static const size_t max_zpage_size = PAGE_SIZE / 10 * 9;
/*
* NOTE: max_zpage_size must be less than or equal to:
- * ZS_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
- * otherwise, xv_malloc() would always return failure.
+ * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
+ * always return failure.
*/
/*-- End of configurable params */
@@ -68,9 +53,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {
- /* Page is stored uncompressed */
- ZRAM_UNCOMPRESSED,
-
/* Page consists entirely of zeros */
ZRAM_ZERO,
@@ -81,34 +63,51 @@ enum zram_pageflags {
/* Allocated for each disk page */
struct table {
- void *handle;
+ unsigned long handle;
u16 size; /* object size (excluding header) */
u8 count; /* object ref count (not yet used) */
u8 flags;
-} __attribute__((aligned(4)));
+} __aligned(4);
+/*
+ * All 64bit fields should only be manipulated by 64bit atomic accessors.
+ * All modifications to 32bit counter should be protected by zram->lock.
+ */
struct zram_stats {
- u64 compr_size; /* compressed size of pages stored */
- u64 num_reads; /* failed + successful */
- u64 num_writes; /* --do-- */
- u64 failed_reads; /* should NEVER! happen */
- u64 failed_writes; /* can happen when memory is too low */
- u64 invalid_io; /* non-page-aligned I/O requests */
- u64 notify_free; /* no. of swap slot free notifications */
+ atomic64_t compr_size; /* compressed size of pages stored */
+ atomic64_t num_reads; /* failed + successful */
+ atomic64_t num_writes; /* --do-- */
+ atomic64_t failed_reads; /* should NEVER! happen */
+ atomic64_t failed_writes; /* can happen when memory is too low */
+ atomic64_t invalid_io; /* non-page-aligned I/O requests */
+ atomic64_t notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 pages_stored; /* no. of pages currently stored */
u32 good_compress; /* % of pages with compression ratio<=50% */
- u32 pages_expand; /* % of incompressible pages */
+ u32 bad_compress; /* % of pages with compression ratio>=75% */
};
-struct zram {
- struct zs_pool *mem_pool;
+struct zram_meta {
void *compress_workmem;
void *compress_buffer;
struct table *table;
- spinlock_t stat64_lock; /* protect 64-bit stats */
- struct rw_semaphore lock; /* protect compression buffers and table
- * against concurrent read and writes */
+ struct zs_pool *mem_pool;
+};
+
+struct zram_slot_free {
+ unsigned long index;
+ struct zram_slot_free *next;
+};
+
+struct zram {
+ struct zram_meta *meta;
+ struct rw_semaphore lock; /* protect compression buffers, table,
+ * 32bit stat counters against concurrent
+ * notifications, reads and writes */
+
+ struct work_struct free_work; /* handle pending free request */
+ struct zram_slot_free *slot_free_rq; /* list head of free request */
+
struct request_queue *queue;
struct gendisk *disk;
int init_done;
@@ -119,17 +118,8 @@ struct zram {
* we can store in a disk.
*/
u64 disksize; /* bytes */
+ spinlock_t slot_free_lock;
struct zram_stats stats;
};
-
-extern struct zram *zram_devices;
-unsigned int zram_get_num_devices(void);
-#ifdef CONFIG_SYSFS
-extern struct attribute_group zram_disk_attr_group;
-#endif
-
-extern int zram_init_device(struct zram *zram);
-extern void __zram_reset_device(struct zram *zram);
-
#endif
diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c
deleted file mode 100644
index a7f37717552..00000000000
--- a/drivers/staging/zram/zram_sysfs.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Compressed RAM block device
- *
- * Copyright (C) 2008, 2009, 2010 Nitin Gupta
- *
- * This code is released using a dual license strategy: BSD/GPL
- * You can choose the licence that better fits your requirements.
- *
- * Released under the terms of 3-clause BSD License
- * Released under the terms of GNU General Public License Version 2.0
- *
- * Project home: http://compcache.googlecode.com/
- */
-
-#include
-#include
-#include
-
-#include "zram_drv.h"
-
-static u64 zram_stat64_read(struct zram *zram, u64 *v)
-{
- u64 val;
-
- spin_lock(&zram->stat64_lock);
- val = *v;
- spin_unlock(&zram->stat64_lock);
-
- return val;
-}
-
-static struct zram *dev_to_zram(struct device *dev)
-{
- int i;
- struct zram *zram = NULL;
-
- for (i = 0; i < zram_get_num_devices(); i++) {
- zram = &zram_devices[i];
- if (disk_to_dev(zram->disk) == dev)
- break;
- }
-
- return zram;
-}
-
-static ssize_t disksize_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n", zram->disksize);
-}
-
-static ssize_t disksize_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- int ret;
- u64 disksize;
- struct zram *zram = dev_to_zram(dev);
-
- ret = kstrtoull(buf, 10, &disksize);
- if (ret)
- return ret;
-
- down_write(&zram->init_lock);
- if (zram->init_done) {
- up_write(&zram->init_lock);
- pr_info("Cannot change disksize for initialized device\n");
- return -EBUSY;
- }
-
- zram->disksize = PAGE_ALIGN(disksize);
- set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- up_write(&zram->init_lock);
-
- return len;
-}
-
-static ssize_t initstate_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%u\n", zram->init_done);
-}
-
-static ssize_t reset_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- int ret;
- unsigned short do_reset;
- struct zram *zram;
- struct block_device *bdev;
-
- zram = dev_to_zram(dev);
- bdev = bdget_disk(zram->disk, 0);
-
- /* Do not reset an active device! */
- if (bdev->bd_holders)
- return -EBUSY;
-
- ret = kstrtou16(buf, 10, &do_reset);
- if (ret)
- return ret;
-
- if (!do_reset)
- return -EINVAL;
-
- /* Make sure all pending I/O is finished */
- if (bdev)
- fsync_bdev(bdev);
-
- down_write(&zram->init_lock);
- if (zram->init_done)
- __zram_reset_device(zram);
- up_write(&zram->init_lock);
-
- return len;
-}
-
-static ssize_t num_reads_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.num_reads));
-}
-
-static ssize_t num_writes_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.num_writes));
-}
-
-static ssize_t invalid_io_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.invalid_io));
-}
-
-static ssize_t notify_free_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.notify_free));
-}
-
-static ssize_t zero_pages_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%u\n", zram->stats.pages_zero);
-}
-
-static ssize_t orig_data_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- (u64)(zram->stats.pages_stored) << PAGE_SHIFT);
-}
-
-static ssize_t compr_data_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.compr_size));
-}
-
-static ssize_t mem_used_total_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u64 val = 0;
- struct zram *zram = dev_to_zram(dev);
-
- if (zram->init_done) {
- val = zs_get_total_size_bytes(zram->mem_pool) +
- ((u64)(zram->stats.pages_expand) << PAGE_SHIFT);
- }
-
- return sprintf(buf, "%llu\n", val);
-}
-
-static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
- disksize_show, disksize_store);
-static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
-static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
-static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
-static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
-static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
-static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
-static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
-static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
-static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
-static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
-
-static struct attribute *zram_disk_attrs[] = {
- &dev_attr_disksize.attr,
- &dev_attr_initstate.attr,
- &dev_attr_reset.attr,
- &dev_attr_num_reads.attr,
- &dev_attr_num_writes.attr,
- &dev_attr_invalid_io.attr,
- &dev_attr_notify_free.attr,
- &dev_attr_zero_pages.attr,
- &dev_attr_orig_data_size.attr,
- &dev_attr_compr_data_size.attr,
- &dev_attr_mem_used_total.attr,
- NULL,
-};
-
-struct attribute_group zram_disk_attr_group = {
- .attrs = zram_disk_attrs,
-};
diff --git a/drivers/staging/zsmalloc/Kconfig b/drivers/staging/zsmalloc/Kconfig
index a5ab7200626..7fab032298f 100644
--- a/drivers/staging/zsmalloc/Kconfig
+++ b/drivers/staging/zsmalloc/Kconfig
@@ -1,9 +1,5 @@
config ZSMALLOC
- tristate "Memory allocator for compressed pages"
- # X86 dependency is because of the use of __flush_tlb_one and set_pte
- # in zsmalloc-main.c.
- # TODO: convert these to portable functions
- depends on X86
+ bool "Memory allocator for compressed pages"
default n
help
zsmalloc is a slab-based memory allocator designed to store
diff --git a/drivers/staging/zsmalloc/Makefile b/drivers/staging/zsmalloc/Makefile
index b134848a590..ceadfa41654 100644
--- a/drivers/staging/zsmalloc/Makefile
+++ b/drivers/staging/zsmalloc/Makefile
@@ -1,3 +1,4 @@
+CFLAGS_zsmalloc-main.o := -Wno-error=implicit-function-declaration -Wno-implicit-function-declaration
zsmalloc-y := zsmalloc-main.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
index 917461c6601..3b950e5a918 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/drivers/staging/zsmalloc/zsmalloc-main.c
@@ -10,6 +10,54 @@
* Released under the terms of GNU General Public License Version 2.0
*/
+
+/*
+ * This allocator is designed for use with zcache and zram. Thus, the
+ * allocator is supposed to work well under low memory conditions. In
+ * particular, it never attempts higher order page allocation which is
+ * very likely to fail under memory pressure. On the other hand, if we
+ * just use single (0-order) pages, it would suffer from very high
+ * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy
+ * an entire page. This was one of the major issues with its predecessor
+ * (xvmalloc).
+ *
+ * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
+ * and links them together using various 'struct page' fields. These linked
+ * pages act as a single higher-order page i.e. an object can span 0-order
+ * page boundaries. The code refers to these linked pages as a single entity
+ * called zspage.
+ *
+ * Following is how we use various fields and flags of underlying
+ * struct page(s) to form a zspage.
+ *
+ * Usage of struct page fields:
+ * page->first_page: points to the first component (0-order) page
+ * page->index (union with page->freelist): offset of the first object
+ * starting in this page. For the first page, this is
+ * always 0, so we use this field (aka freelist) to point
+ * to the first free object in zspage.
+ * page->lru: links together all component pages (except the first page)
+ * of a zspage
+ *
+ * For _first_ page only:
+ *
+ * page->private (union with page->first_page): refers to the
+ * component page after the first page
+ * page->freelist: points to the first free object in zspage.
+ * Free objects are linked together using in-place
+ * metadata.
+ * page->objects: maximum number of objects we can store in this
+ * zspage (class->zspage_order * PAGE_SIZE / class->size)
+ * page->lru: links together first pages of various zspages.
+ * Basically forming list of zspages in a fullness group.
+ * page->mapping: class index and fullness group of the zspage
+ *
+ * Usage of struct page flags:
+ * PG_private: identifies the first component page
+ * PG_private2: identifies the last component page
+ *
+ */
+
#ifdef CONFIG_ZSMALLOC_DEBUG
#define DEBUG
#endif
@@ -27,9 +75,139 @@
#include
#include
#include
+#include
+#include
+#include
#include "zsmalloc.h"
-#include "zsmalloc_int.h"
+
+/*
+ * This must be power of 2 and greater than of equal to sizeof(link_free).
+ * These two conditions ensure that any 'struct link_free' itself doesn't
+ * span more than 1 page which avoids complex case of mapping 2 pages simply
+ * to restore link_free pointer values.
+ */
+#define ZS_ALIGN 8
+
+/*
+ * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
+ * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
+ */
+#define ZS_MAX_ZSPAGE_ORDER 2
+#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
+
+/*
+ * Object location (, ) is encoded as
+ * as single (void *) handle value.
+ *
+ * Note that object index is relative to system
+ * page it is stored in, so for each sub-page belonging
+ * to a zspage, obj_idx starts with 0.
+ *
+ * This is made more complicated by various memory models and PAE.
+ */
+
+#ifndef MAX_PHYSMEM_BITS
+#ifdef CONFIG_HIGHMEM64G
+#define MAX_PHYSMEM_BITS 36
+#else /* !CONFIG_HIGHMEM64G */
+/*
+ * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
+ * be PAGE_SHIFT
+ */
+#define MAX_PHYSMEM_BITS BITS_PER_LONG
+#endif
+#endif
+#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
+#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+
+#define MAX(a, b) ((a) >= (b) ? (a) : (b))
+/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
+#define ZS_MIN_ALLOC_SIZE \
+ MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
+
+/*
+ * On systems with 4K page size, this gives 254 size classes! There is a
+ * trader-off here:
+ * - Large number of size classes is potentially wasteful as free page are
+ * spread across these classes
+ * - Small number of size classes causes large internal fragmentation
+ * - Probably its better to use specific size classes (empirically
+ * determined). NOTE: all those class sizes must be set as multiple of
+ * ZS_ALIGN to make sure link_free itself never has to span 2 pages.
+ *
+ * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
+ * (reason above)
+ */
+#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8)
+#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
+ ZS_SIZE_CLASS_DELTA + 1)
+
+/*
+ * We do not maintain any list for completely empty or full pages
+ */
+enum fullness_group {
+ ZS_ALMOST_FULL,
+ ZS_ALMOST_EMPTY,
+ _ZS_NR_FULLNESS_GROUPS,
+
+ ZS_EMPTY,
+ ZS_FULL
+};
+
+/*
+ * We assign a page to ZS_ALMOST_EMPTY fullness group when:
+ * n <= N / f, where
+ * n = number of allocated objects
+ * N = total number of objects zspage can store
+ * f = 1/fullness_threshold_frac
+ *
+ * Similarly, we assign zspage to:
+ * ZS_ALMOST_FULL when n > N / f
+ * ZS_EMPTY when n == 0
+ * ZS_FULL when n == N
+ *
+ * (see: fix_fullness_group())
+ */
+static const int fullness_threshold_frac = 4;
+
+struct size_class {
+ /*
+ * Size of objects stored in this class. Must be multiple
+ * of ZS_ALIGN.
+ */
+ int size;
+ unsigned int index;
+
+ /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
+ int pages_per_zspage;
+
+ spinlock_t lock;
+
+ /* stats */
+ u64 pages_allocated;
+
+ struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
+};
+
+/*
+ * Placed within free objects to form a singly linked list.
+ * For every zspage, first_page->freelist gives head of this list.
+ *
+ * This must be power of 2 and less than or equal to ZS_ALIGN
+ */
+struct link_free {
+ /* Handle of next free chunk (encodes ) */
+ void *next;
+};
+
+struct zs_pool {
+ struct size_class size_class[ZS_SIZE_CLASSES];
+
+ gfp_t flags; /* allocation flags used when growing pool */
+};
/*
* A zspage's class index and fullness group
@@ -40,17 +218,39 @@
#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
+/*
+ * By default, zsmalloc uses a copy-based object mapping method to access
+ * allocations that span two pages. However, if a particular architecture
+ * performs VM mapping faster than copying, then it should be added here
+ * so that USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use
+ * page table mapping rather than copying for object mapping.
+ */
+#if defined(CONFIG_ARM) && !defined(MODULE)
+#define USE_PGTABLE_MAPPING
+#endif
+
+struct mapping_area {
+#ifdef USE_PGTABLE_MAPPING
+ struct vm_struct *vm; /* vm area for mapping object that span pages */
+#else
+ char *vm_buf; /* copy buffer for objects that span pages */
+#endif
+ char *vm_addr; /* address of kmap_atomic()'ed pages */
+ enum zs_mapmode vm_mm; /* mapping mode */
+};
+
+
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
static int is_first_page(struct page *page)
{
- return test_bit(PG_private, &page->flags);
+ return PagePrivate(page);
}
static int is_last_page(struct page *page)
{
- return test_bit(PG_private_2, &page->flags);
+ return PagePrivate2(page);
}
static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
@@ -180,7 +380,7 @@ out:
* link together 3 PAGE_SIZE sized pages to form a zspage
* since then we can perfectly fit in 8 such objects.
*/
-static int get_zspage_order(int class_size)
+static int get_pages_per_zspage(int class_size)
{
int i, max_usedpc = 0;
/* zspage order which gives maximum used size per KB */
@@ -223,14 +423,19 @@ static struct page *get_next_page(struct page *page)
if (is_last_page(page))
next = NULL;
else if (is_first_page(page))
- next = (struct page *)page->private;
+ next = (struct page *)page_private(page);
else
next = list_entry(page->lru.next, struct page, lru);
return next;
}
-/* Encode as a single handle value */
+/*
+ * Encode as a single handle value.
+ * On hardware platforms with physical memory starting at 0x0 the pfn
+ * could be 0 so we ensure that the handle will never be 0 by adjusting the
+ * encoded obj_idx value before encoding.
+ */
static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
{
unsigned long handle;
@@ -241,19 +446,21 @@ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
}
handle = page_to_pfn(page) << OBJ_INDEX_BITS;
- handle |= (obj_idx & OBJ_INDEX_MASK);
+ handle |= ((obj_idx + 1) & OBJ_INDEX_MASK);
return (void *)handle;
}
-/* Decode pair from the given object handle */
-static void obj_handle_to_location(void *handle, struct page **page,
+/*
+ * Decode pair from the given object handle. We adjust the
+ * decoded obj_idx back to its original value since it was adjusted in
+ * obj_location_to_handle().
+ */
+static void obj_handle_to_location(unsigned long handle, struct page **page,
unsigned long *obj_idx)
{
- unsigned long hval = (unsigned long)handle;
-
- *page = pfn_to_page(hval >> OBJ_INDEX_BITS);
- *obj_idx = hval & OBJ_INDEX_MASK;
+ *page = pfn_to_page(handle >> OBJ_INDEX_BITS);
+ *obj_idx = (handle & OBJ_INDEX_MASK) - 1;
}
static unsigned long obj_idx_to_offset(struct page *page,
@@ -274,7 +481,7 @@ static void reset_page(struct page *page)
set_page_private(page, 0);
page->mapping = NULL;
page->freelist = NULL;
- reset_page_mapcount(page);
+ page_mapcount_reset(page);
}
static void free_zspage(struct page *first_page)
@@ -354,7 +561,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
{
int i, error;
- struct page *first_page = NULL;
+ struct page *first_page = NULL, *uninitialized_var(prev_page);
/*
* Allocate individual pages and link them together as:
@@ -368,8 +575,8 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
* identify the last page.
*/
error = -ENOMEM;
- for (i = 0; i < class->zspage_order; i++) {
- struct page *page, *prev_page;
+ for (i = 0; i < class->pages_per_zspage; i++) {
+ struct page *page;
page = alloc_page(flags);
if (!page)
@@ -377,20 +584,19 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
INIT_LIST_HEAD(&page->lru);
if (i == 0) { /* first page */
- set_bit(PG_private, &page->flags);
+ SetPagePrivate(page);
set_page_private(page, 0);
first_page = page;
first_page->inuse = 0;
}
if (i == 1)
- first_page->private = (unsigned long)page;
+ set_page_private(first_page, (unsigned long)page);
if (i >= 1)
page->first_page = first_page;
if (i >= 2)
list_add(&page->lru, &prev_page->lru);
- if (i == class->zspage_order - 1) /* last page */
- set_bit(PG_private_2, &page->flags);
-
+ if (i == class->pages_per_zspage - 1) /* last page */
+ SetPagePrivate2(page);
prev_page = page;
}
@@ -398,7 +604,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
first_page->freelist = obj_location_to_handle(first_page, 0);
/* Maximum number of objects we can store in this zspage */
- first_page->objects = class->zspage_order * PAGE_SIZE / class->size;
+ first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
error = 0; /* Success */
@@ -425,34 +631,141 @@ static struct page *find_get_zspage(struct size_class *class)
return page;
}
+#ifdef USE_PGTABLE_MAPPING
+static inline int __zs_cpu_up(struct mapping_area *area)
+{
+ /*
+ * Make sure we don't leak memory if a cpu UP notification
+ * and zs_init() race and both call zs_cpu_up() on the same cpu
+ */
+ if (area->vm)
+ return 0;
+ area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
+ if (!area->vm)
+ return -ENOMEM;
+ return 0;
+}
-/*
- * If this becomes a separate module, register zs_init() with
- * module_init(), zs_exit with module_exit(), and remove zs_initialized
-*/
-static int zs_initialized;
+static inline void __zs_cpu_down(struct mapping_area *area)
+{
+ if (area->vm)
+ free_vm_area(area->vm);
+ area->vm = NULL;
+}
+
+static inline void *__zs_map_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
+{
+ BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+ area->vm_addr = area->vm->addr;
+ return area->vm_addr + off;
+}
+
+static inline void __zs_unmap_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
+{
+ unsigned long addr = (unsigned long)area->vm_addr;
+
+ unmap_kernel_range(addr, PAGE_SIZE * 2);
+}
+
+#else /* USE_PGTABLE_MAPPING */
+
+static inline int __zs_cpu_up(struct mapping_area *area)
+{
+ /*
+ * Make sure we don't leak memory if a cpu UP notification
+ * and zs_init() race and both call zs_cpu_up() on the same cpu
+ */
+ if (area->vm_buf)
+ return 0;
+ area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!area->vm_buf)
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void __zs_cpu_down(struct mapping_area *area)
+{
+ if (area->vm_buf)
+ free_page((unsigned long)area->vm_buf);
+ area->vm_buf = NULL;
+}
+
+static void *__zs_map_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
+{
+ int sizes[2];
+ void *addr;
+ char *buf = area->vm_buf;
+
+ /* disable page faults to match kmap_atomic() return conditions */
+ pagefault_disable();
+
+ /* no read fastpath */
+ if (area->vm_mm == ZS_MM_WO)
+ goto out;
+
+ sizes[0] = PAGE_SIZE - off;
+ sizes[1] = size - sizes[0];
+
+ /* copy object to per-cpu buffer */
+ addr = kmap_atomic(pages[0]);
+ memcpy(buf, addr + off, sizes[0]);
+ kunmap_atomic(addr);
+ addr = kmap_atomic(pages[1]);
+ memcpy(buf + sizes[0], addr, sizes[1]);
+ kunmap_atomic(addr);
+out:
+ return area->vm_buf;
+}
+
+static void __zs_unmap_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
+{
+ int sizes[2];
+ void *addr;
+ char *buf = area->vm_buf;
+
+ /* no write fastpath */
+ if (area->vm_mm == ZS_MM_RO)
+ goto out;
+
+ sizes[0] = PAGE_SIZE - off;
+ sizes[1] = size - sizes[0];
+
+ /* copy per-cpu buffer to object */
+ addr = kmap_atomic(pages[0]);
+ memcpy(addr + off, buf, sizes[0]);
+ kunmap_atomic(addr);
+ addr = kmap_atomic(pages[1]);
+ memcpy(addr, buf + sizes[0], sizes[1]);
+ kunmap_atomic(addr);
+
+out:
+ /* enable page faults to match kunmap_atomic() return conditions */
+ pagefault_enable();
+}
+
+#endif /* USE_PGTABLE_MAPPING */
static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
void *pcpu)
{
- int cpu = (long)pcpu;
+ int ret, cpu = (long)pcpu;
struct mapping_area *area;
switch (action) {
case CPU_UP_PREPARE:
area = &per_cpu(zs_map_area, cpu);
- if (area->vm)
- break;
- area->vm = alloc_vm_area(2 * PAGE_SIZE, area->vm_ptes);
- if (!area->vm)
- return notifier_from_errno(-ENOMEM);
+ ret = __zs_cpu_up(area);
+ if (ret)
+ return notifier_from_errno(ret);
break;
case CPU_DEAD:
case CPU_UP_CANCELED:
area = &per_cpu(zs_map_area, cpu);
- if (area->vm)
- free_vm_area(area->vm);
- area->vm = NULL;
+ __zs_cpu_down(area);
break;
}
@@ -488,14 +801,21 @@ fail:
return notifier_to_errno(ret);
}
-struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
+/**
+ * zs_create_pool - Creates an allocation pool to work from.
+ * @flags: allocation flags used to allocate pool metadata
+ *
+ * This function must be called before anything when using
+ * the zsmalloc allocator.
+ *
+ * On success, a pointer to the newly created pool is returned,
+ * otherwise NULL.
+ */
+struct zs_pool *zs_create_pool(gfp_t flags)
{
- int i, error, ovhd_size;
+ int i, ovhd_size;
struct zs_pool *pool;
- if (!name)
- return NULL;
-
ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
pool = kzalloc(ovhd_size, GFP_KERNEL);
if (!pool)
@@ -513,31 +833,11 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
class->size = size;
class->index = i;
spin_lock_init(&class->lock);
- class->zspage_order = get_zspage_order(size);
+ class->pages_per_zspage = get_pages_per_zspage(size);
}
- /*
- * If this becomes a separate module, register zs_init with
- * module_init, and remove this block
- */
- if (!zs_initialized) {
- error = zs_init();
- if (error)
- goto cleanup;
- zs_initialized = 1;
- }
-
pool->flags = flags;
- pool->name = name;
-
- error = 0; /* Success */
-
-cleanup:
- if (error) {
- zs_destroy_pool(pool);
- pool = NULL;
- }
return pool;
}
@@ -553,8 +853,7 @@ void zs_destroy_pool(struct zs_pool *pool)
for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
if (class->fullness_list[fg]) {
- pr_info("Freeing non-empty class with size "
- "%db, fullness group %d\n",
+ pr_info("Freeing non-empty class with size %db, fullness group %d\n",
class->size, fg);
}
}
@@ -567,18 +866,14 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);
* zs_malloc - Allocate block of given size from pool.
* @pool: pool to allocate from
* @size: size of block to allocate
- * @page: page no. that holds the object
- * @offset: location of object within page
- *
- * On success, identifies block allocated
- * and 0 is returned. On failure, is set to
- * 0 and -ENOMEM is returned.
*
+ * On success, handle to the allocated object is returned,
+ * otherwise 0.
* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
*/
-void *zs_malloc(struct zs_pool *pool, size_t size)
+unsigned long zs_malloc(struct zs_pool *pool, size_t size)
{
- void *obj;
+ unsigned long obj;
struct link_free *link;
int class_idx;
struct size_class *class;
@@ -587,7 +882,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
unsigned long m_objidx, m_offset;
if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
- return NULL;
+ return 0;
class_idx = get_size_class_index(size);
class = &pool->size_class[class_idx];
@@ -600,14 +895,14 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
spin_unlock(&class->lock);
first_page = alloc_zspage(class, pool->flags);
if (unlikely(!first_page))
- return NULL;
+ return 0;
set_zspage_mapping(first_page, class->index, ZS_EMPTY);
spin_lock(&class->lock);
- class->pages_allocated += class->zspage_order;
+ class->pages_allocated += class->pages_per_zspage;
}
- obj = first_page->freelist;
+ obj = (unsigned long)first_page->freelist;
obj_handle_to_location(obj, &m_page, &m_objidx);
m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
@@ -626,7 +921,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size)
}
EXPORT_SYMBOL_GPL(zs_malloc);
-void zs_free(struct zs_pool *pool, void *obj)
+void zs_free(struct zs_pool *pool, unsigned long obj)
{
struct link_free *link;
struct page *first_page, *f_page;
@@ -653,13 +948,13 @@ void zs_free(struct zs_pool *pool, void *obj)
+ f_offset);
link->next = first_page->freelist;
kunmap_atomic(link);
- first_page->freelist = obj;
+ first_page->freelist = (void *)obj;
first_page->inuse--;
fullness = fix_fullness_group(pool, first_page);
if (fullness == ZS_EMPTY)
- class->pages_allocated -= class->zspage_order;
+ class->pages_allocated -= class->pages_per_zspage;
spin_unlock(&class->lock);
@@ -668,7 +963,22 @@ void zs_free(struct zs_pool *pool, void *obj)
}
EXPORT_SYMBOL_GPL(zs_free);
-void *zs_map_object(struct zs_pool *pool, void *handle)
+/**
+ * zs_map_object - get address of allocated object from handle.
+ * @pool: pool from which the object was allocated
+ * @handle: handle returned from zs_malloc
+ *
+ * Before using an object allocated from zs_malloc, it must be mapped using
+ * this function. When done with the object, it must be unmapped using
+ * zs_unmap_object.
+ *
+ * Only one object can be mapped per cpu at a time. There is no protection
+ * against nested mappings.
+ *
+ * This function returns with preemption and page faults disabled.
+ */
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ enum zs_mapmode mm)
{
struct page *page;
unsigned long obj_idx, off;
@@ -677,38 +987,40 @@ void *zs_map_object(struct zs_pool *pool, void *handle)
enum fullness_group fg;
struct size_class *class;
struct mapping_area *area;
+ struct page *pages[2];
BUG_ON(!handle);
+ /*
+ * Because we use per-cpu mapping areas shared among the
+ * pools/users, we can't allow mapping in interrupt context
+ * because it can corrupt another users mappings.
+ */
+ BUG_ON(in_interrupt());
+
obj_handle_to_location(handle, &page, &obj_idx);
get_zspage_mapping(get_first_page(page), &class_idx, &fg);
class = &pool->size_class[class_idx];
off = obj_idx_to_offset(page, obj_idx, class->size);
area = &get_cpu_var(zs_map_area);
+ area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
area->vm_addr = kmap_atomic(page);
- } else {
- /* this object spans two pages */
- struct page *nextp;
-
- nextp = get_next_page(page);
- BUG_ON(!nextp);
-
-
- set_pte(area->vm_ptes[0], mk_pte(page, PAGE_KERNEL));
- set_pte(area->vm_ptes[1], mk_pte(nextp, PAGE_KERNEL));
-
- /* We pre-allocated VM area so mapping can never fail */
- area->vm_addr = area->vm->addr;
+ return area->vm_addr + off;
}
- return area->vm_addr + off;
+ /* this object spans two pages */
+ pages[0] = page;
+ pages[1] = get_next_page(page);
+ BUG_ON(!pages[1]);
+
+ return __zs_map_object(area, pages, off, class->size);
}
EXPORT_SYMBOL_GPL(zs_map_object);
-void zs_unmap_object(struct zs_pool *pool, void *handle)
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
{
struct page *page;
unsigned long obj_idx, off;
@@ -726,13 +1038,16 @@ void zs_unmap_object(struct zs_pool *pool, void *handle)
off = obj_idx_to_offset(page, obj_idx, class->size);
area = &__get_cpu_var(zs_map_area);
- if (off + class->size <= PAGE_SIZE) {
+ if (off + class->size <= PAGE_SIZE)
kunmap_atomic(area->vm_addr);
- } else {
- set_pte(area->vm_ptes[0], __pte(0));
- set_pte(area->vm_ptes[1], __pte(0));
- __flush_tlb_one((unsigned long)area->vm_addr);
- __flush_tlb_one((unsigned long)area->vm_addr + PAGE_SIZE);
+ else {
+ struct page *pages[2];
+
+ pages[0] = page;
+ pages[1] = get_next_page(page);
+ BUG_ON(!pages[1]);
+
+ __zs_unmap_object(area, pages, off, class->size);
}
put_cpu_var(zs_map_area);
}
@@ -749,3 +1064,9 @@ u64 zs_get_total_size_bytes(struct zs_pool *pool)
return npages << PAGE_SHIFT;
}
EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
+
+module_init(zs_init);
+module_exit(zs_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Nitin Gupta ");
diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/drivers/staging/zsmalloc/zsmalloc.h
index 949384ee749..fbe6bec421a 100644
--- a/drivers/staging/zsmalloc/zsmalloc.h
+++ b/drivers/staging/zsmalloc/zsmalloc.h
@@ -15,16 +15,28 @@
#include
+/*
+ * zsmalloc mapping modes
+ *
+ * NOTE: These only make a difference when a mapped object spans pages
+ */
+enum zs_mapmode {
+ ZS_MM_RW, /* normal read-write mapping */
+ ZS_MM_RO, /* read-only (no copy-out at unmap time) */
+ ZS_MM_WO /* write-only (no copy-in at map time) */
+};
+
struct zs_pool;
-struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
+struct zs_pool *zs_create_pool(gfp_t flags);
void zs_destroy_pool(struct zs_pool *pool);
-void *zs_malloc(struct zs_pool *pool, size_t size);
-void zs_free(struct zs_pool *pool, void *obj);
+unsigned long zs_malloc(struct zs_pool *pool, size_t size);
+void zs_free(struct zs_pool *pool, unsigned long obj);
-void *zs_map_object(struct zs_pool *pool, void *handle);
-void zs_unmap_object(struct zs_pool *pool, void *handle);
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ enum zs_mapmode mm);
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
u64 zs_get_total_size_bytes(struct zs_pool *pool);
diff --git a/drivers/staging/zsmalloc/zsmalloc_int.h b/drivers/staging/zsmalloc/zsmalloc_int.h
deleted file mode 100644
index 92eefc663af..00000000000
--- a/drivers/staging/zsmalloc/zsmalloc_int.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * zsmalloc memory allocator
- *
- * Copyright (C) 2011 Nitin Gupta
- *
- * This code is released using a dual license strategy: BSD/GPL
- * You can choose the license that better fits your requirements.
- *
- * Released under the terms of 3-clause BSD License
- * Released under the terms of GNU General Public License Version 2.0
- */
-
-#ifndef _ZS_MALLOC_INT_H_
-#define _ZS_MALLOC_INT_H_
-
-#include
-#include
-#include
-
-/*
- * This must be power of 2 and greater than of equal to sizeof(link_free).
- * These two conditions ensure that any 'struct link_free' itself doesn't
- * span more than 1 page which avoids complex case of mapping 2 pages simply
- * to restore link_free pointer values.
- */
-#define ZS_ALIGN 8
-
-/*
- * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
- * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
- */
-#define ZS_MAX_ZSPAGE_ORDER 2
-#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
-
-/*
- * Object location (, ) is encoded as
- * as single (void *) handle value.
- *
- * Note that object index is relative to system
- * page it is stored in, so for each sub-page belonging
- * to a zspage, obj_idx starts with 0.
- *
- * This is made more complicated by various memory models and PAE.
- */
-
-#ifndef MAX_PHYSMEM_BITS
-#ifdef CONFIG_HIGHMEM64G
-#define MAX_PHYSMEM_BITS 36
-#else /* !CONFIG_HIGHMEM64G */
-/*
- * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
- * be PAGE_SHIFT
- */
-#define MAX_PHYSMEM_BITS BITS_PER_LONG
-#endif
-#endif
-#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
-#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
-#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
-
-#define MAX(a, b) ((a) >= (b) ? (a) : (b))
-/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
-#define ZS_MIN_ALLOC_SIZE \
- MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
-#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
-
-/*
- * On systems with 4K page size, this gives 254 size classes! There is a
- * trader-off here:
- * - Large number of size classes is potentially wasteful as free page are
- * spread across these classes
- * - Small number of size classes causes large internal fragmentation
- * - Probably its better to use specific size classes (empirically
- * determined). NOTE: all those class sizes must be set as multiple of
- * ZS_ALIGN to make sure link_free itself never has to span 2 pages.
- *
- * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
- * (reason above)
- */
-#define ZS_SIZE_CLASS_DELTA 16
-#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
- ZS_SIZE_CLASS_DELTA + 1)
-
-/*
- * We do not maintain any list for completely empty or full pages
- */
-enum fullness_group {
- ZS_ALMOST_FULL,
- ZS_ALMOST_EMPTY,
- _ZS_NR_FULLNESS_GROUPS,
-
- ZS_EMPTY,
- ZS_FULL
-};
-
-/*
- * We assign a page to ZS_ALMOST_EMPTY fullness group when:
- * n <= N / f, where
- * n = number of allocated objects
- * N = total number of objects zspage can store
- * f = 1/fullness_threshold_frac
- *
- * Similarly, we assign zspage to:
- * ZS_ALMOST_FULL when n > N / f
- * ZS_EMPTY when n == 0
- * ZS_FULL when n == N
- *
- * (see: fix_fullness_group())
- */
-static const int fullness_threshold_frac = 4;
-
-struct mapping_area {
- struct vm_struct *vm;
- pte_t *vm_ptes[2];
- char *vm_addr;
-};
-
-struct size_class {
- /*
- * Size of objects stored in this class. Must be multiple
- * of ZS_ALIGN.
- */
- int size;
- unsigned int index;
-
- /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
- int zspage_order;
-
- spinlock_t lock;
-
- /* stats */
- u64 pages_allocated;
-
- struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
-};
-
-/*
- * Placed within free objects to form a singly linked list.
- * For every zspage, first_page->freelist gives head of this list.
- *
- * This must be power of 2 and less than or equal to ZS_ALIGN
- */
-struct link_free {
- /* Handle of next free chunk (encodes ) */
- void *next;
-};
-
-struct zs_pool {
- struct size_class size_class[ZS_SIZE_CLASSES];
-
- gfp_t flags; /* allocation flags used when growing pool */
- const char *name;
-};
-
-#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fd1baead0d8..dd6302ae715 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1637,5 +1637,10 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
static inline bool page_is_guard(struct page *page) { return false; }
#endif /* CONFIG_DEBUG_PAGEALLOC */
+static inline void page_mapcount_reset(struct page *page)
+{
+ atomic_set(&(page)->_mapcount, -1);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */