From 66b91c76c04dc505275568c20759fe8efa9baf09 Mon Sep 17 00:00:00 2001 From: Tonoxis Date: Sun, 4 Apr 2021 16:21:55 -0400 Subject: [PATCH] Pulled in android-ready zram from davidmueller13/Vindicator-flo-aosp. --- drivers/staging/zram/Kconfig | 7 +- drivers/staging/zram/Makefile | 2 +- drivers/staging/zram/zram.txt | 25 +- drivers/staging/zram/zram_drv.c | 983 +++++++++++++---------- drivers/staging/zram/zram_drv.h | 82 +- drivers/staging/zram/zram_sysfs.c | 227 ------ drivers/staging/zsmalloc/Kconfig | 6 +- drivers/staging/zsmalloc/Makefile | 1 + drivers/staging/zsmalloc/zsmalloc-main.c | 525 +++++++++--- drivers/staging/zsmalloc/zsmalloc.h | 22 +- drivers/staging/zsmalloc/zsmalloc_int.h | 155 ---- include/linux/mm.h | 5 + 12 files changed, 1057 insertions(+), 983 deletions(-) delete mode 100644 drivers/staging/zram/zram_sysfs.c delete mode 100644 drivers/staging/zsmalloc/zsmalloc_int.h diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig index 9d11a4cb99b..983314c4134 100644 --- a/drivers/staging/zram/Kconfig +++ b/drivers/staging/zram/Kconfig @@ -1,9 +1,6 @@ config ZRAM tristate "Compressed RAM block device support" - # X86 dependency is because zsmalloc uses non-portable pte/tlb - # functions - depends on BLOCK && SYSFS && X86 - select ZSMALLOC + depends on BLOCK && SYSFS && ZSMALLOC select LZO_COMPRESS select LZO_DECOMPRESS default n @@ -17,7 +14,7 @@ config ZRAM disks and maybe many more. See zram.txt for more information. - Project home: http://compcache.googlecode.com/ + Project home: config ZRAM_DEBUG bool "Compressed RAM block device debug support" diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile index 7f4a3019e9c..cb0f9ced6a9 100644 --- a/drivers/staging/zram/Makefile +++ b/drivers/staging/zram/Makefile @@ -1,3 +1,3 @@ -zram-y := zram_drv.o zram_sysfs.o +zram-y := zram_drv.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt index 5f75d298756..765d790ae83 100644 --- a/drivers/staging/zram/zram.txt +++ b/drivers/staging/zram/zram.txt @@ -23,17 +23,17 @@ Following shows a typical sequence of steps for using zram. This creates 4 devices: /dev/zram{0,1,2,3} (num_devices parameter is optional. Default: 1) -2) Set Disksize (Optional): - Set disk size by writing the value to sysfs node 'disksize' - (in bytes). If disksize is not given, default value of 25% - of RAM is used. +2) Set Disksize + Set disk size by writing the value to sysfs node 'disksize'. + The value can be either in bytes or you can use mem suffixes. + Examples: + # Initialize /dev/zram0 with 50MB disksize + echo $((50*1024*1024)) > /sys/block/zram0/disksize - # Initialize /dev/zram0 with 50MB disksize - echo $((50*1024*1024)) > /sys/block/zram0/disksize - - NOTE: disksize cannot be changed if the disk contains any - data. So, for such a disk, you need to issue 'reset' (see below) - before you can change its disksize. + # Using mem suffixes + echo 256K > /sys/block/zram0/disksize + echo 512M > /sys/block/zram0/disksize + echo 1G > /sys/block/zram0/disksize 3) Activate: mkswap /dev/zram0 @@ -65,8 +65,9 @@ Following shows a typical sequence of steps for using zram. echo 1 > /sys/block/zram0/reset echo 1 > /sys/block/zram1/reset - (This frees all the memory allocated for the given device). - + This frees all the memory allocated for the given device and + resets the disksize to zero. You must set the disksize again + before reusing the device. Please report any problems at: - Mailing list: linux-mm-cc at laptop dot org diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 685d612a627..8853d524354 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -32,61 +32,229 @@ #include #include #include +#include #include "zram_drv.h" /* Globals */ static int zram_major; -struct zram *zram_devices; +static struct zram *zram_devices; + +/* + * We don't need to see memory allocation errors more than once every 1 + * second to know that a problem is occurring. + */ +#define ALLOC_ERROR_LOG_RATE_MS 1000 /* Module params (documentation at end) */ -static unsigned int num_devices; +static unsigned int num_devices = 1; -static void zram_stat_inc(u32 *v) +static inline struct zram *dev_to_zram(struct device *dev) { - *v = *v + 1; + return (struct zram *)dev_to_disk(dev)->private_data; } -static void zram_stat_dec(u32 *v) +static ssize_t disksize_show(struct device *dev, + struct device_attribute *attr, char *buf) { - *v = *v - 1; + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", zram->disksize); } -static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc) +static ssize_t initstate_show(struct device *dev, + struct device_attribute *attr, char *buf) { - spin_lock(&zram->stat64_lock); - *v = *v + inc; - spin_unlock(&zram->stat64_lock); + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->init_done); } -static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec) +static ssize_t num_reads_show(struct device *dev, + struct device_attribute *attr, char *buf) { - spin_lock(&zram->stat64_lock); - *v = *v - dec; - spin_unlock(&zram->stat64_lock); + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_reads)); } -static void zram_stat64_inc(struct zram *zram, u64 *v) +static ssize_t num_writes_show(struct device *dev, + struct device_attribute *attr, char *buf) { - zram_stat64_add(zram, v, 1); + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_writes)); } -static int zram_test_flag(struct zram *zram, u32 index, +static ssize_t invalid_io_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.invalid_io)); +} + +static ssize_t notify_free_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.notify_free)); +} + +static ssize_t zero_pages_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->stats.pages_zero); +} + +static ssize_t orig_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)(zram->stats.pages_stored) << PAGE_SHIFT); +} + +static ssize_t compr_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.compr_size)); +} + +static ssize_t mem_used_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + down_read(&zram->init_lock); + if (zram->init_done) + val = zs_get_total_size_bytes(meta->mem_pool); + up_read(&zram->init_lock); + + return sprintf(buf, "%llu\n", val); +} + +static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - return zram->table[index].flags & BIT(flag); + return meta->table[index].flags & BIT(flag); } -static void zram_set_flag(struct zram *zram, u32 index, +static void zram_set_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - zram->table[index].flags |= BIT(flag); + meta->table[index].flags |= BIT(flag); } -static void zram_clear_flag(struct zram *zram, u32 index, +static void zram_clear_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - zram->table[index].flags &= ~BIT(flag); + meta->table[index].flags &= ~BIT(flag); +} + +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + +/* + * Check if request is within bounds and aligned on zram logical blocks. + */ +static inline int valid_io_request(struct zram *zram, struct bio *bio) +{ + u64 start, end, bound; + + /* unaligned request */ + if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) + return 0; + if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) + return 0; + + start = bio->bi_sector; + end = start + (bio->bi_size >> SECTOR_SHIFT); + bound = zram->disksize >> SECTOR_SHIFT; + /* out of range range */ + if (unlikely(start >= bound || end > bound || start > end)) + return 0; + + /* I/O request is valid */ + return 1; +} + +static void zram_meta_free(struct zram_meta *meta) +{ + zs_destroy_pool(meta->mem_pool); + kfree(meta->compress_workmem); + free_pages((unsigned long)meta->compress_buffer, 1); + vfree(meta->table); + kfree(meta); +} + +static struct zram_meta *zram_meta_alloc(u64 disksize) +{ + size_t num_pages; + struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto out; + + meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + if (!meta->compress_workmem) + goto free_meta; + + meta->compress_buffer = + (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!meta->compress_buffer) { + pr_err("Error allocating compressor buffer space\n"); + goto free_workmem; + } + + num_pages = disksize >> PAGE_SHIFT; + meta->table = vzalloc(num_pages * sizeof(*meta->table)); + if (!meta->table) { + pr_err("Error allocating zram address table\n"); + goto free_buffer; + } + + meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM | + __GFP_NOWARN); + if (!meta->mem_pool) { + pr_err("Error creating memory pool\n"); + goto free_table; + } + + return meta; + +free_table: + vfree(meta->table); +free_buffer: + free_pages((unsigned long)meta->compress_buffer, 1); +free_workmem: + kfree(meta->compress_workmem); +free_meta: + kfree(meta); + meta = NULL; +out: + return meta; +} + +static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +{ + if (*offset + bvec->bv_len >= PAGE_SIZE) + (*index)++; + *offset = (*offset + bvec->bv_len) % PAGE_SIZE; } static int page_zero_filled(void *ptr) @@ -104,352 +272,272 @@ static int page_zero_filled(void *ptr) return 1; } -static void zram_set_disksize(struct zram *zram, size_t totalram_bytes) -{ - if (!zram->disksize) { - pr_info( - "disk size not provided. You can use disksize_kb module " - "param to specify size.\nUsing default: (%u%% of RAM).\n", - default_disksize_perc_ram - ); - zram->disksize = default_disksize_perc_ram * - (totalram_bytes / 100); - } - - if (zram->disksize > 2 * (totalram_bytes)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %zu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - totalram_bytes >> 10, zram->disksize - ); - } - - zram->disksize &= PAGE_MASK; -} - -static void zram_free_page(struct zram *zram, size_t index) -{ - void *handle = zram->table[index].handle; - - if (unlikely(!handle)) { - /* - * No memory is allocated for zero filled pages. - * Simply clear zero page flag. - */ - if (zram_test_flag(zram, index, ZRAM_ZERO)) { - zram_clear_flag(zram, index, ZRAM_ZERO); - zram_stat_dec(&zram->stats.pages_zero); - } - return; - } - - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { - __free_page(handle); - zram_clear_flag(zram, index, ZRAM_UNCOMPRESSED); - zram_stat_dec(&zram->stats.pages_expand); - goto out; - } - - zs_free(zram->mem_pool, handle); - - if (zram->table[index].size <= PAGE_SIZE / 2) - zram_stat_dec(&zram->stats.good_compress); - -out: - zram_stat64_sub(zram, &zram->stats.compr_size, - zram->table[index].size); - zram_stat_dec(&zram->stats.pages_stored); - - zram->table[index].handle = NULL; - zram->table[index].size = 0; -} - static void handle_zero_page(struct bio_vec *bvec) { struct page *page = bvec->bv_page; void *user_mem; user_mem = kmap_atomic(page); - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + if (is_partial_io(bvec)) + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + else + clear_page(user_mem); kunmap_atomic(user_mem); flush_dcache_page(page); } -static void handle_uncompressed_page(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset) +static void zram_free_page(struct zram *zram, size_t index) { - struct page *page = bvec->bv_page; - unsigned char *user_mem, *cmem; + struct zram_meta *meta = zram->meta; + unsigned long handle = meta->table[index].handle; + u16 size = meta->table[index].size; - user_mem = kmap_atomic(page); - cmem = kmap_atomic(zram->table[index].handle); + if (unlikely(!handle)) { + /* + * No memory is allocated for zero filled pages. + * Simply clear zero page flag. + */ + if (zram_test_flag(meta, index, ZRAM_ZERO)) { + zram_clear_flag(meta, index, ZRAM_ZERO); + zram->stats.pages_zero--; + } + return; + } - memcpy(user_mem + bvec->bv_offset, cmem + offset, bvec->bv_len); - kunmap_atomic(cmem); - kunmap_atomic(user_mem); + if (unlikely(size > max_zpage_size)) + zram->stats.bad_compress--; - flush_dcache_page(page); + zs_free(meta->mem_pool, handle); + + if (size <= PAGE_SIZE / 2) + zram->stats.good_compress--; + + atomic64_sub(meta->table[index].size, &zram->stats.compr_size); + zram->stats.pages_stored--; + + meta->table[index].handle = 0; + meta->table[index].size = 0; } -static inline int is_partial_io(struct bio_vec *bvec) +static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { - return bvec->bv_len != PAGE_SIZE; + int ret = LZO_E_OK; + size_t clen = PAGE_SIZE; + unsigned char *cmem; + struct zram_meta *meta = zram->meta; + unsigned long handle = meta->table[index].handle; + + if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { + clear_page(mem); + return 0; + } + + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); + if (meta->table[index].size == PAGE_SIZE) + copy_page(mem, cmem); + else + ret = lzo1x_decompress_safe(cmem, meta->table[index].size, + mem, &clen); + zs_unmap_object(meta->mem_pool, handle); + + /* Should NEVER happen. Return bio error if it does. */ + if (unlikely(ret != LZO_E_OK)) { + pr_err("Decompression failed! err=%d, page=%u\n", ret, index); + atomic64_inc(&zram->stats.failed_reads); + return ret; + } + + return 0; } static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio) { int ret; - size_t clen; struct page *page; - struct zobj_header *zheader; - unsigned char *user_mem, *cmem, *uncmem = NULL; - + unsigned char *user_mem, *uncmem = NULL; + struct zram_meta *meta = zram->meta; page = bvec->bv_page; - if (zram_test_flag(zram, index, ZRAM_ZERO)) { + if (unlikely(!meta->table[index].handle) || + zram_test_flag(meta, index, ZRAM_ZERO)) { handle_zero_page(bvec); return 0; } - /* Requested page is not present in compressed area */ - if (unlikely(!zram->table[index].handle)) { - pr_debug("Read before write: sector=%lu, size=%u", - (ulong)(bio->bi_sector), bio->bi_size); - handle_zero_page(bvec); - return 0; - } - - /* Page is stored uncompressed since it's incompressible */ - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { - handle_uncompressed_page(zram, bvec, index, offset); - return 0; - } - - if (is_partial_io(bvec)) { + if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ - uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!uncmem) { - pr_info("Error allocating temp memory!\n"); - return -ENOMEM; - } - } + uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); user_mem = kmap_atomic(page); if (!is_partial_io(bvec)) uncmem = user_mem; - clen = PAGE_SIZE; - cmem = zs_map_object(zram->mem_pool, zram->table[index].handle); - - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), - zram->table[index].size, - uncmem, &clen); - - if (is_partial_io(bvec)) { - memcpy(user_mem + bvec->bv_offset, uncmem + offset, - bvec->bv_len); - kfree(uncmem); + if (!uncmem) { + pr_info("Unable to allocate temp memory\n"); + ret = -ENOMEM; + goto out_cleanup; } - zs_unmap_object(zram->mem_pool, zram->table[index].handle); - kunmap_atomic(user_mem); - + ret = zram_decompress_page(zram, uncmem, index); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { - pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - zram_stat64_inc(zram, &zram->stats.failed_reads); - return ret; - } + if (unlikely(ret != LZO_E_OK)) + goto out_cleanup; + + if (is_partial_io(bvec)) + memcpy(user_mem + bvec->bv_offset, uncmem + offset, + bvec->bv_len); flush_dcache_page(page); - - return 0; -} - -static int zram_read_before_write(struct zram *zram, char *mem, u32 index) -{ - int ret; - size_t clen = PAGE_SIZE; - struct zobj_header *zheader; - unsigned char *cmem; - - if (zram_test_flag(zram, index, ZRAM_ZERO) || - !zram->table[index].handle) { - memset(mem, 0, PAGE_SIZE); - return 0; - } - - cmem = zs_map_object(zram->mem_pool, zram->table[index].handle); - - /* Page is stored uncompressed since it's incompressible */ - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { - memcpy(mem, cmem, PAGE_SIZE); - kunmap_atomic(cmem); - return 0; - } - - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), - zram->table[index].size, - mem, &clen); - zs_unmap_object(zram->mem_pool, zram->table[index].handle); - - /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { - pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - zram_stat64_inc(zram, &zram->stats.failed_reads); - return ret; - } - - return 0; + ret = 0; +out_cleanup: + kunmap_atomic(user_mem); + if (is_partial_io(bvec)) + kfree(uncmem); + return ret; } static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset) { - int ret; - u32 store_offset; + int ret = 0; size_t clen; - void *handle; - struct zobj_header *zheader; - struct page *page, *page_store; + unsigned long handle; + struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; + struct zram_meta *meta = zram->meta; + static unsigned long zram_rs_time; page = bvec->bv_page; - src = zram->compress_buffer; + src = meta->compress_buffer; if (is_partial_io(bvec)) { /* * This is a partial IO. We need to read the full page * before to write the changes. */ - uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL); + uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); if (!uncmem) { - pr_info("Error allocating temp memory!\n"); ret = -ENOMEM; goto out; } - ret = zram_read_before_write(zram, uncmem, index); - if (ret) { - kfree(uncmem); + ret = zram_decompress_page(zram, uncmem, index); + if (ret) goto out; - } } - /* - * System overwrites unused sectors. Free memory associated - * with this sector now. - */ - if (zram->table[index].handle || - zram_test_flag(zram, index, ZRAM_ZERO)) - zram_free_page(zram, index); - user_mem = kmap_atomic(page); - if (is_partial_io(bvec)) + if (is_partial_io(bvec)) { memcpy(uncmem + offset, user_mem + bvec->bv_offset, bvec->bv_len); - else + kunmap_atomic(user_mem); + user_mem = NULL; + } else { uncmem = user_mem; + } if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); - if (is_partial_io(bvec)) - kfree(uncmem); - zram_stat_inc(&zram->stats.pages_zero); - zram_set_flag(zram, index, ZRAM_ZERO); + /* Free memory associated with this sector now. */ + zram_free_page(zram, index); + + zram->stats.pages_zero++; + zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; goto out; } - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, - zram->compress_workmem); + /* + * zram_slot_free_notify could miss free so that let's + * double check. + */ + if (unlikely(meta->table[index].handle || + zram_test_flag(meta, index, ZRAM_ZERO))) + zram_free_page(zram, index); - kunmap_atomic(user_mem); - if (is_partial_io(bvec)) - kfree(uncmem); + ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, + meta->compress_workmem); + + if (!is_partial_io(bvec)) { + kunmap_atomic(user_mem); + user_mem = NULL; + uncmem = NULL; + } if (unlikely(ret != LZO_E_OK)) { pr_err("Compression failed! err=%d\n", ret); goto out; } - /* - * Page is incompressible. Store it as-is (uncompressed) - * since we do not want to return too many disk write - * errors which has side effect of hanging the system. - */ if (unlikely(clen > max_zpage_size)) { + zram->stats.bad_compress++; clen = PAGE_SIZE; - page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); - if (unlikely(!page_store)) { - pr_info("Error allocating memory for " - "incompressible page: %u\n", index); - ret = -ENOMEM; - goto out; - } - - store_offset = 0; - zram_set_flag(zram, index, ZRAM_UNCOMPRESSED); - zram_stat_inc(&zram->stats.pages_expand); - handle = page_store; - src = kmap_atomic(page); - cmem = kmap_atomic(page_store); - goto memstore; + src = NULL; + if (is_partial_io(bvec)) + src = uncmem; } - handle = zs_malloc(zram->mem_pool, clen + sizeof(*zheader)); + handle = zs_malloc(meta->mem_pool, clen); if (!handle) { - pr_info("Error allocating memory for compressed " - "page: %u, size=%zu\n", index, clen); + if (printk_timed_ratelimit(&zram_rs_time, + ALLOC_ERROR_LOG_RATE_MS)) + pr_info("Error allocating memory for compressed page: %u, size=%zu\n", + index, clen); ret = -ENOMEM; goto out; } - cmem = zs_map_object(zram->mem_pool, handle); + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); -memstore: -#if 0 - /* Back-reference needed for memory defragmentation */ - if (!zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)) { - zheader = (struct zobj_header *)cmem; - zheader->table_idx = index; - cmem += sizeof(*zheader); - } -#endif - - memcpy(cmem, src, clen); - - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { - kunmap_atomic(cmem); + if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { + src = kmap_atomic(page); + copy_page(cmem, src); kunmap_atomic(src); } else { - zs_unmap_object(zram->mem_pool, handle); + memcpy(cmem, src, clen); } - zram->table[index].handle = handle; - zram->table[index].size = clen; + zs_unmap_object(meta->mem_pool, handle); + + /* + * Free memory associated with this sector + * before overwriting unused sectors. + */ + zram_free_page(zram, index); + + meta->table[index].handle = handle; + meta->table[index].size = clen; /* Update stats */ - zram_stat64_add(zram, &zram->stats.compr_size, clen); - zram_stat_inc(&zram->stats.pages_stored); + atomic64_add(clen, &zram->stats.compr_size); + zram->stats.pages_stored++; if (clen <= PAGE_SIZE / 2) - zram_stat_inc(&zram->stats.good_compress); - - return 0; + zram->stats.good_compress++; out: + if (is_partial_io(bvec)) + kfree(uncmem); + if (ret) - zram_stat64_inc(zram, &zram->stats.failed_writes); + atomic64_inc(&zram->stats.failed_writes); return ret; } +static void handle_pending_slot_free(struct zram *zram) +{ + struct zram_slot_free *free_rq; + + spin_lock(&zram->slot_free_lock); + while (zram->slot_free_rq) { + free_rq = zram->slot_free_rq; + zram->slot_free_rq = free_rq->next; + zram_free_page(zram, free_rq->index); + kfree(free_rq); + } + spin_unlock(&zram->slot_free_lock); +} + static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio, int rw) { @@ -457,10 +545,12 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (rw == READ) { down_read(&zram->lock); + handle_pending_slot_free(zram); ret = zram_bvec_read(zram, bvec, index, offset, bio); up_read(&zram->lock); } else { down_write(&zram->lock); + handle_pending_slot_free(zram); ret = zram_bvec_write(zram, bvec, index, offset); up_write(&zram->lock); } @@ -468,11 +558,124 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +static void zram_reset_device(struct zram *zram, bool reset_capacity) { - if (*offset + bvec->bv_len >= PAGE_SIZE) - (*index)++; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; + size_t index; + struct zram_meta *meta; + + flush_work(&zram->free_work); + + down_write(&zram->init_lock); + if (!zram->init_done) { + up_write(&zram->init_lock); + return; + } + + meta = zram->meta; + zram->init_done = 0; + + /* Free all pages that are still in this zram device */ + for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { + unsigned long handle = meta->table[index].handle; + if (!handle) + continue; + + zs_free(meta->mem_pool, handle); + } + + zram_meta_free(zram->meta); + zram->meta = NULL; + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + + zram->disksize = 0; + if (reset_capacity) + set_capacity(zram->disk, 0); + up_write(&zram->init_lock); +} + +static void zram_init_device(struct zram *zram, struct zram_meta *meta) +{ + if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { + pr_info( + "There is little point creating a zram of greater than " + "twice the size of memory since we expect a 2:1 compression " + "ratio. Note that zram uses about 0.1%% of the size of " + "the disk when not in use so a huge zram is " + "wasteful.\n" + "\tMemory Size: %lu kB\n" + "\tSize you selected: %llu kB\n" + "Continuing anyway ...\n", + (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 + ); + } + + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); + + zram->meta = meta; + zram->init_done = 1; + + pr_debug("Initialization done!\n"); +} + +static ssize_t disksize_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 disksize; + struct zram_meta *meta; + struct zram *zram = dev_to_zram(dev); + + disksize = memparse(buf, NULL); + if (!disksize) + return -EINVAL; + + disksize = PAGE_ALIGN(disksize); + meta = zram_meta_alloc(disksize); + down_write(&zram->init_lock); + if (zram->init_done) { + up_write(&zram->init_lock); + zram_meta_free(meta); + pr_info("Cannot change disksize for initialized device\n"); + return -EBUSY; + } + + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + zram_init_device(zram, meta); + up_write(&zram->init_lock); + + return len; +} + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned short do_reset; + struct zram *zram; + struct block_device *bdev; + + zram = dev_to_zram(dev); + bdev = bdget_disk(zram->disk, 0); + + /* Do not reset an active device! */ + if (bdev->bd_holders) + return -EBUSY; + + ret = kstrtou16(buf, 10, &do_reset); + if (ret) + return ret; + + if (!do_reset) + return -EINVAL; + + /* Make sure all pending I/O is finished */ + if (bdev) + fsync_bdev(bdev); + + zram_reset_device(zram, true); + return len; } static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) @@ -483,10 +686,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) switch (rw) { case READ: - zram_stat64_inc(zram, &zram->stats.num_reads); + atomic64_inc(&zram->stats.num_reads); break; case WRITE: - zram_stat64_inc(zram, &zram->stats.num_writes); + atomic64_inc(&zram->stats.num_writes); break; } @@ -530,23 +733,6 @@ out: bio_io_error(bio); } -/* - * Check if request is within bounds and aligned on zram logical blocks. - */ -static inline int valid_io_request(struct zram *zram, struct bio *bio) -{ - if (unlikely( - (bio->bi_sector >= (zram->disksize >> SECTOR_SHIFT)) || - (bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)) || - (bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))) { - - return 0; - } - - /* I/O request is valid */ - return 1; -} - /* * Handler function for all zram I/O requests. */ @@ -554,16 +740,13 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; - if (unlikely(!zram->init_done) && zram_init_device(zram)) - goto error; - down_read(&zram->init_lock); if (unlikely(!zram->init_done)) - goto error_unlock; + goto error; if (!valid_io_request(zram, bio)) { - zram_stat64_inc(zram, &zram->stats.invalid_io); - goto error_unlock; + atomic64_inc(&zram->stats.invalid_io); + goto error; } __zram_make_request(zram, bio, bio_data_dir(bio)); @@ -571,129 +754,45 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) return; -error_unlock: - up_read(&zram->init_lock); error: + up_read(&zram->init_lock); bio_io_error(bio); } -void __zram_reset_device(struct zram *zram) +static void zram_slot_free(struct work_struct *work) { - size_t index; + struct zram *zram; - zram->init_done = 0; - - /* Free various per-device buffers */ - kfree(zram->compress_workmem); - free_pages((unsigned long)zram->compress_buffer, 1); - - zram->compress_workmem = NULL; - zram->compress_buffer = NULL; - - /* Free all pages that are still in this zram device */ - for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { - void *handle = zram->table[index].handle; - if (!handle) - continue; - - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) - __free_page(handle); - else - zs_free(zram->mem_pool, handle); - } - - vfree(zram->table); - zram->table = NULL; - - zs_destroy_pool(zram->mem_pool); - zram->mem_pool = NULL; - - /* Reset stats */ - memset(&zram->stats, 0, sizeof(zram->stats)); - - zram->disksize = 0; + zram = container_of(work, struct zram, free_work); + down_write(&zram->lock); + handle_pending_slot_free(zram); + up_write(&zram->lock); } -void zram_reset_device(struct zram *zram) +static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) { - down_write(&zram->init_lock); - __zram_reset_device(zram); - up_write(&zram->init_lock); -} - -int zram_init_device(struct zram *zram) -{ - int ret; - size_t num_pages; - - down_write(&zram->init_lock); - - if (zram->init_done) { - up_write(&zram->init_lock); - return 0; - } - - zram_set_disksize(zram, totalram_pages << PAGE_SHIFT); - - zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!zram->compress_workmem) { - pr_err("Error allocating compressor working memory!\n"); - ret = -ENOMEM; - goto fail_no_table; - } - - zram->compress_buffer = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); - if (!zram->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - ret = -ENOMEM; - goto fail_no_table; - } - - num_pages = zram->disksize >> PAGE_SHIFT; - zram->table = vzalloc(num_pages * sizeof(*zram->table)); - if (!zram->table) { - pr_err("Error allocating zram address table\n"); - ret = -ENOMEM; - goto fail_no_table; - } - - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - - zram->mem_pool = zs_create_pool("zram", GFP_NOIO | __GFP_HIGHMEM); - if (!zram->mem_pool) { - pr_err("Error creating memory pool\n"); - ret = -ENOMEM; - goto fail; - } - - zram->init_done = 1; - up_write(&zram->init_lock); - - pr_debug("Initialization done!\n"); - return 0; - -fail_no_table: - /* To prevent accessing table entries during cleanup */ - zram->disksize = 0; -fail: - __zram_reset_device(zram); - up_write(&zram->init_lock); - pr_err("Initialization failed: err=%d\n", ret); - return ret; + spin_lock(&zram->slot_free_lock); + free_rq->next = zram->slot_free_rq; + zram->slot_free_rq = free_rq; + spin_unlock(&zram->slot_free_lock); } static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { struct zram *zram; + struct zram_slot_free *free_rq; zram = bdev->bd_disk->private_data; - zram_free_page(zram, index); - zram_stat64_inc(zram, &zram->stats.notify_free); + atomic64_inc(&zram->stats.notify_free); + + free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); + if (!free_rq) + return; + + free_rq->index = index; + add_slot_free(zram, free_rq); + schedule_work(&zram->free_work); } static const struct block_device_operations zram_devops = { @@ -701,19 +800,53 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, + disksize_show, disksize_store); +static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); +static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); +static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); +static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); +static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); +static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); +static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); +static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); +static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); +static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); + +static struct attribute *zram_disk_attrs[] = { + &dev_attr_disksize.attr, + &dev_attr_initstate.attr, + &dev_attr_reset.attr, + &dev_attr_num_reads.attr, + &dev_attr_num_writes.attr, + &dev_attr_invalid_io.attr, + &dev_attr_notify_free.attr, + &dev_attr_zero_pages.attr, + &dev_attr_orig_data_size.attr, + &dev_attr_compr_data_size.attr, + &dev_attr_mem_used_total.attr, + NULL, +}; + +static struct attribute_group zram_disk_attr_group = { + .attrs = zram_disk_attrs, +}; + static int create_device(struct zram *zram, int device_id) { - int ret = 0; + int ret = -ENOMEM; init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); - spin_lock_init(&zram->stat64_lock); + + INIT_WORK(&zram->free_work, zram_slot_free); + spin_lock_init(&zram->slot_free_lock); + zram->slot_free_rq = NULL; zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", device_id); - ret = -ENOMEM; goto out; } @@ -723,11 +856,9 @@ static int create_device(struct zram *zram, int device_id) /* gendisk structure */ zram->disk = alloc_disk(1); if (!zram->disk) { - blk_cleanup_queue(zram->queue); - pr_warning("Error allocating disk structure for device %d\n", + pr_warn("Error allocating disk structure for device %d\n", device_id); - ret = -ENOMEM; - goto out; + goto out_free_queue; } zram->disk->major = zram_major; @@ -755,12 +886,18 @@ static int create_device(struct zram *zram, int device_id) ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, &zram_disk_attr_group); if (ret < 0) { - pr_warning("Error creating sysfs group"); - goto out; + pr_warn("Error creating sysfs group"); + goto out_free_disk; } zram->init_done = 0; + return 0; +out_free_disk: + del_gendisk(zram->disk); + put_disk(zram->disk); +out_free_queue: + blk_cleanup_queue(zram->queue); out: return ret; } @@ -779,17 +916,12 @@ static void destroy_device(struct zram *zram) blk_cleanup_queue(zram->queue); } -unsigned int zram_get_num_devices(void) -{ - return num_devices; -} - static int __init zram_init(void) { int ret, dev_id; if (num_devices > max_num_devices) { - pr_warning("Invalid value for num_devices: %u\n", + pr_warn("Invalid value for num_devices: %u\n", num_devices); ret = -EINVAL; goto out; @@ -797,18 +929,12 @@ static int __init zram_init(void) zram_major = register_blkdev(0, "zram"); if (zram_major <= 0) { - pr_warning("Unable to get major number\n"); + pr_warn("Unable to get major number\n"); ret = -EBUSY; goto out; } - if (!num_devices) { - pr_info("num_devices not specified. Using default: 1\n"); - num_devices = 1; - } - /* Allocate the device array and initialize each one */ - pr_info("Creating %u devices ...\n", num_devices); zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL); if (!zram_devices) { ret = -ENOMEM; @@ -821,6 +947,8 @@ static int __init zram_init(void) goto free_devices; } + pr_info("Created %u device(s) ...\n", num_devices); + return 0; free_devices: @@ -841,9 +969,13 @@ static void __exit zram_exit(void) for (i = 0; i < num_devices; i++) { zram = &zram_devices[i]; + get_disk(zram->disk); destroy_device(zram); - if (zram->init_done) - zram_reset_device(zram); + /* + * Shouldn't access zram->disk after destroy_device + * because destroy_device already released zram->disk. + */ + zram_reset_device(zram, false); } unregister_blkdev(zram_major, "zram"); @@ -852,12 +984,13 @@ static void __exit zram_exit(void) pr_debug("Cleanup done!\n"); } -module_param(num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); - module_init(zram_init); module_exit(zram_exit); +module_param(num_devices, uint, 0); +MODULE_PARM_DESC(num_devices, "Number of zram devices"); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); MODULE_DESCRIPTION("Compressed RAM Block Device"); +MODULE_ALIAS("devname:zram"); diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index fbe8ac98704..508a19f444f 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -26,33 +26,18 @@ */ static const unsigned max_num_devices = 32; -/* - * Stored at beginning of each compressed object. - * - * It stores back-reference to table entry which points to this - * object. This is required to support memory defragmentation. - */ -struct zobj_header { -#if 0 - u32 table_idx; -#endif -}; - /*-- Configurable parameters */ -/* Default zram disk size: 25% of total RAM */ -static const unsigned default_disksize_perc_ram = 25; - /* * Pages that compress to size greater than this are stored * uncompressed in memory. */ -static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; +static const size_t max_zpage_size = PAGE_SIZE / 10 * 9; /* * NOTE: max_zpage_size must be less than or equal to: - * ZS_MAX_ALLOC_SIZE - sizeof(struct zobj_header) - * otherwise, xv_malloc() would always return failure. + * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would + * always return failure. */ /*-- End of configurable params */ @@ -68,9 +53,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { - /* Page is stored uncompressed */ - ZRAM_UNCOMPRESSED, - /* Page consists entirely of zeros */ ZRAM_ZERO, @@ -81,34 +63,51 @@ enum zram_pageflags { /* Allocated for each disk page */ struct table { - void *handle; + unsigned long handle; u16 size; /* object size (excluding header) */ u8 count; /* object ref count (not yet used) */ u8 flags; -} __attribute__((aligned(4))); +} __aligned(4); +/* + * All 64bit fields should only be manipulated by 64bit atomic accessors. + * All modifications to 32bit counter should be protected by zram->lock. + */ struct zram_stats { - u64 compr_size; /* compressed size of pages stored */ - u64 num_reads; /* failed + successful */ - u64 num_writes; /* --do-- */ - u64 failed_reads; /* should NEVER! happen */ - u64 failed_writes; /* can happen when memory is too low */ - u64 invalid_io; /* non-page-aligned I/O requests */ - u64 notify_free; /* no. of swap slot free notifications */ + atomic64_t compr_size; /* compressed size of pages stored */ + atomic64_t num_reads; /* failed + successful */ + atomic64_t num_writes; /* --do-- */ + atomic64_t failed_reads; /* should NEVER! happen */ + atomic64_t failed_writes; /* can happen when memory is too low */ + atomic64_t invalid_io; /* non-page-aligned I/O requests */ + atomic64_t notify_free; /* no. of swap slot free notifications */ u32 pages_zero; /* no. of zero filled pages */ u32 pages_stored; /* no. of pages currently stored */ u32 good_compress; /* % of pages with compression ratio<=50% */ - u32 pages_expand; /* % of incompressible pages */ + u32 bad_compress; /* % of pages with compression ratio>=75% */ }; -struct zram { - struct zs_pool *mem_pool; +struct zram_meta { void *compress_workmem; void *compress_buffer; struct table *table; - spinlock_t stat64_lock; /* protect 64-bit stats */ - struct rw_semaphore lock; /* protect compression buffers and table - * against concurrent read and writes */ + struct zs_pool *mem_pool; +}; + +struct zram_slot_free { + unsigned long index; + struct zram_slot_free *next; +}; + +struct zram { + struct zram_meta *meta; + struct rw_semaphore lock; /* protect compression buffers, table, + * 32bit stat counters against concurrent + * notifications, reads and writes */ + + struct work_struct free_work; /* handle pending free request */ + struct zram_slot_free *slot_free_rq; /* list head of free request */ + struct request_queue *queue; struct gendisk *disk; int init_done; @@ -119,17 +118,8 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ + spinlock_t slot_free_lock; struct zram_stats stats; }; - -extern struct zram *zram_devices; -unsigned int zram_get_num_devices(void); -#ifdef CONFIG_SYSFS -extern struct attribute_group zram_disk_attr_group; -#endif - -extern int zram_init_device(struct zram *zram); -extern void __zram_reset_device(struct zram *zram); - #endif diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c deleted file mode 100644 index a7f37717552..00000000000 --- a/drivers/staging/zram/zram_sysfs.c +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com/ - */ - -#include -#include -#include - -#include "zram_drv.h" - -static u64 zram_stat64_read(struct zram *zram, u64 *v) -{ - u64 val; - - spin_lock(&zram->stat64_lock); - val = *v; - spin_unlock(&zram->stat64_lock); - - return val; -} - -static struct zram *dev_to_zram(struct device *dev) -{ - int i; - struct zram *zram = NULL; - - for (i = 0; i < zram_get_num_devices(); i++) { - zram = &zram_devices[i]; - if (disk_to_dev(zram->disk) == dev) - break; - } - - return zram; -} - -static ssize_t disksize_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", zram->disksize); -} - -static ssize_t disksize_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int ret; - u64 disksize; - struct zram *zram = dev_to_zram(dev); - - ret = kstrtoull(buf, 10, &disksize); - if (ret) - return ret; - - down_write(&zram->init_lock); - if (zram->init_done) { - up_write(&zram->init_lock); - pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; - } - - zram->disksize = PAGE_ALIGN(disksize); - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - up_write(&zram->init_lock); - - return len; -} - -static ssize_t initstate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->init_done); -} - -static ssize_t reset_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int ret; - unsigned short do_reset; - struct zram *zram; - struct block_device *bdev; - - zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - - /* Do not reset an active device! */ - if (bdev->bd_holders) - return -EBUSY; - - ret = kstrtou16(buf, 10, &do_reset); - if (ret) - return ret; - - if (!do_reset) - return -EINVAL; - - /* Make sure all pending I/O is finished */ - if (bdev) - fsync_bdev(bdev); - - down_write(&zram->init_lock); - if (zram->init_done) - __zram_reset_device(zram); - up_write(&zram->init_lock); - - return len; -} - -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_reads)); -} - -static ssize_t num_writes_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_writes)); -} - -static ssize_t invalid_io_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.notify_free)); -} - -static ssize_t zero_pages_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->stats.pages_zero); -} - -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); -} - -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.compr_size)); -} - -static ssize_t mem_used_total_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - - if (zram->init_done) { - val = zs_get_total_size_bytes(zram->mem_pool) + - ((u64)(zram->stats.pages_expand) << PAGE_SHIFT); - } - - return sprintf(buf, "%llu\n", val); -} - -static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, - disksize_show, disksize_store); -static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); -static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); -static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); -static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); - -static struct attribute *zram_disk_attrs[] = { - &dev_attr_disksize.attr, - &dev_attr_initstate.attr, - &dev_attr_reset.attr, - &dev_attr_num_reads.attr, - &dev_attr_num_writes.attr, - &dev_attr_invalid_io.attr, - &dev_attr_notify_free.attr, - &dev_attr_zero_pages.attr, - &dev_attr_orig_data_size.attr, - &dev_attr_compr_data_size.attr, - &dev_attr_mem_used_total.attr, - NULL, -}; - -struct attribute_group zram_disk_attr_group = { - .attrs = zram_disk_attrs, -}; diff --git a/drivers/staging/zsmalloc/Kconfig b/drivers/staging/zsmalloc/Kconfig index a5ab7200626..7fab032298f 100644 --- a/drivers/staging/zsmalloc/Kconfig +++ b/drivers/staging/zsmalloc/Kconfig @@ -1,9 +1,5 @@ config ZSMALLOC - tristate "Memory allocator for compressed pages" - # X86 dependency is because of the use of __flush_tlb_one and set_pte - # in zsmalloc-main.c. - # TODO: convert these to portable functions - depends on X86 + bool "Memory allocator for compressed pages" default n help zsmalloc is a slab-based memory allocator designed to store diff --git a/drivers/staging/zsmalloc/Makefile b/drivers/staging/zsmalloc/Makefile index b134848a590..ceadfa41654 100644 --- a/drivers/staging/zsmalloc/Makefile +++ b/drivers/staging/zsmalloc/Makefile @@ -1,3 +1,4 @@ +CFLAGS_zsmalloc-main.o := -Wno-error=implicit-function-declaration -Wno-implicit-function-declaration zsmalloc-y := zsmalloc-main.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c index 917461c6601..3b950e5a918 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/drivers/staging/zsmalloc/zsmalloc-main.c @@ -10,6 +10,54 @@ * Released under the terms of GNU General Public License Version 2.0 */ + +/* + * This allocator is designed for use with zcache and zram. Thus, the + * allocator is supposed to work well under low memory conditions. In + * particular, it never attempts higher order page allocation which is + * very likely to fail under memory pressure. On the other hand, if we + * just use single (0-order) pages, it would suffer from very high + * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy + * an entire page. This was one of the major issues with its predecessor + * (xvmalloc). + * + * To overcome these issues, zsmalloc allocates a bunch of 0-order pages + * and links them together using various 'struct page' fields. These linked + * pages act as a single higher-order page i.e. an object can span 0-order + * page boundaries. The code refers to these linked pages as a single entity + * called zspage. + * + * Following is how we use various fields and flags of underlying + * struct page(s) to form a zspage. + * + * Usage of struct page fields: + * page->first_page: points to the first component (0-order) page + * page->index (union with page->freelist): offset of the first object + * starting in this page. For the first page, this is + * always 0, so we use this field (aka freelist) to point + * to the first free object in zspage. + * page->lru: links together all component pages (except the first page) + * of a zspage + * + * For _first_ page only: + * + * page->private (union with page->first_page): refers to the + * component page after the first page + * page->freelist: points to the first free object in zspage. + * Free objects are linked together using in-place + * metadata. + * page->objects: maximum number of objects we can store in this + * zspage (class->zspage_order * PAGE_SIZE / class->size) + * page->lru: links together first pages of various zspages. + * Basically forming list of zspages in a fullness group. + * page->mapping: class index and fullness group of the zspage + * + * Usage of struct page flags: + * PG_private: identifies the first component page + * PG_private2: identifies the last component page + * + */ + #ifdef CONFIG_ZSMALLOC_DEBUG #define DEBUG #endif @@ -27,9 +75,139 @@ #include #include #include +#include +#include +#include #include "zsmalloc.h" -#include "zsmalloc_int.h" + +/* + * This must be power of 2 and greater than of equal to sizeof(link_free). + * These two conditions ensure that any 'struct link_free' itself doesn't + * span more than 1 page which avoids complex case of mapping 2 pages simply + * to restore link_free pointer values. + */ +#define ZS_ALIGN 8 + +/* + * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single) + * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N. + */ +#define ZS_MAX_ZSPAGE_ORDER 2 +#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) + +/* + * Object location (, ) is encoded as + * as single (void *) handle value. + * + * Note that object index is relative to system + * page it is stored in, so for each sub-page belonging + * to a zspage, obj_idx starts with 0. + * + * This is made more complicated by various memory models and PAE. + */ + +#ifndef MAX_PHYSMEM_BITS +#ifdef CONFIG_HIGHMEM64G +#define MAX_PHYSMEM_BITS 36 +#else /* !CONFIG_HIGHMEM64G */ +/* + * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just + * be PAGE_SHIFT + */ +#define MAX_PHYSMEM_BITS BITS_PER_LONG +#endif +#endif +#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) +#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) + +#define MAX(a, b) ((a) >= (b) ? (a) : (b)) +/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ +#define ZS_MIN_ALLOC_SIZE \ + MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) +#define ZS_MAX_ALLOC_SIZE PAGE_SIZE + +/* + * On systems with 4K page size, this gives 254 size classes! There is a + * trader-off here: + * - Large number of size classes is potentially wasteful as free page are + * spread across these classes + * - Small number of size classes causes large internal fragmentation + * - Probably its better to use specific size classes (empirically + * determined). NOTE: all those class sizes must be set as multiple of + * ZS_ALIGN to make sure link_free itself never has to span 2 pages. + * + * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN + * (reason above) + */ +#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8) +#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \ + ZS_SIZE_CLASS_DELTA + 1) + +/* + * We do not maintain any list for completely empty or full pages + */ +enum fullness_group { + ZS_ALMOST_FULL, + ZS_ALMOST_EMPTY, + _ZS_NR_FULLNESS_GROUPS, + + ZS_EMPTY, + ZS_FULL +}; + +/* + * We assign a page to ZS_ALMOST_EMPTY fullness group when: + * n <= N / f, where + * n = number of allocated objects + * N = total number of objects zspage can store + * f = 1/fullness_threshold_frac + * + * Similarly, we assign zspage to: + * ZS_ALMOST_FULL when n > N / f + * ZS_EMPTY when n == 0 + * ZS_FULL when n == N + * + * (see: fix_fullness_group()) + */ +static const int fullness_threshold_frac = 4; + +struct size_class { + /* + * Size of objects stored in this class. Must be multiple + * of ZS_ALIGN. + */ + int size; + unsigned int index; + + /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ + int pages_per_zspage; + + spinlock_t lock; + + /* stats */ + u64 pages_allocated; + + struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; +}; + +/* + * Placed within free objects to form a singly linked list. + * For every zspage, first_page->freelist gives head of this list. + * + * This must be power of 2 and less than or equal to ZS_ALIGN + */ +struct link_free { + /* Handle of next free chunk (encodes ) */ + void *next; +}; + +struct zs_pool { + struct size_class size_class[ZS_SIZE_CLASSES]; + + gfp_t flags; /* allocation flags used when growing pool */ +}; /* * A zspage's class index and fullness group @@ -40,17 +218,39 @@ #define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1) #define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1) +/* + * By default, zsmalloc uses a copy-based object mapping method to access + * allocations that span two pages. However, if a particular architecture + * performs VM mapping faster than copying, then it should be added here + * so that USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use + * page table mapping rather than copying for object mapping. + */ +#if defined(CONFIG_ARM) && !defined(MODULE) +#define USE_PGTABLE_MAPPING +#endif + +struct mapping_area { +#ifdef USE_PGTABLE_MAPPING + struct vm_struct *vm; /* vm area for mapping object that span pages */ +#else + char *vm_buf; /* copy buffer for objects that span pages */ +#endif + char *vm_addr; /* address of kmap_atomic()'ed pages */ + enum zs_mapmode vm_mm; /* mapping mode */ +}; + + /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); static int is_first_page(struct page *page) { - return test_bit(PG_private, &page->flags); + return PagePrivate(page); } static int is_last_page(struct page *page) { - return test_bit(PG_private_2, &page->flags); + return PagePrivate2(page); } static void get_zspage_mapping(struct page *page, unsigned int *class_idx, @@ -180,7 +380,7 @@ out: * link together 3 PAGE_SIZE sized pages to form a zspage * since then we can perfectly fit in 8 such objects. */ -static int get_zspage_order(int class_size) +static int get_pages_per_zspage(int class_size) { int i, max_usedpc = 0; /* zspage order which gives maximum used size per KB */ @@ -223,14 +423,19 @@ static struct page *get_next_page(struct page *page) if (is_last_page(page)) next = NULL; else if (is_first_page(page)) - next = (struct page *)page->private; + next = (struct page *)page_private(page); else next = list_entry(page->lru.next, struct page, lru); return next; } -/* Encode as a single handle value */ +/* + * Encode as a single handle value. + * On hardware platforms with physical memory starting at 0x0 the pfn + * could be 0 so we ensure that the handle will never be 0 by adjusting the + * encoded obj_idx value before encoding. + */ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) { unsigned long handle; @@ -241,19 +446,21 @@ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) } handle = page_to_pfn(page) << OBJ_INDEX_BITS; - handle |= (obj_idx & OBJ_INDEX_MASK); + handle |= ((obj_idx + 1) & OBJ_INDEX_MASK); return (void *)handle; } -/* Decode pair from the given object handle */ -static void obj_handle_to_location(void *handle, struct page **page, +/* + * Decode pair from the given object handle. We adjust the + * decoded obj_idx back to its original value since it was adjusted in + * obj_location_to_handle(). + */ +static void obj_handle_to_location(unsigned long handle, struct page **page, unsigned long *obj_idx) { - unsigned long hval = (unsigned long)handle; - - *page = pfn_to_page(hval >> OBJ_INDEX_BITS); - *obj_idx = hval & OBJ_INDEX_MASK; + *page = pfn_to_page(handle >> OBJ_INDEX_BITS); + *obj_idx = (handle & OBJ_INDEX_MASK) - 1; } static unsigned long obj_idx_to_offset(struct page *page, @@ -274,7 +481,7 @@ static void reset_page(struct page *page) set_page_private(page, 0); page->mapping = NULL; page->freelist = NULL; - reset_page_mapcount(page); + page_mapcount_reset(page); } static void free_zspage(struct page *first_page) @@ -354,7 +561,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) static struct page *alloc_zspage(struct size_class *class, gfp_t flags) { int i, error; - struct page *first_page = NULL; + struct page *first_page = NULL, *uninitialized_var(prev_page); /* * Allocate individual pages and link them together as: @@ -368,8 +575,8 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) * identify the last page. */ error = -ENOMEM; - for (i = 0; i < class->zspage_order; i++) { - struct page *page, *prev_page; + for (i = 0; i < class->pages_per_zspage; i++) { + struct page *page; page = alloc_page(flags); if (!page) @@ -377,20 +584,19 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) INIT_LIST_HEAD(&page->lru); if (i == 0) { /* first page */ - set_bit(PG_private, &page->flags); + SetPagePrivate(page); set_page_private(page, 0); first_page = page; first_page->inuse = 0; } if (i == 1) - first_page->private = (unsigned long)page; + set_page_private(first_page, (unsigned long)page); if (i >= 1) page->first_page = first_page; if (i >= 2) list_add(&page->lru, &prev_page->lru); - if (i == class->zspage_order - 1) /* last page */ - set_bit(PG_private_2, &page->flags); - + if (i == class->pages_per_zspage - 1) /* last page */ + SetPagePrivate2(page); prev_page = page; } @@ -398,7 +604,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) first_page->freelist = obj_location_to_handle(first_page, 0); /* Maximum number of objects we can store in this zspage */ - first_page->objects = class->zspage_order * PAGE_SIZE / class->size; + first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; error = 0; /* Success */ @@ -425,34 +631,141 @@ static struct page *find_get_zspage(struct size_class *class) return page; } +#ifdef USE_PGTABLE_MAPPING +static inline int __zs_cpu_up(struct mapping_area *area) +{ + /* + * Make sure we don't leak memory if a cpu UP notification + * and zs_init() race and both call zs_cpu_up() on the same cpu + */ + if (area->vm) + return 0; + area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); + if (!area->vm) + return -ENOMEM; + return 0; +} -/* - * If this becomes a separate module, register zs_init() with - * module_init(), zs_exit with module_exit(), and remove zs_initialized -*/ -static int zs_initialized; +static inline void __zs_cpu_down(struct mapping_area *area) +{ + if (area->vm) + free_vm_area(area->vm); + area->vm = NULL; +} + +static inline void *__zs_map_object(struct mapping_area *area, + struct page *pages[2], int off, int size) +{ + BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages)); + area->vm_addr = area->vm->addr; + return area->vm_addr + off; +} + +static inline void __zs_unmap_object(struct mapping_area *area, + struct page *pages[2], int off, int size) +{ + unsigned long addr = (unsigned long)area->vm_addr; + + unmap_kernel_range(addr, PAGE_SIZE * 2); +} + +#else /* USE_PGTABLE_MAPPING */ + +static inline int __zs_cpu_up(struct mapping_area *area) +{ + /* + * Make sure we don't leak memory if a cpu UP notification + * and zs_init() race and both call zs_cpu_up() on the same cpu + */ + if (area->vm_buf) + return 0; + area->vm_buf = (char *)__get_free_page(GFP_KERNEL); + if (!area->vm_buf) + return -ENOMEM; + return 0; +} + +static inline void __zs_cpu_down(struct mapping_area *area) +{ + if (area->vm_buf) + free_page((unsigned long)area->vm_buf); + area->vm_buf = NULL; +} + +static void *__zs_map_object(struct mapping_area *area, + struct page *pages[2], int off, int size) +{ + int sizes[2]; + void *addr; + char *buf = area->vm_buf; + + /* disable page faults to match kmap_atomic() return conditions */ + pagefault_disable(); + + /* no read fastpath */ + if (area->vm_mm == ZS_MM_WO) + goto out; + + sizes[0] = PAGE_SIZE - off; + sizes[1] = size - sizes[0]; + + /* copy object to per-cpu buffer */ + addr = kmap_atomic(pages[0]); + memcpy(buf, addr + off, sizes[0]); + kunmap_atomic(addr); + addr = kmap_atomic(pages[1]); + memcpy(buf + sizes[0], addr, sizes[1]); + kunmap_atomic(addr); +out: + return area->vm_buf; +} + +static void __zs_unmap_object(struct mapping_area *area, + struct page *pages[2], int off, int size) +{ + int sizes[2]; + void *addr; + char *buf = area->vm_buf; + + /* no write fastpath */ + if (area->vm_mm == ZS_MM_RO) + goto out; + + sizes[0] = PAGE_SIZE - off; + sizes[1] = size - sizes[0]; + + /* copy per-cpu buffer to object */ + addr = kmap_atomic(pages[0]); + memcpy(addr + off, buf, sizes[0]); + kunmap_atomic(addr); + addr = kmap_atomic(pages[1]); + memcpy(addr, buf + sizes[0], sizes[1]); + kunmap_atomic(addr); + +out: + /* enable page faults to match kunmap_atomic() return conditions */ + pagefault_enable(); +} + +#endif /* USE_PGTABLE_MAPPING */ static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action, void *pcpu) { - int cpu = (long)pcpu; + int ret, cpu = (long)pcpu; struct mapping_area *area; switch (action) { case CPU_UP_PREPARE: area = &per_cpu(zs_map_area, cpu); - if (area->vm) - break; - area->vm = alloc_vm_area(2 * PAGE_SIZE, area->vm_ptes); - if (!area->vm) - return notifier_from_errno(-ENOMEM); + ret = __zs_cpu_up(area); + if (ret) + return notifier_from_errno(ret); break; case CPU_DEAD: case CPU_UP_CANCELED: area = &per_cpu(zs_map_area, cpu); - if (area->vm) - free_vm_area(area->vm); - area->vm = NULL; + __zs_cpu_down(area); break; } @@ -488,14 +801,21 @@ fail: return notifier_to_errno(ret); } -struct zs_pool *zs_create_pool(const char *name, gfp_t flags) +/** + * zs_create_pool - Creates an allocation pool to work from. + * @flags: allocation flags used to allocate pool metadata + * + * This function must be called before anything when using + * the zsmalloc allocator. + * + * On success, a pointer to the newly created pool is returned, + * otherwise NULL. + */ +struct zs_pool *zs_create_pool(gfp_t flags) { - int i, error, ovhd_size; + int i, ovhd_size; struct zs_pool *pool; - if (!name) - return NULL; - ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); pool = kzalloc(ovhd_size, GFP_KERNEL); if (!pool) @@ -513,31 +833,11 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags) class->size = size; class->index = i; spin_lock_init(&class->lock); - class->zspage_order = get_zspage_order(size); + class->pages_per_zspage = get_pages_per_zspage(size); } - /* - * If this becomes a separate module, register zs_init with - * module_init, and remove this block - */ - if (!zs_initialized) { - error = zs_init(); - if (error) - goto cleanup; - zs_initialized = 1; - } - pool->flags = flags; - pool->name = name; - - error = 0; /* Success */ - -cleanup: - if (error) { - zs_destroy_pool(pool); - pool = NULL; - } return pool; } @@ -553,8 +853,7 @@ void zs_destroy_pool(struct zs_pool *pool) for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) { if (class->fullness_list[fg]) { - pr_info("Freeing non-empty class with size " - "%db, fullness group %d\n", + pr_info("Freeing non-empty class with size %db, fullness group %d\n", class->size, fg); } } @@ -567,18 +866,14 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool); * zs_malloc - Allocate block of given size from pool. * @pool: pool to allocate from * @size: size of block to allocate - * @page: page no. that holds the object - * @offset: location of object within page - * - * On success, identifies block allocated - * and 0 is returned. On failure, is set to - * 0 and -ENOMEM is returned. * + * On success, handle to the allocated object is returned, + * otherwise 0. * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail. */ -void *zs_malloc(struct zs_pool *pool, size_t size) +unsigned long zs_malloc(struct zs_pool *pool, size_t size) { - void *obj; + unsigned long obj; struct link_free *link; int class_idx; struct size_class *class; @@ -587,7 +882,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size) unsigned long m_objidx, m_offset; if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) - return NULL; + return 0; class_idx = get_size_class_index(size); class = &pool->size_class[class_idx]; @@ -600,14 +895,14 @@ void *zs_malloc(struct zs_pool *pool, size_t size) spin_unlock(&class->lock); first_page = alloc_zspage(class, pool->flags); if (unlikely(!first_page)) - return NULL; + return 0; set_zspage_mapping(first_page, class->index, ZS_EMPTY); spin_lock(&class->lock); - class->pages_allocated += class->zspage_order; + class->pages_allocated += class->pages_per_zspage; } - obj = first_page->freelist; + obj = (unsigned long)first_page->freelist; obj_handle_to_location(obj, &m_page, &m_objidx); m_offset = obj_idx_to_offset(m_page, m_objidx, class->size); @@ -626,7 +921,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size) } EXPORT_SYMBOL_GPL(zs_malloc); -void zs_free(struct zs_pool *pool, void *obj) +void zs_free(struct zs_pool *pool, unsigned long obj) { struct link_free *link; struct page *first_page, *f_page; @@ -653,13 +948,13 @@ void zs_free(struct zs_pool *pool, void *obj) + f_offset); link->next = first_page->freelist; kunmap_atomic(link); - first_page->freelist = obj; + first_page->freelist = (void *)obj; first_page->inuse--; fullness = fix_fullness_group(pool, first_page); if (fullness == ZS_EMPTY) - class->pages_allocated -= class->zspage_order; + class->pages_allocated -= class->pages_per_zspage; spin_unlock(&class->lock); @@ -668,7 +963,22 @@ void zs_free(struct zs_pool *pool, void *obj) } EXPORT_SYMBOL_GPL(zs_free); -void *zs_map_object(struct zs_pool *pool, void *handle) +/** + * zs_map_object - get address of allocated object from handle. + * @pool: pool from which the object was allocated + * @handle: handle returned from zs_malloc + * + * Before using an object allocated from zs_malloc, it must be mapped using + * this function. When done with the object, it must be unmapped using + * zs_unmap_object. + * + * Only one object can be mapped per cpu at a time. There is no protection + * against nested mappings. + * + * This function returns with preemption and page faults disabled. + */ +void *zs_map_object(struct zs_pool *pool, unsigned long handle, + enum zs_mapmode mm) { struct page *page; unsigned long obj_idx, off; @@ -677,38 +987,40 @@ void *zs_map_object(struct zs_pool *pool, void *handle) enum fullness_group fg; struct size_class *class; struct mapping_area *area; + struct page *pages[2]; BUG_ON(!handle); + /* + * Because we use per-cpu mapping areas shared among the + * pools/users, we can't allow mapping in interrupt context + * because it can corrupt another users mappings. + */ + BUG_ON(in_interrupt()); + obj_handle_to_location(handle, &page, &obj_idx); get_zspage_mapping(get_first_page(page), &class_idx, &fg); class = &pool->size_class[class_idx]; off = obj_idx_to_offset(page, obj_idx, class->size); area = &get_cpu_var(zs_map_area); + area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ area->vm_addr = kmap_atomic(page); - } else { - /* this object spans two pages */ - struct page *nextp; - - nextp = get_next_page(page); - BUG_ON(!nextp); - - - set_pte(area->vm_ptes[0], mk_pte(page, PAGE_KERNEL)); - set_pte(area->vm_ptes[1], mk_pte(nextp, PAGE_KERNEL)); - - /* We pre-allocated VM area so mapping can never fail */ - area->vm_addr = area->vm->addr; + return area->vm_addr + off; } - return area->vm_addr + off; + /* this object spans two pages */ + pages[0] = page; + pages[1] = get_next_page(page); + BUG_ON(!pages[1]); + + return __zs_map_object(area, pages, off, class->size); } EXPORT_SYMBOL_GPL(zs_map_object); -void zs_unmap_object(struct zs_pool *pool, void *handle) +void zs_unmap_object(struct zs_pool *pool, unsigned long handle) { struct page *page; unsigned long obj_idx, off; @@ -726,13 +1038,16 @@ void zs_unmap_object(struct zs_pool *pool, void *handle) off = obj_idx_to_offset(page, obj_idx, class->size); area = &__get_cpu_var(zs_map_area); - if (off + class->size <= PAGE_SIZE) { + if (off + class->size <= PAGE_SIZE) kunmap_atomic(area->vm_addr); - } else { - set_pte(area->vm_ptes[0], __pte(0)); - set_pte(area->vm_ptes[1], __pte(0)); - __flush_tlb_one((unsigned long)area->vm_addr); - __flush_tlb_one((unsigned long)area->vm_addr + PAGE_SIZE); + else { + struct page *pages[2]; + + pages[0] = page; + pages[1] = get_next_page(page); + BUG_ON(!pages[1]); + + __zs_unmap_object(area, pages, off, class->size); } put_cpu_var(zs_map_area); } @@ -749,3 +1064,9 @@ u64 zs_get_total_size_bytes(struct zs_pool *pool) return npages << PAGE_SHIFT; } EXPORT_SYMBOL_GPL(zs_get_total_size_bytes); + +module_init(zs_init); +module_exit(zs_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Nitin Gupta "); diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/drivers/staging/zsmalloc/zsmalloc.h index 949384ee749..fbe6bec421a 100644 --- a/drivers/staging/zsmalloc/zsmalloc.h +++ b/drivers/staging/zsmalloc/zsmalloc.h @@ -15,16 +15,28 @@ #include +/* + * zsmalloc mapping modes + * + * NOTE: These only make a difference when a mapped object spans pages + */ +enum zs_mapmode { + ZS_MM_RW, /* normal read-write mapping */ + ZS_MM_RO, /* read-only (no copy-out at unmap time) */ + ZS_MM_WO /* write-only (no copy-in at map time) */ +}; + struct zs_pool; -struct zs_pool *zs_create_pool(const char *name, gfp_t flags); +struct zs_pool *zs_create_pool(gfp_t flags); void zs_destroy_pool(struct zs_pool *pool); -void *zs_malloc(struct zs_pool *pool, size_t size); -void zs_free(struct zs_pool *pool, void *obj); +unsigned long zs_malloc(struct zs_pool *pool, size_t size); +void zs_free(struct zs_pool *pool, unsigned long obj); -void *zs_map_object(struct zs_pool *pool, void *handle); -void zs_unmap_object(struct zs_pool *pool, void *handle); +void *zs_map_object(struct zs_pool *pool, unsigned long handle, + enum zs_mapmode mm); +void zs_unmap_object(struct zs_pool *pool, unsigned long handle); u64 zs_get_total_size_bytes(struct zs_pool *pool); diff --git a/drivers/staging/zsmalloc/zsmalloc_int.h b/drivers/staging/zsmalloc/zsmalloc_int.h deleted file mode 100644 index 92eefc663af..00000000000 --- a/drivers/staging/zsmalloc/zsmalloc_int.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * zsmalloc memory allocator - * - * Copyright (C) 2011 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the license that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - */ - -#ifndef _ZS_MALLOC_INT_H_ -#define _ZS_MALLOC_INT_H_ - -#include -#include -#include - -/* - * This must be power of 2 and greater than of equal to sizeof(link_free). - * These two conditions ensure that any 'struct link_free' itself doesn't - * span more than 1 page which avoids complex case of mapping 2 pages simply - * to restore link_free pointer values. - */ -#define ZS_ALIGN 8 - -/* - * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single) - * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N. - */ -#define ZS_MAX_ZSPAGE_ORDER 2 -#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) - -/* - * Object location (, ) is encoded as - * as single (void *) handle value. - * - * Note that object index is relative to system - * page it is stored in, so for each sub-page belonging - * to a zspage, obj_idx starts with 0. - * - * This is made more complicated by various memory models and PAE. - */ - -#ifndef MAX_PHYSMEM_BITS -#ifdef CONFIG_HIGHMEM64G -#define MAX_PHYSMEM_BITS 36 -#else /* !CONFIG_HIGHMEM64G */ -/* - * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just - * be PAGE_SHIFT - */ -#define MAX_PHYSMEM_BITS BITS_PER_LONG -#endif -#endif -#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) -#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) -#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) - -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) -/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ -#define ZS_MIN_ALLOC_SIZE \ - MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) -#define ZS_MAX_ALLOC_SIZE PAGE_SIZE - -/* - * On systems with 4K page size, this gives 254 size classes! There is a - * trader-off here: - * - Large number of size classes is potentially wasteful as free page are - * spread across these classes - * - Small number of size classes causes large internal fragmentation - * - Probably its better to use specific size classes (empirically - * determined). NOTE: all those class sizes must be set as multiple of - * ZS_ALIGN to make sure link_free itself never has to span 2 pages. - * - * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN - * (reason above) - */ -#define ZS_SIZE_CLASS_DELTA 16 -#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \ - ZS_SIZE_CLASS_DELTA + 1) - -/* - * We do not maintain any list for completely empty or full pages - */ -enum fullness_group { - ZS_ALMOST_FULL, - ZS_ALMOST_EMPTY, - _ZS_NR_FULLNESS_GROUPS, - - ZS_EMPTY, - ZS_FULL -}; - -/* - * We assign a page to ZS_ALMOST_EMPTY fullness group when: - * n <= N / f, where - * n = number of allocated objects - * N = total number of objects zspage can store - * f = 1/fullness_threshold_frac - * - * Similarly, we assign zspage to: - * ZS_ALMOST_FULL when n > N / f - * ZS_EMPTY when n == 0 - * ZS_FULL when n == N - * - * (see: fix_fullness_group()) - */ -static const int fullness_threshold_frac = 4; - -struct mapping_area { - struct vm_struct *vm; - pte_t *vm_ptes[2]; - char *vm_addr; -}; - -struct size_class { - /* - * Size of objects stored in this class. Must be multiple - * of ZS_ALIGN. - */ - int size; - unsigned int index; - - /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ - int zspage_order; - - spinlock_t lock; - - /* stats */ - u64 pages_allocated; - - struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; -}; - -/* - * Placed within free objects to form a singly linked list. - * For every zspage, first_page->freelist gives head of this list. - * - * This must be power of 2 and less than or equal to ZS_ALIGN - */ -struct link_free { - /* Handle of next free chunk (encodes ) */ - void *next; -}; - -struct zs_pool { - struct size_class size_class[ZS_SIZE_CLASSES]; - - gfp_t flags; /* allocation flags used when growing pool */ - const char *name; -}; - -#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index fd1baead0d8..dd6302ae715 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1637,5 +1637,10 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool page_is_guard(struct page *page) { return false; } #endif /* CONFIG_DEBUG_PAGEALLOC */ +static inline void page_mapcount_reset(struct page *page) +{ + atomic_set(&(page)->_mapcount, -1); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */