From 8f8d09ec70b26d82b42dcc58779bbbdb460bc8a3 Mon Sep 17 00:00:00 2001 From: Rajeev Kulkarni Date: Thu, 22 Nov 2012 00:22:32 -0800 Subject: [PATCH] msm: kgsl: Enable "big page" memory allocations Allow users to specify "big page" mode when allocating GPU memory. This will attempt to allocate contigious units of 64k bytes per instead of the usual PAGE_SIZE units. If the allocation fails, then automatically fall back to allocate the rest of the region with 4K pages. This means that potentially any allocation could be comprised of a mixture of 64k chunks and 4k chunks. Change-Id: Ib92c0c099d3b10ba54dbb864c5f977f8df4912f2 Signed-off-by: Jordan Crouse Signed-off-by: Jeremy Gebben Signed-off-by: Rajeev Kulkarni --- drivers/gpu/msm/kgsl.c | 7 ++ drivers/gpu/msm/kgsl.h | 3 +- drivers/gpu/msm/kgsl_debugfs.c | 14 ++- drivers/gpu/msm/kgsl_mmu.c | 13 ++- drivers/gpu/msm/kgsl_sharedmem.c | 159 +++++++++++++++++++++---------- include/linux/msm_kgsl.h | 7 ++ 6 files changed, 148 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 9785e69aca3..4f99239b6b9 100644 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1615,6 +1615,8 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, return -ENOMEM; memdesc->sglen = sglen; + memdesc->sglen_alloc = sglen; + sg_init_table(memdesc->sg, sglen); spin_lock(¤t->mm->page_table_lock); @@ -1901,6 +1903,11 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, entry->memdesc.priv |= param->flags & KGSL_MEMTYPE_MASK; + if (entry->memdesc.size >= SZ_1M) + entry->memdesc.priv |= ilog2(SZ_1M) << KGSL_MEMALIGN_SHIFT; + else if (entry->memdesc.size >= SZ_64K) + entry->memdesc.priv |= ilog2(SZ_64K) << KGSL_MEMALIGN_SHIFT; + result = kgsl_mmu_map(private->pagetable, &entry->memdesc, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 472474bf6f7..2861117bb1a 100644 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -135,7 +135,8 @@ struct kgsl_memdesc { unsigned int size; unsigned int priv; struct scatterlist *sg; - unsigned int sglen; + unsigned int sglen; /* Active entries in the sglist */ + unsigned int sglen_alloc; /* Allocated entries in the sglist */ struct kgsl_memdesc_ops *ops; int flags; }; diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c index 40ed7cab874..b49c2602078 100644 --- a/drivers/gpu/msm/kgsl_debugfs.c +++ b/drivers/gpu/msm/kgsl_debugfs.c @@ -168,8 +168,9 @@ static int process_mem_print(struct seq_file *s, void *unused) struct kgsl_mem_entry *entry; struct rb_node *node; struct kgsl_process_private *private = s->private; - char flags[3]; + char flags[4]; char usage[16]; + unsigned int align; spin_lock(&private->mem_lock); seq_printf(s, "%8s %8s %5s %10s %16s %5s\n", @@ -182,7 +183,16 @@ static int process_mem_print(struct seq_file *s, void *unused) flags[0] = m->priv & KGSL_MEMFLAGS_GLOBAL ? 'g' : '-'; flags[1] = m->priv & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-'; - flags[2] = '\0'; + + align = (m->priv & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT; + if (align >= ilog2(SZ_1M)) + flags[2] = 'L'; + else if (align >= ilog2(SZ_64K)) + flags[2] = 'l'; + else + flags[2] = '-'; + + flags[3] = '\0'; kgsl_get_memory_usage(usage, sizeof(usage), m->priv); diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c index 54ba5adabd9..dbb88ee847c 100644 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -606,6 +606,7 @@ kgsl_mmu_map(struct kgsl_pagetable *pagetable, int ret; struct gen_pool *pool; int size; + int page_align = ilog2(PAGE_SIZE); if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) { if (memdesc->sglen == 1) { @@ -630,7 +631,17 @@ kgsl_mmu_map(struct kgsl_pagetable *pagetable, /* Allocate from kgsl pool if it exists for global mappings */ pool = _get_pool(pagetable, memdesc->priv); - memdesc->gpuaddr = gen_pool_alloc(pool, size); + /* Allocate aligned virtual addresses for iommu. This allows + * more efficient pagetable entries if the physical memory + * is also aligned. Don't do this for GPUMMU, because + * the address space is so small. + */ + if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype() && + (memdesc->priv & KGSL_MEMALIGN_MASK)) { + page_align = (memdesc->priv & KGSL_MEMALIGN_MASK) + >> KGSL_MEMALIGN_SHIFT; + } + memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size, page_align); if (memdesc->gpuaddr == 0) { KGSL_CORE_ERR("gen_pool_alloc(%d) failed from pool: %s\n", size, diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 585127e37e8..77617ba0d2f 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -317,21 +317,42 @@ static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) { - unsigned long offset; - struct page *page; - int i; + int i, pgoff; + struct scatterlist *s = memdesc->sg; + unsigned int offset; - offset = (unsigned long) vmf->virtual_address - vma->vm_start; + offset = ((unsigned long) vmf->virtual_address - vma->vm_start); - i = offset >> PAGE_SHIFT; - page = sg_page(&memdesc->sg[i]); - if (page == NULL) + if (offset >= memdesc->size) return VM_FAULT_SIGBUS; - get_page(page); + pgoff = offset >> PAGE_SHIFT; - vmf->page = page; - return 0; + /* + * The sglist might be comprised of mixed blocks of memory depending + * on how many 64K pages were allocated. This means we have to do math + * to find the actual 4K page to map in user space + */ + + for (i = 0; i < memdesc->sglen; i++) { + int npages = s->length >> PAGE_SHIFT; + + if (pgoff < npages) { + struct page *page = sg_page(s); + + page = nth_page(page, pgoff); + + get_page(page); + vmf->page = page; + + return 0; + } + + pgoff -= npages; + s = sg_next(s); + } + + return VM_FAULT_SIGBUS; } static int kgsl_page_alloc_vmflags(struct kgsl_memdesc *memdesc) @@ -357,7 +378,7 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) } if (memdesc->sg) for_each_sg(memdesc->sg, sg, sglen, i) - __free_page(sg_page(sg)); + __free_pages(sg_page(sg), get_order(sg->length)); } static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) @@ -379,27 +400,36 @@ static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); struct page **pages = NULL; struct scatterlist *sg; + int npages = PAGE_ALIGN(memdesc->size) >> PAGE_SHIFT; int sglen = memdesc->sglen; - int i; + int i, count = 0; /* Don't map the guard page if it exists */ if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE) sglen--; /* create a list of pages to call vmap */ - pages = kmalloc(sglen * sizeof(struct page *), GFP_KERNEL); + pages = vmalloc(npages * sizeof(struct page *)); if (!pages) { - KGSL_CORE_ERR("kmalloc(%d) failed\n", - sglen * sizeof(struct page *)); + KGSL_CORE_ERR("vmalloc(%d) failed\n", + npages * sizeof(struct page *)); return -ENOMEM; } - for_each_sg(memdesc->sg, sg, sglen, i) - pages[i] = sg_page(sg); - memdesc->hostptr = vmap(pages, sglen, + + for_each_sg(memdesc->sg, sg, sglen, i) { + struct page *page = sg_page(sg); + int j; + + for (j = 0; j < sg->length >> PAGE_SHIFT; j++) + pages[count++] = page++; + } + + + memdesc->hostptr = vmap(pages, count, VM_IOREMAP, page_prot); KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc, kgsl_driver.stats.vmalloc_max); - kfree(pages); + vfree(pages); } if (!memdesc->hostptr) return -ENOMEM; @@ -503,14 +533,15 @@ EXPORT_SYMBOL(kgsl_cache_range_op); static int _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, - size_t size, unsigned int protflags) + size_t size, unsigned int flags, unsigned int protflags) { - int i, order, ret = 0; - int sglen = PAGE_ALIGN(size) / PAGE_SIZE; + int pcount = 0, order, ret = 0; + int j, len, page_size, sglen_alloc, sglen = 0; struct page **pages = NULL; pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); void *ptr; struct sysinfo si; + unsigned int align; /* * Get the current memory information to be used in deciding if we @@ -530,23 +561,36 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, if (size >= ((si.freeram << PAGE_SHIFT) - SZ_32M)) return -ENOMEM; + align = (flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT; + + page_size = (align >= ilog2(SZ_64K) && size >= SZ_64K) + ? SZ_64K : PAGE_SIZE; + + /* + * There needs to be enough room in the sg structure to be able to + * service the allocation entirely with PAGE_SIZE sized chunks + */ + + sglen_alloc = PAGE_ALIGN(size) >> PAGE_SHIFT; + /* * Add guard page to the end of the allocation when the * IOMMU is in use. */ if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) - sglen++; + sglen_alloc++; memdesc->size = size; memdesc->pagetable = pagetable; + memdesc->priv |= (flags & KGSL_MEMALIGN_MASK); memdesc->ops = &kgsl_page_alloc_ops; - memdesc->sg = kgsl_sg_alloc(sglen); + memdesc->sg = kgsl_sg_alloc(sglen_alloc); if (memdesc->sg == NULL) { KGSL_CORE_ERR("vmalloc(%d) failed\n", - sglen * sizeof(struct scatterlist)); + sglen_alloc * sizeof(struct scatterlist)); ret = -ENOMEM; goto done; } @@ -558,38 +602,52 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, * two pages; well within the acceptable limits for using kmalloc. */ - pages = kmalloc(sglen * sizeof(struct page *), GFP_KERNEL); + pages = kmalloc(sglen_alloc * sizeof(struct page *), GFP_KERNEL); if (pages == NULL) { KGSL_CORE_ERR("kmalloc (%d) failed\n", - sglen * sizeof(struct page *)); + sglen_alloc * sizeof(struct page *)); ret = -ENOMEM; goto done; } kmemleak_not_leak(memdesc->sg); - memdesc->sglen = sglen; - sg_init_table(memdesc->sg, sglen); + memdesc->sglen_alloc = sglen_alloc; + sg_init_table(memdesc->sg, sglen_alloc); - for (i = 0; i < PAGE_ALIGN(size) / PAGE_SIZE; i++) { + len = size; - /* - * Don't use GFP_ZERO here because it is faster to memset the - * range ourselves (see below) - */ + while (len > 0) { + struct page *page; + unsigned int gfp_mask = GFP_KERNEL | __GFP_HIGHMEM | + __GFP_NOWARN; + int j; - pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); - if (pages[i] == NULL) { - ret = -ENOMEM; - memdesc->sglen = i; - goto done; + /* don't waste space at the end of the allocation*/ + if (len < page_size) + page_size = PAGE_SIZE; + + if (page_size != PAGE_SIZE) + gfp_mask |= __GFP_COMP; + + page = alloc_pages(gfp_mask, get_order(page_size)); + + if (page == NULL) { + if (page_size != PAGE_SIZE) { + page_size = PAGE_SIZE; + continue; + } } - sg_set_page(&memdesc->sg[i], pages[i], PAGE_SIZE, 0); + for (j = 0; j < page_size >> PAGE_SHIFT; j++) + pages[pcount++] = nth_page(page, j); + + sg_set_page(&memdesc->sg[sglen++], page, page_size, 0); + len -= page_size; } - /* ADd the guard page to the end of the sglist */ + /* Add the guard page to the end of the sglist */ if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) { /* @@ -603,13 +661,14 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, __GFP_HIGHMEM); if (kgsl_guard_page != NULL) { - sg_set_page(&memdesc->sg[sglen - 1], kgsl_guard_page, + sg_set_page(&memdesc->sg[sglen++], kgsl_guard_page, PAGE_SIZE, 0); memdesc->flags |= KGSL_MEMDESC_GUARD_PAGE; - } else - memdesc->sglen--; + } } + memdesc->sglen = sglen; + /* * All memory that goes to the user has to be zeroed out before it gets * exposed to userspace. This means that the memory has to be mapped in @@ -629,18 +688,16 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, * path */ - ptr = vmap(pages, i, VM_IOREMAP, page_prot); + ptr = vmap(pages, pcount, VM_IOREMAP, page_prot); if (ptr != NULL) { memset(ptr, 0, memdesc->size); dmac_flush_range(ptr, ptr + memdesc->size); vunmap(ptr); } else { - int j; - /* Very, very, very slow path */ - for (j = 0; j < i; j++) { + for (j = 0; j < pcount; j++) { ptr = kmap_atomic(pages[j]); memset(ptr, 0, PAGE_SIZE); dmac_flush_range(ptr, ptr + PAGE_SIZE); @@ -683,7 +740,7 @@ kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, size = ALIGN(size, PAGE_SIZE * 2); ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + 0, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); if (!ret) ret = kgsl_page_alloc_map_kernel(memdesc); if (ret) @@ -707,7 +764,7 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, protflags |= GSL_PT_PAGE_WV; return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, - protflags); + flags, protflags); } EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user); @@ -757,7 +814,7 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) if (memdesc->ops && memdesc->ops->free) memdesc->ops->free(memdesc); - kgsl_sg_free(memdesc->sg, memdesc->sglen); + kgsl_sg_free(memdesc->sg, memdesc->sglen_alloc); memset(memdesc, 0, sizeof(*memdesc)); } diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h index b3b4f22279d..271e7b3234d 100644 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -45,6 +45,13 @@ #define KGSL_MEMTYPE_MULTISAMPLE 20 #define KGSL_MEMTYPE_KERNEL 255 +/* + * Alignment hint, passed as the power of 2 exponent. + * i.e 4k (2^12) would be 12, 64k (2^16)would be 16. + */ +#define KGSL_MEMALIGN_MASK 0x00FF0000 +#define KGSL_MEMALIGN_SHIFT 16 + /* generic flag values */ #define KGSL_FLAGS_NORMALMODE 0x00000000 #define KGSL_FLAGS_SAFEMODE 0x00000001