From e89a2fd9960ea72bbd7d8ecc22a1a0e6716601d8 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 12 Aug 2010 13:33:12 -0700 Subject: [PATCH] arm: Translate delay.S into (mostly) C In the next patch we're going to allow machines to override the __delay() implementation at runtime so they can implement a timer based __delay() routine. It's easier to do this using C, so lets write udelay and friends in C. We lose the #if 0 code, which according to Russell is used "to make the delay loop more stable and predictable on older CPUs" (see http://article.gmane.org/gmane.linux.kernel/888867 for more info). We shouldn't be too worried though, since the next patch adds functionality to allow a machine to set the __delay() loop themselves, therefore allowing machines to resurrect the commented out code if they need it. bloat-o-meter shows an increase of 12 bytes. Further inspection of the assembly shows GCC copying the loops_per_jiffy pointer and the magic HZ value to the ends of __const_udelay() and _delay() thus contributing an extra 4 and 8 bytes of data to each function. These two values weren't taken into account in the delay.S version since they weren't part of the function in nm's eyes. This means we only really gained an extra 4 bytes due to GCC's decision to duplicate the loops_per_jiffy pointer in __const_udelay. $ scripts/bloat-o-meter vmlinux.orig vmlinux.new add/remove: 0/0 grow/shrink: 2/0 up/down: 12/0 (12) function old new delta __udelay 48 56 +8 __const_udelay 40 44 +4 Change-Id: Ibfaab52d0f5e09471571be082232db04726d5532 Signed-off-by: Stephen Boyd Reviewed-by: Saravana Kannan (cherry picked from commit 8d5868d8205d10a0a8e423f53e9cc9bb3e9d1a34) Conflicts: arch/arm/kernel/armksyms.c arch/arm/lib/delay.S --- arch/arm/include/asm/delay.h | 2 +- arch/arm/kernel/armksyms.c | 4 --- arch/arm/lib/delay.S | 69 ------------------------------------ arch/arm/lib/delay.c | 57 +++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 74 deletions(-) delete mode 100644 arch/arm/lib/delay.S create mode 100644 arch/arm/lib/delay.c diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index b2deda18154..ccc5ed573fa 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -8,7 +8,7 @@ #include /* HZ */ -extern void __delay(int loops); +extern void __delay(unsigned long loops); /* * This function intentionally does not exist; if you see references to diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index b57c75e0b01..f1a50f37efd 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -48,10 +48,6 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); - /* platform dependent support */ -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__const_udelay); - /* networking */ EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_from_user); diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S deleted file mode 100644 index 3c9a05c8d20..00000000000 --- a/arch/arm/lib/delay.S +++ /dev/null @@ -1,69 +0,0 @@ -/* - * linux/arch/arm/lib/delay.S - * - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - .text - -.LC0: .word loops_per_jiffy -.LC1: .word (2199023*HZ)>>11 - -/* - * r0 <= 2000 - * lpj <= 0x01ffffff (max. 3355 bogomips) - * HZ <= 1000 - */ - -ENTRY(__udelay) - ldr r2, .LC1 - mul r0, r2, r0 -ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 - mov r1, #-1 - ldr r2, .LC0 - ldr r2, [r2] @ max = 0x01ffffff - add r0, r0, r1, lsr #32-14 - mov r0, r0, lsr #14 @ max = 0x0001ffff - add r2, r2, r1, lsr #32-10 - mov r2, r2, lsr #10 @ max = 0x00007fff - mul r0, r2, r0 @ max = 2^32-1 - add r0, r0, r1, lsr #32-6 - movs r0, r0, lsr #6 - moveq pc, lr - -/* - * loops = r0 * HZ * loops_per_jiffy / 1000000 - * - * Oh, if only we had a cycle counter... - */ - -@ Delay routine -ENTRY(__delay) - subs r0, r0, #1 -#if 0 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 -#endif - bhi __delay - mov pc, lr -ENDPROC(__udelay) -ENDPROC(__const_udelay) -ENDPROC(__delay) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c new file mode 100644 index 00000000000..5ee0adc7b4d --- /dev/null +++ b/arch/arm/lib/delay.c @@ -0,0 +1,57 @@ +/* + * Originally from linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * Copyright (c) 2010, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +/* + * loops = usecs * HZ * loops_per_jiffy / 1000000 + * + * Oh, if only we had a cycle counter... + */ +void __delay(unsigned long loops) +{ + asm volatile( + "1: subs %0, %0, #1 \n" + " bhi 1b \n" + : /* No output */ + : "r" (loops) + ); +} +EXPORT_SYMBOL(__delay); + +/* + * 0 <= xloops <= 0x7fffff06 + * loops_per_jiffy <= 0x01ffffff (max. 3355 bogomips) + */ +void __const_udelay(unsigned long xloops) +{ + unsigned long lpj; + unsigned long loops; + + xloops >>= 14; /* max = 0x01ffffff */ + lpj = loops_per_jiffy >> 10; /* max = 0x0001ffff */ + loops = lpj * xloops; /* max = 0x00007fff */ + loops >>= 6; /* max = 2^32-1 */ + + if (loops) + __delay(loops); +} +EXPORT_SYMBOL(__const_udelay); + +/* + * usecs <= 2000 + * HZ <= 1000 + */ +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * ((2199023*HZ)>>11)); +} +EXPORT_SYMBOL(__udelay);