From 821376bf15e692941f9235f13a14987009fd0b10 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Thu, 21 Apr 2005 11:07:59 -0700 Subject: [IA64] fix fls() The ia64-version of fls() never worked as intended (the bitnumbering was off by 1 and fls(0) was undefined). This patch fixes the problem by using a popcnt-based fls(), which on McKinley-derived cores is slightly faster than both ia64_fls() and generic_fls(). The resulting code, however, is bigger (7-8 bundles instead of about 3 bundles). Also switch ia64_popcnt() to __builtin_popcountl() for GCC v3.4 or newer since the compiler can predicate that and schedule it better. Thanks to Simon Derr and Matt Mackall for tracking down this bug. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- include/asm-ia64/gcc_intrin.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/asm-ia64/gcc_intrin.h') diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h index 7c357dfbae5..4fb4e439b05 100644 --- a/include/asm-ia64/gcc_intrin.h +++ b/include/asm-ia64/gcc_intrin.h @@ -133,13 +133,17 @@ register unsigned long ia64_r13 asm ("r13") __attribute_used__; ia64_intri_res; \ }) -#define ia64_popcnt(x) \ -({ \ +#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# define ia64_popcnt(x) __builtin_popcountl(x) +#else +# define ia64_popcnt(x) \ + ({ \ __u64 ia64_intri_res; \ asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ \ ia64_intri_res; \ -}) + }) +#endif #define ia64_getf_exp(x) \ ({ \ -- cgit v1.2.3