From 64970b68d2b3ed32b964b0b30b1b98518fde388e Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 11 Mar 2008 16:17:19 +0100 Subject: x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps This moves an optimization for searching constant-sized small bitmaps form x86_64-specific to generic code. On an i386 defconfig (the x86#testing one), the size of vmlinux hardly changes with this applied. I have observed only four places where this optimization avoids a call into find_next_bit: In the functions return_unused_surplus_pages, alloc_fresh_huge_page, and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap. In __next_cpu a call is avoided for a 32-bit bitmap. That's it. On x86_64, 52 locations are optimized with a minimal increase in code size: Current #testing defconfig: 146 x bsf, 27 x find_next_*bit text data bss dec hex filename 5392637 846592 724424 6963653 6a41c5 vmlinux After removing the x86_64 specific optimization for find_next_*bit: 94 x bsf, 79 x find_next_*bit text data bss dec hex filename 5392358 846592 724424 6963374 6a40ae vmlinux After this patch (making the optimization generic): 146 x bsf, 27 x find_next_*bit text data bss dec hex filename 5392396 846592 724424 6963412 6a40d4 vmlinux [ tglx@linutronix.de: build fixes ] Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 40d54731de7..3865f2c93bd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -112,4 +112,81 @@ static inline unsigned fls_long(unsigned long l) return fls64(l); } +#ifdef __KERNEL__ +#ifdef CONFIG_GENERIC_FIND_NEXT_BIT +extern unsigned long __find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ +static __always_inline unsigned long +find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + unsigned long value; + + /* Avoid a function call if the bitmap size is a constant */ + /* and not bigger than BITS_PER_LONG. */ + + /* insert a sentinel so that __ffs returns size if there */ + /* are no set bits in the bitmap */ + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { + value = (*addr) & ((~0ul) << offset); + value |= (1ul << size); + return __ffs(value); + } + + /* the result of __ffs(0) is undefined, so it needs to be */ + /* handled separately */ + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { + value = (*addr) & ((~0ul) << offset); + return (value == 0) ? BITS_PER_LONG : __ffs(value); + } + + /* size is not constant or too big */ + return __find_next_bit(addr, size, offset); +} + +extern unsigned long __find_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ +static __always_inline unsigned long +find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + unsigned long value; + + /* Avoid a function call if the bitmap size is a constant */ + /* and not bigger than BITS_PER_LONG. */ + + /* insert a sentinel so that __ffs returns size if there */ + /* are no set bits in the bitmap */ + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { + value = (~(*addr)) & ((~0ul) << offset); + value |= (1ul << size); + return __ffs(value); + } + + /* the result of __ffs(0) is undefined, so it needs to be */ + /* handled separately */ + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { + value = (~(*addr)) & ((~0ul) << offset); + return (value == 0) ? BITS_PER_LONG : __ffs(value); + } + + /* size is not constant or too big */ + return __find_next_zero_bit(addr, size, offset); +} +#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ +#endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 77b9bd9c49442407804c37bcc82021a35277f83c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 1 Apr 2008 11:46:19 +0200 Subject: x86: generic versions of find_first_(zero_)bit, convert i386 Generic versions of __find_first_bit and __find_first_zero_bit are introduced as simplified versions of __find_next_bit and __find_next_zero_bit. Their compilation and use are guarded by a new config variable GENERIC_FIND_FIRST_BIT. The generic versions of find_first_bit and find_first_zero_bit are implemented in terms of the newly introduced __find_first_bit and __find_first_zero_bit. This patch does not remove the i386-specific implementation, but it does switch i386 to use the generic functions by setting GENERIC_FIND_FIRST_BIT=y for X86_32. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 3865f2c93bd..355d67ba3bd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -113,6 +113,40 @@ static inline unsigned fls_long(unsigned long l) } #ifdef __KERNEL__ +#ifdef CONFIG_GENERIC_FIND_FIRST_BIT +extern unsigned long __find_first_bit(const unsigned long *addr, + unsigned long size); + +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first set bit. + */ +static __always_inline unsigned long +find_first_bit(const unsigned long *addr, unsigned long size) +{ + return __find_first_bit(addr, size); +} + +extern unsigned long __find_first_zero_bit(const unsigned long *addr, + unsigned long size); + +/** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit number of the first cleared bit. + */ +static __always_inline unsigned long +find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + return __find_first_zero_bit(addr, size); +} +#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ + #ifdef CONFIG_GENERIC_FIND_NEXT_BIT extern unsigned long __find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -- cgit v1.2.3 From 3a48305028aa38afba93fc05066c71a6ee668ad8 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 1 Apr 2008 17:42:21 +0200 Subject: x86: optimize find_first_bit for small bitmaps Avoid a call to find_first_bit if the bitmap size is know at compile time and small enough to fit in a single long integer. Modeled after an optimization in the original x86_64-specific code. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 355d67ba3bd..48bde600a2d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -127,6 +127,20 @@ extern unsigned long __find_first_bit(const unsigned long *addr, static __always_inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { + /* Avoid a function call if the bitmap size is a constant */ + /* and not bigger than BITS_PER_LONG. */ + + /* insert a sentinel so that __ffs returns size if there */ + /* are no set bits in the bitmap */ + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) + return __ffs((*addr) | (1ul << size)); + + /* the result of __ffs(0) is undefined, so it needs to be */ + /* handled separately */ + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) + return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr); + + /* size is not constant or too big */ return __find_first_bit(addr, size); } @@ -143,6 +157,21 @@ extern unsigned long __find_first_zero_bit(const unsigned long *addr, static __always_inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { + /* Avoid a function call if the bitmap size is a constant */ + /* and not bigger than BITS_PER_LONG. */ + + /* insert a sentinel so that __ffs returns size if there */ + /* are no set bits in the bitmap */ + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { + return __ffs(~(*addr) | (1ul << size)); + } + + /* the result of __ffs(0) is undefined, so it needs to be */ + /* handled separately */ + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) + return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr)); + + /* size is not constant or too big */ return __find_first_zero_bit(addr, size); } #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ -- cgit v1.2.3