From 1f98757776eafe31065be9118db6051afcf8643c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 1 Nov 2008 10:17:22 -0700 Subject: x86: Clean up late e820 resource allocation This makes the late e820 resources use 'insert_resource_expand_to_fit()' instead of doing a 'reserve_region_with_split()', and also avoids marking them as IORESOURCE_BUSY. This results in us being perfectly happy to use pre-existing PCI resources even if they were marked as being in a reserved region, while still avoiding any _new_ allocations in the reserved regions. It also makes for a simpler and more accurate resource tree. Example resource allocation from Jonathan Corbet, who has firmware that has an e820 reserved entry that covered a big range (e0000000-fed003ff), and that had various PCI resources in it set up by firmware. With old kernels, the reserved range would force us to re-allocate all pre-existing PCI resources, and his reserved range would end up looking like this: e0000000-fed003ff : reserved fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 where only the pre-allocated special regions (IOAPIC and HPET) were kept around. With 2.6.28-rc2, which uses 'reserve_region_with_split()', Jonathan's resource tree looked like this: e0000000-fe7fffff : reserved fe800000-fe8fffff : PCI Bus 0000:01 fe800000-fe8fffff : reserved fe900000-fe9d9aff : reserved fe9d9b00-fe9d9bff : 0000:00:1f.3 fe9d9b00-fe9d9bff : reserved fe9d9c00-fe9d9fff : 0000:00:1a.7 fe9d9c00-fe9d9fff : reserved fe9da000-fe9dafff : 0000:00:03.3 fe9da000-fe9dafff : reserved fe9db000-fe9dbfff : 0000:00:19.0 fe9db000-fe9dbfff : reserved fe9dc000-fe9dffff : 0000:00:1b.0 fe9dc000-fe9dffff : reserved fe9e0000-fe9fffff : 0000:00:19.0 fe9e0000-fe9fffff : reserved fea00000-fea7ffff : 0000:00:02.0 fea00000-fea7ffff : reserved fea80000-feafffff : 0000:00:02.1 fea80000-feafffff : reserved feb00000-febfffff : 0000:00:02.0 feb00000-febfffff : reserved fec00000-fed003ff : reserved fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 and because the reserved entry had been split and moved into the individual resources, and because it used the IORESOURCE_BUSY flag, the drivers that actually wanted to _use_ those resources couldn't actually attach to them: e1000e 0000:00:19.0: BAR 0: can't reserve mem region [0xfe9e0000-0xfe9fffff] HDA Intel 0000:00:1b.0: BAR 0: can't reserve mem region [0xfe9dc000-0xfe9dffff] with this patch, the resource tree instead becomes e0000000-fed003ff : reserved fe800000-fe8fffff : PCI Bus 0000:01 fe9d9b00-fe9d9bff : 0000:00:1f.3 fe9d9c00-fe9d9fff : 0000:00:1a.7 fe9d9c00-fe9d9fff : ehci_hcd fe9da000-fe9dafff : 0000:00:03.3 fe9db000-fe9dbfff : 0000:00:19.0 fe9db000-fe9dbfff : e1000e fe9dc000-fe9dffff : 0000:00:1b.0 fe9dc000-fe9dffff : ICH HD audio fe9e0000-fe9fffff : 0000:00:19.0 fe9e0000-fe9fffff : e1000e fea00000-fea7ffff : 0000:00:02.0 fea80000-feafffff : 0000:00:02.1 feb00000-febfffff : 0000:00:02.0 fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 ie the one reserved region now ends up surrounding all the PCI resources that were allocated inside of it by firmware, and because it is not marked BUSY, drivers have no problem attaching to the pre-allocated resources. Reported-and-tested-by: Jonathan Corbet Cc: Yinghai Lu Cc: Ingo Molnar Cc: Robert Hancock Signed-off-by: Linus Torvalds --- arch/x86/kernel/e820.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ce97bf3bed1..7aafeb5263e 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1290,15 +1290,17 @@ void __init e820_reserve_resources(void) res->start = e820.map[i].addr; res->end = end; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM; /* * don't register the region that could be conflicted with * pci device BAR resource and insert them later in * pcibios_resource_survey() */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { + res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); + } res++; } @@ -1318,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); + insert_resource_expand_to_fit(&iomem_resource, res); res++; } } -- cgit v1.2.3