From 9ac023989e6dd1b97140b47fb942a7940d0b2af2 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 27 Jun 2006 02:53:27 -0700 Subject: [PATCH] acpi memory hotplug cannot manage _CRS with plural resoureces Current acpi memory hotplug just looks into the first entry of resources in _CRS. But, _CRS can contain plural resources. So, if _CRS contains plural resoureces, acpi memory hot add cannot add all memory. With this patch, acpi memory hotplug can deal with Memory Device, whose _CRS contains plural resources. Tested on ia64 memory hotplug test envrionment (not emulation, uses alpha version firmware which supports dynamic reconfiguration of NUMA.) Note: Microsoft's Windows Server 2003 requires big (>4G)resoureces to be divided into small (<4G) resources. looks crazy, but not invalid. (See http://www.microsoft.com/whdc/system/pnppwr/hotadd/hotaddmem.mspx) For this reason, a firmware vendor who supports Windows writes plural resources in a _CRS even if they are contiguous. Signed-off-by: Kenji Kaneshige Signed-off-by: KAMEZAWA Hiroyuki Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 112 ++++++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 35 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index e0a95ba7237..1486e03bb41 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -68,45 +68,75 @@ static struct acpi_driver acpi_memory_device_driver = { }, }; +struct acpi_memory_info { + struct list_head list; + u64 start_addr; /* Memory Range start physical addr */ + u64 length; /* Memory Range length */ + unsigned short caching; /* memory cache attribute */ + unsigned short write_protect; /* memory read/write attribute */ + unsigned int enabled:1; +}; + struct acpi_memory_device { acpi_handle handle; unsigned int state; /* State of the memory device */ - unsigned short caching; /* memory cache attribute */ - unsigned short write_protect; /* memory read/write attribute */ - u64 start_addr; /* Memory Range start physical addr */ - u64 length; /* Memory Range length */ + struct list_head res_list; }; +static acpi_status +acpi_memory_get_resource(struct acpi_resource *resource, void *context) +{ + struct acpi_memory_device *mem_device = context; + struct acpi_resource_address64 address64; + struct acpi_memory_info *info, *new; + acpi_status status; + + status = acpi_resource_to_address64(resource, &address64); + if (ACPI_FAILURE(status) || + (address64.resource_type != ACPI_MEMORY_RANGE)) + return AE_OK; + + list_for_each_entry(info, &mem_device->res_list, list) { + /* Can we combine the resource range information? */ + if ((info->caching == address64.info.mem.caching) && + (info->write_protect == address64.info.mem.write_protect) && + (info->start_addr + info->length == address64.minimum)) { + info->length += address64.address_length; + return AE_OK; + } + } + + new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL); + if (!new) + return AE_ERROR; + + INIT_LIST_HEAD(&new->list); + new->caching = address64.info.mem.caching; + new->write_protect = address64.info.mem.write_protect; + new->start_addr = address64.minimum; + new->length = address64.address_length; + list_add_tail(&new->list, &mem_device->res_list); + + return AE_OK; +} + static int acpi_memory_get_device_resources(struct acpi_memory_device *mem_device) { acpi_status status; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_resource *resource = NULL; - struct acpi_resource_address64 address64; + struct acpi_memory_info *info, *n; ACPI_FUNCTION_TRACE("acpi_memory_get_device_resources"); - /* Get the range from the _CRS */ - status = acpi_get_current_resources(mem_device->handle, &buffer); - if (ACPI_FAILURE(status)) - return_VALUE(-EINVAL); - - resource = (struct acpi_resource *)buffer.pointer; - status = acpi_resource_to_address64(resource, &address64); - if (ACPI_SUCCESS(status)) { - if (address64.resource_type == ACPI_MEMORY_RANGE) { - /* Populate the structure */ - mem_device->caching = address64.info.mem.caching; - mem_device->write_protect = - address64.info.mem.write_protect; - mem_device->start_addr = address64.minimum; - mem_device->length = address64.address_length; - } + status = acpi_walk_resources(mem_device->handle, METHOD_NAME__CRS, + acpi_memory_get_resource, mem_device); + if (ACPI_FAILURE(status)) { + list_for_each_entry_safe(info, n, &mem_device->res_list, list) + kfree(info); + return -EINVAL; } - acpi_os_free(buffer.pointer); - return_VALUE(0); + return 0; } static int @@ -181,7 +211,8 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device) static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) { - int result; + int result, num_enabled = 0; + struct acpi_memory_info *info; ACPI_FUNCTION_TRACE("acpi_memory_enable_device"); @@ -197,12 +228,20 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) /* * Tell the VM there is more memory here... * Note: Assume that this function returns zero on success + * We don't have memory-hot-add rollback function,now. + * (i.e. memory-hot-remove function) */ - result = add_memory(mem_device->start_addr, mem_device->length); - if (result) { + list_for_each_entry(info, &mem_device->res_list, list) { + result = add_memory(info->start_addr, info->length); + if (result) + continue; + info->enabled = 1; + num_enabled++; + } + if (!num_enabled) { ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "\nadd_memory failed\n")); mem_device->state = MEMORY_INVALID_STATE; - return result; + return -EINVAL; } return result; @@ -246,8 +285,7 @@ static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device) static int acpi_memory_disable_device(struct acpi_memory_device *mem_device) { int result; - u64 start = mem_device->start_addr; - u64 len = mem_device->length; + struct acpi_memory_info *info, *n; ACPI_FUNCTION_TRACE("acpi_memory_disable_device"); @@ -255,10 +293,13 @@ static int acpi_memory_disable_device(struct acpi_memory_device *mem_device) * Ask the VM to offline this memory range. * Note: Assume that this function returns zero on success */ - result = remove_memory(start, len); - if (result) { - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Hot-Remove failed.\n")); - return_VALUE(result); + list_for_each_entry_safe(info, n, &mem_device->res_list, list) { + if (info->enabled) { + result = remove_memory(info->start_addr, info->length); + if (result) + return result; + } + kfree(info); } /* Power-off and eject the device */ @@ -356,6 +397,7 @@ static int acpi_memory_device_add(struct acpi_device *device) return_VALUE(-ENOMEM); memset(mem_device, 0, sizeof(struct acpi_memory_device)); + INIT_LIST_HEAD(&mem_device->res_list); mem_device->handle = device->handle; sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME); sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS); -- cgit v1.2.3 From 1f425994f96d85540d47eee98daabc1e211b454e Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:28 -0700 Subject: [PATCH] Catch notification of memory add event of ACPI via container driver. (register start func for memory device) This is a patch to call add_memroy() when notify reaches for new node's add event. When new node is added, notify of ACPI reaches container device which means the node. Container device driver calls acpi_bus_scan() to find and add belonging devices (which means cpu, memory and so on). Its function calls add and start function of belonging devices's driver. Howevever, current memory hotplug driver just register add function to create sysfs file for its memory. But, acpi_memory_enable_device() is not called because it is considered just the case that notify reaches memory device directly. So, if notify reaches container device nothing can call add_memory(). This is a patch to create start function which calls add_memory(). add_memory() can be called by this when notify reaches container device. [akpm@osdl.org: coding cleanups] Signed-off-by: Yasunori Goto Cc: "Brown, Len" Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'drivers/acpi') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 1486e03bb41..3721f8dd0de 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -57,6 +57,7 @@ MODULE_LICENSE("GPL"); static int acpi_memory_device_add(struct acpi_device *device); static int acpi_memory_device_remove(struct acpi_device *device, int type); +static int acpi_memory_device_start(struct acpi_device *device); static struct acpi_driver acpi_memory_device_driver = { .name = ACPI_MEMORY_DEVICE_DRIVER_NAME, @@ -65,6 +66,7 @@ static struct acpi_driver acpi_memory_device_driver = { .ops = { .add = acpi_memory_device_add, .remove = acpi_memory_device_remove, + .start = acpi_memory_device_start, }, }; @@ -433,6 +435,25 @@ static int acpi_memory_device_remove(struct acpi_device *device, int type) return_VALUE(0); } +static int acpi_memory_device_start (struct acpi_device *device) +{ + struct acpi_memory_device *mem_device; + int result = 0; + + ACPI_FUNCTION_TRACE("acpi_memory_device_start"); + + mem_device = acpi_driver_data(device); + + if (!acpi_memory_check_device(mem_device)) { + /* call add_memory func */ + result = acpi_memory_enable_device(mem_device); + if (result) + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, + "Error in acpi_memory_enable_device\n")); + } + return_VALUE(result); +} + /* * Helper function to check for memory device */ -- cgit v1.2.3 From dd56a8e36f91f63c0a31e8a118d87b7cf01526b8 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:29 -0700 Subject: [PATCH] Catch notification of memory add event of ACPI via container driver. (avoid redundant call add_memory) When acpi_memory_device_init() is called at boottime to register struct memory acpi_memory_device, acpi_bus_add() are called via acpi_driver_attach(). But it also calls ops->start() function. It is called even if the memory blocks are initialized at early boottime. In this case add_memory() return -EEXIST, and the memory blocks becomes INVALID state even if it is normal. This is patch to avoid calling add_memory() for already available memory. [akpm@osdl.org: coding cleanups] Signed-off-by: Yasunori Goto Cc: "Brown, Len" Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/acpi') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 3721f8dd0de..5652569b376 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -234,6 +234,17 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) * (i.e. memory-hot-remove function) */ list_for_each_entry(info, &mem_device->res_list, list) { + u64 start_pfn, end_pfn; + + start_pfn = info->start_addr >> PAGE_SHIFT; + end_pfn = (info->start_addr + info->length - 1) >> PAGE_SHIFT; + + if (pfn_valid(start_pfn) || pfn_valid(end_pfn)) { + /* already enabled. try next area */ + num_enabled++; + continue; + } + result = add_memory(info->start_addr, info->length); if (result) continue; -- cgit v1.2.3 From bc02af93dd2bbddce1b55e0a493f833a1b7cf140 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:30 -0700 Subject: [PATCH] pgdat allocation for new node add (specify node id) Change the name of old add_memory() to arch_add_memory. And use node id to get pgdat for the node at NODE_DATA(). Note: Powerpc's old add_memory() is defined as __devinit. However, add_memory() is usually called only after bootup. I suppose it may be redundant. But, I'm not well known about powerpc. So, I keep it. (But, __meminit is better at least.) Signed-off-by: Yasunori Goto Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/Kconfig | 2 +- drivers/acpi/acpi_memhotplug.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 94b8d820c51..610d2cc02cf 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -328,7 +328,7 @@ config ACPI_CONTAINER config ACPI_HOTPLUG_MEMORY tristate "Memory Hotplug" depends on ACPI - depends on MEMORY_HOTPLUG || X86_64 + depends on MEMORY_HOTPLUG default n help This driver adds supports for ACPI Memory Hotplug. This driver diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 5652569b376..0424326eae1 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -215,6 +215,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) { int result, num_enabled = 0; struct acpi_memory_info *info; + int node = 0; ACPI_FUNCTION_TRACE("acpi_memory_enable_device"); @@ -245,7 +246,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) continue; } - result = add_memory(info->start_addr, info->length); + result = add_memory(node, info->start_addr, info->length); if (result) continue; info->enabled = 1; -- cgit v1.2.3 From 1e3590e2e4a38e8390fdac5bda23330bf2801838 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:31 -0700 Subject: [PATCH] pgdat allocation for new node add (get node id by acpi) This is to find node id from acpi's handle of memory_device in DSDT. _PXM for the new node can be found by acpi_get_pxm() by using new memory's handle. So, node id can be found by pxm_to_nid_map[]. This patch becomes simpler than v2 of node hot-add patch. Because old add_memory() function doesn't have node id parameter. So, kernel must find its handle by physical address via DSDT again. But, v3 just give node id to add_memory() now. Signed-off-by: Yasunori Goto Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 3 ++- drivers/acpi/numa.c | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 0424326eae1..1012284ff4f 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -215,7 +215,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) { int result, num_enabled = 0; struct acpi_memory_info *info; - int node = 0; + int node; ACPI_FUNCTION_TRACE("acpi_memory_enable_device"); @@ -228,6 +228,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) return result; } + node = acpi_get_node(mem_device->handle); /* * Tell the VM there is more memory here... * Note: Assume that this function returns zero on success diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index e2c1a16078c..13d6d5bdea2 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -254,5 +254,18 @@ int acpi_get_pxm(acpi_handle h) } while (ACPI_SUCCESS(status)); return -1; } - EXPORT_SYMBOL(acpi_get_pxm); + +int acpi_get_node(acpi_handle *handle) +{ + int pxm, node = -1; + + ACPI_FUNCTION_TRACE("acpi_get_node"); + + pxm = acpi_get_pxm(handle); + if (pxm >= 0) + node = acpi_map_pxm_to_node(pxm); + + return_VALUE(node); +} +EXPORT_SYMBOL(acpi_get_node); -- cgit v1.2.3