From 1300124f69cafc54331bc06e968a8dd67863f989 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 28 Mar 2006 17:04:00 -0500 Subject: ACPI: Kconfig: ACPI should depend on, not select PCI Otherwise, illegal configurations like X86_VOYAGER=y, PCI=y are possible. This patch also fixes the options select'ing ACPI to also select PCI. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/ia64/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/ia64') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index edffe25a477..0046020e73b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -77,6 +77,7 @@ choice config IA64_GENERIC bool "generic" select ACPI + select PCI select NUMA select ACPI_NUMA help -- cgit v1.2.3 From 144c87b4e03759214c362d267e01c2905f1ab095 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 2 Apr 2006 00:15:39 -0500 Subject: ACPI: ia64 buildfix arch/ia64/hp/common/sba_iommu.c used ACPI_MEM_FREE instead of kfree() Signed-off-by: Len Brown Date: Sat, 13 May 2006 01:12:15 -0400 Subject: ACPI: silence ia64 build warning When building sim_defconfig, which does not define CONFIG_ACPI arch/ia64/kernel/acpi.c:71: warning: 'acpi_madt_rev' defined but not used really acpi.c should not be built when CONFIG_ACPI=n... Signed-off-by: Len Brown --- arch/ia64/kernel/acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 58c93a30348..f97cc6c7f13 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -68,8 +68,6 @@ EXPORT_SYMBOL(pm_power_off); unsigned char acpi_kbd_controller_present = 1; unsigned char acpi_legacy_devices; -static unsigned int __initdata acpi_madt_rev; - unsigned int acpi_cpei_override; unsigned int acpi_cpei_phys_cpuid; @@ -243,6 +241,8 @@ acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end) return iosapic_init(iosapic->address, iosapic->global_irq_base); } +static unsigned int __initdata acpi_madt_rev; + static int __init acpi_parse_plat_int_src(acpi_table_entry_header * header, const unsigned long end) -- cgit v1.2.3 From 9c576ff1bc9ab42d06457e68e39c121481138562 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 27 Apr 2006 05:25:00 -0400 Subject: ACPI add ia64 exports to build acpi_memhotplug as a module Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/ia64/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index cafa8776a53..11f08001f8c 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -671,9 +671,11 @@ int add_memory(u64 start, u64 size) return ret; } +EXPORT_SYMBOL_GPL(add_memory); int remove_memory(u64 start, u64 size) { return -EINVAL; } +EXPORT_SYMBOL_GPL(remove_memory); #endif -- cgit v1.2.3 From 10083072bfabc40bc47306e512c158c57cf55c2e Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Fri, 14 Apr 2006 16:03:49 -0500 Subject: [PATCH] PCI: per-platform IA64_{FIRST,LAST}_DEVICE_VECTOR definitions Abstract IA64_FIRST_DEVICE_VECTOR/IA64_LAST_DEVICE_VECTOR since SN platforms use a subset of the IA64 range. Implement this by making the above macros global variables which the platform can override in it setup code. Also add a reserve_irq_vector() routine used by SN to mark a vector's as in-use when that weren't allocated through assign_irq_vector(). Signed-off-by: Mark Maule Signed-off-by: Greg Kroah-Hartman --- arch/ia64/kernel/irq_ia64.c | 19 ++++++++++++++++++- arch/ia64/sn/kernel/irq.c | 7 +++++++ 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 6c4d59fd036..ef9a2b49307 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -46,6 +46,10 @@ #define IRQ_DEBUG 0 +/* These can be overridden in platform_irq_init */ +int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR; +int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; + /* default base addr of IPI table */ void __iomem *ipi_base_addr = ((void __iomem *) (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); @@ -60,7 +64,7 @@ __u8 isa_irq_to_vector_map[16] = { }; EXPORT_SYMBOL(isa_irq_to_vector_map); -static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)]; +static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)]; int assign_irq_vector (int irq) @@ -89,6 +93,19 @@ free_irq_vector (int vector) printk(KERN_WARNING "%s: double free!\n", __FUNCTION__); } +int +reserve_irq_vector (int vector) +{ + int pos; + + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) + return -EINVAL; + + pos = vector - IA64_FIRST_DEVICE_VECTOR; + return test_and_set_bit(pos, ia64_vector_mask); +} + #ifdef CONFIG_SMP # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) #else diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index c265e02f503..db187f5cdae 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -202,6 +202,9 @@ void sn_irq_init(void) int i; irq_desc_t *base_desc = irq_desc; + ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR; + ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR; + for (i = 0; i < NR_IRQS; i++) { if (base_desc[i].handler == &no_irq_type) { base_desc[i].handler = &irq_type_sn; @@ -285,6 +288,7 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) /* link it into the sn_irq[irq] list */ spin_lock(&sn_irq_info_lock); list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + reserve_irq_vector(sn_irq_info->irq_irq); spin_unlock(&sn_irq_info_lock); register_intr_pda(sn_irq_info); @@ -310,8 +314,11 @@ void sn_irq_unfixup(struct pci_dev *pci_dev) spin_lock(&sn_irq_info_lock); list_del_rcu(&sn_irq_info->list); spin_unlock(&sn_irq_info_lock); + if (list_empty(sn_irq_lh[sn_irq_info->irq_irq])) + free_irq_vector(sn_irq_info->irq_irq); call_rcu(&sn_irq_info->rcu, sn_irq_info_free); pci_dev_put(pci_dev); + } static inline void -- cgit v1.2.3 From 83821d3f558dc651e555d62182ed0c95651f41a6 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Fri, 14 Apr 2006 16:03:54 -0500 Subject: [PATCH] PCI: altix: msi support MSI callouts for altix. Involves a fair amount of code reorg in sn irq.c code as well as adding some extensions to the altix PCI provider abstaction. Signed-off-by: Mark Maule Signed-off-by: Greg Kroah-Hartman --- arch/ia64/sn/kernel/io_init.c | 9 +-- arch/ia64/sn/kernel/irq.c | 135 +++++++++++++++++++++---------------- arch/ia64/sn/pci/pci_dma.c | 10 +-- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 62 ++++++++++++----- arch/ia64/sn/pci/tioca_provider.c | 8 ++- arch/ia64/sn/pci/tioce_provider.c | 65 ++++++++++++------ 6 files changed, 178 insertions(+), 111 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 5101ac46264..dc09a6a28a3 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -58,7 +58,7 @@ static int max_pcibus_number = 255; /* Default highest pci bus number */ */ static dma_addr_t -sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size) +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) { return 0; } @@ -457,13 +457,6 @@ void sn_pci_fixup_slot(struct pci_dev *dev) pcidev_info->pdi_sn_irq_info = NULL; kfree(sn_irq_info); } - - /* - * MSI currently not supported on altix. Remove this when - * the MSI abstraction patches are integrated into the kernel - * (sometime after 2.6.16 releases) - */ - dev->no_msi = 1; } /* diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index db187f5cdae..dc8e2b69671 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -26,11 +26,11 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); int sn_force_interrupt_flag = 1; extern int sn_ioif_inited; -static struct list_head **sn_irq_lh; +struct list_head **sn_irq_lh; static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */ -static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, - u64 sn_irq_info, +u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, int req_irq, nasid_t req_nasid, int req_slice) { @@ -40,12 +40,13 @@ static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, (u64) SAL_INTR_ALLOC, (u64) local_nasid, - (u64) local_widget, (u64) sn_irq_info, (u64) req_irq, + (u64) local_widget, __pa(sn_irq_info), (u64) req_irq, (u64) req_nasid, (u64) req_slice); + return ret_stuff.status; } -static inline void sn_intr_free(nasid_t local_nasid, int local_widget, +void sn_intr_free(nasid_t local_nasid, int local_widget, struct sn_irq_info *sn_irq_info) { struct ia64_sal_retval ret_stuff; @@ -112,73 +113,91 @@ static void sn_end_irq(unsigned int irq) static void sn_irq_info_free(struct rcu_head *head); -static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, + nasid_t nasid, int slice) { - struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; - int cpuid, cpuphys; + int vector; + int cpuphys; + int64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; - cpuid = first_cpu(mask); - cpuphys = cpu_physical_id(cpuid); + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; - list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, - sn_irq_lh[irq], list) { - u64 bridge; - int local_widget, status; - nasid_t local_nasid; - struct sn_irq_info *new_irq_info; - struct sn_pcibus_provider *pci_provider; - - new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); - if (new_irq_info == NULL) - break; - memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); - - bridge = (u64) new_irq_info->irq_bridge; - if (!bridge) { - kfree(new_irq_info); - break; /* irq is not a device interrupt */ - } + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); - local_nasid = NASID_GET(bridge); + bridge = (u64) new_irq_info->irq_bridge; + if (!bridge) { + kfree(new_irq_info); + return NULL; /* irq is not a device interrupt */ + } - if (local_nasid & 1) - local_widget = TIO_SWIN_WIDGETNUM(bridge); - else - local_widget = SWIN_WIDGETNUM(bridge); + local_nasid = NASID_GET(bridge); - /* Free the old PROM new_irq_info structure */ - sn_intr_free(local_nasid, local_widget, new_irq_info); - /* Update kernels new_irq_info with new target info */ - unregister_intr_pda(new_irq_info); + if (local_nasid & 1) + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); - /* allocate a new PROM new_irq_info struct */ - status = sn_intr_alloc(local_nasid, local_widget, - __pa(new_irq_info), irq, - cpuid_to_nasid(cpuid), - cpuid_to_slice(cpuid)); + vector = sn_irq_info->irq_irq; + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + /* Update kernels new_irq_info with new target info */ + unregister_intr_pda(new_irq_info); - /* SAL call failed */ - if (status) { - kfree(new_irq_info); - break; - } + /* allocate a new PROM new_irq_info struct */ + status = sn_intr_alloc(local_nasid, local_widget, + new_irq_info, vector, + nasid, slice); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + return NULL; + } - new_irq_info->irq_cpuid = cpuid; - register_intr_pda(new_irq_info); + cpuphys = nasid_slice_to_cpuid(nasid, slice); + new_irq_info->irq_cpuid = cpuphys; + register_intr_pda(new_irq_info); - pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; - if (pci_provider && pci_provider->target_interrupt) - (pci_provider->target_interrupt)(new_irq_info); + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + + /* + * If this represents a line interrupt, target it. If it's + * an msi (irq_int_bit < 0), it's already targeted. + */ + if (new_irq_info->irq_int_bit >= 0 && + pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); - spin_lock(&sn_irq_info_lock); - list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); - spin_unlock(&sn_irq_info_lock); - call_rcu(&sn_irq_info->rcu, sn_irq_info_free); + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); #ifdef CONFIG_SMP - set_irq_affinity_info((irq & 0xff), cpuphys, 0); + set_irq_affinity_info((vector & 0xff), cpuphys, 0); #endif - } + + return new_irq_info; +} + +static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; + nasid_t nasid; + int slice; + + nasid = cpuid_to_nasid(first_cpu(mask)); + slice = cpuid_to_slice(first_cpu(mask)); + + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) + (void)sn_retarget_vector(sn_irq_info, nasid, slice); } struct hw_interrupt_type irq_type_sn = { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index b4b84c26921..7a291a27151 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include @@ -113,7 +113,8 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size, * resources. */ - *dma_handle = provider->dma_map_consistent(pdev, phys_addr, size); + *dma_handle = provider->dma_map_consistent(pdev, phys_addr, size, + SN_DMA_ADDR_PHYS); if (!*dma_handle) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); free_pages((unsigned long)cpuaddr, get_order(size)); @@ -176,7 +177,7 @@ dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size, BUG_ON(dev->bus != &pci_bus_type); phys_addr = __pa(cpu_addr); - dma_addr = provider->dma_map(pdev, phys_addr, size); + dma_addr = provider->dma_map(pdev, phys_addr, size, SN_DMA_ADDR_PHYS); if (!dma_addr) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); return 0; @@ -260,7 +261,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries, for (i = 0; i < nhwentries; i++, sg++) { phys_addr = SG_ENT_PHYS_ADDRESS(sg); sg->dma_address = provider->dma_map(pdev, - phys_addr, sg->length); + phys_addr, sg->length, + SN_DMA_ADDR_PHYS); if (!sg->dma_address) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 9f86bb6519a..a86c7b94596 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -41,7 +41,7 @@ extern int sn_ioif_inited; static dma_addr_t pcibr_dmamap_ate32(struct pcidev_info *info, - u64 paddr, size_t req_size, u64 flags) + u64 paddr, size_t req_size, u64 flags, int dma_flags) { struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; @@ -81,9 +81,12 @@ pcibr_dmamap_ate32(struct pcidev_info *info, if (IS_PCIX(pcibus_info)) ate_flags &= ~(PCI32_ATE_PREF); - xio_addr = - IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : - PHYS_TO_TIODMA(paddr); + if (SN_DMA_ADDRTYPE(dma_flags == SN_DMA_ADDR_PHYS)) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; + offset = IOPGOFF(xio_addr); ate = ate_flags | (xio_addr - offset); @@ -91,6 +94,13 @@ pcibr_dmamap_ate32(struct pcidev_info *info, if (IS_PIC_SOFT(pcibus_info)) { ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT); } + + /* + * If we're mapping for MSI, set the MSI bit in the ATE + */ + if (dma_flags & SN_DMA_MSI) + ate |= PCI32_ATE_MSI; + ate_write(pcibus_info, ate_index, ate_count, ate); /* @@ -105,20 +115,27 @@ pcibr_dmamap_ate32(struct pcidev_info *info, if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR) ATE_SWAP_ON(pci_addr); + return pci_addr; } static dma_addr_t pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, - u64 dma_attributes) + u64 dma_attributes, int dma_flags) { struct pcibus_info *pcibus_info = (struct pcibus_info *) ((info->pdi_host_pcidev_info)->pdi_pcibus_info); u64 pci_addr; /* Translate to Crosstalk View of Physical Address */ - pci_addr = (IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : - PHYS_TO_TIODMA(paddr)) | dma_attributes; + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + pci_addr = IS_PIC_SOFT(pcibus_info) ? + PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr) | dma_attributes; + else + pci_addr = IS_PIC_SOFT(pcibus_info) ? + paddr : + paddr | dma_attributes; /* Handle Bus mode */ if (IS_PCIX(pcibus_info)) @@ -130,7 +147,9 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, ((u64) pcibus_info-> pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT); } else - pci_addr |= TIOCP_PCI64_CMDTYPE_MEM; + pci_addr |= (dma_flags & SN_DMA_MSI) ? + TIOCP_PCI64_CMDTYPE_MSI : + TIOCP_PCI64_CMDTYPE_MEM; /* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */ if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn)) @@ -141,7 +160,7 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, static dma_addr_t pcibr_dmatrans_direct32(struct pcidev_info * info, - u64 paddr, size_t req_size, u64 flags) + u64 paddr, size_t req_size, u64 flags, int dma_flags) { struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> @@ -156,8 +175,14 @@ pcibr_dmatrans_direct32(struct pcidev_info * info, return 0; } - xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : - PHYS_TO_TIODMA(paddr); + if (dma_flags & SN_DMA_MSI) + return 0; + + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; xio_base = pcibus_info->pbi_dir_xbase; offset = xio_addr - xio_base; @@ -327,7 +352,7 @@ void sn_dma_flush(u64 addr) */ dma_addr_t -pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) +pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size, int dma_flags) { dma_addr_t dma_handle; struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); @@ -344,11 +369,11 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) */ dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, - PCI64_ATTR_PREF); + PCI64_ATTR_PREF, dma_flags); } else { /* Handle 32-63 bit cards via direct mapping */ dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr, - size, 0); + size, 0, dma_flags); if (!dma_handle) { /* * It is a 32 bit card and we cannot do direct mapping, @@ -356,7 +381,8 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) */ dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr, - size, PCI32_ATE_PREF); + size, PCI32_ATE_PREF, + dma_flags); } } @@ -365,18 +391,18 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) dma_addr_t pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr, - size_t size) + size_t size, int dma_flags) { dma_addr_t dma_handle; struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); if (hwdev->dev.coherent_dma_mask == ~0UL) { dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, - PCI64_ATTR_BAR); + PCI64_ATTR_BAR, dma_flags); } else { dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info, phys_addr, size, - PCI32_ATE_BAR); + PCI32_ATE_BAR, dma_flags); } return dma_handle; diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index be017691296..20de72791b9 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -515,10 +515,16 @@ tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) * use the GART mapped mode. */ static u64 -tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count) +tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { u64 mapaddr; + /* + * Not supported for now ... + */ + if (dma_flags & SN_DMA_MSI) + return 0; + /* * If card is 64 or 48 bit addresable, use a direct mapping. 32 * bit direct is so restrictive w.r.t. where the memory resides that diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 833295624e5..4cac7bdc7c7 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -170,7 +170,8 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) (ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1) #define ATE_VALID(ate) ((ate) & (1UL << 63)) -#define ATE_MAKE(addr, ps) (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63)) +#define ATE_MAKE(addr, ps, msi) \ + (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63) | ((msi)?(1UL << 62):0)) /* * Flavors of ate-based mapping supported by tioce_alloc_map() @@ -196,15 +197,17 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) * * 63 - must be 1 to indicate d64 mode to CE hardware * 62 - barrier bit ... controlled with tioce_dma_barrier() - * 61 - 0 since this is not an MSI transaction + * 61 - msi bit ... specified through dma_flags * 60:54 - reserved, MBZ */ static u64 -tioce_dma_d64(unsigned long ct_addr) +tioce_dma_d64(unsigned long ct_addr, int dma_flags) { u64 bus_addr; bus_addr = ct_addr | (1UL << 63); + if (dma_flags & SN_DMA_MSI) + bus_addr |= (1UL << 61); return bus_addr; } @@ -261,7 +264,7 @@ pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base, */ static u64 tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, - u64 ct_addr, int len) + u64 ct_addr, int len, int dma_flags) { int i; int j; @@ -270,6 +273,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, int entries; int nates; u64 pagesize; + int msi_capable, msi_wanted; u64 *ate_shadow; u64 *ate_reg; u64 addr; @@ -291,6 +295,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, ate_reg = ce_mmr->ce_ure_ate3240; pagesize = ce_kern->ce_ate3240_pagesize; bus_base = TIOCE_M32_MIN; + msi_capable = 1; break; case TIOCE_ATE_M40: first = 0; @@ -299,6 +304,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, ate_reg = ce_mmr->ce_ure_ate40; pagesize = MB(64); bus_base = TIOCE_M40_MIN; + msi_capable = 0; break; case TIOCE_ATE_M40S: /* @@ -311,11 +317,16 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, ate_reg = ce_mmr->ce_ure_ate3240; pagesize = GB(16); bus_base = TIOCE_M40S_MIN; + msi_capable = 0; break; default: return 0; } + msi_wanted = dma_flags & SN_DMA_MSI; + if (msi_wanted && !msi_capable) + return 0; + nates = ATE_NPAGES(ct_addr, len, pagesize); if (nates > entries) return 0; @@ -344,7 +355,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, for (j = 0; j < nates; j++) { u64 ate; - ate = ATE_MAKE(addr, pagesize); + ate = ATE_MAKE(addr, pagesize, msi_wanted); ate_shadow[i + j] = ate; tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); addr += pagesize; @@ -371,7 +382,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info. */ static u64 -tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr) +tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr, int dma_flags) { int dma_ok; int port; @@ -381,6 +392,9 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr) u64 ct_lower; dma_addr_t bus_addr; + if (dma_flags & SN_DMA_MSI) + return 0; + ct_upper = ct_addr & ~0x3fffffffUL; ct_lower = ct_addr & 0x3fffffffUL; @@ -507,7 +521,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) */ static u64 tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, - int barrier) + int barrier, int dma_flags) { unsigned long flags; u64 ct_addr; @@ -523,15 +537,18 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, if (dma_mask < 0x7fffffffUL) return 0; - ct_addr = PHYS_TO_TIODMA(paddr); + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + ct_addr = PHYS_TO_TIODMA(paddr); + else + ct_addr = paddr; /* * If the device can generate 64 bit addresses, create a D64 map. - * Since this should never fail, bypass the rest of the checks. */ if (dma_mask == ~0UL) { - mapaddr = tioce_dma_d64(ct_addr); - goto dma_map_done; + mapaddr = tioce_dma_d64(ct_addr, dma_flags); + if (mapaddr) + goto dma_map_done; } pcidev_to_tioce(pdev, NULL, &ce_kern, &port); @@ -574,18 +591,22 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, if (byte_count > MB(64)) { mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, - port, ct_addr, byte_count); + port, ct_addr, byte_count, + dma_flags); if (!mapaddr) mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, - ct_addr, byte_count); + ct_addr, byte_count, + dma_flags); } else { mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, - ct_addr, byte_count); + ct_addr, byte_count, + dma_flags); if (!mapaddr) mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, - port, ct_addr, byte_count); + port, ct_addr, byte_count, + dma_flags); } } @@ -593,7 +614,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, * 32-bit direct is the next mode to try */ if (!mapaddr && dma_mask >= 0xffffffffUL) - mapaddr = tioce_dma_d32(pdev, ct_addr); + mapaddr = tioce_dma_d32(pdev, ct_addr, dma_flags); /* * Last resort, try 32-bit ATE-based map. @@ -601,7 +622,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, if (!mapaddr) mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr, - byte_count); + byte_count, dma_flags); spin_unlock_irqrestore(&ce_kern->ce_lock, flags); @@ -622,9 +643,9 @@ dma_map_done: * in the address. */ static u64 -tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count) +tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { - return tioce_do_dma_map(pdev, paddr, byte_count, 0); + return tioce_do_dma_map(pdev, paddr, byte_count, 0, dma_flags); } /** @@ -636,9 +657,9 @@ tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count) * Simply call tioce_do_dma_map() to create a map with the barrier bit set * in the address. */ static u64 -tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count) +tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { - return tioce_do_dma_map(pdev, paddr, byte_count, 1); + return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags); } /** @@ -696,7 +717,7 @@ tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) while (ate_index <= last_ate) { u64 ate; - ate = ATE_MAKE(0xdeadbeef, ps); + ate = ATE_MAKE(0xdeadbeef, ps, 0); ce_kern->ce_ate3240_shadow[ate_index] = ate; tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], ate); -- cgit v1.2.3 From 454e2398be9b9fa30433fccc548db34d19aa9958 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Jun 2006 02:02:57 -0700 Subject: [PATCH] VFS: Permit filesystem to override root dentry on mount Extend the get_sb() filesystem operation to take an extra argument that permits the VFS to pass in the target vfsmount that defines the mountpoint. The filesystem is then required to manually set the superblock and root dentry pointers. For most filesystems, this should be done with simple_set_mnt() which will set the superblock pointer and then set the root dentry to the superblock's s_root (as per the old default behaviour). The get_sb() op now returns an integer as there's now no need to return the superblock pointer. This patch permits a superblock to be implicitly shared amongst several mount points, such as can be done with NFS to avoid potential inode aliasing. In such a case, simple_set_mnt() would not be called, and instead the mnt_root and mnt_sb would be set directly. The patch also makes the following changes: (*) the get_sb_*() convenience functions in the core kernel now take a vfsmount pointer argument and return an integer, so most filesystems have to change very little. (*) If one of the convenience function is not used, then get_sb() should normally call simple_set_mnt() to instantiate the vfsmount. This will always return 0, and so can be tail-called from get_sb(). (*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the dcache upon superblock destruction rather than shrink_dcache_anon(). This is required because the superblock may now have multiple trees that aren't actually bound to s_root, but that still need to be cleaned up. The currently called functions assume that the whole tree is rooted at s_root, and that anonymous dentries are not the roots of trees which results in dentries being left unculled. However, with the way NFS superblock sharing are currently set to be implemented, these assumptions are violated: the root of the filesystem is simply a dummy dentry and inode (the real inode for '/' may well be inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries with child trees. [*] Anonymous until discovered from another tree. (*) The documentation has been adjusted, including the additional bit of changing ext2_* into foo_* in the documentation. [akpm@osdl.org: convert ipath_fs, do other stuff] Signed-off-by: David Howells Acked-by: Al Viro Cc: Nathan Scott Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 077f21216b6..2359e2809f5 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -595,10 +595,11 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, } -static struct super_block * -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +static int +pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); } static struct file_system_type pfm_fs_type = { -- cgit v1.2.3 From 762834e8bf46bf41ce9034d062a7c1f8563175f3 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Fri, 23 Jun 2006 02:03:19 -0700 Subject: [PATCH] Unify pxm_to_node() and node_to_pxm() Consolidate the various arch-specific implementations of pxm_to_node() and node_to_pxm() into a single generic version. Signed-off-by: Yasunori Goto Cc: "Luck, Tony" Cc: Andi Kleen Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/hp/common/sba_iommu.c | 2 +- arch/ia64/kernel/acpi.c | 24 ++++++++---------------- arch/ia64/pci/pci.c | 2 +- arch/ia64/sn/kernel/setup.c | 4 ++-- 4 files changed, 12 insertions(+), 20 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index bdccd0b1eb6..3ce443e6c01 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1958,7 +1958,7 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) if (pxm < 0) return; - node = pxm_to_nid_map[pxm]; + node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node)) return; diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 58c93a30348..d1c52cf6788 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -415,9 +415,6 @@ static int __initdata srat_num_cpus; /* number of cpus */ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) -/* maps to convert between proximity domain and logical node ID */ -int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; -int __initdata nid_to_pxm_map[MAX_NUMNODES]; static struct acpi_table_slit __initdata *slit_table; static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa) @@ -533,22 +530,17 @@ void __init acpi_numa_arch_fixup(void) * MCD - This can probably be dropped now. No need for pxm ID to node ID * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. */ - /* calculate total number of nodes in system from PXM bitmap */ - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (pxm_bit_test(i)) { - int nid = num_online_nodes(); - pxm_to_nid_map[i] = nid; - nid_to_pxm_map[nid] = i; + int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } /* set logical node id in memory chunk structure */ for (i = 0; i < num_node_memblks; i++) - node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; + node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); /* assign memory bank numbers for each chunk on each node */ for_each_online_node(i) { @@ -562,7 +554,7 @@ void __init acpi_numa_arch_fixup(void) /* set logical node id in cpu structure */ for (i = 0; i < srat_num_cpus; i++) - node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; + node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes()); @@ -575,11 +567,11 @@ void __init acpi_numa_arch_fixup(void) for (i = 0; i < slit_table->localities; i++) { if (!pxm_bit_test(i)) continue; - node_from = pxm_to_nid_map[i]; + node_from = pxm_to_node(i); for (j = 0; j < slit_table->localities; j++) { if (!pxm_bit_test(j)) continue; - node_to = pxm_to_nid_map[j]; + node_to = pxm_to_node(j); node_distance(node_from, node_to) = slit_table->entry[i * slit_table->localities + j]; } @@ -785,9 +777,9 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) /* * Assuming that the container driver would have set the proximity - * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag + * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag */ - node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_nid_map[pxm_id]; + node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; #endif @@ -966,7 +958,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) if (pxm < 0) return AE_OK; - node = pxm_to_nid_map[pxm]; + node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || cpus_empty(node_to_cpumask(node))) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index ab829a22f8a..cf7751b99d1 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -352,7 +352,7 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) pxm = acpi_get_pxm(controller->acpi_handle); #ifdef CONFIG_NUMA if (pxm >= 0) - controller->node = pxm_to_nid_map[pxm]; + controller->node = pxm_to_node(pxm); #endif acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 30988dfbddf..93577abae36 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -139,7 +139,7 @@ static int __init pxm_to_nasid(int pxm) int i; int nid; - nid = pxm_to_nid_map[pxm]; + nid = pxm_to_node(pxm); for (i = 0; i < num_node_memblks; i++) { if (node_memblk[i].nid == nid) { return NASID_GET(node_memblk[i].start_paddr); @@ -704,7 +704,7 @@ void __init build_cnode_tables(void) * cnode == node for all C & M bricks. */ for_each_online_node(node) { - nasid = pxm_to_nasid(nid_to_pxm_map[node]); + nasid = pxm_to_nasid(node_to_pxm(node)); sn_cnodeid_to_nasid[node] = nasid; physical_node_map[nasid] = node; } -- cgit v1.2.3 From 929f97276bcf7f4a95272ed08a85339b98ba210d Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Fri, 23 Jun 2006 02:03:21 -0700 Subject: [PATCH] change gen_pool allocator to not touch managed memory Modify the gen_pool allocator (lib/genalloc.c) to utilize a bitmap scheme instead of the buddy scheme. The purpose of this change is to eliminate the touching of the actual memory being allocated. Since the change modifies the interface, a change to the uncached allocator (arch/ia64/kernel/uncached.c) is also required. Both Andrey Volkov and Jes Sorenson have expressed a desire that the gen_pool allocator not write to the memory being managed. See the following: http://marc.theaimsgroup.com/?l=linux-kernel&m=113518602713125&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113533568827916&w=2 Signed-off-by: Dean Nelson Cc: Andrey Volkov Acked-by: Jes Sorensen Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/uncached.c | 200 +++++++++++++++++++++------------------- arch/ia64/sn/kernel/sn2/cache.c | 15 ++- 2 files changed, 116 insertions(+), 99 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index fcd2bad0286..5f03b9e524d 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -29,15 +29,8 @@ #include #include -#define DEBUG 0 -#if DEBUG -#define dprintk printk -#else -#define dprintk(x...) do { } while (0) -#endif - -void __init efi_memmap_walk_uc (efi_freemem_callback_t callback); +extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *); #define MAX_UNCACHED_GRANULES 5 static int allocated_granules; @@ -60,6 +53,7 @@ static void uncached_ipi_visibility(void *data) static void uncached_ipi_mc_drain(void *data) { int status; + status = ia64_pal_mc_drain(); if (status) printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on " @@ -67,30 +61,35 @@ static void uncached_ipi_mc_drain(void *data) } -static unsigned long -uncached_get_new_chunk(struct gen_pool *poolp) +/* + * Add a new chunk of uncached memory pages to the specified pool. + * + * @pool: pool to add new chunk of uncached memory to + * @nid: node id of node to allocate memory from, or -1 + * + * This is accomplished by first allocating a granule of cached memory pages + * and then converting them to uncached memory pages. + */ +static int uncached_add_chunk(struct gen_pool *pool, int nid) { struct page *page; - void *tmp; int status, i; - unsigned long addr, node; + unsigned long c_addr, uc_addr; if (allocated_granules >= MAX_UNCACHED_GRANULES) - return 0; + return -1; + + /* attempt to allocate a granule's worth of cached memory pages */ - node = poolp->private; - page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO, IA64_GRANULE_SHIFT-PAGE_SHIFT); + if (!page) + return -1; - dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n", - page, (unsigned long)(page-vmem_map) << PAGE_SHIFT); + /* convert the memory pages from cached to uncached */ - /* - * Do magic if no mem on local node! XXX - */ - if (!page) - return 0; - tmp = page_address(page); + c_addr = (unsigned long)page_address(page); + uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; /* * There's a small race here where it's possible for someone to @@ -100,76 +99,90 @@ uncached_get_new_chunk(struct gen_pool *poolp) for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) SetPageUncached(&page[i]); - flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE); + flush_tlb_kernel_range(uc_addr, uc_adddr + IA64_GRANULE_SIZE); status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); - - dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n", - status, raw_smp_processor_id()); - if (!status) { status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1); if (status) - printk(KERN_WARNING "smp_call_function failed for " - "uncached_ipi_visibility! (%i)\n", status); + goto failed; } + preempt_disable(); + if (ia64_platform_is("sn2")) - sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE); + sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE); else - flush_icache_range((unsigned long)tmp, - (unsigned long)tmp+IA64_GRANULE_SIZE); + flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); + + /* flush the just introduced uncached translation from the TLB */ + local_flush_tlb_all(); + + preempt_enable(); ia64_pal_mc_drain(); status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1); if (status) - printk(KERN_WARNING "smp_call_function failed for " - "uncached_ipi_mc_drain! (%i)\n", status); + goto failed; - addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; + /* + * The chunk of memory pages has been converted to uncached so now we + * can add it to the pool. + */ + status = gen_pool_add(pool, uc_addr, IA64_GRANULE_SIZE, nid); + if (status) + goto failed; allocated_granules++; - return addr; + return 0; + + /* failed to convert or add the chunk so give it back to the kernel */ +failed: + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + ClearPageUncached(&page[i]); + + free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT); + return -1; } /* * uncached_alloc_page * + * @starting_nid: node id of node to start with, or -1 + * * Allocate 1 uncached page. Allocates on the requested node. If no * uncached pages are available on the requested node, roundrobin starting - * with higher nodes. + * with the next higher node. */ -unsigned long -uncached_alloc_page(int nid) +unsigned long uncached_alloc_page(int starting_nid) { - unsigned long maddr; + unsigned long uc_addr; + struct gen_pool *pool; + int nid; - maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE); + if (unlikely(starting_nid >= MAX_NUMNODES)) + return 0; - dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n", - maddr, nid); + if (starting_nid < 0) + starting_nid = numa_node_id(); + nid = starting_nid; - /* - * If no memory is availble on our local node, try the - * remaining nodes in the system. - */ - if (!maddr) { - int i; - - for (i = MAX_NUMNODES - 1; i >= 0; i--) { - if (i == nid || !node_online(i)) - continue; - maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE); - dprintk(KERN_DEBUG "uncached_alloc_page alternate search " - "returns %lx on node %i\n", maddr, i); - if (maddr) { - break; - } - } - } + do { + if (!node_online(nid)) + continue; + pool = uncached_pool[nid]; + if (pool == NULL) + continue; + do { + uc_addr = gen_pool_alloc(pool, PAGE_SIZE); + if (uc_addr != 0) + return uc_addr; + } while (uncached_add_chunk(pool, nid) == 0); + + } while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid); - return maddr; + return 0; } EXPORT_SYMBOL(uncached_alloc_page); @@ -177,21 +190,22 @@ EXPORT_SYMBOL(uncached_alloc_page); /* * uncached_free_page * + * @uc_addr: uncached address of page to free + * * Free a single uncached page. */ -void -uncached_free_page(unsigned long maddr) +void uncached_free_page(unsigned long uc_addr) { - int node; - - node = paddr_to_nid(maddr - __IA64_UNCACHED_OFFSET); + int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pool[nid]; - dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node); + if (unlikely(pool == NULL)) + return; - if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) - panic("uncached_free_page invalid address %lx\n", maddr); + if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) + panic("uncached_free_page invalid address %lx\n", uc_addr); - gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE); + gen_pool_free(pool, uc_addr, PAGE_SIZE); } EXPORT_SYMBOL(uncached_free_page); @@ -199,43 +213,39 @@ EXPORT_SYMBOL(uncached_free_page); /* * uncached_build_memmap, * + * @uc_start: uncached starting address of a chunk of uncached memory + * @uc_end: uncached ending address of a chunk of uncached memory + * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc()) + * * Called at boot time to build a map of pages that can be used for * memory special operations. */ -static int __init -uncached_build_memmap(unsigned long start, unsigned long end, void *arg) +static int __init uncached_build_memmap(unsigned long uc_start, + unsigned long uc_end, void *arg) { - long length = end - start; - int node; - - dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end); + int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pool[nid]; + size_t size = uc_end - uc_start; touch_softlockup_watchdog(); - memset((char *)start, 0, length); - node = paddr_to_nid(start - __IA64_UNCACHED_OFFSET); - - for (; start < end ; start += PAGE_SIZE) { - dprintk(KERN_INFO "sticking %lx into the pool!\n", start); - gen_pool_free(uncached_pool[node], start, PAGE_SIZE); + if (pool != NULL) { + memset((char *)uc_start, 0, size); + (void) gen_pool_add(pool, uc_start, size, nid); } - return 0; } -static int __init uncached_init(void) { - int i; +static int __init uncached_init(void) +{ + int nid; - for (i = 0; i < MAX_NUMNODES; i++) { - if (!node_online(i)) - continue; - uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT, - &uncached_get_new_chunk, i); + for_each_online_node(nid) { + uncached_pool[nid] = gen_pool_create(PAGE_SHIFT, nid); } - efi_memmap_walk_uc(uncached_build_memmap); - + efi_memmap_walk_uc(uncached_build_memmap, NULL); return 0; } diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c index bc3cfa17cd0..2862cb33026 100644 --- a/arch/ia64/sn/kernel/sn2/cache.c +++ b/arch/ia64/sn/kernel/sn2/cache.c @@ -3,11 +3,12 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2003, 2006 Silicon Graphics, Inc. All rights reserved. * */ #include #include +#include /** * sn_flush_all_caches - flush a range of address from all caches (incl. L4) @@ -17,18 +18,24 @@ * Flush a range of addresses from all caches including L4. * All addresses fully or partially contained within * @flush_addr to @flush_addr + @bytes are flushed - * from the all caches. + * from all caches. */ void sn_flush_all_caches(long flush_addr, long bytes) { - flush_icache_range(flush_addr, flush_addr+bytes); + unsigned long addr = flush_addr; + + /* SHub1 requires a cached address */ + if (is_shub1() && (addr & RGN_BITS) == RGN_BASE(RGN_UNCACHED)) + addr = (addr - RGN_BASE(RGN_UNCACHED)) + RGN_BASE(RGN_KERNEL); + + flush_icache_range(addr, addr + bytes); /* * The last call may have returned before the caches * were actually flushed, so we call it again to make * sure. */ - flush_icache_range(flush_addr, flush_addr+bytes); + flush_icache_range(addr, addr + bytes); mb(); } EXPORT_SYMBOL(sn_flush_all_caches); -- cgit v1.2.3 From 742755a1d8ce2b548428f7aacf1758b4bba50080 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 23 Jun 2006 02:03:55 -0700 Subject: [PATCH] page migration: sys_move_pages(): support moving of individual pages move_pages() is used to move individual pages of a process. The function can be used to determine the location of pages and to move them onto the desired node. move_pages() returns status information for each page. long move_pages(pid, number_of_pages_to_move, addresses_of_pages[], nodes[] or NULL, status[], flags); The addresses of pages is an array of void * pointing to the pages to be moved. The nodes array contains the node numbers that the pages should be moved to. If a NULL is passed instead of an array then no pages are moved but the status array is updated. The status request may be used to determine the page state before issuing another move_pages() to move pages. The status array will contain the state of all individual page migration attempts when the function terminates. The status array is only valid if move_pages() completed successfullly. Possible page states in status[]: 0..MAX_NUMNODES The page is now on the indicated node. -ENOENT Page is not present -EACCES Page is mapped by multiple processes and can only be moved if MPOL_MF_MOVE_ALL is specified. -EPERM The page has been mlocked by a process/driver and cannot be moved. -EBUSY Page is busy and cannot be moved. Try again later. -EFAULT Invalid address (no VMA or zero page). -ENOMEM Unable to allocate memory on target node. -EIO Unable to write back page. The page must be written back in order to move it since the page is dirty and the filesystem does not provide a migration function that would allow the moving of dirty pages. -EINVAL A dirty page cannot be moved. The filesystem does not provide a migration function and has no ability to write back pages. The flags parameter indicates what types of pages to move: MPOL_MF_MOVE Move pages that are only mapped by the process. MPOL_MF_MOVE_ALL Also move pages that are mapped by multiple processes. Requires sufficient capabilities. Possible return codes from move_pages() -ENOENT No pages found that would require moving. All pages are either already on the target node, not present, had an invalid address or could not be moved because they were mapped by multiple processes. -EINVAL Flags other than MPOL_MF_MOVE(_ALL) specified or an attempt to migrate pages in a kernel thread. -EPERM MPOL_MF_MOVE_ALL specified without sufficient priviledges. or an attempt to move a process belonging to another user. -EACCES One of the target nodes is not allowed by the current cpuset. -ENODEV One of the target nodes is not online. -ESRCH Process does not exist. -E2BIG Too many pages to move. -ENOMEM Not enough memory to allocate control array. -EFAULT Parameters could not be accessed. A test program for move_pages() may be found with the patches on ftp.kernel.org:/pub/linux/kernel/people/christoph/pmig/patches-2.6.17-rc4-mm3 From: Christoph Lameter Detailed results for sys_move_pages() Pass a pointer to an integer to get_new_page() that may be used to indicate where the completion status of a migration operation should be placed. This allows sys_move_pags() to report back exactly what happened to each page. Wish there would be a better way to do this. Looks a bit hacky. Signed-off-by: Christoph Lameter Cc: Hugh Dickins Cc: Jes Sorensen Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Andi Kleen Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index bcb80ca5cf4..32c999f58d1 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1584,7 +1584,7 @@ sys_call_table: data8 sys_keyctl data8 sys_ioprio_set data8 sys_ioprio_get // 1275 - data8 sys_ni_syscall + data8 sys_move_pages data8 sys_inotify_init data8 sys_inotify_add_watch data8 sys_inotify_rm_watch -- cgit v1.2.3 From 75e1fcc0b18df0a65ab113198e9dc0e98999a08c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 23 Jun 2006 02:05:12 -0700 Subject: [PATCH] vfs: add lock owner argument to flush operation Pass the POSIX lock owner ID to the flush operation. This is useful for filesystems which don't want to store any locking state in inode->i_flock but want to handle locking/unlocking POSIX locks internally. FUSE is one such filesystem but I think it possible that some network filesystems would need this also. Also add a flag to indicate that a POSIX locking request was generated by close(), so filesystems using the above feature won't send an extra locking request in this case. Signed-off-by: Miklos Szeredi Cc: Trond Myklebust Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 2359e2809f5..6d7bc8ff7b3 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -532,7 +532,6 @@ static ctl_table pfm_sysctl_root[] = { static struct ctl_table_header *pfm_sysctl_header; static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); -static int pfm_flush(struct file *filp); #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) #define pfm_get_cpu_data(a,b) per_cpu(a, b) @@ -1774,7 +1773,7 @@ pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) * When caller is self-monitoring, the context is unloaded. */ static int -pfm_flush(struct file *filp) +pfm_flush(struct file *filp, fl_owner_t id) { pfm_context_t *ctx; struct task_struct *task; -- cgit v1.2.3 From 1e11d2782b2f8e86d22ad92c75b70ec8cad14dcf Mon Sep 17 00:00:00 2001 From: Jean-Luc Leger Date: Fri, 23 Jun 2006 02:05:19 -0700 Subject: [PATCH] clean up default value of SCHED_SMT Default values for boolean and tristate options can only be 'y', 'm' or 'n'. This patch removes wrong default for SCHED_SMT. Signed-off-by: Jean-Luc Leger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 0f3076a820c..c1c9b422408 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -273,7 +273,6 @@ config HOTPLUG_CPU config SCHED_SMT bool "SMT scheduler support" depends on SMP - default off help Improves the CPU scheduler's decision making when dealing with Intel IA64 chips with MultiThreading at a cost of slightly increased -- cgit v1.2.3