From 62fdd7678a26efadd6ac5c2869543caff77d2df0 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 17 Oct 2008 12:14:13 -0700 Subject: [IA64] Add Variable Page Size and IA64 Support in Intel IOMMU The patch contains Intel IOMMU IA64 specific code. It defines new machvec dig_vtd, hooks for IOMMU, DMAR table detection, cache line flush function, etc. For a generic kernel with CONFIG_DMAR=y, if Intel IOMMU is detected, dig_vtd is used for machinve vector. Otherwise, kernel falls back to dig machine vector. Kernel parameter "machvec=dig" or "intel_iommu=off" can be used to force kernel to boot dig machine vector. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 4 ++ arch/ia64/kernel/acpi.c | 17 ++++++ arch/ia64/kernel/msi_ia64.c | 80 +++++++++++++++++++++++++ arch/ia64/kernel/pci-dma.c | 129 +++++++++++++++++++++++++++++++++++++++++ arch/ia64/kernel/pci-swiotlb.c | 46 +++++++++++++++ arch/ia64/kernel/setup.c | 42 +++++++++----- 6 files changed, 305 insertions(+), 13 deletions(-) create mode 100644 arch/ia64/kernel/pci-dma.c create mode 100644 arch/ia64/kernel/pci-swiotlb.c (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 87fea11aecb..af0e750705e 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -42,6 +42,10 @@ obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper endif +obj-$(CONFIG_DMAR) += pci-dma.o +ifeq ($(CONFIG_DMAR), y) +obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +endif # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5d1eb7ee2bf..8cc2f8a610c 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -91,6 +91,9 @@ acpi_get_sysname(void) struct acpi_table_rsdp *rsdp; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; +#ifdef CONFIG_DMAR + u64 i, nentries; +#endif rsdp_phys = acpi_find_rsdp(); if (!rsdp_phys) { @@ -123,6 +126,18 @@ acpi_get_sysname(void) return "sn2"; } +#ifdef CONFIG_DMAR + /* Look for Intel IOMMU */ + nentries = (hdr->length - sizeof(*hdr)) / + sizeof(xsdt->table_offset_entry[0]); + for (i = 0; i < nentries; i++) { + hdr = __va(xsdt->table_offset_entry[i]); + if (strncmp(hdr->signature, ACPI_SIG_DMAR, + sizeof(ACPI_SIG_DMAR) - 1) == 0) + return "dig_vtd"; + } +#endif + return "dig"; #else # if defined (CONFIG_IA64_HP_SIM) @@ -137,6 +152,8 @@ acpi_get_sysname(void) return "uv"; # elif defined (CONFIG_IA64_DIG) return "dig"; +# elif defined(CONFIG_IA64_DIG_VTD) + return "dig_vtd"; # else # error Unknown platform. Fix acpi.c. # endif diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 60c6ef67ebb..702a09c1323 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -162,3 +163,82 @@ void arch_teardown_msi_irq(unsigned int irq) return ia64_teardown_msi_irq(irq); } + +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + int cpu = first_cpu(mask); + + + if (!cpu_online(cpu)) + return; + + if (irq_prepare_move(irq, cpu)) + return; + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DESTID_MASK; + msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ia64_ack_msi_irq, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ia64_msi_retrigger_irq, +}; + +static int +msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned dest; + cpumask_t mask; + + cpus_and(mask, irq_to_domain(irq), cpu_online_map); + dest = cpu_physical_id(first_cpu(mask)); + + msg->address_hi = 0; + msg->address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DESTID_CPU(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(cfg->vector); + return 0; +} + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif /* CONFIG_DMAR */ + diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c new file mode 100644 index 00000000000..10a75b55765 --- /dev/null +++ b/arch/ia64/kernel/pci-dma.c @@ -0,0 +1,129 @@ +/* + * Dynamic DMA mapping support. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_DMAR + +#include +#include + +#include +#include + +dma_addr_t bad_dma_address __read_mostly; +EXPORT_SYMBOL(bad_dma_address); + +static int iommu_sac_force __read_mostly; + +int no_iommu __read_mostly; +#ifdef CONFIG_IOMMU_DEBUG +int force_iommu __read_mostly = 1; +#else +int force_iommu __read_mostly; +#endif + +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly; + +/* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to i386. */ +struct device fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &fallback_dev.coherent_dma_mask, +}; + +void __init pci_iommu_alloc(void) +{ + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + +static int __init pci_iommu_init(void) +{ + if (iommu_detected) + intel_iommu_init(); + + return 0; +} + +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); + +void pci_iommu_shutdown(void) +{ + return; +} + +void __init +iommu_dma_init(void) +{ + return; +} + +struct dma_mapping_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + +int iommu_dma_supported(struct device *dev, u64 mask) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + dev_info(dev, "Disallowing DAC for device\n"); + return 0; + } +#endif + + if (ops->dma_supported_op) + return ops->dma_supported_op(dev, mask); + + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_24BIT_MASK) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + dev_info(dev, "Force SAC with mask %lx\n", mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(iommu_dma_supported); + +#endif diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c new file mode 100644 index 00000000000..16c50516dbc --- /dev/null +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -0,0 +1,46 @@ +/* Glue code to lib/swiotlb.c */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +int swiotlb __read_mostly; +EXPORT_SYMBOL(swiotlb); + +struct dma_mapping_ops swiotlb_dma_ops = { + .mapping_error = swiotlb_dma_mapping_error, + .alloc_coherent = swiotlb_alloc_coherent, + .free_coherent = swiotlb_free_coherent, + .map_single = swiotlb_map_single, + .unmap_single = swiotlb_unmap_single, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, + .sync_single_range_for_device = swiotlb_sync_single_range_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_sg = swiotlb_map_sg, + .unmap_sg = swiotlb_unmap_sg, + .dma_supported_op = swiotlb_dma_supported, +}; + +void __init pci_swiotlb_init(void) +{ + if (!iommu_detected) { +#ifdef CONFIG_IA64_GENERIC + swiotlb = 1; + printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); + machvec_init("dig"); + swiotlb_init(); + dma_ops = &swiotlb_dma_ops; +#else + panic("Unable to find Intel IOMMU"); +#endif + } +} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index de636b21567..2a67a74a48f 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -116,6 +116,13 @@ unsigned int num_io_spaces; */ #define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ unsigned long ia64_i_cache_stride_shift = ~0; +/* + * "clflush_cache_range()" needs to know what processor dependent stride size to + * use when it flushes cache lines including both d-cache and i-cache. + */ +/* Safest way to go: 32 bytes by 32 bytes */ +#define CACHE_STRIDE_SHIFT 5 +unsigned long ia64_cache_stride_shift = ~0; /* * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This @@ -847,13 +854,14 @@ setup_per_cpu_areas (void) } /* - * Calculate the max. cache line size. + * Do the following calculations: * - * In addition, the minimum of the i-cache stride sizes is calculated for - * "flush_icache_range()". + * 1. the max. cache line size. + * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". + * 3. the minimum of the cache stride sizes for "clflush_cache_range()". */ static void __cpuinit -get_max_cacheline_size (void) +get_cache_info(void) { unsigned long line_size, max = 1; u64 l, levels, unique_caches; @@ -867,12 +875,14 @@ get_max_cacheline_size (void) max = SMP_CACHE_BYTES; /* Safest setup for "flush_icache_range()" */ ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; + /* Safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; goto out; } for (l = 0; l < levels; ++l) { - status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2, - &cci); + /* cache_type (data_or_unified)=2 */ + status = ia64_pal_cache_config_info(l, 2, &cci); if (status != 0) { printk(KERN_ERR "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n", @@ -880,15 +890,21 @@ get_max_cacheline_size (void) max = SMP_CACHE_BYTES; /* The safest setup for "flush_icache_range()" */ cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + /* The safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; cci.pcci_unified = 1; + } else { + if (cci.pcci_stride < ia64_cache_stride_shift) + ia64_cache_stride_shift = cci.pcci_stride; + + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; } - line_size = 1 << cci.pcci_line_size; - if (line_size > max) - max = line_size; + if (!cci.pcci_unified) { - status = ia64_pal_cache_config_info(l, - /* cache_type (instruction)= */ 1, - &cci); + /* cache_type (instruction)=1*/ + status = ia64_pal_cache_config_info(l, 1, &cci); if (status != 0) { printk(KERN_ERR "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n", @@ -942,7 +958,7 @@ cpu_init (void) } #endif - get_max_cacheline_size(); + get_cache_info(); /* * We can't pass "local_cpu_data" to identify_cpu() because we haven't called -- cgit v1.2.3