From ffbbef5c0639dbf31a0511d8f65f574bb3e30758 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:26 +0900 Subject: intel-iommu: add map_page and unmap_page This is a preparation of struct dma_mapping_ops unification. We use map_page and unmap_page instead of map_single and unmap_single. This uses a temporary workaround, ifdef X86_64 to avoid IA64 build. The workaround will be removed after the unification. Well, changing x86's struct dma_mapping_ops could break IA64. It's just wrong. It's one of problems that this patchset fixes. We will remove map_single and unmap_single hooks in the last patch in this patchset. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 235fb7a5a8a..60258ecbcb4 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2273,6 +2273,15 @@ error: return 0; } +static dma_addr_t intel_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return __intel_map_single(dev, page_to_phys(page) + offset, size, + dir, to_pci_dev(dev)->dma_mask); +} + dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) { @@ -2341,8 +2350,9 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) spin_unlock_irqrestore(&async_umap_flush_lock, flags); } -void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, - int dir) +static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; @@ -2386,6 +2396,12 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, } } +void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, + int dir) +{ + intel_unmap_page(dev, dev_addr, size, dir, NULL); +} + void *intel_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { @@ -2570,6 +2586,10 @@ static struct dma_mapping_ops intel_dma_ops = { .unmap_single = intel_unmap_single, .map_sg = intel_map_sg, .unmap_sg = intel_unmap_sg, +#ifdef CONFIG_X86_64 + .map_page = intel_map_page, + .unmap_page = intel_unmap_page, +#endif }; static inline int iommu_domain_cache_init(void) -- cgit v1.2.3 From d7dff84053524186b139342ac66a4160ce6bb517 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:28 +0900 Subject: x86: remove map_single and unmap_single in struct dma_mapping_ops This patch converts dma_map_single and dma_unmap_single to use map_page and unmap_page respectively and removes unnecessary map_single and unmap_single in struct dma_mapping_ops. This leaves intel-iommu's dma_map_single and dma_unmap_single since IA64 uses them. They will be removed after the unification. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 60258ecbcb4..da273e4ef66 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2582,8 +2582,6 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, static struct dma_mapping_ops intel_dma_ops = { .alloc_coherent = intel_alloc_coherent, .free_coherent = intel_free_coherent, - .map_single = intel_map_single, - .unmap_single = intel_unmap_single, .map_sg = intel_map_sg, .unmap_sg = intel_unmap_sg, #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 160c1d8e40866edfeae7d68816b7005d70acf391 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:59:02 +0900 Subject: x86, ia64: convert to use generic dma_map_ops struct This converts X86 and IA64 to use include/linux/dma-mapping.h. It's a bit large but pretty boring. The major change for X86 is converting 'int dir' to 'enum dma_data_direction dir' in DMA mapping operations. The major changes for IA64 is using map_page and unmap_page instead of map_single and unmap_single. Signed-off-by: FUJITA Tomonori Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index da273e4ef66..b9a56293390 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2441,7 +2441,8 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, int dir) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { int i; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -2499,7 +2500,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, } int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, - int dir) + enum dma_data_direction dir, struct dma_attrs *attrs) { void *addr; int i; @@ -2579,15 +2580,13 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, return nelems; } -static struct dma_mapping_ops intel_dma_ops = { +struct dma_map_ops intel_dma_ops = { .alloc_coherent = intel_alloc_coherent, .free_coherent = intel_free_coherent, .map_sg = intel_map_sg, .unmap_sg = intel_unmap_sg, -#ifdef CONFIG_X86_64 .map_page = intel_map_page, .unmap_page = intel_unmap_page, -#endif }; static inline int iommu_domain_cache_init(void) -- cgit v1.2.3 From 6d652ea1d056390a0c33db92b44ed219284b71af Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:38:59 +0530 Subject: x86: smp.h move boot_cpu_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- drivers/pci/intr_remapping.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f78371b2252..5a57753ea9f 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "intr_remapping.h" -- cgit v1.2.3 From d7e51e66899f95dabc89b4d4c6674a6e50fa37fc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 7 Jan 2009 15:03:13 -0800 Subject: sparseirq: make some func to be used with genirq Impact: clean up sparseirq fallout on random.c Ingo suggested to change some ifdef from SPARSE_IRQ to GENERIC_HARDIRQS so we could some #ifdef later if all arch support genirq Signed-off-by: Yinghai Lu Acked-by: Matt Mackall Signed-off-by: Ingo Molnar --- drivers/pci/intr_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f78371b2252..3d604132a04 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -20,7 +20,7 @@ struct irq_2_iommu { u8 irte_mask; }; -#ifdef CONFIG_SPARSE_IRQ +#ifdef CONFIG_GENERIC_HARDIRQS static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) { struct irq_2_iommu *iommu; -- cgit v1.2.3 From dfb805e831cc5306b14eacd64e0b36d0d973ee0d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 28 Jan 2009 21:53:17 +0900 Subject: IA64: fix VT-d dma_mapping_error dma_mapping_error is used to see if dma_map_single and dma_map_page succeed. IA64 VT-d dma_mapping_error always says that dma_map_single is successful even though it could fail. Note that X86 VT-d works properly in this regard. This patch fixes IA64 VT-d dma_mapping_error by adding VT-d's own dma_mapping_error() that works for both X86_64 and IA64. VT-d uses zero as an error dma address so VT-d's dma_mapping_error returns 1 if a passed dma address is zero (as x86's VT-d dma_mapping_error does now). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c933980bf56..59de56304aa 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2581,6 +2581,11 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, return nelems; } +static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return !dma_addr; +} + struct dma_map_ops intel_dma_ops = { .alloc_coherent = intel_alloc_coherent, .free_coherent = intel_free_coherent, @@ -2588,6 +2593,7 @@ struct dma_map_ops intel_dma_ops = { .unmap_sg = intel_unmap_sg, .map_page = intel_map_page, .unmap_page = intel_unmap_page, + .mapping_error = intel_mapping_error, }; static inline int iommu_domain_cache_init(void) -- cgit v1.2.3 From d7ab5c46ae2743079a40bb4060e510418c0842b4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 28 Jan 2009 21:53:18 +0900 Subject: intel-iommu: make dma mapping functions static The dma ops unification enables X86 and IA64 to share intel_dma_ops so we can make dma mapping functions static. This also remove unused intel_map_single(). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 59de56304aa..628f8b72e53 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2283,13 +2283,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, dir, to_pci_dev(dev)->dma_mask); } -dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr, - size_t size, int dir) -{ - return __intel_map_single(hwdev, paddr, size, dir, - to_pci_dev(hwdev)->dma_mask); -} - static void flush_unmaps(void) { int i, j; @@ -2397,14 +2390,14 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, } } -void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, - int dir) +static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, + int dir) { intel_unmap_page(dev, dev_addr, size, dir, NULL); } -void *intel_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) +static void *intel_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) { void *vaddr; int order; @@ -2427,8 +2420,8 @@ void *intel_alloc_coherent(struct device *hwdev, size_t size, return NULL; } -void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle) +static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dma_handle) { int order; @@ -2441,9 +2434,9 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) -void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) +static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { int i; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -2500,8 +2493,8 @@ static int intel_nontranslate_map_sg(struct device *hddev, return nelems; } -int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, - enum dma_data_direction dir, struct dma_attrs *attrs) +static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, + enum dma_data_direction dir, struct dma_attrs *attrs) { void *addr; int i; -- cgit v1.2.3 From 8e1568f3500287d0b36c9776132cb53a42d5651d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Feb 2009 01:06:59 -0800 Subject: pci, x86, acpi: fix early_ioremap() leak Pawel reported: ------------[ cut here ]------------ WARNING: at arch/x86/mm/ioremap.c:616 check_early_ioremap_leak+0x52/0x67() Hardware name: Debug warning: early ioremap leak of 1 areas detected. Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.29-rc4-tip #2 ... Reported-by: Pawel Dziekonski Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f5a662a50ac..519f5f91e76 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -42,6 +42,7 @@ LIST_HEAD(dmar_drhd_units); static struct acpi_table_header * __initdata dmar_tbl; +static acpi_size dmar_tbl_size; static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { @@ -288,8 +289,9 @@ static int __init dmar_table_detect(void) acpi_status status = AE_OK; /* if we could find DMAR table, then there are DMAR devices */ - status = acpi_get_table(ACPI_SIG_DMAR, 0, - (struct acpi_table_header **)&dmar_tbl); + status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0, + (struct acpi_table_header **)&dmar_tbl, + &dmar_tbl_size); if (ACPI_SUCCESS(status) && !dmar_tbl) { printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); @@ -481,6 +483,7 @@ void __init detect_intel_iommu(void) iommu_detected = 1; #endif } + early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; } -- cgit v1.2.3 From 4c5502b1c5744b2090414e1b80ca6388d5c46e06 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:53 -0700 Subject: x86, x2apic: fix lock ordering during IRQ migration Impact: fix potential deadlock on x2apic fix "hard-safe -> hard-unsafe lock order detected" with irq_2_ir_lock On x2apic enabled system: [ INFO: hard-safe -> hard-unsafe lock order detected ] 2.6.27-03151-g4480f15b #1 ------------------------------------------------------ swapper/1 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: (irq_2_ir_lock){--..}, at: [] get_irte+0x2f/0x95 and this task is already holding: (&irq_desc_lock_class){+...}, at: [] setup_irq+0x67/0x281 which would create a new lock dependency: (&irq_desc_lock_class){+...} -> (irq_2_ir_lock){--..} but this new dependency connects a hard-irq-safe lock: (&irq_desc_lock_class){+...} ... which became hard-irq-safe at: [] 0xffffffffffffffff to a hard-irq-unsafe lock: (irq_2_ir_lock){--..} ... which became hard-irq-unsafe at: ... [] __lock_acquire+0x571/0x706 [] lock_acquire+0x55/0x71 [] _spin_lock+0x2c/0x38 [] alloc_irte+0x8a/0x14b [] setup_IO_APIC_irq+0x119/0x30e [] setup_IO_APIC+0x146/0x6e5 [] native_smp_prepare_cpus+0x24e/0x2e9 [] kernel_init+0x5a/0x176 [] child_rip+0xa/0x11 [] 0xffffffffffffffff Fix this theoretical lock order issue by using spin_lock_irqsave() instead of spin_lock() Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/intr_remapping.c | 58 +++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 25 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 8e44db040db..5ffa65fffb6 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -117,21 +117,22 @@ int get_irte(int irq, struct irte *entry) { int index; struct irq_2_iommu *irq_iommu; + unsigned long flags; if (!entry) return -1; - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu = valid_irq_2_iommu(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return -1; } index = irq_iommu->irte_index + irq_iommu->sub_handle; *entry = *(irq_iommu->iommu->ir_table->base + index); - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return 0; } @@ -141,6 +142,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) struct irq_2_iommu *irq_iommu; u16 index, start_index; unsigned int mask = 0; + unsigned long flags; int i; if (!count) @@ -170,7 +172,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) return -1; } - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); do { for (i = index; i < index + count; i++) if (table->base[i].present) @@ -182,7 +184,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) index = (index + count) % INTR_REMAP_TABLE_ENTRIES; if (index == start_index) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); printk(KERN_ERR "can't allocate an IRTE\n"); return -1; } @@ -193,7 +195,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) irq_iommu = irq_2_iommu_alloc(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); printk(KERN_ERR "can't allocate irq_2_iommu\n"); return -1; } @@ -203,7 +205,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) irq_iommu->sub_handle = 0; irq_iommu->irte_mask = mask; - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return index; } @@ -223,30 +225,32 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle) { int index; struct irq_2_iommu *irq_iommu; + unsigned long flags; - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu = valid_irq_2_iommu(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return -1; } *sub_handle = irq_iommu->sub_handle; index = irq_iommu->irte_index; - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return index; } int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) { struct irq_2_iommu *irq_iommu; + unsigned long flags; - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu = irq_2_iommu_alloc(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); printk(KERN_ERR "can't allocate irq_2_iommu\n"); return -1; } @@ -256,7 +260,7 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) irq_iommu->sub_handle = subhandle; irq_iommu->irte_mask = 0; - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return 0; } @@ -264,11 +268,12 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) { struct irq_2_iommu *irq_iommu; + unsigned long flags; - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu = valid_irq_2_iommu(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return -1; } @@ -277,7 +282,7 @@ int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) irq_iommu->sub_handle = 0; irq_2_iommu(irq)->irte_mask = 0; - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return 0; } @@ -289,11 +294,12 @@ int modify_irte(int irq, struct irte *irte_modified) struct irte *irte; struct intel_iommu *iommu; struct irq_2_iommu *irq_iommu; + unsigned long flags; - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu = valid_irq_2_iommu(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return -1; } @@ -306,7 +312,7 @@ int modify_irte(int irq, struct irte *irte_modified) __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; } @@ -317,11 +323,12 @@ int flush_irte(int irq) int index; struct intel_iommu *iommu; struct irq_2_iommu *irq_iommu; + unsigned long flags; - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu = valid_irq_2_iommu(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return -1; } @@ -330,7 +337,7 @@ int flush_irte(int irq) index = irq_iommu->irte_index + irq_iommu->sub_handle; rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; } @@ -363,11 +370,12 @@ int free_irte(int irq) struct irte *irte; struct intel_iommu *iommu; struct irq_2_iommu *irq_iommu; + unsigned long flags; - spin_lock(&irq_2_ir_lock); + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu = valid_irq_2_iommu(irq); if (!irq_iommu) { - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return -1; } @@ -387,7 +395,7 @@ int free_irte(int irq) irq_iommu->sub_handle = 0; irq_iommu->irte_mask = 0; - spin_unlock(&irq_2_ir_lock); + spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; } -- cgit v1.2.3 From 0ac2491f57af5644f88383d28809760902d6f4d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:54 -0700 Subject: x86, dmar: move page fault handling code to dmar.c Impact: code movement Move page fault handling code to dmar.c This will be shared both by DMA-remapping and Intr-remapping code. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/intel-iommu.c | 188 --------------------------------------------- 2 files changed, 191 insertions(+), 188 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 5f333403c2e..75d34bf2db5 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #undef PREFIX #define PREFIX "DMAR:" @@ -812,3 +814,192 @@ int dmar_enable_qi(struct intel_iommu *iommu) return 0; } + +/* iommu interrupt handling. Most stuff are MSI-like. */ + +static const char *fault_reason_strings[] = +{ + "Software", + "Present bit in root entry is clear", + "Present bit in context entry is clear", + "Invalid context entry", + "Access beyond MGAW", + "PTE Write access is not set", + "PTE Read access is not set", + "Next page table ptr is invalid", + "Root table address invalid", + "Context table ptr is invalid", + "non-zero reserved fields in RTP", + "non-zero reserved fields in CTP", + "non-zero reserved fields in PTE", +}; +#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) + +const char *dmar_get_fault_reason(u8 fault_reason) +{ + if (fault_reason > MAX_FAULT_REASON_IDX) + return "Unknown"; + else + return fault_reason_strings[fault_reason]; +} + +void dmar_msi_unmask(unsigned int irq) +{ + struct intel_iommu *iommu = get_irq_data(irq); + unsigned long flag; + + /* unmask it */ + spin_lock_irqsave(&iommu->register_lock, flag); + writel(0, iommu->reg + DMAR_FECTL_REG); + /* Read a reg to force flush the post write */ + readl(iommu->reg + DMAR_FECTL_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_mask(unsigned int irq) +{ + unsigned long flag; + struct intel_iommu *iommu = get_irq_data(irq); + + /* mask it */ + spin_lock_irqsave(&iommu->register_lock, flag); + writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); + /* Read a reg to force flush the post write */ + readl(iommu->reg + DMAR_FECTL_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_write(int irq, struct msi_msg *msg) +{ + struct intel_iommu *iommu = get_irq_data(irq); + unsigned long flag; + + spin_lock_irqsave(&iommu->register_lock, flag); + writel(msg->data, iommu->reg + DMAR_FEDATA_REG); + writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); + writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_read(int irq, struct msi_msg *msg) +{ + struct intel_iommu *iommu = get_irq_data(irq); + unsigned long flag; + + spin_lock_irqsave(&iommu->register_lock, flag); + msg->data = readl(iommu->reg + DMAR_FEDATA_REG); + msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); + msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +static int dmar_fault_do_one(struct intel_iommu *iommu, int type, + u8 fault_reason, u16 source_id, unsigned long long addr) +{ + const char *reason; + + reason = dmar_get_fault_reason(fault_reason); + + printk(KERN_ERR + "DMAR:[%s] Request device [%02x:%02x.%d] " + "fault addr %llx \n" + "DMAR:[fault reason %02d] %s\n", + (type ? "DMA Read" : "DMA Write"), + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + return 0; +} + +#define PRIMARY_FAULT_REG_LEN (16) +static irqreturn_t dmar_fault(int irq, void *dev_id) +{ + struct intel_iommu *iommu = dev_id; + int reg, fault_index; + u32 fault_status; + unsigned long flag; + + spin_lock_irqsave(&iommu->register_lock, flag); + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + + /* TBD: ignore advanced fault log currently */ + if (!(fault_status & DMA_FSTS_PPF)) + goto clear_overflow; + + fault_index = dma_fsts_fault_record_index(fault_status); + reg = cap_fault_reg_offset(iommu->cap); + while (1) { + u8 fault_reason; + u16 source_id; + u64 guest_addr; + int type; + u32 data; + + /* highest 32 bits */ + data = readl(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12); + if (!(data & DMA_FRCD_F)) + break; + + fault_reason = dma_frcd_fault_reason(data); + type = dma_frcd_type(data); + + data = readl(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 8); + source_id = dma_frcd_source_id(data); + + guest_addr = dmar_readq(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN); + guest_addr = dma_frcd_page_addr(guest_addr); + /* clear the fault */ + writel(DMA_FRCD_F, iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + + dmar_fault_do_one(iommu, type, fault_reason, + source_id, guest_addr); + + fault_index++; + if (fault_index > cap_num_fault_regs(iommu->cap)) + fault_index = 0; + spin_lock_irqsave(&iommu->register_lock, flag); + } +clear_overflow: + /* clear primary fault overflow */ + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + if (fault_status & DMA_FSTS_PFO) + writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + return IRQ_HANDLED; +} + +int dmar_set_interrupt(struct intel_iommu *iommu) +{ + int irq, ret; + + irq = create_irq(); + if (!irq) { + printk(KERN_ERR "IOMMU: no free vectors\n"); + return -EINVAL; + } + + set_irq_data(irq, iommu); + iommu->irq = irq; + + ret = arch_setup_dmar_msi(irq); + if (ret) { + set_irq_data(irq, NULL); + iommu->irq = 0; + destroy_irq(irq); + return 0; + } + + /* Force fault register is cleared */ + dmar_fault(irq, iommu); + + ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu); + if (ret) + printk(KERN_ERR "IOMMU: can't request irq\n"); + return ret; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f3f686581a9..4a4ab651b70 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1004,194 +1004,6 @@ static int iommu_disable_translation(struct intel_iommu *iommu) return 0; } -/* iommu interrupt handling. Most stuff are MSI-like. */ - -static const char *fault_reason_strings[] = -{ - "Software", - "Present bit in root entry is clear", - "Present bit in context entry is clear", - "Invalid context entry", - "Access beyond MGAW", - "PTE Write access is not set", - "PTE Read access is not set", - "Next page table ptr is invalid", - "Root table address invalid", - "Context table ptr is invalid", - "non-zero reserved fields in RTP", - "non-zero reserved fields in CTP", - "non-zero reserved fields in PTE", -}; -#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) - -const char *dmar_get_fault_reason(u8 fault_reason) -{ - if (fault_reason > MAX_FAULT_REASON_IDX) - return "Unknown"; - else - return fault_reason_strings[fault_reason]; -} - -void dmar_msi_unmask(unsigned int irq) -{ - struct intel_iommu *iommu = get_irq_data(irq); - unsigned long flag; - - /* unmask it */ - spin_lock_irqsave(&iommu->register_lock, flag); - writel(0, iommu->reg + DMAR_FECTL_REG); - /* Read a reg to force flush the post write */ - readl(iommu->reg + DMAR_FECTL_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); -} - -void dmar_msi_mask(unsigned int irq) -{ - unsigned long flag; - struct intel_iommu *iommu = get_irq_data(irq); - - /* mask it */ - spin_lock_irqsave(&iommu->register_lock, flag); - writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); - /* Read a reg to force flush the post write */ - readl(iommu->reg + DMAR_FECTL_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); -} - -void dmar_msi_write(int irq, struct msi_msg *msg) -{ - struct intel_iommu *iommu = get_irq_data(irq); - unsigned long flag; - - spin_lock_irqsave(&iommu->register_lock, flag); - writel(msg->data, iommu->reg + DMAR_FEDATA_REG); - writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); - writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); -} - -void dmar_msi_read(int irq, struct msi_msg *msg) -{ - struct intel_iommu *iommu = get_irq_data(irq); - unsigned long flag; - - spin_lock_irqsave(&iommu->register_lock, flag); - msg->data = readl(iommu->reg + DMAR_FEDATA_REG); - msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); - msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); -} - -static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, - u8 fault_reason, u16 source_id, unsigned long long addr) -{ - const char *reason; - - reason = dmar_get_fault_reason(fault_reason); - - printk(KERN_ERR - "DMAR:[%s] Request device [%02x:%02x.%d] " - "fault addr %llx \n" - "DMAR:[fault reason %02d] %s\n", - (type ? "DMA Read" : "DMA Write"), - (source_id >> 8), PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); - return 0; -} - -#define PRIMARY_FAULT_REG_LEN (16) -static irqreturn_t iommu_page_fault(int irq, void *dev_id) -{ - struct intel_iommu *iommu = dev_id; - int reg, fault_index; - u32 fault_status; - unsigned long flag; - - spin_lock_irqsave(&iommu->register_lock, flag); - fault_status = readl(iommu->reg + DMAR_FSTS_REG); - - /* TBD: ignore advanced fault log currently */ - if (!(fault_status & DMA_FSTS_PPF)) - goto clear_overflow; - - fault_index = dma_fsts_fault_record_index(fault_status); - reg = cap_fault_reg_offset(iommu->cap); - while (1) { - u8 fault_reason; - u16 source_id; - u64 guest_addr; - int type; - u32 data; - - /* highest 32 bits */ - data = readl(iommu->reg + reg + - fault_index * PRIMARY_FAULT_REG_LEN + 12); - if (!(data & DMA_FRCD_F)) - break; - - fault_reason = dma_frcd_fault_reason(data); - type = dma_frcd_type(data); - - data = readl(iommu->reg + reg + - fault_index * PRIMARY_FAULT_REG_LEN + 8); - source_id = dma_frcd_source_id(data); - - guest_addr = dmar_readq(iommu->reg + reg + - fault_index * PRIMARY_FAULT_REG_LEN); - guest_addr = dma_frcd_page_addr(guest_addr); - /* clear the fault */ - writel(DMA_FRCD_F, iommu->reg + reg + - fault_index * PRIMARY_FAULT_REG_LEN + 12); - - spin_unlock_irqrestore(&iommu->register_lock, flag); - - iommu_page_fault_do_one(iommu, type, fault_reason, - source_id, guest_addr); - - fault_index++; - if (fault_index > cap_num_fault_regs(iommu->cap)) - fault_index = 0; - spin_lock_irqsave(&iommu->register_lock, flag); - } -clear_overflow: - /* clear primary fault overflow */ - fault_status = readl(iommu->reg + DMAR_FSTS_REG); - if (fault_status & DMA_FSTS_PFO) - writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); - - spin_unlock_irqrestore(&iommu->register_lock, flag); - return IRQ_HANDLED; -} - -int dmar_set_interrupt(struct intel_iommu *iommu) -{ - int irq, ret; - - irq = create_irq(); - if (!irq) { - printk(KERN_ERR "IOMMU: no free vectors\n"); - return -EINVAL; - } - - set_irq_data(irq, iommu); - iommu->irq = irq; - - ret = arch_setup_dmar_msi(irq); - if (ret) { - set_irq_data(irq, NULL); - iommu->irq = 0; - destroy_irq(irq); - return 0; - } - - /* Force fault register is cleared */ - iommu_page_fault(irq, iommu); - - ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu); - if (ret) - printk(KERN_ERR "IOMMU: can't request irq\n"); - return ret; -} static int iommu_init_domains(struct intel_iommu *iommu) { -- cgit v1.2.3 From 9d783ba042771284fb4ee5013c3d94220755ae7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:55 -0700 Subject: x86, x2apic: enable fault handling for intr-remapping Impact: interface augmentation (not yet used) Enable fault handling flow for intr-remapping aswell. Fault handling code now shared by both dma-remapping and intr-remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 102 +++++++++++++++++++++++++++++++++++-------- drivers/pci/intel-iommu.c | 3 +- drivers/pci/intr_remapping.c | 2 +- 3 files changed, 86 insertions(+), 21 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 75d34bf2db5..bb4ed985f9c 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -511,6 +511,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) return -ENOMEM; iommu->seq_id = iommu_allocated++; + sprintf (iommu->name, "dmar%d", iommu->seq_id); iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); if (!iommu->reg) { @@ -817,7 +818,13 @@ int dmar_enable_qi(struct intel_iommu *iommu) /* iommu interrupt handling. Most stuff are MSI-like. */ -static const char *fault_reason_strings[] = +enum faulttype { + DMA_REMAP, + INTR_REMAP, + UNKNOWN, +}; + +static const char *dma_remap_fault_reasons[] = { "Software", "Present bit in root entry is clear", @@ -833,14 +840,33 @@ static const char *fault_reason_strings[] = "non-zero reserved fields in CTP", "non-zero reserved fields in PTE", }; + +static const char *intr_remap_fault_reasons[] = +{ + "Detected reserved fields in the decoded interrupt-remapped request", + "Interrupt index exceeded the interrupt-remapping table size", + "Present field in the IRTE entry is clear", + "Error accessing interrupt-remapping table pointed by IRTA_REG", + "Detected reserved fields in the IRTE entry", + "Blocked a compatibility format interrupt request", + "Blocked an interrupt request due to source-id verification failure", +}; + #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) -const char *dmar_get_fault_reason(u8 fault_reason) +const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) { - if (fault_reason > MAX_FAULT_REASON_IDX) + if (fault_reason >= 0x20 && (fault_reason <= 0x20 + + ARRAY_SIZE(intr_remap_fault_reasons))) { + *fault_type = INTR_REMAP; + return intr_remap_fault_reasons[fault_reason - 0x20]; + } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) { + *fault_type = DMA_REMAP; + return dma_remap_fault_reasons[fault_reason]; + } else { + *fault_type = UNKNOWN; return "Unknown"; - else - return fault_reason_strings[fault_reason]; + } } void dmar_msi_unmask(unsigned int irq) @@ -897,16 +923,25 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, u8 fault_reason, u16 source_id, unsigned long long addr) { const char *reason; + int fault_type; - reason = dmar_get_fault_reason(fault_reason); + reason = dmar_get_fault_reason(fault_reason, &fault_type); - printk(KERN_ERR - "DMAR:[%s] Request device [%02x:%02x.%d] " - "fault addr %llx \n" - "DMAR:[fault reason %02d] %s\n", - (type ? "DMA Read" : "DMA Write"), - (source_id >> 8), PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + if (fault_type == INTR_REMAP) + printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] " + "fault index %llx\n" + "INTR-REMAP:[fault reason %02d] %s\n", + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else + printk(KERN_ERR + "DMAR:[%s] Request device [%02x:%02x.%d] " + "fault addr %llx \n" + "DMAR:[fault reason %02d] %s\n", + (type ? "DMA Read" : "DMA Write"), + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); return 0; } @@ -920,10 +955,13 @@ static irqreturn_t dmar_fault(int irq, void *dev_id) spin_lock_irqsave(&iommu->register_lock, flag); fault_status = readl(iommu->reg + DMAR_FSTS_REG); + if (fault_status) + printk(KERN_ERR "DRHD: handling fault status reg %x\n", + fault_status); /* TBD: ignore advanced fault log currently */ if (!(fault_status & DMA_FSTS_PPF)) - goto clear_overflow; + goto clear_rest; fault_index = dma_fsts_fault_record_index(fault_status); reg = cap_fault_reg_offset(iommu->cap); @@ -964,11 +1002,10 @@ static irqreturn_t dmar_fault(int irq, void *dev_id) fault_index = 0; spin_lock_irqsave(&iommu->register_lock, flag); } -clear_overflow: - /* clear primary fault overflow */ +clear_rest: + /* clear all the other faults */ fault_status = readl(iommu->reg + DMAR_FSTS_REG); - if (fault_status & DMA_FSTS_PFO) - writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); + writel(fault_status, iommu->reg + DMAR_FSTS_REG); spin_unlock_irqrestore(&iommu->register_lock, flag); return IRQ_HANDLED; @@ -978,6 +1015,12 @@ int dmar_set_interrupt(struct intel_iommu *iommu) { int irq, ret; + /* + * Check if the fault interrupt is already initialized. + */ + if (iommu->irq) + return 0; + irq = create_irq(); if (!irq) { printk(KERN_ERR "IOMMU: no free vectors\n"); @@ -1003,3 +1046,26 @@ int dmar_set_interrupt(struct intel_iommu *iommu) printk(KERN_ERR "IOMMU: can't request irq\n"); return ret; } + +int __init enable_drhd_fault_handling(void) +{ + struct dmar_drhd_unit *drhd; + + /* + * Enable fault control interrupt. + */ + for_each_drhd_unit(drhd) { + int ret; + struct intel_iommu *iommu = drhd->iommu; + ret = dmar_set_interrupt(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable fault, " + " interrupt, ret %d\n", + (unsigned long long)drhd->reg_base_addr, ret); + return -1; + } + } + + return 0; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4a4ab651b70..25fc1df486b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1799,7 +1799,7 @@ static int __init init_dmars(void) struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int i, ret, unit = 0; + int i, ret; /* * for each drhd @@ -1921,7 +1921,6 @@ static int __init init_dmars(void) if (drhd->ignored) continue; iommu = drhd->iommu; - sprintf (iommu->name, "dmar%d", unit++); iommu_flush_write_buffer(iommu); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 5ffa65fffb6..c38e3f437a8 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -308,7 +308,7 @@ int modify_irte(int irq, struct irte *irte_modified) index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); + set_64bit((unsigned long *)irte, irte_modified->low); __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); -- cgit v1.2.3 From eba67e5da6e971993b2899d2cdf459ce77d3dbc5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:56 -0700 Subject: x86, dmar: routines for disabling queued invalidation and intr remapping Impact: new interfaces (not yet used) Routines for disabling queued invalidation and interrupt remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/pci/intr_remapping.c | 27 +++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index bb4ed985f9c..932e5e3930f 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -753,6 +753,42 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, return qi_submit_sync(&desc, iommu); } +/* + * Disable Queued Invalidation interface. + */ +void dmar_disable_qi(struct intel_iommu *iommu) +{ + unsigned long flags; + u32 sts; + cycles_t start_time = get_cycles(); + + if (!ecap_qis(iommu->ecap)) + return; + + spin_lock_irqsave(&iommu->register_lock, flags); + + sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_QIES)) + goto end; + + /* + * Give a chance to HW to complete the pending invalidation requests. + */ + while ((readl(iommu->reg + DMAR_IQT_REG) != + readl(iommu->reg + DMAR_IQH_REG)) && + (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time))) + cpu_relax(); + + iommu->gcmd &= ~DMA_GCMD_QIE; + + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, + !(sts & DMA_GSTS_QIES), sts); +end: + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + /* * Enable Queued Invalidation interface. This is a must to support * interrupt-remapping. Also used by DMA-remapping, which replaces diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index c38e3f437a8..0d202d73a1a 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -467,6 +467,33 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode) return 0; } +/* + * Disable Interrupt Remapping. + */ +static void disable_intr_remapping(struct intel_iommu *iommu) +{ + unsigned long flags; + u32 sts; + + if (!ecap_ir_support(iommu->ecap)) + return; + + spin_lock_irqsave(&iommu->register_lock, flags); + + sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_IRES)) + goto end; + + iommu->gcmd &= ~DMA_GCMD_IRE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, !(sts & DMA_GSTS_IRES), sts); + +end: + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + int __init enable_intr_remapping(int eim) { struct dmar_drhd_unit *drhd; -- cgit v1.2.3 From 1531a6a6b81a4e6f9eec9a5608758a6ea14b96e0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:57 -0700 Subject: x86, dmar: start with sane state while enabling dma and interrupt-remapping Impact: cleanup/sanitization Start from a sane state while enabling dma and interrupt-remapping, by clearing the previous recorded faults and disabling previously enabled queued invalidation and interrupt-remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 5 +---- drivers/pci/intel-iommu.c | 29 +++++++++++++++++++++++++++++ drivers/pci/intr_remapping.c | 17 +++++++++++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 932e5e3930f..f1805002e43 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -982,7 +982,7 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, } #define PRIMARY_FAULT_REG_LEN (16) -static irqreturn_t dmar_fault(int irq, void *dev_id) +irqreturn_t dmar_fault(int irq, void *dev_id) { struct intel_iommu *iommu = dev_id; int reg, fault_index; @@ -1074,9 +1074,6 @@ int dmar_set_interrupt(struct intel_iommu *iommu) return 0; } - /* Force fault register is cleared */ - dmar_fault(irq, iommu); - ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu); if (ret) printk(KERN_ERR "IOMMU: can't request irq\n"); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 25fc1df486b..ef167b8b047 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1855,11 +1855,40 @@ static int __init init_dmars(void) } } + /* + * Start from the sane iommu hardware state. + */ for_each_drhd_unit(drhd) { if (drhd->ignored) continue; iommu = drhd->iommu; + + /* + * If the queued invalidation is already initialized by us + * (for example, while enabling interrupt-remapping) then + * we got the things already rolling from a sane state. + */ + if (iommu->qi) + continue; + + /* + * Clear any previous faults. + */ + dmar_fault(-1, iommu); + /* + * Disable queued invalidation if supported and already enabled + * before OS handover. + */ + dmar_disable_qi(iommu); + } + + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + + iommu = drhd->iommu; + if (dmar_enable_qi(iommu)) { /* * Queued Invalidate not enabled, use Register Based diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 0d202d73a1a..a84686b2478 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -499,6 +499,23 @@ int __init enable_intr_remapping(int eim) struct dmar_drhd_unit *drhd; int setup = 0; + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + /* + * Clear previous faults. + */ + dmar_fault(-1, iommu); + + /* + * Disable intr remapping and queued invalidation, if already + * enabled prior to OS handover. + */ + disable_intr_remapping(iommu); + + dmar_disable_qi(iommu); + } + /* * check for the Interrupt-remapping support */ -- cgit v1.2.3 From 2e93456f5c069cf889c0c3acd1246ee88c49ae5c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:58 -0700 Subject: x86, intr-remapping: fix free_irte() to clear all the IRTE entries Impact: fix interrupt table entry leak Fix the typo which was not clearing all the interrupt remapping table entries corresponding to an irq. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/intr_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index a84686b2478..f7ecd85e210 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -386,7 +386,7 @@ int free_irte(int irq) if (!irq_iommu->sub_handle) { for (i = 0; i < (1 << irq_iommu->irte_mask); i++) - set_64bit((unsigned long *)irte, 0); + set_64bit((unsigned long *)(irte + i), 0); rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); } -- cgit v1.2.3 From fa4b57cc045d6134b9862b2873f9c8ba9ed53ffe Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:05 -0700 Subject: x86, dmar: use atomic allocations for QI and Intr-remapping init Impact: invalid use of GFP_KERNEL in interrupt context Queued invalidation and interrupt-remapping will get initialized with interrupts disabled (while enabling interrupt-remapping). So use GFP_ATOMIC instead of GFP_KERNEL for memory alloacations. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 6 +++--- drivers/pci/intr_remapping.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f1805002e43..d313039e2fd 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -809,20 +809,20 @@ int dmar_enable_qi(struct intel_iommu *iommu) if (iommu->qi) return 0; - iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL); + iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC); if (!iommu->qi) return -ENOMEM; qi = iommu->qi; - qi->desc = (void *)(get_zeroed_page(GFP_KERNEL)); + qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC)); if (!qi->desc) { kfree(qi); iommu->qi = 0; return -ENOMEM; } - qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL); + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { free_page((unsigned long) qi->desc); kfree(qi); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f7ecd85e210..bc5b6976f91 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -447,12 +447,12 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode) struct page *pages; ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table), - GFP_KERNEL); + GFP_ATOMIC); if (!iommu->ir_table) return -ENOMEM; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); if (!pages) { printk(KERN_ERR "failed to allocate pages of order %d\n", -- cgit v1.2.3 From e496b617b40f2abf6d49803f56aa1344ce1b9177 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 7 Jan 2009 16:22:37 -0800 Subject: PCI: __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6d6120007af..5737b8a9a73 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -657,7 +657,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); + dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -700,7 +700,7 @@ static int pci_save_pcix_state(struct pci_dev *dev) save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); + dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } -- cgit v1.2.3 From 1bf83e558cb29d163f4bc6decbc3800ecf4db195 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:38:34 +0100 Subject: PCI: PCIe portdrv: Use driver data to simplify code PCI Express port driver extension, as defined by struct pcie_port_device_ext in portdrv.h, is allocated and initialized, but never used (it also is never freed). Extend it to hold the PCI Express port type as well as the port interrupt mode, change its name and use it to simplify the code in portdrv_core.c . Additionally, remove the redundant interrupt_mode member of struct pcie_device defined in include/linux/pcieport_if.h . Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv.h | 5 ++- drivers/pci/pcie/portdrv_core.c | 95 ++++++++++++++++------------------------- 2 files changed, 39 insertions(+), 61 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2529f3f2ea5..b0dcbc73415 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -28,8 +28,9 @@ #define get_descriptor_id(type, service) (((type - 4) << 4) | service) -struct pcie_port_device_ext { - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ +struct pcie_port_data { + int port_type; /* Type of the port */ + int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ }; extern struct bus_type pcie_port_bus_type; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 8b3f8c18032..273e97619bc 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -15,10 +15,9 @@ #include #include +#include "../pci.h" #include "portdrv.h" -extern int pcie_mch_quirk; /* MSI-quirk Indicator */ - /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -31,28 +30,6 @@ static void release_pcie_device(struct device *dev) kfree(to_pcie_device(dev)); } -static int is_msi_quirked(struct pci_dev *dev) -{ - int port_type, quirk = 0; - u16 reg16; - - pci_read_config_word(dev, - pci_find_capability(dev, PCI_CAP_ID_EXP) + - PCIE_CAPABILITIES_REG, ®16); - port_type = (reg16 >> 4) & PORT_TYPE_MASK; - switch(port_type) { - case PCIE_RC_PORT: - if (pcie_mch_quirk == 1) - quirk = 1; - break; - case PCIE_SW_UPSTREAM_PORT: - case PCIE_SW_DOWNSTREAM_PORT: - default: - break; - } - return quirk; -} - /** * assign_interrupt_mode - choose interrupt mode for PCI Express port services * (INTx, MSI-X, MSI) and set up vectors @@ -64,6 +41,7 @@ static int is_msi_quirked(struct pci_dev *dev) */ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { + struct pcie_port_data *port_data = pci_get_drvdata(dev); int i, pos, nvec, status = -EINVAL; int interrupt_mode = PCIE_PORT_INTx_MODE; @@ -75,7 +53,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) } /* Check MSI quirk */ - if (is_msi_quirked(dev)) + if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) return interrupt_mode; /* Select MSI-X over MSI if supported */ @@ -132,13 +110,11 @@ static int get_port_device_capability(struct pci_dev *dev) pos + PCIE_SLOT_CAPABILITIES_REG, ®32); if (reg32 & SLOT_HP_CAPABLE_MASK) services |= PCIE_PORT_SERVICE_HP; - } - /* PME Capable - root port capability */ - if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT) - services |= PCIE_PORT_SERVICE_PME; - + } + /* AER capable */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) services |= PCIE_PORT_SERVICE_AER; + /* VC support */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC)) services |= PCIE_PORT_SERVICE_VC; @@ -152,15 +128,15 @@ static int get_port_device_capability(struct pci_dev *dev) * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { + struct pcie_port_data *port_data = pci_get_drvdata(parent); struct device *device; + int port_type = port_data->port_type; dev->port = parent; - dev->interrupt_mode = irq_mode; dev->irq = irq; dev->id.vendor = parent->vendor; dev->id.device = parent->device; @@ -185,10 +161,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { struct pcie_device *device; @@ -196,7 +171,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, if (!device) return NULL; - pcie_device_init(parent, device, port_type, service_type, irq,irq_mode); + pcie_device_init(parent, device, service_type, irq); return device; } @@ -230,39 +205,36 @@ int pcie_port_device_probe(struct pci_dev *dev) */ int pcie_port_device_register(struct pci_dev *dev) { - struct pcie_port_device_ext *p_ext; - int status, type, capabilities, irq_mode, i; + struct pcie_port_data *port_data; + int status, capabilities, irq_mode, i; int vectors[PCIE_PORT_DEVICE_MAXSERVICES]; u16 reg16; - /* Allocate port device extension */ - if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL))) + port_data = kzalloc(sizeof(*port_data), GFP_KERNEL); + if (!port_data) return -ENOMEM; - - pci_set_drvdata(dev, p_ext); + pci_set_drvdata(dev, port_data); /* Get port type */ pci_read_config_word(dev, pci_find_capability(dev, PCI_CAP_ID_EXP) + PCIE_CAPABILITIES_REG, ®16); - type = (reg16 >> 4) & PORT_TYPE_MASK; + port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK; - /* Now get port services */ capabilities = get_port_device_capability(dev); + /* Root ports are capable of generating PME too */ + if (port_data->port_type == PCIE_RC_PORT) + capabilities |= PCIE_PORT_SERVICE_PME; + irq_mode = assign_interrupt_mode(dev, vectors, capabilities); - p_ext->interrupt_mode = irq_mode; + port_data->port_irq_mode = irq_mode; /* Allocate child services if any */ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { struct pcie_device *child; if (capabilities & (1 << i)) { - child = alloc_pcie_device( - dev, /* parent */ - type, /* port type */ - i, /* service type */ - vectors[i], /* irq */ - irq_mode /* interrupt mode */); + child = alloc_pcie_device(dev, i, vectors[i]); if (child) { status = device_register(&child->device); if (status) { @@ -349,25 +321,30 @@ static int remove_iter(struct device *dev, void *data) */ void pcie_port_device_remove(struct pci_dev *dev) { - struct device *device; - unsigned long device_addr; - int interrupt_mode = PCIE_PORT_INTx_MODE; + struct pcie_port_data *port_data = pci_get_drvdata(dev); int status; do { + unsigned long device_addr; + status = device_for_each_child(&dev->dev, &device_addr, remove_iter); if (status) { - device = (struct device*)device_addr; - interrupt_mode = (to_pcie_device(device))->interrupt_mode; + struct device *device = (struct device*)device_addr; put_device(device); device_unregister(device); } } while (status); - /* Switch to INTx by default if MSI enabled */ - if (interrupt_mode == PCIE_PORT_MSIX_MODE) + + switch (port_data->port_irq_mode) { + case PCIE_PORT_MSIX_MODE: pci_disable_msix(dev); - else if (interrupt_mode == PCIE_PORT_MSI_MODE) + break; + case PCIE_PORT_MSI_MODE: pci_disable_msi(dev); + break; + } + + kfree(port_data); } /** -- cgit v1.2.3 From 90e9cd50f7feeddc911325c8a8c1b7e1fccc6599 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:39:39 +0100 Subject: PCI: PCIe portdrv: Aviod using service devices with wrong interrupts The PCI Express port driver should not attempt to register service devices that require the ability to generate interrupts if generating interrupts is not possible. Namely, if the port has no interrupt pin configured and we cannot set up MSI or MSI-X for it, there is no way it can generate interrupts and in such a case the port services that rely on interrupts (PME, PCIe HP, AER) should not be enabled for it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 273e97619bc..265eba033a4 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -43,7 +43,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { struct pcie_port_data *port_data = pci_get_drvdata(dev); int i, pos, nvec, status = -EINVAL; - int interrupt_mode = PCIE_PORT_INTx_MODE; + int interrupt_mode = PCIE_PORT_NO_IRQ; /* Set INTx as default */ for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { @@ -51,7 +51,9 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) nvec++; vectors[i] = dev->irq; } - + if (dev->pin) + interrupt_mode = PCIE_PORT_INTx_MODE; + /* Check MSI quirk */ if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) return interrupt_mode; @@ -141,7 +143,7 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, dev->id.vendor = parent->vendor; dev->id.device = parent->device; dev->id.port_type = port_type; - dev->id.service_type = (1 << service_type); + dev->id.service_type = service_type; /* Initialize generic device interface */ device = &dev->device; @@ -232,19 +234,32 @@ int pcie_port_device_register(struct pci_dev *dev) /* Allocate child services if any */ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { struct pcie_device *child; + int service = 1 << i; - if (capabilities & (1 << i)) { - child = alloc_pcie_device(dev, i, vectors[i]); - if (child) { - status = device_register(&child->device); - if (status) { - kfree(child); - continue; - } - get_device(&child->device); - } + if (!(capabilities & service)) + continue; + + /* + * Don't use service devices that require interrupts if there is + * no way to generate them. + */ + if (irq_mode == PCIE_PORT_NO_IRQ + && service != PCIE_PORT_SERVICE_VC) + continue; + + child = alloc_pcie_device(dev, service, vectors[i]); + if (!child) + continue; + + status = device_register(&child->device); + if (status) { + kfree(child); + continue; } + + get_device(&child->device); } + return 0; } -- cgit v1.2.3 From f118c0c3cff4fed39bde1863f9d59850719645cc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:42:01 +0100 Subject: PCI: PCIe portdrv: Do not enable port device before setting up interrupts The PCI Express port driver calls pci_enable_device() before setting up interrupts, which is wrong, because if there is an interrupt pin configured for the port, pci_enable_device() will likely set up an interrupt link for it. However, this shouldn't be done if either MSI or MSI-X interrupt mode is chosen for the port. The solution is to call pci_enable_device() after setting up interrupts, because in that case the interrupt link won't be set up if MSI or MSI-X are enabled. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 38 ++++++++++++++++++++++++++++---------- drivers/pci/pcie/portdrv_pci.c | 11 +++-------- 2 files changed, 31 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 265eba033a4..91ecbc43155 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -208,7 +208,7 @@ int pcie_port_device_probe(struct pci_dev *dev) int pcie_port_device_register(struct pci_dev *dev) { struct pcie_port_data *port_data; - int status, capabilities, irq_mode, i; + int status, capabilities, irq_mode, i, nr_serv; int vectors[PCIE_PORT_DEVICE_MAXSERVICES]; u16 reg16; @@ -229,24 +229,32 @@ int pcie_port_device_register(struct pci_dev *dev) capabilities |= PCIE_PORT_SERVICE_PME; irq_mode = assign_interrupt_mode(dev, vectors, capabilities); + if (irq_mode == PCIE_PORT_NO_IRQ) { + /* + * Don't use service devices that require interrupts if there is + * no way to generate them. + */ + if (!(capabilities & PCIE_PORT_SERVICE_VC)) { + status = -ENODEV; + goto Error; + } + capabilities = PCIE_PORT_SERVICE_VC; + } port_data->port_irq_mode = irq_mode; + status = pci_enable_device(dev); + if (status) + goto Error; + pci_set_master(dev); + /* Allocate child services if any */ - for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { + for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { struct pcie_device *child; int service = 1 << i; if (!(capabilities & service)) continue; - /* - * Don't use service devices that require interrupts if there is - * no way to generate them. - */ - if (irq_mode == PCIE_PORT_NO_IRQ - && service != PCIE_PORT_SERVICE_VC) - continue; - child = alloc_pcie_device(dev, service, vectors[i]); if (!child) continue; @@ -258,9 +266,19 @@ int pcie_port_device_register(struct pci_dev *dev) } get_device(&child->device); + nr_serv++; + } + if (!nr_serv) { + pci_disable_device(dev); + status = -ENODEV; + goto Error; } return 0; + + Error: + kfree(port_data); + return status; } #ifdef CONFIG_PM diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 5ea566e20b3..d17611f390b 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -82,18 +82,13 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev, if (status) return status; - if (pci_enable_device(dev) < 0) - return -ENODEV; - - pci_set_master(dev); if (!dev->irq && dev->pin) { dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; " "check vendor BIOS\n", dev->vendor, dev->device); } - if (pcie_port_device_register(dev)) { - pci_disable_device(dev); - return -ENOMEM; - } + status = pcie_port_device_register(dev); + if (status) + return status; pcie_portdrv_save_config(dev); -- cgit v1.2.3 From 87d2e2ecf6026efa64b01f7f71802b20da736d35 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:43:07 +0100 Subject: PCI: PCIe portdrv: Remove unnecessary function The function pcie_portdrv_save_config() in portdrv_pci.c is not necessary. Remove it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_pci.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index d17611f390b..94d0e2af9ba 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -32,11 +32,6 @@ MODULE_LICENSE("GPL"); /* global data */ static const char device_name[] = "pcieport-driver"; -static int pcie_portdrv_save_config(struct pci_dev *dev) -{ - return pci_save_state(dev); -} - static int pcie_portdrv_restore_config(struct pci_dev *dev) { int retval; @@ -90,7 +85,7 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev, if (status) return status; - pcie_portdrv_save_config(dev); + pci_save_state(dev); return 0; } -- cgit v1.2.3 From 0516c8bcd25293f438573101c439ce25a18916ad Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:44:19 +0100 Subject: PCI: PCIe portdrv: Simplily probe callback of service drivers The second argument of the ->probe() callback in struct pcie_port_service_driver is unnecessary and never used. Remove it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_acpi.c | 3 +-- drivers/pci/hotplug/pciehp_core.c | 2 +- drivers/pci/pcie/aer/aerdrv.c | 6 ++---- drivers/pci/pcie/portdrv_core.c | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 438d795f9fe..ad8835758a1 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -82,8 +82,7 @@ static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); /* Dummy driver for dumplicate name detection */ -static int __init dummy_probe(struct pcie_device *dev, - const struct pcie_port_service_id *id) +static int __init dummy_probe(struct pcie_device *dev) { int pos; u32 slot_cap; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 681e3912b82..3429b21dbb5 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe return 0; } -static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id) +static int pciehp_probe(struct pcie_device *dev) { int rc; struct controller *ctrl; diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index e390707661d..57c41204c54 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -38,8 +38,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ); +static int __devinit aer_probe (struct pcie_device *dev); static void aer_remove(struct pcie_device *dev); static int aer_suspend(struct pcie_device *dev, pm_message_t state) {return 0;} @@ -207,8 +206,7 @@ static void aer_remove(struct pcie_device *dev) * * Invoked when PCI Express bus loads AER service driver. **/ -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ) +static int __devinit aer_probe (struct pcie_device *dev) { int status; struct aer_rpc *rpc; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 91ecbc43155..682524b0c93 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -402,7 +402,7 @@ static int pcie_port_probe_service(struct device *dev) return -ENODEV; pciedev = to_pcie_device(dev); - status = driver->probe(pciedev, driver->id_table); + status = driver->probe(pciedev); if (!status) { dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", driver->name); -- cgit v1.2.3 From 22106368c999246c414610dcaacd485e741605b1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:46:46 +0100 Subject: PCI: PCIe portdrv: Remove struct pcie_port_service_id The PCI Express port driver uses 'struct pcie_port_service_id' for matching port service devices and drivers, but this structure contains fields that duplicate information from the port device itself (vendor, device, subvendor, subdevice) and fields that are not used by any existing port service driver (class, class_mask, drvier_data). Also, both existing port service drivers (AER and PCIe HP) don't even use the vendor and device fields for device matching. Therefore 'struct pcie_port_service_id' can be removed altogether and the only useful members of it (port_type, service) can be introduced directly into the port service device and port service driver structures. That simplifies the code quite a bit and reduces its size. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_acpi.c | 13 ++----------- drivers/pci/hotplug/pciehp_core.c | 12 ++---------- drivers/pci/pcie/aer/aerdrv.c | 16 ++-------------- drivers/pci/pcie/aer/aerdrv_core.c | 10 +++++----- drivers/pci/pcie/portdrv.h | 5 ----- drivers/pci/pcie/portdrv_bus.c | 18 ++++++++++-------- drivers/pci/pcie/portdrv_core.c | 5 +---- 7 files changed, 22 insertions(+), 57 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index ad8835758a1..21734c31152 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -67,16 +67,6 @@ static int __init parse_detect_mode(void) return PCIEHP_DETECT_DEFAULT; } -static struct pcie_port_service_id __initdata port_pci_ids[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; - static int __initdata dup_slot_id; static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); @@ -110,7 +100,8 @@ static int __init dummy_probe(struct pcie_device *dev) static struct pcie_port_service_driver __initdata dummy_driver = { .name = "pciehp_dummy", - .id_table = port_pci_ids, + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = dummy_probe, }; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 3429b21dbb5..3d21bbba330 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -505,18 +505,10 @@ static int pciehp_resume (struct pcie_device *dev) } #endif -static struct pcie_port_service_id port_pci_ids[] = { { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; - static struct pcie_port_service_driver hpdriver_portdrv = { .name = PCIE_MODULE_NAME, - .id_table = &port_pci_ids[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = pciehp_probe, .remove = pciehp_remove, diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 57c41204c54..e11c0319406 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -48,19 +48,6 @@ static pci_ers_result_t aer_error_detected(struct pci_dev *dev, static void aer_error_resume(struct pci_dev *dev); static pci_ers_result_t aer_root_reset(struct pci_dev *dev); -/* - * PCI Express bus's AER Root service driver data structure - */ -static struct pcie_port_service_id aer_id[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_RC_PORT, - .service_type = PCIE_PORT_SERVICE_AER, - }, - { /* end: all zeroes */ } -}; - static struct pci_error_handlers aer_error_handlers = { .error_detected = aer_error_detected, .resume = aer_error_resume, @@ -68,7 +55,8 @@ static struct pci_error_handlers aer_error_handlers = { static struct pcie_port_service_driver aerdriver = { .name = "aer", - .id_table = &aer_id[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, .remove = aer_remove, diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 38257500738..307452f3003 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data) { struct device_driver *driver; struct pcie_port_service_driver *service_driver; - struct pcie_device *pcie_dev; struct find_aer_service_data *result; result = (struct find_aer_service_data *) data; if (device->bus == &pcie_port_bus_type) { - pcie_dev = to_pcie_device(device); - if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT) + struct pcie_port_data *port_data; + + port_data = pci_get_drvdata(to_pcie_device(device)->port); + if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT) result->is_downstream = 1; driver = device->driver; if (driver) { service_driver = to_service_driver(driver); - if (service_driver->id_table->service_type == - PCIE_PORT_SERVICE_AER) { + if (service_driver->service == PCIE_PORT_SERVICE_AER) { result->aer_driver = service_driver; return 1; } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index b0dcbc73415..ad4d082a034 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -28,11 +28,6 @@ #define get_descriptor_id(type, service) (((type - 4) << 4) | service) -struct pcie_port_data { - int port_type; /* Type of the port */ - int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ -}; - extern struct bus_type pcie_port_bus_type; extern int pcie_port_device_probe(struct pci_dev *dev); extern int pcie_port_device_register(struct pci_dev *dev); diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index eec89b767f9..ef3a4eeaebb 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type); static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) { struct pcie_device *pciedev; + struct pcie_port_data *port_data; struct pcie_port_service_driver *driver; if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type) return 0; - + pciedev = to_pcie_device(dev); driver = to_service_driver(drv); - if ( (driver->id_table->vendor != PCI_ANY_ID && - driver->id_table->vendor != pciedev->id.vendor) || - (driver->id_table->device != PCI_ANY_ID && - driver->id_table->device != pciedev->id.device) || - (driver->id_table->port_type != PCIE_ANY_PORT && - driver->id_table->port_type != pciedev->id.port_type) || - driver->id_table->service_type != pciedev->id.service_type ) + + if (driver->service != pciedev->service) + return 0; + + port_data = pci_get_drvdata(pciedev->port); + + if (driver->port_type != PCIE_ANY_PORT + && driver->port_type != port_data->port_type) return 0; return 1; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 682524b0c93..843d9e30dd3 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -140,10 +140,7 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, dev->port = parent; dev->irq = irq; - dev->id.vendor = parent->vendor; - dev->id.device = parent->device; - dev->id.port_type = port_type; - dev->id.service_type = service_type; + dev->service = service_type; /* Initialize generic device interface */ device = &dev->device; -- cgit v1.2.3 From a447b772826fde2a3abfd9bb943dee8750994c55 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 25 Jan 2009 23:53:56 +0100 Subject: PCI: struct device - replace bus_id with dev_name(), dev_set_name() More dev_set_name conversion. Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/cpqphp_sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index a13abf55d78..8450f4a6568 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -225,7 +225,8 @@ void cpqhp_shutdown_debugfs(void) void cpqhp_create_debugfs_files(struct controller *ctrl) { - ctrl->dentry = debugfs_create_file(ctrl->pci_dev->dev.bus_id, S_IRUGO, root, ctrl, &debug_ops); + ctrl->dentry = debugfs_create_file(dev_name(&ctrl->pci_dev->dev), + S_IRUGO, root, ctrl, &debug_ops); } void cpqhp_remove_debugfs_files(struct controller *ctrl) -- cgit v1.2.3 From a52e2e3513d4beafe8fe8699f1519b021c2d05ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 24 Jan 2009 00:21:14 +0100 Subject: PCI/MSI: Introduce pci_msix_table_size() Introduce new function pci_msix_table_size() returning the size of the MSI-X table of given PCI device or 0 if the device doesn't support MSI-X. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index baba2eb5367..08aedd5875b 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -674,6 +674,23 @@ static int msi_free_irqs(struct pci_dev* dev) return 0; } +/** + * pci_msix_table_size - return the number of device's MSI-X table entries + * @dev: pointer to the pci_dev data structure of MSI-X device function + */ +int pci_msix_table_size(struct pci_dev *dev) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (!pos) + return 0; + + pci_read_config_word(dev, msi_control_reg(pos), &control); + return multi_msix_capable(control); +} + /** * pci_enable_msix - configure device's MSI-X capability structure * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -691,9 +708,8 @@ static int msi_free_irqs(struct pci_dev* dev) **/ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) { - int status, pos, nr_entries; + int status, nr_entries; int i, j; - u16 control; if (!entries) return -EINVAL; @@ -702,9 +718,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) if (status) return status; - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - pci_read_config_word(dev, msi_control_reg(pos), &control); - nr_entries = multi_msix_capable(control); + nr_entries = pci_msix_table_size(dev); if (nvec > nr_entries) return -EINVAL; -- cgit v1.2.3 From b43d451385ef833e0696032aac2629da04d46c59 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 24 Jan 2009 00:23:22 +0100 Subject: PCI/PCIe portdrv: Fix allocation of interrupts If MSI-X interrupt mode is used by the PCI Express port driver, too many vectors are allocated and it is not ensured that the right vectors will be used for the right services. Namely, the PCI Express specification states that both PCI Express native PME and PCI Express hotplug will always use the same MSI or MSI-X message for signalling interrupts, which implies that the same vector will be used by both of them. Also, the VC service does not use interrupts at all. Moreover, is not clear which of the vectors allocated by pci_enable_msix() in the current code will be used for PME and hotplug and which of them will be used for AER if all of these services are configured. For these reasons, rework the allocation of interrupts for PCI Express ports so that if MSI-X are enabled, the right vectors will be used for the right purposes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv.h | 6 ++ drivers/pci/pcie/portdrv_core.c | 206 ++++++++++++++++++++++++++++++++-------- 2 files changed, 173 insertions(+), 39 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index ad4d082a034..5b818bd835e 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -25,6 +25,12 @@ #define PCIE_CAPABILITIES_REG 0x2 #define PCIE_SLOT_CAPABILITIES_REG 0x14 #define PCIE_PORT_DEVICE_MAXSERVICES 4 +#define PCIE_PORT_MSI_VECTOR_MASK 0x1f +/* + * According to the PCI Express Base Specification 2.0, the indices of the MSI-X + * table entires used by port services must not exceed 31 + */ +#define PCIE_PORT_MAX_MSIX_ENTRIES 32 #define get_descriptor_id(type, service) (((type - 4) << 4) | service) diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 843d9e30dd3..3aea92a9292 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -30,6 +30,152 @@ static void release_pcie_device(struct device *dev) kfree(to_pcie_device(dev)); } +/** + * pcie_port_msix_add_entry - add entry to given array of MSI-X entries + * @entries: Array of MSI-X entries + * @new_entry: Index of the entry to add to the array + * @nr_entries: Number of entries aleady in the array + * + * Return value: Position of the added entry in the array + */ +static int pcie_port_msix_add_entry( + struct msix_entry *entries, int new_entry, int nr_entries) +{ + int j; + + for (j = 0; j < nr_entries; j++) + if (entries[j].entry == new_entry) + return j; + + entries[j].entry = new_entry; + return j; +} + +/** + * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port + * @dev: PCI Express port to handle + * @vectors: Array of interrupt vectors to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: 0 on success, error code on failure + */ +static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask) +{ + struct msix_entry *msix_entries; + int idx[PCIE_PORT_DEVICE_MAXSERVICES]; + int nr_entries, status, pos, i, nvec; + u16 reg16; + u32 reg32; + + nr_entries = pci_msix_table_size(dev); + if (!nr_entries) + return -EINVAL; + if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES) + nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES; + + msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL); + if (!msix_entries) + return -ENOMEM; + + /* + * Allocate as many entries as the port wants, so that we can check + * which of them will be useful. Moreover, if nr_entries is correctly + * equal to the number of entries this port actually uses, we'll happily + * go through without any tricks. + */ + for (i = 0; i < nr_entries; i++) + msix_entries[i].entry = i; + + status = pci_enable_msix(dev, msix_entries, nr_entries); + if (status) + goto Exit; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + idx[i] = -1; + status = -EIO; + nvec = 0; + + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + int entry; + + /* + * The code below follows the PCI Express Base Specification 2.0 + * stating in Section 6.1.6 that "PME and Hot-Plug Event + * interrupts (when both are implemented) always share the same + * MSI or MSI-X vector, as indicated by the Interrupt Message + * Number field in the PCI Express Capabilities register", where + * according to Section 7.8.2 of the specification "For MSI-X, + * the value in this field indicates which MSI-X Table entry is + * used to generate the interrupt message." + */ + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, ®16); + entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_PME_SHIFT] = i; + idx[PCIE_PORT_SERVICE_HP_SHIFT] = i; + } + + if (mask & PCIE_PORT_SERVICE_AER) { + int entry; + + /* + * The code below follows Section 7.10.10 of the PCI Express + * Base Specification 2.0 stating that bits 31-27 of the Root + * Error Status Register contain a value indicating which of the + * MSI/MSI-X vectors assigned to the port is going to be used + * for AER, where "For MSI-X, the value in this register + * indicates which MSI-X Table entry is used to generate the + * interrupt message." + */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); + entry = reg32 >> 27; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_AER_SHIFT] = i; + } + + /* + * If nvec is equal to the allocated number of entries, we can just use + * what we have. Otherwise, the port has some extra entries not for the + * services we know and we need to work around that. + */ + if (nvec == nr_entries) { + status = 0; + } else { + /* Drop the temporary MSI-X setup */ + pci_disable_msix(dev); + + /* Now allocate the MSI-X vectors for real */ + status = pci_enable_msix(dev, msix_entries, nvec); + if (status) + goto Exit; + } + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1; + + Exit: + kfree(msix_entries); + return status; + + Error: + pci_disable_msix(dev); + goto Exit; +} + /** * assign_interrupt_mode - choose interrupt mode for PCI Express port services * (INTx, MSI-X, MSI) and set up vectors @@ -42,49 +188,31 @@ static void release_pcie_device(struct device *dev) static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { struct pcie_port_data *port_data = pci_get_drvdata(dev); - int i, pos, nvec, status = -EINVAL; - int interrupt_mode = PCIE_PORT_NO_IRQ; - - /* Set INTx as default */ - for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - nvec++; - vectors[i] = dev->irq; - } - if (dev->pin) - interrupt_mode = PCIE_PORT_INTx_MODE; + int irq, interrupt_mode = PCIE_PORT_NO_IRQ; + int i; /* Check MSI quirk */ if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) - return interrupt_mode; + goto Fallback; + + /* Try to use MSI-X if supported */ + if (!pcie_port_enable_msix(dev, vectors, mask)) + return PCIE_PORT_MSIX_MODE; + + /* We're not going to use MSI-X, so try MSI and fall back to INTx */ + if (!pci_enable_msi(dev)) + interrupt_mode = PCIE_PORT_MSI_MODE; + + Fallback: + if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin) + interrupt_mode = PCIE_PORT_INTx_MODE; + + irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = irq; + + vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1; - /* Select MSI-X over MSI if supported */ - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos) { - struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = - {{0, 0}, {0, 1}, {0, 2}, {0, 3}}; - status = pci_enable_msix(dev, msix_entries, nvec); - if (!status) { - int j = 0; - - interrupt_mode = PCIE_PORT_MSIX_MODE; - for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - vectors[i] = msix_entries[j++].vector; - } - } - } - if (status) { - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); - if (pos) { - status = pci_enable_msi(dev); - if (!status) { - interrupt_mode = PCIE_PORT_MSI_MODE; - for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++) - vectors[i] = dev->irq; - } - } - } return interrupt_mode; } -- cgit v1.2.3 From 11df1f05514beaf0269484191007dbc8d47e0e6f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Jan 2009 11:31:00 +1100 Subject: PCI/MSI: Use #ifdefs instead of weak functions Weak functions aren't all they're cracked up to be. They lead to incorrect binaries with some toolchains, they require us to have empty functions we otherwise wouldn't, and the unused code is not elided (as of gcc 4.3.2 anyway). So replace the weak MSI arch hooks with the #define foo foo idiom. We no longer need empty versions of arch_setup/teardown_msi_irq(). This is less source (by 1 line!), and results in smaller binaries too: text data bss dec hex filename 9354300 1693916 678424 11726640 b2ef30 build/powerpc/vmlinux-before 9354052 1693852 678424 11726328 b2edf8 build/powerpc/vmlinux-after Also smaller on x86_64 and arm (iop13xx). Signed-off-by: Michael Ellerman Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 08aedd5875b..33adf323f06 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -27,20 +27,15 @@ static int pci_msi_enable = 1; /* Arch hooks */ -int __attribute__ ((weak)) -arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +#ifndef arch_msi_check_device +int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) { return 0; } +#endif -int __attribute__ ((weak)) -arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) -{ - return 0; -} - -int __attribute__ ((weak)) -arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +#ifndef arch_setup_msi_irqs +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *entry; int ret; @@ -53,14 +48,10 @@ arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 0; } +#endif -void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) -{ - return; -} - -void __attribute__ ((weak)) -arch_teardown_msi_irqs(struct pci_dev *dev) +#ifndef arch_teardown_msi_irqs +void arch_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; @@ -69,6 +60,7 @@ arch_teardown_msi_irqs(struct pci_dev *dev) arch_teardown_msi_irq(entry->irq); } } +#endif static void __msi_set_enable(struct pci_dev *dev, int pos, int enable) { -- cgit v1.2.3 From 2b56313448bb8efad3af19f211d988c8352ac04d Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 28 Jan 2009 18:27:21 +0800 Subject: PCI: check if a bus is added when removing it When removing a bus, 'is_added' should be checked to make sure the bus has been successfully added by pci_bus_add_child() who will sets 'is_added'. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/remove.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 042e0892442..caf8e1eae45 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -71,6 +71,9 @@ void pci_remove_bus(struct pci_bus *pci_bus) down_write(&pci_bus_sem); list_del(&pci_bus->node); up_write(&pci_bus_sem); + if (!pci_bus->is_added) + return; + pci_remove_legacy_files(pci_bus); device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity); device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity); -- cgit v1.2.3 From 1c35b8e538cb6259accb215099cdb673310cad84 Mon Sep 17 00:00:00 2001 From: Frank Seidel Date: Fri, 6 Feb 2009 10:23:36 +0100 Subject: PCI: add missing KERN_* constants to printks According to kerneljanitors todo list all printk calls (beginning a new line) should have an according KERN_* constant. Those are the missing pieces here for the pci subsystem. Signed-off-by: Frank Seidel Reviewed-by: Kenji Kaneshige Tested-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp.h | 10 +++++----- drivers/pci/hotplug/shpchp.h | 10 +++++----- drivers/pci/intel-iommu.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 39ae37589fd..1f887f64e49 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -46,10 +46,10 @@ extern int pciehp_force; extern struct workqueue_struct *pciehp_wq; #define dbg(format, arg...) \ - do { \ - if (pciehp_debug) \ - printk("%s: " format, MY_NAME , ## arg); \ - } while (0) +do { \ + if (pciehp_debug) \ + printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \ +} while (0) #define err(format, arg...) \ printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) \ @@ -60,7 +60,7 @@ extern struct workqueue_struct *pciehp_wq; #define ctrl_dbg(ctrl, format, arg...) \ do { \ if (pciehp_debug) \ - dev_printk(, &ctrl->pcie->device, \ + dev_printk(KERN_DEBUG, &ctrl->pcie->device, \ format, ## arg); \ } while (0) #define ctrl_err(ctrl, format, arg...) \ diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index 6aba0b6cf2e..974e924ca96 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -48,10 +48,10 @@ extern int shpchp_debug; extern struct workqueue_struct *shpchp_wq; #define dbg(format, arg...) \ - do { \ - if (shpchp_debug) \ - printk("%s: " format, MY_NAME , ## arg); \ - } while (0) +do { \ + if (shpchp_debug) \ + printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \ +} while (0) #define err(format, arg...) \ printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) \ @@ -62,7 +62,7 @@ extern struct workqueue_struct *shpchp_wq; #define ctrl_dbg(ctrl, format, arg...) \ do { \ if (shpchp_debug) \ - dev_printk(, &ctrl->pci_dev->dev, \ + dev_printk(KERN_DEBUG, &ctrl->pci_dev->dev, \ format, ## arg); \ } while (0) #define ctrl_err(ctrl, format, arg...) \ diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f3f686581a9..b548937d474 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1970,7 +1970,7 @@ static inline void iommu_prepare_isa(void) ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); if (ret) - printk("IOMMU: Failed to create 0-64M identity map, " + printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " "floppy might not work\n"); } -- cgit v1.2.3 From 0b3e7388e3b438500aaa0630879ce536747a47ca Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Feb 2009 22:45:24 +0100 Subject: PCI: introduce missing kfree Error handling code following a kmalloc should free the allocated data. Since the subsequent code that could provoke an error does not use the allocated data, the allocation is just moved below it. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Reviewed-by: Matthew Wilcox Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 21734c31152..96048010e7d 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -79,14 +79,15 @@ static int __init dummy_probe(struct pcie_device *dev) struct slot *slot, *tmp; struct pci_dev *pdev = dev->port; struct pci_bus *pbus = pdev->subordinate; - if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL))) - return -ENOMEM; /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ if (pciehp_get_hp_hw_control_from_firmware(pdev)) return -ENODEV; if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP))) return -ENODEV; pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap); + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) + return -ENOMEM; slot->number = slot_cap >> 19; list_for_each_entry(tmp, &dummy_slots, slot_list) { if (tmp->number == slot->number) -- cgit v1.2.3 From 81b840cd27e3ee9af67b6e05a4847868f74fce69 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 3 Feb 2009 15:06:13 +0900 Subject: PCI: pciehp: fix possible endless loop in pcie_isr Fix possible endless loop in pcie_isr. Currently, pcie_isr() (interrupt service routine of pciehp) can end up in an endless loop if the Slot Status register is set again immediately after being cleared. According to the past discussion (see below URL) this case can happen if the power fault detected bit is set during handling. http://sourceforge.net/mailarchive/message.php?msg_id=20051130135409.A14918%40unix-os.sc.intel.com Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 7a16c6897bb..c1a312f2406 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -672,10 +672,11 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC); + detected &= ~intr_loc; intr_loc |= detected; if (!intr_loc) return IRQ_NONE; - if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) { + if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, intr_loc)) { ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n", __func__); return IRQ_NONE; -- cgit v1.2.3 From 99f0169c17f334a11b0ace91188501c612f3e1e6 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 3 Feb 2009 15:06:16 +0900 Subject: PCI: pciehp: enable software notification on empty slots Current pciehp disables software notification of adapter presence changed event and MRL changed event when slot is turned off. Because of this, there is no way to detect those events on empty slots in the current pciehp implementation. According to the past discussion(*), this behavior was introduced to prevent endless loop that could happen if pcie_isr() runs after power fault is detected on a certain platform whose stickey power-fault bit remains on till the slot is powered on again. (*) http://sourceforge.net/mailarchive/message.php?msg_id=20051130135409.A14918%40unix-os.sc.intel.com I think this endless loop can be avoided using one bit flag that indicates power fault had been detected, instead of disabling software notification of adapter present changed event and MRL changed event. With this patch, we can enable software notification mechanism of presence changed and MRL changed event on the empty slots again. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp.h | 1 + drivers/pci/hotplug/pciehp_hpc.c | 31 +++++++++++-------------------- 2 files changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 1f887f64e49..2bf8d28062e 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -112,6 +112,7 @@ struct controller { unsigned int no_cmd_complete:1; unsigned int link_active_reporting:1; unsigned int notification_enabled:1; + unsigned int power_fault_detected; }; #define INT_BUTTON_IGNORE 0 diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index c1a312f2406..07bd3215114 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -548,23 +548,21 @@ static int hpc_power_on_slot(struct slot * slot) slot_cmd = POWER_ON; cmd_mask = PCI_EXP_SLTCTL_PCC; - /* Enable detection that we turned off at slot power-off time */ if (!pciehp_poll_mode) { - slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); - cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); + /* Enable power fault detection turned off at power off time */ + slot_cmd |= PCI_EXP_SLTCTL_PFDE; + cmd_mask |= PCI_EXP_SLTCTL_PFDE; } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); - if (retval) { ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd); - return -1; + return retval; } ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); + ctrl->power_fault_detected = 0; return retval; } @@ -621,18 +619,10 @@ static int hpc_power_off_slot(struct slot * slot) slot_cmd = POWER_OFF; cmd_mask = PCI_EXP_SLTCTL_PCC; - /* - * If we get MRL or presence detect interrupts now, the isr - * will notice the sticky power-fault bit too and issue power - * indicator change commands. This will lead to an endless loop - * of command completions, since the power-fault bit remains on - * till the slot is powered on again. - */ if (!pciehp_poll_mode) { - slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); - cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); + /* Disable power fault detection */ + slot_cmd &= ~PCI_EXP_SLTCTL_PFDE; + cmd_mask |= PCI_EXP_SLTCTL_PFDE; } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); @@ -710,9 +700,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) pciehp_handle_presence_change(p_slot); /* Check Power Fault Detected */ - if (intr_loc & PCI_EXP_SLTSTA_PFD) + if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { + ctrl->power_fault_detected = 1; pciehp_handle_power_fault(p_slot); - + } return IRQ_HANDLED; } -- cgit v1.2.3 From 6a82e21823058eea95325005b79f3b8c9492460f Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 3 Feb 2009 15:06:18 +0900 Subject: PCI: pciehp: make cmd_busy flag one bit The cmd_busy field in struct controller takes only two values 0 or 1. So it should be one bit. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 2bf8d28062e..0a368547e63 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -108,7 +108,7 @@ struct controller { u32 slot_cap; u8 cap_base; struct timer_list poll_timer; - int cmd_busy; + unsigned int cmd_busy:1; unsigned int no_cmd_complete:1; unsigned int link_active_reporting:1; unsigned int notification_enabled:1; -- cgit v1.2.3 From 62795041418dd63cd9ff6ff7bbdf1d1c513c189b Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 4 Feb 2009 11:25:22 -0700 Subject: PCI: enhance physical slot debug information Convert usages of pr_debug to dev_dbg and add physical slot name. Note that we use dev_dbg on the struct pci_bus and still manually print out the PCI slot number (instead of calling dev_dbg on a pci_dev) because a struct pci_bus with empty physical slots will not have any pci_devs. Reviewed-by: Andrew Patterson Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/slot.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 5a8ccb4f604..21189447e54 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -1,8 +1,8 @@ /* * drivers/pci/slot.c * Copyright (C) 2006 Matthew Wilcox - * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P. - * Alex Chiang + * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P. + * Alex Chiang */ #include @@ -52,8 +52,8 @@ static void pci_slot_release(struct kobject *kobj) struct pci_dev *dev; struct pci_slot *slot = to_pci_slot(kobj); - pr_debug("%s: releasing pci_slot on %x:%d\n", __func__, - slot->bus->number, slot->number); + dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n", + slot->number, pci_slot_name(slot)); list_for_each_entry(dev, &slot->bus->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot->number) @@ -248,9 +248,8 @@ placeholder: if (PCI_SLOT(dev->devfn) == slot_nr) dev->slot = slot; - /* Don't care if debug printk has a -1 for slot_nr */ - pr_debug("%s: created pci_slot on %04x:%02x:%02x\n", - __func__, pci_domain_nr(parent), parent->number, slot_nr); + dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n", + slot_nr, pci_slot_name(slot)); out: kfree(slot_name); @@ -299,9 +298,8 @@ EXPORT_SYMBOL_GPL(pci_renumber_slot); */ void pci_destroy_slot(struct pci_slot *slot) { - pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__, - atomic_read(&slot->kobj.kref.refcount) - 1, - pci_domain_nr(slot->bus), slot->bus->number, slot->number); + dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n", + slot->number, atomic_read(&slot->kobj.kref.refcount) - 1); down_write(&pci_bus_sem); kobject_put(&slot->kobj); -- cgit v1.2.3 From 4c9c16867e4980fbd7d1fcc9516c9269ecb4d06f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 8 Dec 2008 16:19:14 +0100 Subject: PCI quirk: don't mark one netmos as class other Let it stay as serial, since it doesn't have subdevice in the form of 0x00PS. Signed-off-by: Jiri Slaby Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 92b9efe9bca..5aa2afb23ef 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1664,9 +1664,13 @@ static void __devinit quirk_netmos(struct pci_dev *dev) * of parallel ports and is the number of serial ports. */ switch (dev->device) { + case PCI_DEVICE_ID_NETMOS_9835: + /* Well, this rule doesn't hold for the following 9835 device */ + if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM && + dev->subsystem_device == 0x0299) + return; case PCI_DEVICE_ID_NETMOS_9735: case PCI_DEVICE_ID_NETMOS_9745: - case PCI_DEVICE_ID_NETMOS_9835: case PCI_DEVICE_ID_NETMOS_9845: case PCI_DEVICE_ID_NETMOS_9855: if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL && -- cgit v1.2.3 From 5fe5db05f64d0d10b563b1c13b58e4a52b190686 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 9 Feb 2009 14:53:47 +0800 Subject: PCI: Speed up device reset function For all devices need to do function level reset, currently we need wait for at least 200ms, which can be too long if we have lots of devices... The patch checked pending bit before msleep() to skip some unnecessary sleeping interval. Signed-off-by: Sheng Yang Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5737b8a9a73..0b3e20f1b6f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2028,18 +2028,24 @@ static int __pcie_flr(struct pci_dev *dev, int probe) pci_block_user_cfg_access(dev); /* Wait for Transaction Pending bit clean */ + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (!(status & PCI_EXP_DEVSTA_TRPND)) + goto transaction_done; + msleep(100); pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (status & PCI_EXP_DEVSTA_TRPND) { - dev_info(&dev->dev, "Busy after 100ms while trying to reset; " + if (!(status & PCI_EXP_DEVSTA_TRPND)) + goto transaction_done; + + dev_info(&dev->dev, "Busy after 100ms while trying to reset; " "sleeping for 1 second\n"); - ssleep(1); - pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (status & PCI_EXP_DEVSTA_TRPND) - dev_info(&dev->dev, "Still busy after 1s; " + ssleep(1); + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (status & PCI_EXP_DEVSTA_TRPND) + dev_info(&dev->dev, "Still busy after 1s; " "proceeding with reset anyway\n"); - } +transaction_done: pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); mdelay(100); @@ -2066,18 +2072,24 @@ static int __pci_af_flr(struct pci_dev *dev, int probe) pci_block_user_cfg_access(dev); /* Wait for Transaction Pending bit clean */ + pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); + if (!(status & PCI_AF_STATUS_TP)) + goto transaction_done; + msleep(100); pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); - if (status & PCI_AF_STATUS_TP) { - dev_info(&dev->dev, "Busy after 100ms while trying to" - " reset; sleeping for 1 second\n"); - ssleep(1); - pci_read_config_byte(dev, - cappos + PCI_AF_STATUS, &status); - if (status & PCI_AF_STATUS_TP) - dev_info(&dev->dev, "Still busy after 1s; " - "proceeding with reset anyway\n"); - } + if (!(status & PCI_AF_STATUS_TP)) + goto transaction_done; + + dev_info(&dev->dev, "Busy after 100ms while trying to" + " reset; sleeping for 1 second\n"); + ssleep(1); + pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); + if (status & PCI_AF_STATUS_TP) + dev_info(&dev->dev, "Still busy after 1s; " + "proceeding with reset anyway\n"); + +transaction_done: pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); mdelay(100); -- cgit v1.2.3 From 63f10f0f6df4e4e860b790d64bebfde85b540b0a Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 9 Feb 2009 15:59:29 +0900 Subject: PCI/ACPI: move _OSC code to pci_root.c Move PCI _OSC management code from drivers/pci/pci-acpi.c to drivers/acpi/pci_root.c. The benefits are - We no longer need struct osc_data and its management code (contents are moved to struct acpi_pci_root). This simplify the code, and we no longer care about kmalloc() failure. - We can make pci_acpi_osc_support() be a static function, which is called only from drivers/acpi/pci_root.c. Signed-off-by: Kenji Kaneshige Reviewed-by: Andrew Patterson Tested-by: Andrew Patterson Acked-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/pci-acpi.c | 215 ------------------------------------------------- 1 file changed, 215 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index deea8a187eb..fac5eddcefd 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -18,221 +18,6 @@ #include #include "pci.h" -struct acpi_osc_data { - acpi_handle handle; - u32 support_set; - u32 control_set; - u32 control_query; - int is_queried; - struct list_head sibiling; -}; -static LIST_HEAD(acpi_osc_data_list); - -struct acpi_osc_args { - u32 capbuf[3]; -}; - -static DEFINE_MUTEX(pci_acpi_lock); - -static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) -{ - struct acpi_osc_data *data; - - list_for_each_entry(data, &acpi_osc_data_list, sibiling) { - if (data->handle == handle) - return data; - } - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return NULL; - INIT_LIST_HEAD(&data->sibiling); - data->handle = handle; - list_add_tail(&data->sibiling, &acpi_osc_data_list); - return data; -} - -static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, - 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; - -static acpi_status acpi_run_osc(acpi_handle handle, - struct acpi_osc_args *osc_args, u32 *retval) -{ - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE]; - - /* Setting up input parameters */ - input.count = 4; - input.pointer = in_params; - in_params[0].type = ACPI_TYPE_BUFFER; - in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = OSC_UUID; - in_params[1].type = ACPI_TYPE_INTEGER; - in_params[1].integer.value = 1; - in_params[2].type = ACPI_TYPE_INTEGER; - in_params[2].integer.value = 3; - in_params[3].type = ACPI_TYPE_BUFFER; - in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)osc_args->capbuf; - - status = acpi_evaluate_object(handle, "_OSC", &input, &output); - if (ACPI_FAILURE(status)) - return status; - - if (!output.length) - return AE_NULL_OBJECT; - - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n"); - status = AE_TYPE; - goto out_kfree; - } - /* Need to ignore the bit0 in result code */ - errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - if (errors & OSC_REQUEST_ERROR) - printk(KERN_DEBUG "_OSC request fails\n"); - if (errors & OSC_INVALID_UUID_ERROR) - printk(KERN_DEBUG "_OSC invalid UUID\n"); - if (errors & OSC_INVALID_REVISION_ERROR) - printk(KERN_DEBUG "_OSC invalid revision\n"); - if (errors & OSC_CAPABILITIES_MASK_ERROR) { - if (flags & OSC_QUERY_ENABLE) - goto out_success; - printk(KERN_DEBUG "_OSC FW not grant req. control\n"); - status = AE_SUPPORT; - goto out_kfree; - } - status = AE_ERROR; - goto out_kfree; - } -out_success: - *retval = *((u32 *)(out_obj->buffer.pointer + 8)); - status = AE_OK; - -out_kfree: - kfree(output.pointer); - return status; -} - -static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data) -{ - acpi_status status; - u32 support_set, result; - struct acpi_osc_args osc_args; - - /* do _OSC query for all possible controls */ - support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS); - osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; - - status = acpi_run_osc(osc_data->handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) { - osc_data->support_set = support_set; - osc_data->control_query = result; - osc_data->is_queried = 1; - } - - return status; -} - -/* - * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature - * @flags: Bitmask of flags to support - * - * See the ACPI spec for the definition of the flags - */ -int pci_acpi_osc_support(acpi_handle handle, u32 flags) -{ - acpi_status status; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - int rc = 0; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return -ENOTTY; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - rc = -ENOMEM; - goto out; - } - - __acpi_query_osc(flags, osc_data); -out: - mutex_unlock(&pci_acpi_lock); - return rc; -} - -/** - * pci_osc_control_set - commit requested control to Firmware - * @handle: acpi_handle for the target ACPI object - * @flags: driver's requested control bits - * - * Attempt to take control from Firmware on requested control bits. - **/ -acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{ - acpi_status status; - u32 control_req, control_set, result; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - struct acpi_osc_args osc_args; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return status; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - status = AE_ERROR; - goto out; - } - - control_req = (flags & OSC_CONTROL_MASKS); - if (!control_req) { - status = AE_TYPE; - goto out; - } - - /* No need to evaluate _OSC if the control was already granted. */ - if ((osc_data->control_set & control_req) == control_req) - goto out; - - if (!osc_data->is_queried) { - status = __acpi_query_osc(osc_data->support_set, osc_data); - if (ACPI_FAILURE(status)) - goto out; - } - - if ((osc_data->control_query & control_req) != control_req) { - status = AE_SUPPORT; - goto out; - } - - control_set = osc_data->control_set | control_req; - osc_args.capbuf[OSC_QUERY_TYPE] = 0; - osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = control_set; - status = acpi_run_osc(handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) - osc_data->control_set = result; -out: - mutex_unlock(&pci_acpi_lock); - return status; -} -EXPORT_SYMBOL(pci_osc_control_set); - /* * _SxD returns the D-state with the highest power * (lowest D-state number) supported in the S-state "x". -- cgit v1.2.3 From 9f5404d8ea90bfa4d58a3936e5a3d0d28cecf60f Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 9 Feb 2009 16:00:04 +0900 Subject: PCI/ACPI: rename pci_osc_control_set() - Rename pci_osc_control_set() to acpi_pci_osc_control_set() according to the other API names in drivers/acpi/pci_root.c. - Move _OSC related definitions to include/linux/acpi.h because _OSC related API is implemented in drivers/acpi/pci_root.c now. Signed-off-by: Kenji Kaneshige Reviewed-by: Andrew Patterson Tested-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpi_pcihp.c | 5 ++--- drivers/pci/pcie/aer/aerdrv_acpi.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 1c114180106..f47bc74be56 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -30,9 +30,8 @@ #include #include #include +#include #include -#include -#include #define MY_NAME "acpi_pcihp" @@ -408,7 +407,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); dbg("Trying to get hotplug control for %s\n", (char *)string.pointer); - status = pci_osc_control_set(handle, flags); + status = acpi_pci_osc_control_set(handle, flags); if (ACPI_SUCCESS(status)) goto got_one; kfree(string.pointer); diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index ebce26c3704..8edb2f300e8 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev) handle = acpi_find_root_bridge_handle(pdev); if (handle) { - status = pci_osc_control_set(handle, + status = acpi_pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL | OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); } -- cgit v1.2.3 From 35e1801ea637810830e653ffe7ff62c7048ae03a Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 11 Feb 2009 21:13:45 +0100 Subject: PCI hotplug: shpchp: fix bus number check to avoid false positive With for (busnr = 0; busnr <= end; busnr++) { ... } busnr reaches end + 1 after the loop. So fix the "no busses available" check to look for just busnr > end rather than >=. Signed-off-by: Roel Kluin Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/shpchp_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c index 138f161becc..aa315e52529 100644 --- a/drivers/pci/hotplug/shpchp_pci.c +++ b/drivers/pci/hotplug/shpchp_pci.c @@ -137,7 +137,7 @@ int __ref shpchp_configure_device(struct slot *p_slot) busnr)) break; } - if (busnr >= end) { + if (busnr > end) { ctrl_err(ctrl, "No free bus for hot-added bridge\n"); pci_dev_put(dev); -- cgit v1.2.3 From b5fbf53324f65646154e172af350674d5a2a1629 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 11 Feb 2009 22:27:02 +1100 Subject: PCI/MSI: Allow arch code to return the number of MSI-X available There is code in msix_capability_init() which, when the requested number of MSI-X couldn't be allocated, calculates how many MSI-X /could/ be allocated and returns that to the driver. That allows the driver to then make a second request, with a number of MSIs that should succeed. The current code requires the arch code to setup as many msi_descs as it can, and then return to the generic code. On some platforms the arch code may already know how many MSI-X it can allocate, before it sets up any of the msi_descs. So change the logic such that if the arch code returns a positive error code, that is taken to be the number of MSI-X that could be allocated. If the error code is negative we still calculate the number available using the old method. Because it's a little subtle, make sure the error return code from arch_setup_msi_irq() is always negative. That way only implementations of arch_setup_msi_irqs() need to be careful about returning a positive error code. Signed-off-by: Michael Ellerman Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 33adf323f06..dceea56f734 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -42,8 +42,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) list_for_each_entry(entry, &dev->msi_list, list) { ret = arch_setup_msi_irq(dev, entry); - if (ret) + if (ret < 0) return ret; + if (ret > 0) + return -ENOSPC; } return 0; @@ -487,7 +489,9 @@ static int msix_capability_init(struct pci_dev *dev, } ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); - if (ret) { + if (ret < 0) { + /* If we had some success report the number of irqs + * we succeeded in setting up. */ int avail = 0; list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq != 0) { @@ -495,14 +499,13 @@ static int msix_capability_init(struct pci_dev *dev, } } - msi_free_irqs(dev); + if (avail != 0) + ret = avail; + } - /* If we had some success report the number of irqs - * we succeeded in setting up. - */ - if (avail == 0) - avail = ret; - return avail; + if (ret) { + msi_free_irqs(dev); + return ret; } i = 0; -- cgit v1.2.3 From c48f1670f42b71f39f4a3bfba01ffb691cc9206c Mon Sep 17 00:00:00 2001 From: "akpm@linux-foundation.org" Date: Tue, 3 Feb 2009 15:45:26 -0800 Subject: PCI: constify pci_bus_add_devices() drivers/pci/hotplug/fakephp.c:283: warning: passing argument 1 of 'pci_bus_add_devices' discards qualifiers from pointer target type Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 52b54f053be..118c77778d2 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus) * * Call hotplug for each new devices. */ -void pci_bus_add_devices(struct pci_bus *bus) +void pci_bus_add_devices(const struct pci_bus *bus) { struct pci_dev *dev; struct pci_bus *child; -- cgit v1.2.3 From ea7415512a07add2b09c070c9a5d1950833cf9b3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 18 Feb 2009 10:44:29 -0800 Subject: PCI: constify pci_bus_assign_resources() drivers/pci/hotplug/fakephp.c: In function 'pci_rescan_bus': drivers/pci/hotplug/fakephp.c:271: warning: passing argument 1 of 'pci_bus_assign_resources' discards qualifiers from pointer target type Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 70460894578..170a3eda9dd 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -27,7 +27,7 @@ #include -static void pbus_assign_resources_sorted(struct pci_bus *bus) +static void pbus_assign_resources_sorted(const struct pci_bus *bus) { struct pci_dev *dev; struct resource *res; @@ -495,7 +495,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_size_bridges); -void __ref pci_bus_assign_resources(struct pci_bus *bus) +void __ref pci_bus_assign_resources(const struct pci_bus *bus) { struct pci_bus *b; struct pci_dev *dev; -- cgit v1.2.3 From 10a0ef39fbd1d484c2bbc1ffd83d57ecef209140 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Tue, 17 Feb 2009 13:46:53 +0300 Subject: PCI/alpha: pci sysfs resources This closes http://bugzilla.kernel.org/show_bug.cgi?id=10893 which is a showstopper for X development on alpha. The generic HAVE_PCI_MMAP code (drivers/pci-sysfs.c) is not very useful since we have to deal with three different types of MMIO address spaces: sparse and dense mappings for old ev4/ev5 machines and "normal" 1:1 MMIO space (bwx) for ev56 and later. Also "write combine" mappings are meaningless on alpha - roughly speaking, alpha does write combining, IO reordering and other optimizations by default, unless user splits IO accesses with memory barriers. I think the cleanest way to deal with resource files on alpha is to convert the default no-op pci_create_resource_files() and pci_remove_resource_files() for !HAVE_PCI_MMAP case into __weak functions and override them with alpha specific ones. Another alpha hook is needed for "legacy_" resource files to handle sparse addressing (pci_adjust_legacy_attr). With the "standard" resourceN files on ev56/ev6 libpciaccess works "out of the box". Handling of resourceN_sparse/resourceN_dense files on older machines obviously requires some userland work. Sparse/dense stuff has been tested on sx164 (pca56/pyxis, normally uses bwx IO) with the kernel hacked into "cia compatible" mode. Signed-off-by: Ivan Kokshaysky Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index dfc4e0ddf24..1c892980140 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -492,6 +492,19 @@ pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr, return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io); } +/** + * pci_adjust_legacy_attr - adjustment of legacy file attributes + * @b: bus to create files under + * @mmap_type: I/O port or memory + * + * Stub implementation. Can be overridden by arch if necessary. + */ +void __weak +pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type) +{ + return; +} + /** * pci_create_legacy_files - create legacy I/O port and memory files * @b: bus to create files under @@ -518,6 +531,7 @@ void pci_create_legacy_files(struct pci_bus *b) b->legacy_io->read = pci_read_legacy_io; b->legacy_io->write = pci_write_legacy_io; b->legacy_io->mmap = pci_mmap_legacy_io; + pci_adjust_legacy_attr(b, pci_mmap_io); error = device_create_bin_file(&b->dev, b->legacy_io); if (error) goto legacy_io_err; @@ -528,6 +542,7 @@ void pci_create_legacy_files(struct pci_bus *b) b->legacy_mem->size = 1024*1024; b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR; b->legacy_mem->mmap = pci_mmap_legacy_mem; + pci_adjust_legacy_attr(b, pci_mmap_mem); error = device_create_bin_file(&b->dev, b->legacy_mem); if (error) goto legacy_mem_err; @@ -719,8 +734,8 @@ static int pci_create_resource_files(struct pci_dev *pdev) return 0; } #else /* !HAVE_PCI_MMAP */ -static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; } -static inline void pci_remove_resource_files(struct pci_dev *dev) { return; } +int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; } +void __weak pci_remove_resource_files(struct pci_dev *dev) { return; } #endif /* HAVE_PCI_MMAP */ /** -- cgit v1.2.3 From ae40582e9959cdb7bfe4b918be8e3d19f9511798 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Feb 2009 20:16:07 -0800 Subject: PCI: pcie_portdriver: fix pcie_port_device_remove pcie_port_device_remove currently calls the remove method of port drivers twice. Ouch! We are calling device_for_each_child multiple times for no apparent reason. So make it simple. Place put_device and device_unregister into remove_iter, and throw out the rest. Only call device_for_each_child once. The code is simpler and actually works! Signed-off-by: Eric W. Biederman Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 3aea92a9292..569af0015fc 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -456,16 +456,9 @@ int pcie_port_device_resume(struct pci_dev *dev) static int remove_iter(struct device *dev, void *data) { - struct pcie_port_service_driver *service_driver; - if (dev->bus == &pcie_port_bus_type) { - if (dev->driver) { - service_driver = to_service_driver(dev->driver); - if (service_driver->remove) - service_driver->remove(to_pcie_device(dev)); - } - *(unsigned long*)data = (unsigned long)dev; - return 1; + put_device(dev); + device_unregister(dev); } return 0; } @@ -480,18 +473,8 @@ static int remove_iter(struct device *dev, void *data) void pcie_port_device_remove(struct pci_dev *dev) { struct pcie_port_data *port_data = pci_get_drvdata(dev); - int status; - - do { - unsigned long device_addr; - status = device_for_each_child(&dev->dev, &device_addr, remove_iter); - if (status) { - struct device *device = (struct device*)device_addr; - put_device(device); - device_unregister(device); - } - } while (status); + device_for_each_child(&dev->dev, NULL, remove_iter); switch (port_data->port_irq_mode) { case PCIE_PORT_MSIX_MODE: -- cgit v1.2.3 From 3a3c244c9a355105bc193fde873c73727bf87192 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 15 Feb 2009 22:32:48 +0100 Subject: PCI: PCIe portdrv: Implement pm object Implement pm object for the PCI Express port driver in order to use the new power management framework and reduce the code size. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_core.c | 4 ++-- drivers/pci/pcie/aer/aerdrv.c | 6 ------ drivers/pci/pcie/portdrv.h | 4 ++-- drivers/pci/pcie/portdrv_core.c | 14 ++++++-------- drivers/pci/pcie/portdrv_pci.c | 31 +++++++++++++++---------------- 5 files changed, 25 insertions(+), 34 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 3d21bbba330..fb254b2454d 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev) } #ifdef CONFIG_PM -static int pciehp_suspend (struct pcie_device *dev, pm_message_t state) +static int pciehp_suspend (struct pcie_device *dev) { dev_info(&dev->device, "%s ENTRY\n", __func__); return 0; @@ -503,7 +503,7 @@ static int pciehp_resume (struct pcie_device *dev) } return 0; } -#endif +#endif /* PM */ static struct pcie_port_service_driver hpdriver_portdrv = { .name = PCIE_MODULE_NAME, diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index e11c0319406..32ade5af927 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -40,9 +40,6 @@ MODULE_LICENSE("GPL"); static int __devinit aer_probe (struct pcie_device *dev); static void aer_remove(struct pcie_device *dev); -static int aer_suspend(struct pcie_device *dev, pm_message_t state) -{return 0;} -static int aer_resume(struct pcie_device *dev) {return 0;} static pci_ers_result_t aer_error_detected(struct pci_dev *dev, enum pci_channel_state error); static void aer_error_resume(struct pci_dev *dev); @@ -61,9 +58,6 @@ static struct pcie_port_service_driver aerdriver = { .probe = aer_probe, .remove = aer_remove, - .suspend = aer_suspend, - .resume = aer_resume, - .err_handler = &aer_error_handlers, .reset_link = aer_root_reset, diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 5b818bd835e..17ad53868f9 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -38,8 +38,8 @@ extern struct bus_type pcie_port_bus_type; extern int pcie_port_device_probe(struct pci_dev *dev); extern int pcie_port_device_register(struct pci_dev *dev); #ifdef CONFIG_PM -extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state); -extern int pcie_port_device_resume(struct pci_dev *dev); +extern int pcie_port_device_suspend(struct device *dev); +extern int pcie_port_device_resume(struct device *dev); #endif extern void pcie_port_device_remove(struct pci_dev *dev); extern int __must_check pcie_port_bus_register(void); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 569af0015fc..5a5bfe7cdf5 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -410,13 +410,12 @@ int pcie_port_device_register(struct pci_dev *dev) static int suspend_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; - pm_message_t state = * (pm_message_t *) data; if ((dev->bus == &pcie_port_bus_type) && (dev->driver)) { service_driver = to_service_driver(dev->driver); if (service_driver->suspend) - service_driver->suspend(to_pcie_device(dev), state); + service_driver->suspend(to_pcie_device(dev)); } return 0; } @@ -424,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data) /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle - * @state: Representation of system power management transition in progress */ -int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state) +int pcie_port_device_suspend(struct device *dev) { - return device_for_each_child(&dev->dev, &state, suspend_iter); + return device_for_each_child(dev, NULL, suspend_iter); } static int resume_iter(struct device *dev, void *data) @@ -448,11 +446,11 @@ static int resume_iter(struct device *dev, void *data) * pcie_port_device_suspend - resume port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_resume(struct pci_dev *dev) +int pcie_port_device_resume(struct device *dev) { - return device_for_each_child(&dev->dev, NULL, resume_iter); + return device_for_each_child(dev, NULL, resume_iter); } -#endif +#endif /* PM */ static int remove_iter(struct device *dev, void *data) { diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 94d0e2af9ba..a61f4930d67 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -44,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev) } #ifdef CONFIG_PM -static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) -{ - return pcie_port_device_suspend(dev, state); +static struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore = pcie_port_device_resume, +}; -} +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) -static int pcie_portdrv_resume(struct pci_dev *dev) -{ - pci_set_master(dev); - return pcie_port_device_resume(dev); -} -#else -#define pcie_portdrv_suspend NULL -#define pcie_portdrv_resume NULL -#endif +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ /* * pcie_portdrv_probe - Probe PCI-Express port devices @@ -268,10 +268,9 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .suspend = pcie_portdrv_suspend, - .resume = pcie_portdrv_resume, - .err_handler = &pcie_portdrv_err_handler, + + .driver.pm = PCIE_PORTDRV_PM_OPS, }; static int __init pcie_portdrv_init(void) -- cgit v1.2.3 From 267efd7eec5eca62f32f8c9bc1721b578d5da963 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:13:20 +0900 Subject: PCI hotplug: fix wrong assumption in acpi_get_hp_params_from_firmware Current acpi_get_hp_params_from_firmware() has a assumption that pci_bus->self is NULL on the root pci bus. But it might not true on some platforms. Because of this wrong assumption, current acpi_get_hp_params_from_firmware() might cause endless loop. We must check pci_bus->parent instead. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpi_pcihp.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index f47bc74be56..09a84402986 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -332,19 +332,14 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, { acpi_status status = AE_NOT_FOUND; acpi_handle handle, phandle; - struct pci_bus *pbus = bus; - struct pci_dev *pdev; + struct pci_bus *pbus; - do { - pdev = pbus->self; - if (!pdev) { - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(pbus), pbus->number); + handle = NULL; + for (pbus = bus; pbus; pbus = pbus->parent) { + handle = acpi_pci_get_bridge_handle(pbus); + if (handle) break; - } - handle = DEVICE_ACPI_HANDLE(&(pdev->dev)); - pbus = pbus->parent; - } while (!handle); + } /* * _HPP settings apply to all child buses, until another _HPP is -- cgit v1.2.3 From d391f00f0e7fb6d883c6724b31a1799e19a584c5 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:13:59 +0900 Subject: PCI hotplug: fix wrong assumption in acpi_get_hp_hw_control_from_firmware Current acpi_get_hp_hw_control_from_firmware() has a assumption that pci_bus->self is NULL on a PCI root bus. But it might not be true on some platforms. Because of this wrong assumption, current acpi_get_hp_hw_control_from_firmware() might cause endless loop. We must check pci_bus->parent instead. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpi_pcihp.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 09a84402986..fbc63d5e459 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -372,12 +372,10 @@ EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware); * * Attempt to take hotplug control from firmware. */ -int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) { acpi_status status; acpi_handle chandle, handle; - struct pci_dev *pdev = dev; - struct pci_bus *parent; struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | @@ -409,26 +407,18 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL }; } - pdev = dev; - handle = DEVICE_ACPI_HANDLE(&dev->dev); - while (!handle) { + handle = DEVICE_ACPI_HANDLE(&pdev->dev); + if (!handle) { /* * This hotplug controller was not listed in the ACPI name * space at all. Try to get acpi handle of parent pci bus. */ - if (!pdev || !pdev->bus->parent) - break; - parent = pdev->bus->parent; - dbg("Could not find %s in acpi namespace, trying parent\n", - pci_name(pdev)); - if (!parent->self) - /* Parent must be a host bridge */ - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(parent), - parent->number); - else - handle = DEVICE_ACPI_HANDLE(&(parent->self->dev)); - pdev = parent->self; + struct pci_bus *pbus; + for (pbus = pdev->bus; pbus; pbus = pbus->parent) { + handle = acpi_pci_get_bridge_handle(pbus); + if (handle) + break; + } } while (handle) { @@ -447,13 +437,13 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) } dbg("Cannot get control of hotplug hardware for pci %s\n", - pci_name(dev)); + pci_name(pdev)); kfree(string.pointer); return -ENODEV; got_one: - dbg("Gained control for hotplug HW for pci %s (%s)\n", pci_name(dev), - (char *)string.pointer); + dbg("Gained control for hotplug HW for pci %s (%s)\n", + pci_name(pdev), (char *)string.pointer); kfree(string.pointer); return 0; } -- cgit v1.2.3 From 151ab36a2ea0b3181d103f7244636e0d16e685de Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:14:36 +0900 Subject: PCI: fix wrong assumption in pci_find_upstream_pcie_bridge Current pci_find_upstream_pcie_bridge() has a wrong assumption that pci_bus->self is NULL on the root pci bus. But it might not true on some platforms. Because of this wrong assumption, current pci_find_upstream_pcie_bridge() might cause endless loop. We must check pci_bus->parent instead. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/search.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 5af8bd53814..710d4ea6956 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -29,7 +29,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev) if (pdev->is_pcie) return NULL; while (1) { - if (!pdev->bus->self) + if (!pdev->bus->parent) break; pdev = pdev->bus->self; /* a p2p bridge */ -- cgit v1.2.3 From f92d4e29d785f1d4217dee7f1ae6ff7140547ed5 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:15:16 +0900 Subject: PCI: fix wrong assumption in pci_read_bridge_bases Current pci_read_bridge_bases() has an assumption that pci_bus->self is NULL on the pci root bus (It checks pci_bus->self to see if the pci bus is root bus). But is might not true on some platforms. We must check pci_bus->parent instead. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 55ec44a27e8..23362e8c696 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -287,7 +287,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) struct resource *res; int i; - if (!dev) /* It's a host bus, nothing to read */ + if (!child->parent) /* It's a host bus, nothing to read */ return; if (dev->transparent) { -- cgit v1.2.3 From c2a3072e010943ac749794622f26b3ef54de25be Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:15:45 +0900 Subject: PCI: fix wrong assumption in pci_get_interrupt_pin Current pci_get_interrupt_pin() seems to have an assumption that pci_bus->self is NULL on the root pci bus. But it might not be true on some platforms. Because of this wrong assumption, current pci_get_interrupt_pin() might cause endless loop. We must check pci_bus->parent instead. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0b3e20f1b6f..0cfed9e28ea 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1484,7 +1484,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) if (!pin) return -1; - while (dev->bus->self) { + while (dev->bus->parent) { pin = pci_swizzle_interrupt_pin(dev, pin); dev = dev->bus->self; } -- cgit v1.2.3 From c74d724462d1845535667f4d3f720e02e3432e53 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:16:13 +0900 Subject: PCI: fix wrong assumption in pci_common_swizzle Current pci_common_swizzle() seems to have a assumption that pci_bus->self is NULL on the pci root bus. But it might not be true on some platforms. Because of this wrong assumption, pci_common_swizzle() might cause endless loop. We must check pci_bus->parent instead. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0cfed9e28ea..8310dc2f943 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1504,7 +1504,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp) { u8 pin = *pinp; - while (dev->bus->self) { + while (dev->bus->parent) { pin = pci_swizzle_interrupt_pin(dev, pin); dev = dev->bus->self; } -- cgit v1.2.3 From 0994375e9614f78657031e04e30019b9cdb62795 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Mon, 23 Feb 2009 21:52:23 -0800 Subject: PCI: add remove_id sysfs entry This adds a remove_id sysfs entry to allow users of new_id to later remove the added dynid. One use case is management tools that want to dynamically bind/unbind devices to pci-stub driver while devices are assigned to KVM guests. Rather than having to track which driver was originally bound to the driver, a mangement tool can simply: Guest uses device Signed-off-by: Chris Wright Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 93eac142358..87a5ddbb324 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -99,6 +99,52 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) } static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id); +/** + * store_remove_id - remove a PCI device ID from this driver + * @driver: target device driver + * @buf: buffer for scanning device ID data + * @count: input size + * + * Removes a dynamic pci device ID to this driver. + */ +static ssize_t +store_remove_id(struct device_driver *driver, const char *buf, size_t count) +{ + struct pci_dynid *dynid, *n; + struct pci_driver *pdrv = to_pci_driver(driver); + __u32 vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields = 0; + int retval = -ENODEV; + + fields = sscanf(buf, "%x %x %x %x %x %x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + if (fields < 2) + return -EINVAL; + + spin_lock(&pdrv->dynids.lock); + list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) { + struct pci_device_id *id = &dynid->id; + if ((id->vendor == vendor) && + (id->device == device) && + (subvendor == PCI_ANY_ID || id->subvendor == subvendor) && + (subdevice == PCI_ANY_ID || id->subdevice == subdevice) && + !((id->class ^ class) & class_mask)) { + list_del(&dynid->node); + kfree(dynid); + retval = 0; + break; + } + } + spin_unlock(&pdrv->dynids.lock); + + if (retval) + return retval; + return count; +} +static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id); + static void pci_free_dynids(struct pci_driver *drv) { @@ -125,6 +171,20 @@ static void pci_remove_newid_file(struct pci_driver *drv) { driver_remove_file(&drv->driver, &driver_attr_new_id); } + +static int +pci_create_removeid_file(struct pci_driver *drv) +{ + int error = 0; + if (drv->probe != NULL) + error = driver_create_file(&drv->driver,&driver_attr_remove_id); + return error; +} + +static void pci_remove_removeid_file(struct pci_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_remove_id); +} #else /* !CONFIG_HOTPLUG */ static inline void pci_free_dynids(struct pci_driver *drv) {} static inline int pci_create_newid_file(struct pci_driver *drv) @@ -132,6 +192,11 @@ static inline int pci_create_newid_file(struct pci_driver *drv) return 0; } static inline void pci_remove_newid_file(struct pci_driver *drv) {} +static inline int pci_create_removeid_file(struct pci_driver *drv) +{ + return 0; +} +static inline void pci_remove_removeid_file(struct pci_driver *drv) {} #endif /** @@ -852,13 +917,23 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, /* register with core */ error = driver_register(&drv->driver); if (error) - return error; + goto out; error = pci_create_newid_file(drv); if (error) - driver_unregister(&drv->driver); + goto out_newid; + error = pci_create_removeid_file(drv); + if (error) + goto out_removeid; +out: return error; + +out_removeid: + pci_remove_newid_file(drv); +out_newid: + driver_unregister(&drv->driver); + goto out; } /** @@ -874,6 +949,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, void pci_unregister_driver(struct pci_driver *drv) { + pci_remove_removeid_file(drv); pci_remove_newid_file(drv); driver_unregister(&drv->driver); pci_free_dynids(drv); -- cgit v1.2.3 From 24d27553390c69d11cdbd930d635193956fc295f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:06 -0400 Subject: PCI MSI: Replace 'type' with 'is_msix' By changing from a 5-bit field to a 1-bit field, we free up some bits that can be used by a later patch. Also rearrange the fields for better packing on 64-bit platforms (reducing the size of msi_desc from 72 bytes to 64 bytes). Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 115 ++++++++++++++++++------------------------------------ 1 file changed, 39 insertions(+), 76 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index dceea56f734..b3db4388f97 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -111,20 +111,10 @@ static void msix_flush_writes(struct irq_desc *desc) entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - /* nothing to do */ - break; - case PCI_CAP_ID_MSIX: - { + if (entry->msi_attrib.is_msix) { int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; } } @@ -143,32 +133,23 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - if (entry->msi_attrib.maskbit) { - int pos; - u32 mask_bits; - - pos = (long)entry->mask_base; - pci_read_config_dword(entry->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(entry->dev, pos, mask_bits); - } else { - return 0; - } - break; - case PCI_CAP_ID_MSIX: - { + if (entry->msi_attrib.is_msix) { int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; writel(flag, entry->mask_base + offset); readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; + } else { + int pos; + u32 mask_bits; + + if (!entry->msi_attrib.maskbit) + return 0; + + pos = (long)entry->mask_base; + pci_read_config_dword(entry->dev, pos, &mask_bits); + mask_bits &= ~mask; + mask_bits |= flag & mask; + pci_write_config_dword(entry->dev, pos, mask_bits); } entry->msi_attrib.masked = !!flag; return 1; @@ -177,9 +158,14 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch(entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; u16 data; @@ -195,21 +181,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_read_config_word(dev, msi_data_reg(pos, 0), &data); } msg->data = data; - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } } @@ -223,9 +194,17 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base; + base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + writel(msg->address_lo, + base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + writel(msg->address_hi, + base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; @@ -240,23 +219,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_write_config_word(dev, msi_data_reg(pos, 0), msg->data); } - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - writel(msg->address_lo, - base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(msg->address_hi, - base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } entry->msg = *msg; } @@ -393,7 +355,7 @@ static int msi_capability_init(struct pci_dev *dev) if (!entry) return -ENOMEM; - entry->msi_attrib.type = PCI_CAP_ID_MSI; + entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = is_64bit_address(control); entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = is_mask_bit_support(control); @@ -475,7 +437,7 @@ static int msix_capability_init(struct pci_dev *dev, break; j = entries[i].entry; - entry->msi_attrib.type = PCI_CAP_ID_MSIX; + entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = j; entry->msi_attrib.maskbit = 1; @@ -619,12 +581,13 @@ void pci_msi_shutdown(struct pci_dev* dev) struct irq_desc *desc = irq_to_desc(dev->irq); msi_set_mask_bits(desc, mask, ~mask); } - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) + if (!entry->dev || entry->msi_attrib.is_msix) return; /* Restore dev->irq to its default pin-assertion irq */ dev->irq = entry->msi_attrib.default_irq; } + void pci_disable_msi(struct pci_dev* dev) { struct msi_desc *entry; @@ -635,7 +598,7 @@ void pci_disable_msi(struct pci_dev* dev) pci_msi_shutdown(dev); entry = list_entry(dev->msi_list.next, struct msi_desc, list); - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) + if (!entry->dev || entry->msi_attrib.is_msix) return; msi_free_irqs(dev); @@ -654,7 +617,7 @@ static int msi_free_irqs(struct pci_dev* dev) arch_teardown_msi_irqs(dev); list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) { - if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) { + if (entry->msi_attrib.is_msix) { writel(1, entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); -- cgit v1.2.3 From 379f5327a86f7822a51ec7d088a085167724df75 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:07 -0400 Subject: PCI MSI: msi_desc->dev is always initialised By passing the pci_dev into alloc_msi_entry() we can be sure that the ->dev entry is always assigned and so we don't need to check it. Also, we used kzalloc() so we don't need to initialise ->irq to 0. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index b3db4388f97..a658c0f34e1 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -110,7 +110,7 @@ static void msix_flush_writes(struct irq_desc *desc) struct msi_desc *entry; entry = get_irq_desc_msi(desc); - BUG_ON(!entry || !entry->dev); + BUG_ON(!entry); if (entry->msi_attrib.is_msix) { int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; @@ -132,7 +132,7 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) struct msi_desc *entry; entry = get_irq_desc_msi(desc); - BUG_ON(!entry || !entry->dev); + BUG_ON(!entry); if (entry->msi_attrib.is_msix) { int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; @@ -248,19 +248,16 @@ void unmask_msi_irq(unsigned int irq) static int msi_free_irqs(struct pci_dev* dev); -static struct msi_desc* alloc_msi_entry(void) +static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) { - struct msi_desc *entry; - - entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL); - if (!entry) + struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) return NULL; - INIT_LIST_HEAD(&entry->list); - entry->irq = 0; - entry->dev = NULL; + INIT_LIST_HEAD(&desc->list); + desc->dev = dev; - return entry; + return desc; } static void pci_intx_for_msi(struct pci_dev *dev, int enable) @@ -351,7 +348,7 @@ static int msi_capability_init(struct pci_dev *dev) pos = pci_find_capability(dev, PCI_CAP_ID_MSI); pci_read_config_word(dev, msi_control_reg(pos), &control); /* MSI Entry Initialization */ - entry = alloc_msi_entry(); + entry = alloc_msi_entry(dev); if (!entry) return -ENOMEM; @@ -362,7 +359,6 @@ static int msi_capability_init(struct pci_dev *dev) entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.pos = pos; - entry->dev = dev; if (entry->msi_attrib.maskbit) { unsigned int base, maskbits, temp; @@ -432,7 +428,7 @@ static int msix_capability_init(struct pci_dev *dev, /* MSI-X Table Initialization */ for (i = 0; i < nvec; i++) { - entry = alloc_msi_entry(); + entry = alloc_msi_entry(dev); if (!entry) break; @@ -444,7 +440,6 @@ static int msix_capability_init(struct pci_dev *dev, entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; entry->msi_attrib.pos = pos; - entry->dev = dev; entry->mask_base = base; list_add_tail(&entry->list, &dev->msi_list); @@ -581,7 +576,7 @@ void pci_msi_shutdown(struct pci_dev* dev) struct irq_desc *desc = irq_to_desc(dev->irq); msi_set_mask_bits(desc, mask, ~mask); } - if (!entry->dev || entry->msi_attrib.is_msix) + if (entry->msi_attrib.is_msix) return; /* Restore dev->irq to its default pin-assertion irq */ @@ -598,7 +593,7 @@ void pci_disable_msi(struct pci_dev* dev) pci_msi_shutdown(dev); entry = list_entry(dev->msi_list.next, struct msi_desc, list); - if (!entry->dev || entry->msi_attrib.is_msix) + if (entry->msi_attrib.is_msix) return; msi_free_irqs(dev); -- cgit v1.2.3 From 264d9caaa1c574c0274b019a810abfe957391005 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:08 -0400 Subject: PCI MSI: Use mask_pos instead of mask_base when appropriate MSI interrupts have a mask_pos where MSI-X have a mask_base. Use a transparent union to get rid of some ugly casts. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a658c0f34e1..fcde04df6df 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -145,7 +145,7 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) if (!entry->msi_attrib.maskbit) return 0; - pos = (long)entry->mask_base; + pos = entry->mask_pos; pci_read_config_dword(entry->dev, pos, &mask_bits); mask_bits &= ~mask; mask_bits |= flag & mask; @@ -363,8 +363,7 @@ static int msi_capability_init(struct pci_dev *dev) unsigned int base, maskbits, temp; base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); - entry->mask_base = (void __iomem *)(long)base; - + entry->mask_pos = base; /* All MSIs are unmasked by default, Mask them all */ pci_read_config_dword(dev, base, &maskbits); temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1); -- cgit v1.2.3 From f2440d9acbe866b917b16cc0f927366341ce9215 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:09 -0400 Subject: PCI MSI: Refactor interrupt masking code Since most of the callers already know whether they have an MSI or an MSI-X capability, split msi_set_mask_bits() into msi_mask_irq() and msix_mask_irq(). The only callers which don't (mask_msi_irq() and unmask_msi_irq()) can share code in msi_set_mask_bit(). This then becomes the only caller of msix_flush_writes(), so we can inline it. The flushing read can be to any address that belongs to the device, so we can eliminate the calculation too. We can also get rid of maskbits_mask from struct msi_desc and simply recalculate it on the rare occasion that we need it. The single-bit 'masked' element is replaced by a copy of the 32-bit 'masked' register, so this patch does not affect the size of msi_desc. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 155 ++++++++++++++++++++++++++---------------------------- 1 file changed, 75 insertions(+), 80 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index fcde04df6df..adcc7824257 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -105,17 +105,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x) return (1 << (1 << x)) - 1; } -static void msix_flush_writes(struct irq_desc *desc) +static inline __attribute_const__ u32 msi_capable_mask(u16 control) { - struct msi_desc *entry; + return msi_mask((control >> 1) & 7); +} - entry = get_irq_desc_msi(desc); - BUG_ON(!entry); - if (entry->msi_attrib.is_msix) { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - readl(entry->mask_base + offset); - } +static inline __attribute_const__ u32 msi_enabled_mask(u16 control) +{ + return msi_mask((control >> 4) & 7); } /* @@ -127,32 +124,57 @@ static void msix_flush_writes(struct irq_desc *desc) * Returns 1 if it succeeded in masking the interrupt and 0 if the device * doesn't support MSI masking. */ -static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) +static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - struct msi_desc *entry; + u32 mask_bits = desc->masked; - entry = get_irq_desc_msi(desc); - BUG_ON(!entry); - if (entry->msi_attrib.is_msix) { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - writel(flag, entry->mask_base + offset); - readl(entry->mask_base + offset); - } else { - int pos; - u32 mask_bits; + if (!desc->msi_attrib.maskbit) + return; + + mask_bits &= ~mask; + mask_bits |= flag; + pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits); + desc->masked = mask_bits; +} + +/* + * This internal function does not flush PCI writes to the device. + * All users must ensure that they read from the device before either + * assuming that the device state is up to date, or returning out of this + * file. This saves a few milliseconds when initialising devices with lots + * of MSI-X interrupts. + */ +static void msix_mask_irq(struct msi_desc *desc, u32 flag) +{ + u32 mask_bits = desc->masked; + unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + mask_bits &= ~1; + mask_bits |= flag; + writel(mask_bits, desc->mask_base + offset); + desc->masked = mask_bits; +} - if (!entry->msi_attrib.maskbit) - return 0; +static void msi_set_mask_bit(unsigned irq, u32 flag) +{ + struct msi_desc *desc = get_irq_msi(irq); - pos = entry->mask_pos; - pci_read_config_dword(entry->dev, pos, &mask_bits); - mask_bits &= ~mask; - mask_bits |= flag & mask; - pci_write_config_dword(entry->dev, pos, mask_bits); + if (desc->msi_attrib.is_msix) { + msix_mask_irq(desc, flag); + readl(desc->mask_base); /* Flush write to device */ + } else { + msi_mask_irq(desc, 1, flag); } - entry->msi_attrib.masked = !!flag; - return 1; +} + +void mask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 1); +} + +void unmask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 0); } void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) @@ -230,22 +252,6 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) write_msi_msg_desc(desc, msg); } -void mask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 1); - msix_flush_writes(desc); -} - -void unmask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 0); - msix_flush_writes(desc); -} - static int msi_free_irqs(struct pci_dev* dev); static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) @@ -281,13 +287,9 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); - if (entry->msi_attrib.maskbit) { - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask, - entry->msi_attrib.masked); - } pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + msi_mask_irq(entry, msi_capable_mask(control), entry->masked); control &= ~PCI_MSI_FLAGS_QSIZE; control |= PCI_MSI_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); @@ -307,9 +309,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev) msix_set_enable(dev, 0); list_for_each_entry(entry, &dev->msi_list, list) { - struct irq_desc *desc = irq_to_desc(entry->irq); write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bits(desc, 1, entry->msi_attrib.masked); + msix_mask_irq(entry, entry->masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -342,6 +343,7 @@ static int msi_capability_init(struct pci_dev *dev) struct msi_desc *entry; int pos, ret; u16 control; + unsigned mask; msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ @@ -356,21 +358,16 @@ static int msi_capability_init(struct pci_dev *dev) entry->msi_attrib.is_64 = is_64bit_address(control); entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = is_mask_bit_support(control); - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.pos = pos; - if (entry->msi_attrib.maskbit) { - unsigned int base, maskbits, temp; - - base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); - entry->mask_pos = base; - /* All MSIs are unmasked by default, Mask them all */ - pci_read_config_dword(dev, base, &maskbits); - temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1); - maskbits |= temp; - pci_write_config_dword(dev, base, maskbits); - entry->msi_attrib.maskbits_mask = temp; - } + + entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); + /* All MSIs are unmasked by default, Mask them all */ + if (entry->msi_attrib.maskbit) + pci_read_config_dword(dev, entry->mask_pos, &entry->masked); + mask = msi_capable_mask(control); + msi_mask_irq(entry, mask, mask); + list_add_tail(&entry->list, &dev->msi_list); /* Configure MSI capability structure */ @@ -435,11 +432,12 @@ static int msix_capability_init(struct pci_dev *dev, entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = j; - entry->msi_attrib.maskbit = 1; - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; entry->msi_attrib.pos = pos; entry->mask_base = base; + entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + msix_mask_irq(entry, 1); list_add_tail(&entry->list, &dev->msi_list); } @@ -556,9 +554,11 @@ int pci_enable_msi(struct pci_dev* dev) } EXPORT_SYMBOL(pci_enable_msi); -void pci_msi_shutdown(struct pci_dev* dev) +void pci_msi_shutdown(struct pci_dev *dev) { - struct msi_desc *entry; + struct msi_desc *desc; + u32 mask; + u16 ctrl; if (!pci_msi_enable || !dev || !dev->msi_enabled) return; @@ -568,18 +568,13 @@ void pci_msi_shutdown(struct pci_dev* dev) dev->msi_enabled = 0; BUG_ON(list_empty(&dev->msi_list)); - entry = list_entry(dev->msi_list.next, struct msi_desc, list); - /* Return the the pci reset with msi irqs unmasked */ - if (entry->msi_attrib.maskbit) { - u32 mask = entry->msi_attrib.maskbits_mask; - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, mask, ~mask); - } - if (entry->msi_attrib.is_msix) - return; + desc = list_first_entry(&dev->msi_list, struct msi_desc, list); + pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl); + mask = msi_capable_mask(ctrl); + msi_mask_irq(desc, mask, ~mask); /* Restore dev->irq to its default pin-assertion irq */ - dev->irq = entry->msi_attrib.default_irq; + dev->irq = desc->msi_attrib.default_irq; } void pci_disable_msi(struct pci_dev* dev) -- cgit v1.2.3 From 1c8d7b0a562da06d3ebe83f01b1ed553205d1ae4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:10 -0400 Subject: PCI MSI: Add support for multiple MSI Add the new API pci_enable_msi_block() to allow drivers to request multiple MSI and reimplement pci_enable_msi in terms of pci_enable_msi_block. Ensure that the architecture back ends don't have to know about multiple MSI. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 91 ++++++++++++++++++++++++++++++++++++++----------------- drivers/pci/msi.h | 6 ---- 2 files changed, 64 insertions(+), 33 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index adcc7824257..6f2e6295e77 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -40,6 +40,13 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_desc *entry; int ret; + /* + * If an architecture wants to support multiple MSI, it needs to + * override arch_setup_msi_irqs() + */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(entry, &dev->msi_list, list) { ret = arch_setup_msi_irq(dev, entry); if (ret < 0) @@ -58,8 +65,12 @@ void arch_teardown_msi_irqs(struct pci_dev *dev) struct msi_desc *entry; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq != 0) - arch_teardown_msi_irq(entry->irq); + int i, nvec; + if (entry->irq == 0) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + arch_teardown_msi_irq(entry->irq + i); } } #endif @@ -163,7 +174,8 @@ static void msi_set_mask_bit(unsigned irq, u32 flag) msix_mask_irq(desc, flag); readl(desc->mask_base); /* Flush write to device */ } else { - msi_mask_irq(desc, 1, flag); + unsigned offset = irq - desc->dev->irq; + msi_mask_irq(desc, 1 << offset, flag << offset); } } @@ -229,6 +241,12 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; + u16 msgctl; + + pci_read_config_word(dev, msi_control_reg(pos), &msgctl); + msgctl &= ~PCI_MSI_FLAGS_QSIZE; + msgctl |= entry->msi_attrib.multiple << 4; + pci_write_config_word(dev, msi_control_reg(pos), msgctl); pci_write_config_dword(dev, msi_lower_address_reg(pos), msg->address_lo); @@ -291,7 +309,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); msi_mask_irq(entry, msi_capable_mask(control), entry->masked); control &= ~PCI_MSI_FLAGS_QSIZE; - control |= PCI_MSI_FLAGS_ENABLE; + control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); } @@ -332,13 +350,15 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state); /** * msi_capability_init - configure device's MSI capability structure * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: number of interrupts to allocate * - * Setup the MSI capability structure of device function with a single - * MSI irq, regardless of device function is capable of handling - * multiple messages. A return of zero indicates the successful setup - * of an entry zero with the new MSI irq or non-zero for otherwise. - **/ -static int msi_capability_init(struct pci_dev *dev) + * Setup the MSI capability structure of the device with the requested + * number of interrupts. A return value of zero indicates the successful + * setup of an entry with the new MSI irq. A negative return value indicates + * an error, and a positive return value indicates the number of interrupts + * which could have been allocated. + */ +static int msi_capability_init(struct pci_dev *dev, int nvec) { struct msi_desc *entry; int pos, ret; @@ -371,7 +391,7 @@ static int msi_capability_init(struct pci_dev *dev) list_add_tail(&entry->list, &dev->msi_list); /* Configure MSI capability structure */ - ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI); + ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { msi_free_irqs(dev); return ret; @@ -524,35 +544,48 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type) } /** - * pci_enable_msi - configure device's MSI capability structure - * @dev: pointer to the pci_dev data structure of MSI device function + * pci_enable_msi_block - configure device's MSI capability structure + * @dev: device to configure + * @nvec: number of interrupts to configure * - * Setup the MSI capability structure of device function with - * a single MSI irq upon its software driver call to request for - * MSI mode enabled on its hardware device function. A return of zero - * indicates the successful setup of an entry zero with the new MSI - * irq or non-zero for otherwise. - **/ -int pci_enable_msi(struct pci_dev* dev) + * Allocate IRQs for a device with the MSI capability. + * This function returns a negative errno if an error occurs. If it + * is unable to allocate the number of interrupts requested, it returns + * the number of interrupts it might be able to allocate. If it successfully + * allocates at least the number of interrupts requested, it returns 0 and + * updates the @dev's irq member to the lowest new interrupt number; the + * other interrupt numbers allocated to this device are consecutive. + */ +int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { - int status; + int status, pos, maxvec; + u16 msgctl; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + if (nvec > maxvec) + return maxvec; - status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); + status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI); if (status) return status; WARN_ON(!!dev->msi_enabled); - /* Check whether driver already requested for MSI-X irqs */ + /* Check whether driver already requested MSI-X irqs */ if (dev->msix_enabled) { dev_info(&dev->dev, "can't enable MSI " "(MSI-X already enabled)\n"); return -EINVAL; } - status = msi_capability_init(dev); + + status = msi_capability_init(dev, nvec); return status; } -EXPORT_SYMBOL(pci_enable_msi); +EXPORT_SYMBOL(pci_enable_msi_block); void pci_msi_shutdown(struct pci_dev *dev) { @@ -599,8 +632,12 @@ static int msi_free_irqs(struct pci_dev* dev) struct msi_desc *entry, *tmp; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq) - BUG_ON(irq_has_action(entry->irq)); + int i, nvec; + if (!entry->irq) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + BUG_ON(irq_has_action(entry->irq + i)); } arch_teardown_msi_irqs(dev); diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 3898f523714..71f4df2ef65 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -20,14 +20,8 @@ #define msi_mask_bits_reg(base, is64bit) \ ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) #define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE -#define multi_msi_capable(control) \ - (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) -#define multi_msi_enable(control, num) \ - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) -#define msi_enable(control, num) multi_msi_enable(control, num); \ - control |= PCI_MSI_FLAGS_ENABLE #define msix_table_offset_reg(base) (base + 0x04) #define msix_pba_offset_reg(base) (base + 0x08) -- cgit v1.2.3 From 32a9a682bef2f6fce7026bd94d1ce20028b0e52d Mon Sep 17 00:00:00 2001 From: Yuji Shimada Date: Mon, 16 Mar 2009 17:13:39 +0900 Subject: PCI: allow assignment of memory resources with a specified alignment This patch allows memory resources to be assigned with a specified alignment at boot-time or run-time. The patch is useful when we use PCI pass-through, because page-aligned memory resources are required to securely share PCI resources with guest drivers. If you want to assign the resource at boot time, please set "pci=resource_alignment=" boot parameter. This is format of "pci=resource_alignment=" boot parameter: [@][:]:.[; ...] Specifies alignment and device to reassign aligned memory resources. If is not specified, PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource windows need to be expanded. This is example: pci=resource_alignment=20@07:00.0;18@0f:00.0;00:1d.7 If you want to assign the resource at run-time, please set "/sys/bus/pci/resource_alignment" file, and hot-remove the device and hot-add the device. For this purpose, fakephp or PCI hotplug interfaces can be used. The format of "/sys/bus/pci/resource_alignment" file is the same with boot parameter. You can use "," instead of ";". For example: # cd /sys/bus/pci # echo -n 20@12:00.0 > resource_alignment # echo 1 > devices/0000:12:00.0/remove # echo 1 > rescan Reviewed-by: Alex Chiang Reviewed-by: Yu Zhao Signed-off-by: Yuji Shimada Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 6 +++ drivers/pci/quirks.c | 60 ++++++++++++++++++++++++ drivers/pci/setup-res.c | 15 ++++++ 4 files changed, 201 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8310dc2f943..a35a8b2ba63 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -20,6 +20,8 @@ #include #include #include /* isa_dma_bridge_buggy */ +#include +#include #include "pci.h" unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT; @@ -2370,6 +2372,121 @@ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) return 0; } +#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE +static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; +spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED; + +/** + * pci_specified_resource_alignment - get resource alignment specified by user. + * @dev: the PCI device to get + * + * RETURNS: Resource alignment if it is specified. + * Zero if it is not specified. + */ +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) +{ + int seg, bus, slot, func, align_order, count; + resource_size_t align = 0; + char *p; + + spin_lock(&resource_alignment_lock); + p = resource_alignment_param; + while (*p) { + count = 0; + if (sscanf(p, "%d%n", &align_order, &count) == 1 && + p[count] == '@') { + p += count + 1; + } else { + align_order = -1; + } + if (sscanf(p, "%x:%x:%x.%x%n", + &seg, &bus, &slot, &func, &count) != 4) { + seg = 0; + if (sscanf(p, "%x:%x.%x%n", + &bus, &slot, &func, &count) != 3) { + /* Invalid format */ + printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n", + p); + break; + } + } + p += count; + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + if (align_order == -1) { + align = PAGE_SIZE; + } else { + align = 1 << align_order; + } + /* Found */ + break; + } + if (*p != ';' && *p != ',') { + /* End of param or invalid format */ + break; + } + p++; + } + spin_unlock(&resource_alignment_lock); + return align; +} + +/** + * pci_is_reassigndev - check if specified PCI is target device to reassign + * @dev: the PCI device to check + * + * RETURNS: non-zero for PCI device is a target device to reassign, + * or zero is not. + */ +int pci_is_reassigndev(struct pci_dev *dev) +{ + return (pci_specified_resource_alignment(dev) != 0); +} + +ssize_t pci_set_resource_alignment_param(const char *buf, size_t count) +{ + if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1) + count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1; + spin_lock(&resource_alignment_lock); + strncpy(resource_alignment_param, buf, count); + resource_alignment_param[count] = '\0'; + spin_unlock(&resource_alignment_lock); + return count; +} + +ssize_t pci_get_resource_alignment_param(char *buf, size_t size) +{ + size_t count; + spin_lock(&resource_alignment_lock); + count = snprintf(buf, size, "%s", resource_alignment_param); + spin_unlock(&resource_alignment_lock); + return count; +} + +static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf) +{ + return pci_get_resource_alignment_param(buf, PAGE_SIZE); +} + +static ssize_t pci_resource_alignment_store(struct bus_type *bus, + const char *buf, size_t count) +{ + return pci_set_resource_alignment_param(buf, count); +} + +BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show, + pci_resource_alignment_store); + +static int __init pci_resource_alignment_sysfs_init(void) +{ + return bus_create_file(&pci_bus_type, + &bus_attr_resource_alignment); +} + +late_initcall(pci_resource_alignment_sysfs_init); + static void __devinit pci_no_domains(void) { #ifdef CONFIG_PCI_DOMAINS @@ -2418,6 +2535,9 @@ static int __init pci_setup(char *str) pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { pci_cardbus_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "resource_alignment=", 19)) { + pci_set_resource_alignment_param(str + 19, + strlen(str + 19)); } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 07c0aa5275e..2cd1cba7236 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -195,4 +195,10 @@ static inline int pci_ari_enabled(struct pci_bus *bus) return bus->self && bus->self->ari_enabled; } +#ifdef CONFIG_PCI_QUIRKS +extern int pci_is_reassigndev(struct pci_dev *dev); +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev); +extern void pci_disable_bridge_window(struct pci_dev *dev); +#endif + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 5aa2afb23ef..50233818a76 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "pci.h" int isa_dma_bridge_buggy; @@ -34,6 +35,65 @@ int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); #ifdef CONFIG_PCI_QUIRKS +/* + * This quirk function disables the device and releases resources + * which is specified by kernel's boot parameter 'pci=resource_alignment='. + * It also rounds up size to specified alignment. + * Later on, the kernel will assign page-aligned memory resource back + * to that device. + */ +static void __devinit quirk_resource_alignment(struct pci_dev *dev) +{ + int i; + struct resource *r; + resource_size_t align, size; + + if (!pci_is_reassigndev(dev)) + return; + + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + dev_warn(&dev->dev, + "Can't reassign resources to host bridge.\n"); + return; + } + + dev_info(&dev->dev, "Disabling device and release resources.\n"); + pci_disable_device(dev); + + align = pci_specified_resource_alignment(dev); + for (i=0; i < PCI_BRIDGE_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + size = resource_size(r); + if (size < align) { + size = align; + dev_info(&dev->dev, + "Rounding up size of resource #%d to %#llx.\n", + i, (unsigned long long)size); + } + r->end = size - 1; + r->start = 0; + } + /* Need to disable bridge's resource window, + * to enable the kernel to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + r->end = resource_size(r) - 1; + r->start = 0; + } + pci_disable_bridge_window(dev); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment); + /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow * PCI scanning code to "skip" this now blacklisted device. diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 32e8d88a461..3039fcb86af 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return err; } +#ifdef CONFIG_PCI_QUIRKS +void pci_disable_bridge_window(struct pci_dev *dev) +{ + dev_dbg(&dev->dev, "Disabling bridge window.\n"); + + /* MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); + + /* Prefetchable MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); +} +#endif /* CONFIG_PCI_QUIRKS */ + int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; -- cgit v1.2.3 From 6a3b3e26803fc823058fbb05abb5e0d92a52e1bd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 15 Mar 2009 20:14:37 +0100 Subject: PCI: Use kzalloc() in pci_create_bus() Signed-off-by: Geert Uytterhoeven Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 23362e8c696..9e7d642e66b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1114,7 +1114,7 @@ struct pci_bus * pci_create_bus(struct device *parent, if (!b) return NULL; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev){ kfree(b); return NULL; @@ -1133,7 +1133,6 @@ struct pci_bus * pci_create_bus(struct device *parent, list_add_tail(&b->node, &pci_root_buses); up_write(&pci_bus_sem); - memset(dev, 0, sizeof(*dev)); dev->parent = parent; dev->release = pci_release_bus_bridge_dev; dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus); -- cgit v1.2.3 From 9efb5fe1b80a45130ba659ba755f24534c83301b Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 9 Mar 2009 12:08:15 -0600 Subject: PCI: PCIe portdrv: eliminate double kfree in remove path Commit 55633af3 (PCIe portdrv: Use driver data to simplify code) added a kfree of the driver private data in pcie_port_device_remove but forgot to remove the old kfree from pcie_portdrv_remove. Acked-by: Rafael J. Wysocki Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_pci.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index a61f4930d67..b924e2463f8 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -94,7 +94,6 @@ static void pcie_portdrv_remove (struct pci_dev *dev) { pcie_port_device_remove(dev); pci_disable_device(dev); - kfree(pci_get_drvdata(dev)); } static int error_detected_iter(struct device *device, void *data) -- cgit v1.2.3 From 745be2e700cdddd5da4e402854a484242c3628df Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sat, 7 Mar 2009 21:46:49 -0700 Subject: PCIe: portdrv: call pci_disable_device during remove The PCIe port driver calls pci_enable_device when registering ports, but never calls pci_disable_device during removal. Acked-by: Rafael J. Wysocki Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 5a5bfe7cdf5..e3998250386 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -473,6 +473,7 @@ void pcie_port_device_remove(struct pci_dev *dev) struct pcie_port_data *port_data = pci_get_drvdata(dev); device_for_each_child(&dev->dev, NULL, remove_iter); + pci_disable_device(dev); switch (port_data->port_irq_mode) { case PCIE_PORT_MSIX_MODE: -- cgit v1.2.3 From dfadd9edff498d767008edc6b2a6e86a7a19934d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Mar 2009 21:35:37 -0700 Subject: PCI/x86: detect host bridge config space size w/o using quirks Many host bridges support a 4k config space, so check them directy instead of using quirks to add them. We only need to do this extra check for host bridges at this point, because only host bridges are known to have extended address space without also having a PCI-X/PCI-E caps. Other devices with this property could be done with quirks (if there are any). As a bonus, we can remove the quirks for AMD host bridges with family 10h and 11h since they're not needed any more. With this patch, we can get correct pci cfg size of new Intel CPUs/IOHs with host bridges. Signed-off-by: Yinghai Lu Acked-by: H. Peter Anvin Reviewed-by: Matthew Wilcox Cc: Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 9e7d642e66b..579a56c8181 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -847,6 +847,11 @@ int pci_cfg_space_size(struct pci_dev *dev) { int pos; u32 status; + u16 class; + + class = dev->class >> 8; + if (class == PCI_CLASS_BRIDGE_HOST) + return pci_cfg_space_size_ext(dev); pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { @@ -936,7 +941,6 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) dev->multifunction = !!(hdr_type & 0x80); dev->vendor = l & 0xffff; dev->device = (l >> 16) & 0xffff; - dev->cfg_size = pci_cfg_space_size(dev); dev->error_state = pci_channel_io_normal; set_pcie_port_type(dev); @@ -952,6 +956,9 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) return NULL; } + /* need to have dev->class ready */ + dev->cfg_size = pci_cfg_space_size(dev); + return dev; } -- cgit v1.2.3 From 217f45de3d2f68b6d0b646bb4f2a155a71648396 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 4 Mar 2009 05:57:05 +0000 Subject: PCI: expose boot VGA device via sysfs. X really would like to know which VGA device was considered the boot device by the system. The x86 PCI fixups have support for discovering this but we provide no way to expose it to userspace. This adds a sysfs file per VGA class device which has the value 0 for non the boot device or unknown, and 1 if the VGA device is the boot device. Acked-by: Greg Kroah-Hartman Signed-off-by: Dave Airlie Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 1c892980140..ec7a175dcba 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -240,6 +240,17 @@ struct device_attribute pci_dev_attrs[] = { __ATTR_NULL, }; +static ssize_t +boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf(buf, "%u\n", + !!(pdev->resource[PCI_ROM_RESOURCE].flags & + IORESOURCE_ROM_SHADOW)); +} +struct device_attribute vga_attr = __ATTR_RO(boot_vga); + static ssize_t pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) @@ -899,18 +910,27 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) pdev->rom_attr = attr; } + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) { + retval = device_create_file(&pdev->dev, &vga_attr); + if (retval) + goto err_rom_file; + } + /* add platform-specific attributes */ retval = pcibios_add_platform_entries(pdev); if (retval) - goto err_rom_file; + goto err_vga_file; /* add sysfs entries for various capabilities */ retval = pci_create_capabilities_sysfs(pdev); if (retval) - goto err_rom_file; + goto err_vga_file; return 0; +err_vga_file: + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + device_remove_file(&pdev->dev, &vga_attr); err_rom_file: if (rom_size) { sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr); -- cgit v1.2.3 From 8293b0f629095efbe7c7e3f9b437f8c040c19eb5 Mon Sep 17 00:00:00 2001 From: David O'Shea Date: Mon, 2 Mar 2009 09:51:13 +0100 Subject: PCI: Compaq Evo D510 SMBus quirk using USB instead of VGA On the Compaq Evo D510 SFF/CMT, a PCI quirk activated the SMBus device based on detection of the on-board VGA controller, but the on-board VGA is disabled if an AGP card is inserted, so look for one of the USB controllers instead. Signed-off-by: David O'Shea Signed-off-by: Jean Delvare Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 50233818a76..7ddcfc65e79 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1186,10 +1186,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev) * its on-board VGA controller */ asus_hides_smbus = 1; } - else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG) + else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2) switch(dev->subsystem_device) { case 0x00b8: /* Compaq Evo D510 CMT */ case 0x00b9: /* Compaq Evo D510 SFF */ + /* Motherboard doesn't have Host bridge + * subvendor/subdevice IDs and on-board VGA + * controller is disabled if an AGP card is + * inserted, therefore checking USB UHCI + * Controller #1 */ asus_hides_smbus = 1; } else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC) @@ -1214,7 +1219,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_2, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge); static void asus_hides_smbus_lpc(struct pci_dev *dev) -- cgit v1.2.3 From d1b054da8f599905f3c18a218961dcf17f9d5f13 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:11 +0800 Subject: PCI: initialize and release SR-IOV capability If a device has the SR-IOV capability, initialize it (set the ARI Capable Hierarchy in the lowest numbered PF if necessary; calculate the System Page Size for the VF MMIO, probe the VF Offset, Stride and BARs). A lock for the VF bus allocation is also initialized if a PF is the lowest numbered PF. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/Kconfig | 10 +++ drivers/pci/Makefile | 2 + drivers/pci/iov.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.c | 7 ++ drivers/pci/pci.h | 37 +++++++++++ drivers/pci/probe.c | 4 ++ 6 files changed, 242 insertions(+) create mode 100644 drivers/pci/iov.c (limited to 'drivers/pci') diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2a4501dd251..fdc864f9cf2 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -59,3 +59,13 @@ config HT_IRQ This allows native hypertransport devices to use interrupts. If unsure say Y. + +config PCI_IOV + bool "PCI IOV support" + depends on PCI + help + I/O Virtualization is a PCI feature supported by some devices + which allows them to create virtual devices which share their + physical resources. + + If unsure, say N. diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 3d07ce24f6a..ba6af162fd3 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o +obj-$(CONFIG_PCI_IOV) += iov.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c new file mode 100644 index 00000000000..66cc414ed15 --- /dev/null +++ b/drivers/pci/iov.c @@ -0,0 +1,182 @@ +/* + * drivers/pci/iov.c + * + * Copyright (C) 2009 Intel Corporation, Yu Zhao + * + * PCI Express I/O Virtualization (IOV) support. + * Single Root IOV 1.0 + */ + +#include +#include +#include +#include +#include "pci.h" + + +static int sriov_init(struct pci_dev *dev, int pos) +{ + int i; + int rc; + int nres; + u32 pgsz; + u16 ctrl, total, offset, stride; + struct pci_sriov *iov; + struct resource *res; + struct pci_dev *pdev; + + if (dev->pcie_type != PCI_EXP_TYPE_RC_END && + dev->pcie_type != PCI_EXP_TYPE_ENDPOINT) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) { + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); + ssleep(1); + } + + pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); + if (!total) + return 0; + + ctrl = 0; + list_for_each_entry(pdev, &dev->bus->devices, bus_list) + if (pdev->is_physfn) + goto found; + + pdev = NULL; + if (pci_ari_enabled(dev->bus)) + ctrl |= PCI_SRIOV_CTRL_ARI; + +found: + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); + pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (total > 1 && !stride)) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); + i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; + pgsz &= ~((1 << i) - 1); + if (!pgsz) + return -EIO; + + pgsz &= ~(pgsz - 1); + pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + i += __pci_read_base(dev, pci_bar_unknown, res, + pos + PCI_SRIOV_BAR + i * 4); + if (!res->flags) + continue; + if (resource_size(res) & (PAGE_SIZE - 1)) { + rc = -EIO; + goto failed; + } + res->end = res->start + resource_size(res) * total - 1; + nres++; + } + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) { + rc = -ENOMEM; + goto failed; + } + + iov->pos = pos; + iov->nres = nres; + iov->ctrl = ctrl; + iov->total = total; + iov->offset = offset; + iov->stride = stride; + iov->pgsz = pgsz; + iov->self = dev; + pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); + pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); + + if (pdev) + iov->dev = pci_dev_get(pdev); + else { + iov->dev = dev; + mutex_init(&iov->lock); + } + + dev->sriov = iov; + dev->is_physfn = 1; + + return 0; + +failed: + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + res->flags = 0; + } + + return rc; +} + +static void sriov_release(struct pci_dev *dev) +{ + if (dev == dev->sriov->dev) + mutex_destroy(&dev->sriov->lock); + else + pci_dev_put(dev->sriov->dev); + + kfree(dev->sriov); + dev->sriov = NULL; +} + +/** + * pci_iov_init - initialize the IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_init(struct pci_dev *dev) +{ + int pos; + + if (!dev->is_pcie) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (pos) + return sriov_init(dev, pos); + + return -ENODEV; +} + +/** + * pci_iov_release - release resources used by the IOV capability + * @dev: the PCI device + */ +void pci_iov_release(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_release(dev); +} + +/** + * pci_iov_resource_bar - get position of the SR-IOV BAR + * @dev: the PCI device + * @resno: the resource number + * @type: the BAR type to be filled in + * + * Returns position of the BAR encapsulated in the SR-IOV capability. + */ +int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) + return 0; + + BUG_ON(!dev->is_physfn); + + *type = pci_bar_unknown; + + return dev->sriov->pos + PCI_SRIOV_BAR + + 4 * (resno - PCI_IOV_RESOURCES); +} diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a35a8b2ba63..2b3201ec2b0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2360,12 +2360,19 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) */ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) { + int reg; + if (resno < PCI_ROM_RESOURCE) { *type = pci_bar_unknown; return PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { *type = pci_bar_mem32; return dev->rom_base_reg; + } else if (resno < PCI_BRIDGE_RESOURCES) { + /* device specific resource */ + reg = pci_iov_resource_bar(dev, resno, type); + if (reg) + return reg; } dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 2cd1cba7236..7d5327c986f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -201,4 +201,41 @@ resource_size_t pci_specified_resource_alignment(struct pci_dev *dev); extern void pci_disable_bridge_window(struct pci_dev *dev); #endif +/* Single Root I/O Virtualization */ +struct pci_sriov { + int pos; /* capability position */ + int nres; /* number of resources */ + u32 cap; /* SR-IOV Capabilities */ + u16 ctrl; /* SR-IOV Control */ + u16 total; /* total VFs associated with the PF */ + u16 offset; /* first VF Routing ID offset */ + u16 stride; /* following VF stride */ + u32 pgsz; /* page size for BAR alignment */ + u8 link; /* Function Dependency Link */ + struct pci_dev *dev; /* lowest numbered PF */ + struct pci_dev *self; /* this PF */ + struct mutex lock; /* lock for VF bus */ +}; + +#ifdef CONFIG_PCI_IOV +extern int pci_iov_init(struct pci_dev *dev); +extern void pci_iov_release(struct pci_dev *dev); +extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type); +#else +static inline int pci_iov_init(struct pci_dev *dev) +{ + return -ENODEV; +} +static inline void pci_iov_release(struct pci_dev *dev) + +{ +} +static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 579a56c8181..0471f6ea146 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -785,6 +785,7 @@ static int pci_setup_device(struct pci_dev * dev) static void pci_release_capabilities(struct pci_dev *dev) { pci_vpd_release(dev); + pci_iov_release(dev); } /** @@ -979,6 +980,9 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Alternative Routing-ID Forwarding */ pci_enable_ari(dev); + + /* Single Root I/O Virtualization */ + pci_iov_init(dev); } void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) -- cgit v1.2.3 From 8c5cdb6adc6688b9b8fd82ea4a5cf4674dabad79 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:12 +0800 Subject: PCI: restore saved SR-IOV state Restore the volatile registers in the SR-IOV capability after the D3->D0 transition. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 29 +++++++++++++++++++++++++++++ drivers/pci/pci.c | 1 + drivers/pci/pci.h | 4 ++++ 3 files changed, 34 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 66cc414ed15..b121e47402f 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -129,6 +129,25 @@ static void sriov_release(struct pci_dev *dev) dev->sriov = NULL; } +static void sriov_restore_state(struct pci_dev *dev) +{ + int i; + u16 ctrl; + struct pci_sriov *iov = dev->sriov; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) + return; + + for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) + pci_update_resource(dev, i); + + pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + if (iov->ctrl & PCI_SRIOV_CTRL_VFE) + msleep(100); +} + /** * pci_iov_init - initialize the IOV capability * @dev: the PCI device @@ -180,3 +199,13 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno, return dev->sriov->pos + PCI_SRIOV_BAR + 4 * (resno - PCI_IOV_RESOURCES); } + +/** + * pci_restore_iov_state - restore the state of the IOV capability + * @dev: the PCI device + */ +void pci_restore_iov_state(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_restore_state(dev); +} diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2b3201ec2b0..676bbcbc272 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -775,6 +775,7 @@ pci_restore_state(struct pci_dev *dev) } pci_restore_pcix_state(dev); pci_restore_msi_state(dev); + pci_restore_iov_state(dev); return 0; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 7d5327c986f..fd5ea4d445e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -222,6 +222,7 @@ extern int pci_iov_init(struct pci_dev *dev); extern void pci_iov_release(struct pci_dev *dev); extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); +extern void pci_restore_iov_state(struct pci_dev *dev); #else static inline int pci_iov_init(struct pci_dev *dev) { @@ -236,6 +237,9 @@ static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, { return 0; } +static inline void pci_restore_iov_state(struct pci_dev *dev) +{ +} #endif /* CONFIG_PCI_IOV */ #endif /* DRIVERS_PCI_H */ -- cgit v1.2.3 From a28724b0fb909d247229a70761c90bb37b13366a Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:13 +0800 Subject: PCI: reserve bus range for SR-IOV device Reserve the bus number range used by the Virtual Function when pcibios_assign_all_busses() returns true. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 5 +++++ drivers/pci/probe.c | 3 +++ 3 files changed, 44 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index b121e47402f..5ddfc09a8d3 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -14,6 +14,18 @@ #include "pci.h" +static inline u8 virtfn_bus(struct pci_dev *dev, int id) +{ + return dev->bus->number + ((dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) >> 8); +} + +static inline u8 virtfn_devfn(struct pci_dev *dev, int id) +{ + return (dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) & 0xff; +} + static int sriov_init(struct pci_dev *dev, int pos) { int i; @@ -209,3 +221,27 @@ void pci_restore_iov_state(struct pci_dev *dev) if (dev->is_physfn) sriov_restore_state(dev); } + +/** + * pci_iov_bus_range - find bus range used by Virtual Function + * @bus: the PCI bus + * + * Returns max number of buses (exclude current one) used by Virtual + * Functions. + */ +int pci_iov_bus_range(struct pci_bus *bus) +{ + int max = 0; + u8 busnr; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!dev->is_physfn) + continue; + busnr = virtfn_bus(dev, dev->sriov->total - 1); + if (busnr > max) + max = busnr; + } + + return max ? max - bus->number : 0; +} diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index fd5ea4d445e..5c29cb2b8e6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -223,6 +223,7 @@ extern void pci_iov_release(struct pci_dev *dev); extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); extern void pci_restore_iov_state(struct pci_dev *dev); +extern int pci_iov_bus_range(struct pci_bus *bus); #else static inline int pci_iov_init(struct pci_dev *dev) { @@ -240,6 +241,10 @@ static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, static inline void pci_restore_iov_state(struct pci_dev *dev) { } +static inline int pci_iov_bus_range(struct pci_bus *bus) +{ + return 0; +} #endif /* CONFIG_PCI_IOV */ #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0471f6ea146..0ecdaea3915 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1085,6 +1085,9 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) for (devfn = 0; devfn < 0x100; devfn += 8) pci_scan_slot(bus, devfn); + /* Reserve buses for SR-IOV capability. */ + max += pci_iov_bus_range(bus); + /* * After performing arch-dependent fixup of the bus, look behind * all PCI-to-PCI bridges on this bus. -- cgit v1.2.3 From 480b93b7837fb3cf0579a42f4953ac463a5b9e1e Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:14 +0800 Subject: PCI: centralize device setup code Move the device setup stuff into pci_setup_device() which will be used to setup the Virtual Function later. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.h | 1 + drivers/pci/probe.c | 78 +++++++++++++++++++++++++++-------------------------- 2 files changed, 41 insertions(+), 38 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 5c29cb2b8e6..f4fc10fc587 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -178,6 +178,7 @@ enum pci_bar_type { pci_bar_mem64, /* A 64-bit memory BAR */ }; +extern int pci_setup_device(struct pci_dev *dev); extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); extern int pci_resource_bar(struct pci_dev *dev, int resno, diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0ecdaea3915..943c49a7842 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -674,6 +674,19 @@ static void pci_read_irq(struct pci_dev *dev) dev->irq = irq; } +static void set_pcie_port_type(struct pci_dev *pdev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!pos) + return; + pdev->is_pcie = 1; + pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); + pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; +} + #define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED) /** @@ -683,12 +696,34 @@ static void pci_read_irq(struct pci_dev *dev) * Initialize the device structure with information about the device's * vendor,class,memory and IO-space addresses,IRQ lines etc. * Called at initialisation of the PCI subsystem and by CardBus services. - * Returns 0 on success and -1 if unknown type of device (not normal, bridge - * or CardBus). + * Returns 0 on success and negative if unknown type of device (not normal, + * bridge or CardBus). */ -static int pci_setup_device(struct pci_dev * dev) +int pci_setup_device(struct pci_dev *dev) { u32 class; + u8 hdr_type; + struct pci_slot *slot; + + if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) + return -EIO; + + dev->sysdata = dev->bus->sysdata; + dev->dev.parent = dev->bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->hdr_type = hdr_type & 0x7f; + dev->multifunction = !!(hdr_type & 0x80); + dev->cfg_size = pci_cfg_space_size(dev); + dev->error_state = pci_channel_io_normal; + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; + + /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) + set this higher, assuming the system even supports it. */ + dev->dma_mask = 0xffffffff; dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), dev->bus->number, PCI_SLOT(dev->devfn), @@ -708,7 +743,6 @@ static int pci_setup_device(struct pci_dev * dev) /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); - class = dev->class >> 8; switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ @@ -770,7 +804,7 @@ static int pci_setup_device(struct pci_dev * dev) default: /* unknown header */ dev_err(&dev->dev, "unknown header type %02x, " "ignoring device\n", dev->hdr_type); - return -1; + return -EIO; bad: dev_err(&dev->dev, "ignoring class %02x (doesn't match header " @@ -804,19 +838,6 @@ static void pci_release_dev(struct device *dev) kfree(pci_dev); } -static void set_pcie_port_type(struct pci_dev *pdev) -{ - int pos; - u16 reg16; - - pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (!pos) - return; - pdev->is_pcie = 1; - pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); - pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; -} - /** * pci_cfg_space_size - get the configuration space size of the PCI device. * @dev: PCI device @@ -897,9 +918,7 @@ EXPORT_SYMBOL(alloc_pci_dev); static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; - struct pci_slot *slot; u32 l; - u8 hdr_type; int delay = 1; if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l)) @@ -926,33 +945,16 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) } } - if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type)) - return NULL; - dev = alloc_pci_dev(); if (!dev) return NULL; dev->bus = bus; - dev->sysdata = bus->sysdata; - dev->dev.parent = bus->bridge; - dev->dev.bus = &pci_bus_type; dev->devfn = devfn; - dev->hdr_type = hdr_type & 0x7f; - dev->multifunction = !!(hdr_type & 0x80); dev->vendor = l & 0xffff; dev->device = (l >> 16) & 0xffff; - dev->error_state = pci_channel_io_normal; - set_pcie_port_type(dev); - - list_for_each_entry(slot, &bus->slots, list) - if (PCI_SLOT(devfn) == slot->number) - dev->slot = slot; - /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) - set this higher, assuming the system even supports it. */ - dev->dma_mask = 0xffffffff; - if (pci_setup_device(dev) < 0) { + if (pci_setup_device(dev)) { kfree(dev); return NULL; } -- cgit v1.2.3 From dd7cc44d0bcec5e9c42fe52e88dc254ae62eac8d Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:15 +0800 Subject: PCI: add SR-IOV API for Physical Function driver Add or remove the Virtual Function when the SR-IOV is enabled or disabled by the device driver. This can happen anytime rather than only at the device probe stage. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 314 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 2 + 2 files changed, 316 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 5ddfc09a8d3..d0ff8ad8f7b 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -13,6 +13,7 @@ #include #include "pci.h" +#define VIRTFN_ID_LEN 16 static inline u8 virtfn_bus(struct pci_dev *dev, int id) { @@ -26,6 +27,284 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) dev->sriov->stride * id) & 0xff; } +static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) +{ + int rc; + struct pci_bus *child; + + if (bus->number == busnr) + return bus; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (child) + return child; + + child = pci_add_new_bus(bus, NULL, busnr); + if (!child) + return NULL; + + child->subordinate = busnr; + child->dev.parent = bus->bridge; + rc = pci_bus_add_child(child); + if (rc) { + pci_remove_bus(child); + return NULL; + } + + return child; +} + +static void virtfn_remove_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + BUG_ON(!child); + + if (list_empty(&child->devices)) + pci_remove_bus(child); +} + +static int virtfn_add(struct pci_dev *dev, int id, int reset) +{ + int i; + int rc; + u64 size; + char buf[VIRTFN_ID_LEN]; + struct pci_dev *virtfn; + struct resource *res; + struct pci_sriov *iov = dev->sriov; + + virtfn = alloc_pci_dev(); + if (!virtfn) + return -ENOMEM; + + mutex_lock(&iov->dev->sriov->lock); + virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); + if (!virtfn->bus) { + kfree(virtfn); + mutex_unlock(&iov->dev->sriov->lock); + return -ENOMEM; + } + virtfn->devfn = virtfn_devfn(dev, id); + virtfn->vendor = dev->vendor; + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); + pci_setup_device(virtfn); + virtfn->dev.parent = dev->dev.parent; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (!res->parent) + continue; + virtfn->resource[i].name = pci_name(virtfn); + virtfn->resource[i].flags = res->flags; + size = resource_size(res); + do_div(size, iov->total); + virtfn->resource[i].start = res->start + size * id; + virtfn->resource[i].end = virtfn->resource[i].start + size - 1; + rc = request_resource(res, &virtfn->resource[i]); + BUG_ON(rc); + } + + if (reset) + pci_execute_reset_function(virtfn); + + pci_device_add(virtfn, virtfn->bus); + mutex_unlock(&iov->dev->sriov->lock); + + virtfn->physfn = pci_dev_get(dev); + virtfn->is_virtfn = 1; + + rc = pci_bus_add_device(virtfn); + if (rc) + goto failed1; + sprintf(buf, "virtfn%u", id); + rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); + if (rc) + goto failed1; + rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); + if (rc) + goto failed2; + + kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + + return 0; + +failed2: + sysfs_remove_link(&dev->dev.kobj, buf); +failed1: + pci_dev_put(dev); + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + return rc; +} + +static void virtfn_remove(struct pci_dev *dev, int id, int reset) +{ + char buf[VIRTFN_ID_LEN]; + struct pci_bus *bus; + struct pci_dev *virtfn; + struct pci_sriov *iov = dev->sriov; + + bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id)); + if (!bus) + return; + + virtfn = pci_get_slot(bus, virtfn_devfn(dev, id)); + if (!virtfn) + return; + + pci_dev_put(virtfn); + + if (reset) { + device_release_driver(&virtfn->dev); + pci_execute_reset_function(virtfn); + } + + sprintf(buf, "virtfn%u", id); + sysfs_remove_link(&dev->dev.kobj, buf); + sysfs_remove_link(&virtfn->dev.kobj, "physfn"); + + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + pci_dev_put(dev); +} + +static int sriov_enable(struct pci_dev *dev, int nr_virtfn) +{ + int rc; + int i, j; + int nres; + u16 offset, stride, initial; + struct resource *res; + struct pci_dev *pdev; + struct pci_sriov *iov = dev->sriov; + + if (!nr_virtfn) + return 0; + + if (iov->nr_virtfn) + return -EINVAL; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); + if (initial > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total))) + return -EIO; + + if (nr_virtfn < 0 || nr_virtfn > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) + return -EINVAL; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (nr_virtfn > 1 && !stride)) + return -EIO; + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (res->parent) + nres++; + } + if (nres != iov->nres) { + dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); + return -ENOMEM; + } + + iov->offset = offset; + iov->stride = stride; + + if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) { + dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); + return -ENOMEM; + } + + if (iov->link != dev->devfn) { + pdev = pci_get_slot(dev->bus, iov->link); + if (!pdev) + return -ENODEV; + + pci_dev_put(pdev); + + if (!pdev->is_physfn) + return -ENODEV; + + rc = sysfs_create_link(&dev->dev.kobj, + &pdev->dev.kobj, "dep_link"); + if (rc) + return rc; + } + + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_unblock_user_cfg_access(dev); + + iov->initial = initial; + if (nr_virtfn < initial) + initial = nr_virtfn; + + for (i = 0; i < initial; i++) { + rc = virtfn_add(dev, i, 0); + if (rc) + goto failed; + } + + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); + iov->nr_virtfn = nr_virtfn; + + return 0; + +failed: + for (j = 0; j < i; j++) + virtfn_remove(dev, j, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + return rc; +} + +static void sriov_disable(struct pci_dev *dev) +{ + int i; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return; + + for (i = 0; i < iov->nr_virtfn; i++) + virtfn_remove(dev, i, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + iov->nr_virtfn = 0; +} + static int sriov_init(struct pci_dev *dev, int pos) { int i; @@ -132,6 +411,8 @@ failed: static void sriov_release(struct pci_dev *dev) { + BUG_ON(dev->sriov->nr_virtfn); + if (dev == dev->sriov->dev) mutex_destroy(&dev->sriov->lock); else @@ -155,6 +436,7 @@ static void sriov_restore_state(struct pci_dev *dev) pci_update_resource(dev, i); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); if (iov->ctrl & PCI_SRIOV_CTRL_VFE) msleep(100); @@ -245,3 +527,35 @@ int pci_iov_bus_range(struct pci_bus *bus) return max ? max - bus->number : 0; } + +/** + * pci_enable_sriov - enable the SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + might_sleep(); + + if (!dev->is_physfn) + return -ENODEV; + + return sriov_enable(dev, nr_virtfn); +} +EXPORT_SYMBOL_GPL(pci_enable_sriov); + +/** + * pci_disable_sriov - disable the SR-IOV capability + * @dev: the PCI device + */ +void pci_disable_sriov(struct pci_dev *dev) +{ + might_sleep(); + + if (!dev->is_physfn) + return; + + sriov_disable(dev); +} +EXPORT_SYMBOL_GPL(pci_disable_sriov); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f4fc10fc587..0f1c7d10350 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -209,6 +209,8 @@ struct pci_sriov { u32 cap; /* SR-IOV Capabilities */ u16 ctrl; /* SR-IOV Control */ u16 total; /* total VFs associated with the PF */ + u16 initial; /* initial VFs associated with the PF */ + u16 nr_virtfn; /* number of VFs available */ u16 offset; /* first VF Routing ID offset */ u16 stride; /* following VF stride */ u32 pgsz; /* page size for BAR alignment */ -- cgit v1.2.3 From 74bb1bcc7dbbc9ddef773bf3395d7ff92aaaad2e Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:16 +0800 Subject: PCI: handle SR-IOV Virtual Function Migration Add or remove a Virtual Function after receiving a Migrate In or Out Request. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 4 ++ 2 files changed, 123 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index d0ff8ad8f7b..7227efc760d 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -179,6 +179,97 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset) pci_dev_put(dev); } +static int sriov_migration(struct pci_dev *dev) +{ + u16 status; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return 0; + + if (!(iov->cap & PCI_SRIOV_CAP_VFM)) + return 0; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status); + if (!(status & PCI_SRIOV_STATUS_VFM)) + return 0; + + schedule_work(&iov->mtask); + + return 1; +} + +static void sriov_migration_task(struct work_struct *work) +{ + int i; + u8 state; + u16 status; + struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask); + + for (i = iov->initial; i < iov->nr_virtfn; i++) { + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_MI) { + writeb(PCI_SRIOV_VFM_AV, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 1); + } else if (state == PCI_SRIOV_VFM_MO) { + virtfn_remove(iov->self, i, 1); + writeb(PCI_SRIOV_VFM_UA, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 0); + } + } + + pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status); + status &= ~PCI_SRIOV_STATUS_VFM; + pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status); +} + +static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn) +{ + int bir; + u32 table; + resource_size_t pa; + struct pci_sriov *iov = dev->sriov; + + if (nr_virtfn <= iov->initial) + return 0; + + pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table); + bir = PCI_SRIOV_VFM_BIR(table); + if (bir > PCI_STD_RESOURCE_END) + return -EIO; + + table = PCI_SRIOV_VFM_OFFSET(table); + if (table + nr_virtfn > pci_resource_len(dev, bir)) + return -EIO; + + pa = pci_resource_start(dev, bir) + table; + iov->mstate = ioremap(pa, nr_virtfn); + if (!iov->mstate) + return -ENOMEM; + + INIT_WORK(&iov->mtask, sriov_migration_task); + + iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR; + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + return 0; +} + +static void sriov_disable_migration(struct pci_dev *dev) +{ + struct pci_sriov *iov = dev->sriov; + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + cancel_work_sync(&iov->mtask); + iounmap(iov->mstate); +} + static int sriov_enable(struct pci_dev *dev, int nr_virtfn) { int rc; @@ -261,6 +352,12 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) goto failed; } + if (iov->cap & PCI_SRIOV_CAP_VFM) { + rc = sriov_enable_migration(dev, nr_virtfn); + if (rc) + goto failed; + } + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); iov->nr_virtfn = nr_virtfn; @@ -290,6 +387,9 @@ static void sriov_disable(struct pci_dev *dev) if (!iov->nr_virtfn) return; + if (iov->cap & PCI_SRIOV_CAP_VFM) + sriov_disable_migration(dev); + for (i = 0; i < iov->nr_virtfn; i++) virtfn_remove(dev, i, 0); @@ -559,3 +659,22 @@ void pci_disable_sriov(struct pci_dev *dev) sriov_disable(dev); } EXPORT_SYMBOL_GPL(pci_disable_sriov); + +/** + * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration + * @dev: the PCI device + * + * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not. + * + * Physical Function driver is responsible to register IRQ handler using + * VF Migration Interrupt Message Number, and call this function when the + * interrupt is generated by the hardware. + */ +irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + if (!dev->is_physfn) + return IRQ_NONE; + + return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; +} +EXPORT_SYMBOL_GPL(pci_sriov_migration); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0f1c7d10350..22dcfdb75d9 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1,6 +1,8 @@ #ifndef DRIVERS_PCI_H #define DRIVERS_PCI_H +#include + #define PCI_CFG_SPACE_SIZE 256 #define PCI_CFG_SPACE_EXP_SIZE 4096 @@ -218,6 +220,8 @@ struct pci_sriov { struct pci_dev *dev; /* lowest numbered PF */ struct pci_dev *self; /* this PF */ struct mutex lock; /* lock for VF bus */ + struct work_struct mtask; /* VF Migration task */ + u8 __iomem *mstate; /* VF Migration State Array */ }; #ifdef CONFIG_PCI_IOV -- cgit v1.2.3 From 90bdb3117f4209baa6d712b126f0e7791b24dc3f Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Fri, 20 Mar 2009 14:56:00 -0600 Subject: PCI: don't scan existing devices pci_scan_single_device is supposed to add newly discovered devices to pci_bus->devices, but doesn't check to see if the device has already been added. This can cause problems if we ever want to use this interface to rescan the PCI bus. If the device is already added, just return it. Signed-off-by: Trent Piepho Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 943c49a7842..140b9deb482 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1019,6 +1019,12 @@ struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; + dev = pci_get_slot(bus, devfn); + if (dev) { + pci_dev_put(dev); + return dev; + } + dev = pci_scan_device(bus, devfn); if (!dev) return NULL; -- cgit v1.2.3 From 1b69dfc649e6658fc38499cf704750d74cabc73d Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Fri, 20 Mar 2009 14:56:05 -0600 Subject: PCI: pci_scan_slot() returns newly found devices pci_scan_slot() has been rewritten to be less complex and will now return the number of *new* devices found. Existing callers need not worry because they already assume that they can't call pci_scan_slot() on an already-scanned slot. Thus, there is no semantic change for existing callers: returning newly found devices (this patch) is exactly equal to returning all found devices (before this patch). This patch adds some more groundwork to allow us to rescan the PCI bus during runtime to discover newly added devices. Signed-off-by: Trent Piepho Reviewed-by: Alex Chiang Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 140b9deb482..f3aabdf28f8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1043,35 +1043,27 @@ EXPORT_SYMBOL(pci_scan_single_device); * Scan a PCI slot on the specified PCI bus for devices, adding * discovered devices to the @bus->devices list. New devices * will not have is_added set. + * + * Returns the number of new devices found. */ int pci_scan_slot(struct pci_bus *bus, int devfn) { - int func, nr = 0; - int scan_all_fns; - - scan_all_fns = pcibios_scan_all_fns(bus, devfn); - - for (func = 0; func < 8; func++, devfn++) { - struct pci_dev *dev; - - dev = pci_scan_single_device(bus, devfn); - if (dev) { - nr++; + int fn, nr = 0; + struct pci_dev *dev; - /* - * If this is a single function device, - * don't scan past the first function. - */ - if (!dev->multifunction) { - if (func > 0) { - dev->multifunction = 1; - } else { - break; - } + dev = pci_scan_single_device(bus, devfn); + if (dev && !dev->is_added) /* new device? */ + nr++; + + if ((dev && dev->multifunction) || + (!dev && pcibios_scan_all_fns(bus, devfn))) { + for (fn = 1; fn < 8; fn++) { + dev = pci_scan_single_device(bus, devfn + fn); + if (dev) { + if (!dev->is_added) + nr++; + dev->multifunction = 1; } - } else { - if (func == 0 && !scan_all_fns) - break; } } -- cgit v1.2.3 From 74710ded8e16fc8dacbb702a5bac1a493d88549a Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:10 -0600 Subject: PCI: always scan child buses While scanning bridges, we stop our scan if we encounter a bus that we've seen before, to work around some buggy chipsets. This is a good idea, but prevents us from fully scanning the PCI bus at a future time (to find newly hot-added devices, for example). Change the logic so that we skip _re-adding_ an existing bus that we've seen before, but also allow the scan to descend to all child buses. Now that we're potentially scanning our child buses again, we also need to be sure not to attempt re-initializing their BARs so we avoid that. This patch lays the groundwork to allow the user to issue a rescan of the PCI bus at any time. Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f3aabdf28f8..f69256c63b2 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -511,21 +511,21 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, /* * If we already got to this bus through a different bridge, - * ignore it. This can happen with the i450NX chipset. + * don't re-add it. This can happen with the i450NX chipset. + * + * However, we continue to descend down the hierarchy and + * scan remaining child buses. */ - if (pci_find_bus(pci_domain_nr(bus), busnr)) { - dev_info(&dev->dev, "bus %04x:%02x already known\n", - pci_domain_nr(bus), busnr); - goto out; + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (!child) { + child = pci_add_new_bus(bus, dev, busnr); + if (!child) + goto out; + child->primary = buses & 0xFF; + child->subordinate = (buses >> 16) & 0xFF; + child->bridge_ctl = bctl; } - child = pci_add_new_bus(bus, dev, busnr); - if (!child) - goto out; - child->primary = buses & 0xFF; - child->subordinate = (buses >> 16) & 0xFF; - child->bridge_ctl = bctl; - cmax = pci_scan_child_bus(child); if (cmax > max) max = cmax; @@ -1092,8 +1092,14 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) * After performing arch-dependent fixup of the bus, look behind * all PCI-to-PCI bridges on this bus. */ - pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number); - pcibios_fixup_bus(bus); + if (!bus->is_added) { + pr_debug("PCI: Fixups for bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); + pcibios_fixup_bus(bus); + if (pci_is_root_bus(bus)) + bus->is_added = 1; + } + for (pass=0; pass < 2; pass++) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || -- cgit v1.2.3 From b73e97d95c168cbc19bd1208c894077f25931ba1 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:15 -0600 Subject: PCI: do not initialize bridges more than once In preparation for PCI core hotplug, we need to ensure that we do not attempt to re-initialize bridges that have already been initialized. We only need to worry about non-root buses, since we will not allow root bus removal. Reported-by: Kenji Kaneshige Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 170a3eda9dd..334285a8e23 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -144,6 +144,9 @@ static void pci_setup_bridge(struct pci_bus *bus) struct pci_bus_region region; u32 l, bu, lu, io_upper16; + if (!pci_is_root_bus(bus) && bus->is_added) + return; + dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n", pci_domain_nr(bus), bus->number); -- cgit v1.2.3 From 9dd90cafa7a712d283e2e0c625b022e19f746762 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:20 -0600 Subject: PCI: do not enable bridges more than once In preparation for PCI core hotplug, we need to ensure that we do not attempt to re-enable bridges that have already been enabled. Reported-by: Kenji Kaneshige Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 118c77778d2..68f91a25259 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -184,8 +184,10 @@ void pci_enable_bridges(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->subordinate) { - retval = pci_enable_device(dev); - pci_set_master(dev); + if (atomic_read(&dev->enable_cnt) == 0) { + retval = pci_enable_device(dev); + pci_set_master(dev); + } pci_enable_bridges(dev->subordinate); } } -- cgit v1.2.3 From 3ed4fd96b3188406ac5357d9290bcffa08c65cf6 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:25 -0600 Subject: PCI: Introduce pci_rescan_bus() This API is used by the PCI core to rescan a bus and rediscover newly added devices. Over time, it is expected that the various PCI hotplug drivers will migrate to this interface and away from the old pci_do_scan_bus() interface. Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/fakephp.c | 6 +++--- drivers/pci/probe.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index d8649e12729..16063745766 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -245,12 +245,12 @@ static int pci_rescan_slot(struct pci_dev *temp) /** - * pci_rescan_bus - Rescan PCI bus + * pci_rescan_bus_local - fakephp version of rescan PCI bus * @bus: the PCI bus to rescan * * Call pci_rescan_slot for each possible function of the bus. */ -static void pci_rescan_bus(const struct pci_bus *bus) +static void pci_rescan_bus_local(const struct pci_bus *bus) { unsigned int devfn; struct pci_dev *dev; @@ -291,7 +291,7 @@ static void pci_rescan_buses(const struct list_head *list) const struct list_head *l; list_for_each(l,list) { const struct pci_bus *b = pci_bus_b(l); - pci_rescan_bus(b); + pci_rescan_bus_local(b); pci_rescan_buses(&b->children); } } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f69256c63b2..60a8e5fec6c 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1212,6 +1212,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, EXPORT_SYMBOL(pci_scan_bus_parented); #ifdef CONFIG_HOTPLUG +/** + * pci_rescan_bus - scan a PCI bus for devices. + * @bus: PCI bus to scan + * + * Scan a PCI bus and child buses for new devices, adds them, + * and enables them. + * + * Returns the max number of subordinate bus discovered. + */ +unsigned int __devinit pci_rescan_bus(struct pci_bus *bus) +{ + unsigned int max; + struct pci_dev *dev; + + max = pci_scan_child_bus(bus); + + up_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + if (dev->subordinate) + pci_bus_size_bridges(dev->subordinate); + down_read(&pci_bus_sem); + + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); + pci_bus_add_devices(bus); + + return max; +} +EXPORT_SYMBOL_GPL(pci_rescan_bus); + EXPORT_SYMBOL(pci_add_new_bus); EXPORT_SYMBOL(pci_scan_slot); EXPORT_SYMBOL(pci_scan_bridge); -- cgit v1.2.3 From 705b1aaa823e800490f157cd9366ad8cff385f5f Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:31 -0600 Subject: PCI: Introduce /sys/bus/pci/rescan This interface allows the user to force a rescan of all PCI buses in system, and rediscover devices that have been removed earlier. pci_bus_attrs implementation from Trent Piepho. Thanks to Vegard Nossum for discovering locking issues with the sysfs interface. Cc: Trent Piepho Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 1 + drivers/pci/pci-sysfs.c | 26 ++++++++++++++++++++++++++ drivers/pci/pci.h | 6 ++++++ drivers/pci/probe.c | 4 ++-- 4 files changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 87a5ddbb324..95d19857029 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1049,6 +1049,7 @@ struct bus_type pci_bus_type = { .remove = pci_device_remove, .shutdown = pci_device_shutdown, .dev_attrs = pci_dev_attrs, + .bus_attrs = pci_bus_attrs, .pm = PCI_PM_OPS_PTR, }; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index ec7a175dcba..be7468a5eb7 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -219,6 +219,32 @@ msi_bus_store(struct device *dev, struct device_attribute *attr, return count; } +#ifdef CONFIG_HOTPLUG +static DEFINE_MUTEX(pci_remove_rescan_mutex); +static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf, + size_t count) +{ + unsigned long val; + struct pci_bus *b = NULL; + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + while ((b = pci_find_next_bus(b)) != NULL) + pci_rescan_bus(b); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + +struct bus_attribute pci_bus_attrs[] = { + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store), + __ATTR_NULL +}; +#endif + struct device_attribute pci_dev_attrs[] = { __ATTR_RO(resource), __ATTR_RO(vendor), diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 22dcfdb75d9..45833a5bca6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -138,6 +138,12 @@ extern int pcie_mch_quirk; extern struct device_attribute pci_dev_attrs[]; extern struct device_attribute dev_attr_cpuaffinity; extern struct device_attribute dev_attr_cpulistaffinity; +#ifdef CONFIG_HOTPLUG +extern struct bus_attribute pci_bus_attrs[]; +#else +#define pci_bus_attrs NULL +#endif + /** * pci_match_one_device - Tell if a PCI device structure has a matching diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 60a8e5fec6c..56c71e585f3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1228,13 +1228,13 @@ unsigned int __devinit pci_rescan_bus(struct pci_bus *bus) max = pci_scan_child_bus(bus); - up_read(&pci_bus_sem); + down_read(&pci_bus_sem); list_for_each_entry(dev, &bus->devices, bus_list) if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) if (dev->subordinate) pci_bus_size_bridges(dev->subordinate); - down_read(&pci_bus_sem); + up_read(&pci_bus_sem); pci_bus_assign_resources(bus); pci_enable_bridges(bus); -- cgit v1.2.3 From 77c27c7b49d69d45ccb94e481653f024f1ac6650 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:36 -0600 Subject: PCI: Introduce /sys/bus/pci/devices/.../remove This patch adds an attribute named "remove" to a PCI device's sysfs directory. Writing a non-zero value to this attribute will remove the PCI device and any children of it. Trent Piepho wrote the original implementation and documentation. Thanks to Vegard Nossum for testing under kmemcheck and finding locking issues with the sysfs interface. Cc: Trent Piepho Tested-by: Vegard Nossum Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index be7468a5eb7..e16990ecc02 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -243,6 +243,39 @@ struct bus_attribute pci_bus_attrs[] = { __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store), __ATTR_NULL }; + +static void remove_callback(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + mutex_lock(&pci_remove_rescan_mutex); + pci_remove_bus_device(pdev); + mutex_unlock(&pci_remove_rescan_mutex); +} + +static ssize_t +remove_store(struct device *dev, struct device_attribute *dummy, + const char *buf, size_t count) +{ + int ret = 0; + unsigned long val; + struct pci_dev *pdev = to_pci_dev(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (pci_is_root_bus(pdev->bus)) + return -EBUSY; + + /* An attribute cannot be unregistered by one of its own methods, + * so we have to use this roundabout approach. + */ + if (val) + ret = device_schedule_callback(dev, remove_callback); + if (ret) + count = ret; + return count; +} #endif struct device_attribute pci_dev_attrs[] = { @@ -263,6 +296,9 @@ struct device_attribute pci_dev_attrs[] = { __ATTR(broken_parity_status,(S_IRUGO|S_IWUSR), broken_parity_status_show,broken_parity_status_store), __ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store), +#ifdef CONFIG_HOTPLUG + __ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store), +#endif __ATTR_NULL, }; -- cgit v1.2.3 From 738a6396c223b486304dda778119dbbca563f019 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:41 -0600 Subject: PCI: Introduce /sys/bus/pci/devices/.../rescan This interface allows the user to force a rescan of the device's parent bus and all subordinate buses, and rediscover devices removed earlier from this part of the device tree. Cc: Trent Piepho Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index e16990ecc02..e9a8706a640 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -244,6 +244,24 @@ struct bus_attribute pci_bus_attrs[] = { __ATTR_NULL }; +static ssize_t +dev_rescan_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + struct pci_dev *pdev = to_pci_dev(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + pci_rescan_bus(pdev->bus); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + static void remove_callback(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -298,6 +316,7 @@ struct device_attribute pci_dev_attrs[] = { __ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store), #ifdef CONFIG_HOTPLUG __ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store), + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store), #endif __ATTR_NULL, }; -- cgit v1.2.3 From 83dbf66f04b96e65c6c18436c16d40f9cf8630aa Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Fri, 20 Mar 2009 14:56:46 -0600 Subject: PCI Hotplug: restore fakephp interface with complete reimplementation A complete re-implementation of fakephp is necessary if it is to present its former interface (pre-2.6.27, when it broke). The reason is that PCI hotplug drivers call pci_hp_register(), which enforces the rule that only one /sys/bus/pci/slots/ file may be created per physical slot. The change breaks the old fakephp's assumption that it could create a file per function. So we re-implement fakephp to avoid using the standard PCI hotplug API so that we can restore the old fakephp user interface. It puts entries in /sys/bus/pci/slots with the names of all PCI devices/functions, exactly symmetrical to what is shown in /sys/bus/pci/devices. Each slots/ entry has a "power" attribute, which works the same way as the fakephp driver's power attribute has worked. There are a few improvements over old fakephp, which couldn't handle PCI devices being added or removed via a means outside of fakephp's knowledge. If a device was added another way, old fakephp didn't notice and didn't create the fake slot for it. If a device was removed another way, old fakephp didn't delete the fake slot for it (and accessing the stale slot caused an oops). The new implementation overcomes these limitations. As a consequence, removing a bridge with other devices behind it now works as well, which is something else old fakephp couldn't do previously. This duplicates a tiny bit of the code in the PCI core that does this same function. Re-using that code ends up being more complex than duplicating it, and it makes code in the PCI core more ugly just to support this legacy fakephp interface compatibility layer. Reviewed-by: James Cameron Signed-off-by: Trent Piepho Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/Makefile | 2 +- drivers/pci/hotplug/fakephp.c | 395 ----------------------------------- drivers/pci/hotplug/legacy_fakephp.c | 162 ++++++++++++++ 3 files changed, 163 insertions(+), 396 deletions(-) delete mode 100644 drivers/pci/hotplug/fakephp.c create mode 100644 drivers/pci/hotplug/legacy_fakephp.c (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 2aa117c8cd8..3314fe88150 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o # Link this last so it doesn't claim devices that have a real hotplug driver -obj-$(CONFIG_HOTPLUG_PCI_FAKE) += fakephp.o +obj-$(CONFIG_HOTPLUG_PCI_FAKE) += legacy_fakephp.o pci_hotplug-objs := pci_hotplug_core.o diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c deleted file mode 100644 index 16063745766..00000000000 --- a/drivers/pci/hotplug/fakephp.c +++ /dev/null @@ -1,395 +0,0 @@ -/* - * Fake PCI Hot Plug Controller Driver - * - * Copyright (C) 2003 Greg Kroah-Hartman - * Copyright (C) 2003 IBM Corp. - * Copyright (C) 2003 Rolf Eike Beer - * - * Based on ideas and code from: - * Vladimir Kondratiev - * Rolf Eike Beer - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 2 of the License. - * - * Send feedback to - */ - -/* - * - * This driver will "emulate" removing PCI devices from the system. If - * the "power" file is written to with "0" then the specified PCI device - * will be completely removed from the kernel. - * - * WARNING, this does NOT turn off the power to the PCI device. This is - * a "logical" removal, not a physical or electrical removal. - * - * Use this module at your own risk, you have been warned! - * - * Enabling PCI devices is left as an exercise for the reader... - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "../pci.h" - -#if !defined(MODULE) - #define MY_NAME "fakephp" -#else - #define MY_NAME THIS_MODULE->name -#endif - -#define dbg(format, arg...) \ - do { \ - if (debug) \ - printk(KERN_DEBUG "%s: " format, \ - MY_NAME , ## arg); \ - } while (0) -#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg) -#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg) - -#define DRIVER_AUTHOR "Greg Kroah-Hartman " -#define DRIVER_DESC "Fake PCI Hot Plug Controller Driver" - -struct dummy_slot { - struct list_head node; - struct hotplug_slot *slot; - struct pci_dev *dev; - struct work_struct remove_work; - unsigned long removed; -}; - -static int debug; -static int dup_slots; -static LIST_HEAD(slot_list); -static struct workqueue_struct *dummyphp_wq; - -static void pci_rescan_worker(struct work_struct *work); -static DECLARE_WORK(pci_rescan_work, pci_rescan_worker); - -static int enable_slot (struct hotplug_slot *slot); -static int disable_slot (struct hotplug_slot *slot); - -static struct hotplug_slot_ops dummy_hotplug_slot_ops = { - .owner = THIS_MODULE, - .enable_slot = enable_slot, - .disable_slot = disable_slot, -}; - -static void dummy_release(struct hotplug_slot *slot) -{ - struct dummy_slot *dslot = slot->private; - - list_del(&dslot->node); - kfree(dslot->slot->info); - kfree(dslot->slot); - pci_dev_put(dslot->dev); - kfree(dslot); -} - -#define SLOT_NAME_SIZE 8 - -static int add_slot(struct pci_dev *dev) -{ - struct dummy_slot *dslot; - struct hotplug_slot *slot; - char name[SLOT_NAME_SIZE]; - int retval = -ENOMEM; - static int count = 1; - - slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL); - if (!slot) - goto error; - - slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL); - if (!slot->info) - goto error_slot; - - slot->info->power_status = 1; - slot->info->max_bus_speed = PCI_SPEED_UNKNOWN; - slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN; - - dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL); - if (!dslot) - goto error_info; - - if (dup_slots) - snprintf(name, SLOT_NAME_SIZE, "fake"); - else - snprintf(name, SLOT_NAME_SIZE, "fake%d", count++); - dbg("slot->name = %s\n", name); - slot->ops = &dummy_hotplug_slot_ops; - slot->release = &dummy_release; - slot->private = dslot; - - retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), name); - if (retval) { - err("pci_hp_register failed with error %d\n", retval); - goto error_dslot; - } - - dbg("slot->name = %s\n", hotplug_slot_name(slot)); - dslot->slot = slot; - dslot->dev = pci_dev_get(dev); - list_add (&dslot->node, &slot_list); - return retval; - -error_dslot: - kfree(dslot); -error_info: - kfree(slot->info); -error_slot: - kfree(slot); -error: - return retval; -} - -static int __init pci_scan_buses(void) -{ - struct pci_dev *dev = NULL; - int lastslot = 0; - - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (PCI_FUNC(dev->devfn) > 0 && - lastslot == PCI_SLOT(dev->devfn)) - continue; - lastslot = PCI_SLOT(dev->devfn); - add_slot(dev); - } - - return 0; -} - -static void remove_slot(struct dummy_slot *dslot) -{ - int retval; - - dbg("removing slot %s\n", hotplug_slot_name(dslot->slot)); - retval = pci_hp_deregister(dslot->slot); - if (retval) - err("Problem unregistering a slot %s\n", - hotplug_slot_name(dslot->slot)); -} - -/* called from the single-threaded workqueue handler to remove a slot */ -static void remove_slot_worker(struct work_struct *work) -{ - struct dummy_slot *dslot = - container_of(work, struct dummy_slot, remove_work); - remove_slot(dslot); -} - -/** - * pci_rescan_slot - Rescan slot - * @temp: Device template. Should be set: bus and devfn. - * - * Tries hard not to re-enable already existing devices; - * also handles scanning of subfunctions. - */ -static int pci_rescan_slot(struct pci_dev *temp) -{ - struct pci_bus *bus = temp->bus; - struct pci_dev *dev; - int func; - u8 hdr_type; - int count = 0; - - if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) { - temp->hdr_type = hdr_type & 0x7f; - if ((dev = pci_get_slot(bus, temp->devfn)) != NULL) - pci_dev_put(dev); - else { - dev = pci_scan_single_device(bus, temp->devfn); - if (dev) { - dbg("New device on %s function %x:%x\n", - bus->name, temp->devfn >> 3, - temp->devfn & 7); - count++; - } - } - /* multifunction device? */ - if (!(hdr_type & 0x80)) - return count; - - /* continue scanning for other functions */ - for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) { - if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) - continue; - temp->hdr_type = hdr_type & 0x7f; - - if ((dev = pci_get_slot(bus, temp->devfn)) != NULL) - pci_dev_put(dev); - else { - dev = pci_scan_single_device(bus, temp->devfn); - if (dev) { - dbg("New device on %s function %x:%x\n", - bus->name, temp->devfn >> 3, - temp->devfn & 7); - count++; - } - } - } - } - - return count; -} - - -/** - * pci_rescan_bus_local - fakephp version of rescan PCI bus - * @bus: the PCI bus to rescan - * - * Call pci_rescan_slot for each possible function of the bus. - */ -static void pci_rescan_bus_local(const struct pci_bus *bus) -{ - unsigned int devfn; - struct pci_dev *dev; - int retval; - int found = 0; - dev = alloc_pci_dev(); - if (!dev) - return; - - dev->bus = (struct pci_bus*)bus; - dev->sysdata = bus->sysdata; - for (devfn = 0; devfn < 0x100; devfn += 8) { - dev->devfn = devfn; - found += pci_rescan_slot(dev); - } - - if (found) { - pci_bus_assign_resources(bus); - list_for_each_entry(dev, &bus->devices, bus_list) { - /* Skip already-added devices */ - if (dev->is_added) - continue; - retval = pci_bus_add_device(dev); - if (retval) - dev_err(&dev->dev, - "Error adding device, continuing\n"); - else - add_slot(dev); - } - pci_bus_add_devices(bus); - } - kfree(dev); -} - -/* recursively scan all buses */ -static void pci_rescan_buses(const struct list_head *list) -{ - const struct list_head *l; - list_for_each(l,list) { - const struct pci_bus *b = pci_bus_b(l); - pci_rescan_bus_local(b); - pci_rescan_buses(&b->children); - } -} - -/* initiate rescan of all pci buses */ -static inline void pci_rescan(void) { - pci_rescan_buses(&pci_root_buses); -} - -/* called from the single-threaded workqueue handler to rescan all pci buses */ -static void pci_rescan_worker(struct work_struct *work) -{ - pci_rescan(); -} - -static int enable_slot(struct hotplug_slot *hotplug_slot) -{ - /* mis-use enable_slot for rescanning of the pci bus */ - cancel_work_sync(&pci_rescan_work); - queue_work(dummyphp_wq, &pci_rescan_work); - return 0; -} - -static int disable_slot(struct hotplug_slot *slot) -{ - struct dummy_slot *dslot; - struct pci_dev *dev; - int func; - - if (!slot) - return -ENODEV; - dslot = slot->private; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot_name(slot)); - - for (func = 7; func >= 0; func--) { - dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func); - if (!dev) - continue; - - if (test_and_set_bit(0, &dslot->removed)) { - dbg("Slot already scheduled for removal\n"); - pci_dev_put(dev); - return -ENODEV; - } - - /* remove the device from the pci core */ - pci_remove_bus_device(dev); - - /* queue work item to blow away this sysfs entry and other - * parts. - */ - INIT_WORK(&dslot->remove_work, remove_slot_worker); - queue_work(dummyphp_wq, &dslot->remove_work); - - pci_dev_put(dev); - } - return 0; -} - -static void cleanup_slots (void) -{ - struct list_head *tmp; - struct list_head *next; - struct dummy_slot *dslot; - - destroy_workqueue(dummyphp_wq); - list_for_each_safe (tmp, next, &slot_list) { - dslot = list_entry (tmp, struct dummy_slot, node); - remove_slot(dslot); - } - -} - -static int __init dummyphp_init(void) -{ - info(DRIVER_DESC "\n"); - - dummyphp_wq = create_singlethread_workqueue(MY_NAME); - if (!dummyphp_wq) - return -ENOMEM; - - return pci_scan_buses(); -} - - -static void __exit dummyphp_exit(void) -{ - cleanup_slots(); -} - -module_init(dummyphp_init); -module_exit(dummyphp_exit); - -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_LICENSE("GPL"); -module_param(debug, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(debug, "Debugging mode enabled or not"); -module_param(dup_slots, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(dup_slots, "Force duplicate slot names for debugging"); diff --git a/drivers/pci/hotplug/legacy_fakephp.c b/drivers/pci/hotplug/legacy_fakephp.c new file mode 100644 index 00000000000..2dc7828df48 --- /dev/null +++ b/drivers/pci/hotplug/legacy_fakephp.c @@ -0,0 +1,162 @@ +/* Works like the fakephp driver used to, except a little better. + * + * - It's possible to remove devices with subordinate busses. + * - New PCI devices that appear via any method, not just a fakephp triggered + * rescan, will be noticed. + * - Devices that are removed via any method, not just a fakephp triggered + * removal, will also be noticed. + * + * Uses nothing from the pci-hotplug subsystem. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../pci.h" + +struct legacy_slot { + struct kobject kobj; + struct pci_dev *dev; + struct list_head list; +}; + +static LIST_HEAD(legacy_list); + +static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + strcpy(buf, "1\n"); + return 2; +} + +static void remove_callback(void *data) +{ + pci_remove_bus_device((struct pci_dev *)data); +} + +static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + unsigned long val; + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) + pci_rescan_bus(slot->dev->bus); + else + sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback, + slot->dev, THIS_MODULE); + return len; +} + +static struct attribute *legacy_attrs[] = { + &(struct attribute){ .name = "power", .mode = 0644 }, + NULL, +}; + +static void legacy_release(struct kobject *kobj) +{ + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + + pci_dev_put(slot->dev); + kfree(slot); +} + +static struct kobj_type legacy_ktype = { + .sysfs_ops = &(struct sysfs_ops){ + .store = legacy_store, .show = legacy_show + }, + .release = &legacy_release, + .default_attrs = legacy_attrs, +}; + +static int legacy_add_slot(struct pci_dev *pdev) +{ + struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL); + + if (!slot) + return -ENOMEM; + + if (kobject_init_and_add(&slot->kobj, &legacy_ktype, + &pci_slots_kset->kobj, "%s", + pdev->dev.bus_id)) { + dev_warn(&pdev->dev, "Failed to created legacy fake slot\n"); + return -EINVAL; + } + slot->dev = pci_dev_get(pdev); + + list_add(&slot->list, &legacy_list); + + return 0; +} + +static int legacy_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + + if (action == BUS_NOTIFY_ADD_DEVICE) { + legacy_add_slot(pdev); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + struct legacy_slot *slot; + + list_for_each_entry(slot, &legacy_list, list) + if (slot->dev == pdev) + goto found; + + dev_warn(&pdev->dev, "Missing legacy fake slot?"); + return -ENODEV; +found: + kobject_del(&slot->kobj); + list_del(&slot->list); + kobject_put(&slot->kobj); + } + + return 0; +} + +static struct notifier_block legacy_notifier = { + .notifier_call = legacy_notify +}; + +static int __init init_legacy(void) +{ + struct pci_dev *pdev = NULL; + + /* Add existing devices */ + while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) + legacy_add_slot(pdev); + + /* Be alerted of any new ones */ + bus_register_notifier(&pci_bus_type, &legacy_notifier); + return 0; +} +module_init(init_legacy); + +static void __exit remove_legacy(void) +{ + struct legacy_slot *slot, *tmp; + + bus_unregister_notifier(&pci_bus_type, &legacy_notifier); + + list_for_each_entry_safe(slot, tmp, &legacy_list, list) { + list_del(&slot->list); + kobject_del(&slot->kobj); + kobject_put(&slot->kobj); + } +} +module_exit(remove_legacy); + + +MODULE_AUTHOR("Trent Piepho "); +MODULE_DESCRIPTION("Legacy version of the fakephp interface"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 8ffd25454738fb9ed76ee18cc0f180fb0b360401 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:51 -0600 Subject: PCI Hotplug: rename legacy_fakephp to fakephp We wanted to replace fakephp wholesale, so rename legacy_fakephp back to fakephp. Yes, this is a silly commit, but it produces a much easier patch to read and review. Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/Makefile | 2 +- drivers/pci/hotplug/fakephp.c | 162 +++++++++++++++++++++++++++++++++++ drivers/pci/hotplug/legacy_fakephp.c | 162 ----------------------------------- 3 files changed, 163 insertions(+), 163 deletions(-) create mode 100644 drivers/pci/hotplug/fakephp.c delete mode 100644 drivers/pci/hotplug/legacy_fakephp.c (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 3314fe88150..2aa117c8cd8 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o # Link this last so it doesn't claim devices that have a real hotplug driver -obj-$(CONFIG_HOTPLUG_PCI_FAKE) += legacy_fakephp.o +obj-$(CONFIG_HOTPLUG_PCI_FAKE) += fakephp.o pci_hotplug-objs := pci_hotplug_core.o diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c new file mode 100644 index 00000000000..2dc7828df48 --- /dev/null +++ b/drivers/pci/hotplug/fakephp.c @@ -0,0 +1,162 @@ +/* Works like the fakephp driver used to, except a little better. + * + * - It's possible to remove devices with subordinate busses. + * - New PCI devices that appear via any method, not just a fakephp triggered + * rescan, will be noticed. + * - Devices that are removed via any method, not just a fakephp triggered + * removal, will also be noticed. + * + * Uses nothing from the pci-hotplug subsystem. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../pci.h" + +struct legacy_slot { + struct kobject kobj; + struct pci_dev *dev; + struct list_head list; +}; + +static LIST_HEAD(legacy_list); + +static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + strcpy(buf, "1\n"); + return 2; +} + +static void remove_callback(void *data) +{ + pci_remove_bus_device((struct pci_dev *)data); +} + +static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + unsigned long val; + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) + pci_rescan_bus(slot->dev->bus); + else + sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback, + slot->dev, THIS_MODULE); + return len; +} + +static struct attribute *legacy_attrs[] = { + &(struct attribute){ .name = "power", .mode = 0644 }, + NULL, +}; + +static void legacy_release(struct kobject *kobj) +{ + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + + pci_dev_put(slot->dev); + kfree(slot); +} + +static struct kobj_type legacy_ktype = { + .sysfs_ops = &(struct sysfs_ops){ + .store = legacy_store, .show = legacy_show + }, + .release = &legacy_release, + .default_attrs = legacy_attrs, +}; + +static int legacy_add_slot(struct pci_dev *pdev) +{ + struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL); + + if (!slot) + return -ENOMEM; + + if (kobject_init_and_add(&slot->kobj, &legacy_ktype, + &pci_slots_kset->kobj, "%s", + pdev->dev.bus_id)) { + dev_warn(&pdev->dev, "Failed to created legacy fake slot\n"); + return -EINVAL; + } + slot->dev = pci_dev_get(pdev); + + list_add(&slot->list, &legacy_list); + + return 0; +} + +static int legacy_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + + if (action == BUS_NOTIFY_ADD_DEVICE) { + legacy_add_slot(pdev); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + struct legacy_slot *slot; + + list_for_each_entry(slot, &legacy_list, list) + if (slot->dev == pdev) + goto found; + + dev_warn(&pdev->dev, "Missing legacy fake slot?"); + return -ENODEV; +found: + kobject_del(&slot->kobj); + list_del(&slot->list); + kobject_put(&slot->kobj); + } + + return 0; +} + +static struct notifier_block legacy_notifier = { + .notifier_call = legacy_notify +}; + +static int __init init_legacy(void) +{ + struct pci_dev *pdev = NULL; + + /* Add existing devices */ + while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) + legacy_add_slot(pdev); + + /* Be alerted of any new ones */ + bus_register_notifier(&pci_bus_type, &legacy_notifier); + return 0; +} +module_init(init_legacy); + +static void __exit remove_legacy(void) +{ + struct legacy_slot *slot, *tmp; + + bus_unregister_notifier(&pci_bus_type, &legacy_notifier); + + list_for_each_entry_safe(slot, tmp, &legacy_list, list) { + list_del(&slot->list); + kobject_del(&slot->kobj); + kobject_put(&slot->kobj); + } +} +module_exit(remove_legacy); + + +MODULE_AUTHOR("Trent Piepho "); +MODULE_DESCRIPTION("Legacy version of the fakephp interface"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pci/hotplug/legacy_fakephp.c b/drivers/pci/hotplug/legacy_fakephp.c deleted file mode 100644 index 2dc7828df48..00000000000 --- a/drivers/pci/hotplug/legacy_fakephp.c +++ /dev/null @@ -1,162 +0,0 @@ -/* Works like the fakephp driver used to, except a little better. - * - * - It's possible to remove devices with subordinate busses. - * - New PCI devices that appear via any method, not just a fakephp triggered - * rescan, will be noticed. - * - Devices that are removed via any method, not just a fakephp triggered - * removal, will also be noticed. - * - * Uses nothing from the pci-hotplug subsystem. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "../pci.h" - -struct legacy_slot { - struct kobject kobj; - struct pci_dev *dev; - struct list_head list; -}; - -static LIST_HEAD(legacy_list); - -static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); - strcpy(buf, "1\n"); - return 2; -} - -static void remove_callback(void *data) -{ - pci_remove_bus_device((struct pci_dev *)data); -} - -static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); - unsigned long val; - - if (strict_strtoul(buf, 0, &val) < 0) - return -EINVAL; - - if (val) - pci_rescan_bus(slot->dev->bus); - else - sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback, - slot->dev, THIS_MODULE); - return len; -} - -static struct attribute *legacy_attrs[] = { - &(struct attribute){ .name = "power", .mode = 0644 }, - NULL, -}; - -static void legacy_release(struct kobject *kobj) -{ - struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); - - pci_dev_put(slot->dev); - kfree(slot); -} - -static struct kobj_type legacy_ktype = { - .sysfs_ops = &(struct sysfs_ops){ - .store = legacy_store, .show = legacy_show - }, - .release = &legacy_release, - .default_attrs = legacy_attrs, -}; - -static int legacy_add_slot(struct pci_dev *pdev) -{ - struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL); - - if (!slot) - return -ENOMEM; - - if (kobject_init_and_add(&slot->kobj, &legacy_ktype, - &pci_slots_kset->kobj, "%s", - pdev->dev.bus_id)) { - dev_warn(&pdev->dev, "Failed to created legacy fake slot\n"); - return -EINVAL; - } - slot->dev = pci_dev_get(pdev); - - list_add(&slot->list, &legacy_list); - - return 0; -} - -static int legacy_notify(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct pci_dev *pdev = to_pci_dev(data); - - if (action == BUS_NOTIFY_ADD_DEVICE) { - legacy_add_slot(pdev); - } else if (action == BUS_NOTIFY_DEL_DEVICE) { - struct legacy_slot *slot; - - list_for_each_entry(slot, &legacy_list, list) - if (slot->dev == pdev) - goto found; - - dev_warn(&pdev->dev, "Missing legacy fake slot?"); - return -ENODEV; -found: - kobject_del(&slot->kobj); - list_del(&slot->list); - kobject_put(&slot->kobj); - } - - return 0; -} - -static struct notifier_block legacy_notifier = { - .notifier_call = legacy_notify -}; - -static int __init init_legacy(void) -{ - struct pci_dev *pdev = NULL; - - /* Add existing devices */ - while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) - legacy_add_slot(pdev); - - /* Be alerted of any new ones */ - bus_register_notifier(&pci_bus_type, &legacy_notifier); - return 0; -} -module_init(init_legacy); - -static void __exit remove_legacy(void) -{ - struct legacy_slot *slot, *tmp; - - bus_unregister_notifier(&pci_bus_type, &legacy_notifier); - - list_for_each_entry_safe(slot, tmp, &legacy_list, list) { - list_del(&slot->list); - kobject_del(&slot->kobj); - kobject_put(&slot->kobj); - } -} -module_exit(remove_legacy); - - -MODULE_AUTHOR("Trent Piepho "); -MODULE_DESCRIPTION("Legacy version of the fakephp interface"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 58c610bd1a3f50820e45a7c09ec0e44d2cda15dd Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 18 Mar 2009 15:33:05 +0800 Subject: intel-iommu: Snooping control support Snooping control enabled IOMMU to guarantee DMA cache coherency and thus reduce software effort (VMM) in maintaining effective memory type. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f3f686581a9..be999ff025a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -231,6 +231,7 @@ struct dmar_domain { int flags; /* flags to find out type of domain */ int iommu_coherency;/* indicate coherency of iommu access */ + int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ spinlock_t iommu_lock; /* protect iommu set in domain */ u64 max_addr; /* maximum mapped address */ @@ -421,7 +422,6 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) return g_iommus[iommu_id]; } -/* "Coherency" capability may be different across iommus */ static void domain_update_iommu_coherency(struct dmar_domain *domain) { int i; @@ -438,6 +438,29 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) } } +static void domain_update_iommu_snooping(struct dmar_domain *domain) +{ + int i; + + domain->iommu_snooping = 1; + + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); + for (; i < g_num_of_iommus; ) { + if (!ecap_sc_support(g_iommus[i]->ecap)) { + domain->iommu_snooping = 0; + break; + } + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); + } +} + +/* Some capabilities may be different across iommus */ +static void domain_update_iommu_cap(struct dmar_domain *domain) +{ + domain_update_iommu_coherency(domain); + domain_update_iommu_snooping(domain); +} + static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) { struct dmar_drhd_unit *drhd = NULL; @@ -1429,6 +1452,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_coherency = 0; + if (ecap_sc_support(iommu->ecap)) + domain->iommu_snooping = 1; + else + domain->iommu_snooping = 0; + domain->iommu_count = 1; /* always allocate the top pgd */ @@ -1557,7 +1585,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, spin_lock_irqsave(&domain->iommu_lock, flags); if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count++; - domain_update_iommu_coherency(domain); + domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags); return 0; @@ -2820,7 +2848,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&domain->iommu_lock, tmp_flags); clear_bit(iommu->seq_id, &domain->iommu_bmp); domain->iommu_count--; - domain_update_iommu_coherency(domain); + domain_update_iommu_cap(domain); spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); } @@ -2848,13 +2876,13 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) iommu_detach_dev(iommu, info->bus, info->devfn); /* clear this iommu in iommu_bmp, update iommu count - * and coherency + * and capabilities */ spin_lock_irqsave(&domain->iommu_lock, flags2); if (test_and_clear_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count--; - domain_update_iommu_coherency(domain); + domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags2); -- cgit v1.2.3 From dbb9fd8630e95b6155aff658a2b5f80e95ca2bc6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 18 Mar 2009 15:33:06 +0800 Subject: iommu: Add domain_has_cap iommu_ops This iommu_op can tell if domain have a specific capability, like snooping control for Intel IOMMU, which can be used by other components of kernel to adjust the behaviour. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index be999ff025a..3778ab149ba 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3158,6 +3158,17 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } +static int intel_iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + struct dmar_domain *dmar_domain = domain->priv; + + if (cap == IOMMU_CAP_CACHE_COHERENCY) + return dmar_domain->iommu_snooping; + + return 0; +} + static struct iommu_ops intel_iommu_ops = { .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, @@ -3166,6 +3177,7 @@ static struct iommu_ops intel_iommu_ops = { .map = intel_iommu_map_range, .unmap = intel_iommu_unmap_range, .iova_to_phys = intel_iommu_iova_to_phys, + .domain_has_cap = intel_iommu_domain_has_cap, }; static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) -- cgit v1.2.3 From 9cf0669746be19a4906a6c48920060bcf54c708b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 18 Mar 2009 15:33:07 +0800 Subject: intel-iommu: VT-d page table to support snooping control bit The user can request to enable snooping control through VT-d page table. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3778ab149ba..a0ba568b831 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -164,7 +164,8 @@ static inline void context_clear_entry(struct context_entry *context) * 1: writable * 2-6: reserved * 7: super page - * 8-11: available + * 8-10: available + * 11: snoop behavior * 12-63: Host physcial address */ struct dma_pte { @@ -186,6 +187,11 @@ static inline void dma_set_pte_writable(struct dma_pte *pte) pte->val |= DMA_PTE_WRITE; } +static inline void dma_set_pte_snp(struct dma_pte *pte) +{ + pte->val |= DMA_PTE_SNP; +} + static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) { pte->val = (pte->val & ~3) | (prot & 3); @@ -1685,6 +1691,8 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, BUG_ON(dma_pte_addr(pte)); dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); dma_set_pte_prot(pte, prot); + if (prot & DMA_PTE_SNP) + dma_set_pte_snp(pte); domain_flush_cache(domain, pte, sizeof(*pte)); start_pfn++; index++; @@ -3105,6 +3113,8 @@ static int intel_iommu_map_range(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; + if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) + prot |= DMA_PTE_SNP; max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); if (dmar_domain->max_addr < max_addr) { -- cgit v1.2.3 From 54cc6954a431dad42fb73e0a50b6d318a70594f6 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 24 Mar 2009 16:38:21 -0700 Subject: pci: struct device - replace bus_id with dev_name(), dev_set_name() Cc: jbarnes@virtuousgeek.org Cc: linux-pci@vger.kernel.org Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers --- drivers/pci/hotplug/cpqphp_sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index a13abf55d78..8450f4a6568 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -225,7 +225,8 @@ void cpqhp_shutdown_debugfs(void) void cpqhp_create_debugfs_files(struct controller *ctrl) { - ctrl->dentry = debugfs_create_file(ctrl->pci_dev->dev.bus_id, S_IRUGO, root, ctrl, &debug_ops); + ctrl->dentry = debugfs_create_file(dev_name(&ctrl->pci_dev->dev), + S_IRUGO, root, ctrl, &debug_ops); } void cpqhp_remove_debugfs_files(struct controller *ctrl) -- cgit v1.2.3 From 3199aa6bc8766e17b8f60820c4f78d59c25fce0e Mon Sep 17 00:00:00 2001 From: "Han, Weidong" Date: Thu, 26 Feb 2009 17:31:12 +0800 Subject: intel-iommu: fix PCI device detach from virtual machine When assign a device behind conventional PCI bridge or PCIe to PCI/PCI-x bridge to a domain, it must assign its bridge and may also need to assign secondary interface to the same domain. Dependent assignment is already there, but dependent deassignment is missed when detach device from virtual machine. This results in conventional PCI device assignment failure after it has been assigned once. This patch addes dependent deassignment, and fixes the issue. Signed-off-by: Weidong Han Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a0ba568b831..e541c3bdbf0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2808,6 +2808,33 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain, return 0; } +static void iommu_detach_dependent_devices(struct intel_iommu *iommu, + struct pci_dev *pdev) +{ + struct pci_dev *tmp, *parent; + + if (!iommu || !pdev) + return; + + /* dependent device detach */ + tmp = pci_find_upstream_pcie_bridge(pdev); + /* Secondary interface's bus number and devfn 0 */ + if (tmp) { + parent = pdev->bus->self; + while (parent != tmp) { + iommu_detach_dev(iommu, parent->bus->number, + parent->devfn); + parent = parent->bus->self; + } + if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ + iommu_detach_dev(iommu, + tmp->subordinate->number, 0); + else /* this is a legacy PCI bridge */ + iommu_detach_dev(iommu, + tmp->bus->number, tmp->devfn); + } +} + static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, struct pci_dev *pdev) { @@ -2833,6 +2860,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); iommu_detach_dev(iommu, info->bus, info->devfn); + iommu_detach_dependent_devices(iommu, pdev); free_devinfo_mem(info); spin_lock_irqsave(&device_domain_lock, flags); @@ -2882,6 +2910,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) iommu = device_to_iommu(info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); + iommu_detach_dependent_devices(iommu, info->dev); /* clear this iommu in iommu_bmp, update iommu count * and capabilities -- cgit v1.2.3 From 4cf2e75d0bec15d945972b005056c4a8731b82cf Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Feb 2009 17:23:43 +0000 Subject: intel-iommu: Enable DMAR on 32-bit kernel. If we fix a few highmem-related thinkos and a couple of printk format warnings, the Intel IOMMU driver works fine in a 32-bit kernel. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e541c3bdbf0..0c12d06bade 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2315,7 +2315,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, error: if (iova) __free_iova(&domain->iovad, iova); - printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", + printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", pci_name(pdev), size, (unsigned long long)paddr, dir); return 0; } @@ -2411,7 +2411,7 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, start_addr = iova->pfn_lo << PAGE_SHIFT; size = aligned_size((u64)dev_addr, size); - pr_debug("Device %s unmapping: %lx@%llx\n", + pr_debug("Device %s unmapping: %zx@%llx\n", pci_name(pdev), size, (unsigned long long)start_addr); /* clear the whole page */ @@ -2469,8 +2469,6 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, order); } -#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) - void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, int dir) { @@ -2480,7 +2478,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, unsigned long start_addr; struct iova *iova; size_t size = 0; - void *addr; + phys_addr_t addr; struct scatterlist *sg; struct intel_iommu *iommu; @@ -2496,7 +2494,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, if (!iova) return; for_each_sg(sglist, sg, nelems, i) { - addr = SG_ENT_VIRT_ADDRESS(sg); + addr = page_to_phys(sg_page(sg)) + sg->offset; size += aligned_size((u64)addr, sg->length); } @@ -2523,7 +2521,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, for_each_sg(sglist, sg, nelems, i) { BUG_ON(!sg_page(sg)); - sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)); + sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; sg->dma_length = sg->length; } return nelems; @@ -2532,7 +2530,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, int dir) { - void *addr; + phys_addr_t addr; int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; @@ -2556,8 +2554,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, iommu = domain_get_iommu(domain); for_each_sg(sglist, sg, nelems, i) { - addr = SG_ENT_VIRT_ADDRESS(sg); - addr = (void *)virt_to_phys(addr); + addr = page_to_phys(sg_page(sg)) + sg->offset; size += aligned_size((u64)addr, sg->length); } @@ -2580,8 +2577,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, start_addr = iova->pfn_lo << PAGE_SHIFT; offset = 0; for_each_sg(sglist, sg, nelems, i) { - addr = SG_ENT_VIRT_ADDRESS(sg); - addr = (void *)virt_to_phys(addr); + addr = page_to_phys(sg_page(sg)) + sg->offset; size = aligned_size((u64)addr, sg->length); ret = domain_page_mapping(domain, start_addr + offset, ((u64)addr) & PAGE_MASK, -- cgit v1.2.3 From afeeb7cebbd223ffee303fd8de4ba97458b13581 Mon Sep 17 00:00:00 2001 From: "Zhao, Yu" Date: Fri, 13 Feb 2009 17:55:49 +0800 Subject: intel-iommu: Fix address wrap on 32-bit kernel. The problem is in dma_pte_clear_range and dma_pte_free_pagetable. When intel_unmap_single and intel_unmap_sg call them, the end address may be zero if the 'start_addr + size' rounds up. So no PTE gets cleared. The uncleared PTE fires the BUG_ON when it's used again to create new mappings. After I modified dma_pte_clear_range a bit, the BUG_ON is gone. Tested both 32 and 32 PAE modes on Intel X58 and Q35 platforms. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 0c12d06bade..002c8b95edf 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -718,15 +718,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) { int addr_width = agaw_to_width(domain->agaw); + int npages; start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; /* in case it's partial page */ start = PAGE_ALIGN(start); end &= PAGE_MASK; + npages = (end - start) / VTD_PAGE_SIZE; /* we don't need lock here, nobody else touches the iova range */ - while (start < end) { + while (npages--) { dma_pte_clear_one(domain, start); start += VTD_PAGE_SIZE; } -- cgit v1.2.3 From 853346e4354c948b50a6fb0002f8af2cf5fbf2ae Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 21 Mar 2009 22:05:11 +0800 Subject: PCI: fix conflict between SR-IOV and config space sizing New pci_cfg_space_size() needs invalid pdev->class, put it in the right place in the pci_setup_device(). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 56c71e585f3..e2f3dd098cf 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -713,7 +713,6 @@ int pci_setup_device(struct pci_dev *dev) dev->dev.bus = &pci_bus_type; dev->hdr_type = hdr_type & 0x7f; dev->multifunction = !!(hdr_type & 0x80); - dev->cfg_size = pci_cfg_space_size(dev); dev->error_state = pci_channel_io_normal; set_pcie_port_type(dev); @@ -738,6 +737,9 @@ int pci_setup_device(struct pci_dev *dev) dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n", dev->vendor, dev->device, class, dev->hdr_type); + /* need to have dev->class ready */ + dev->cfg_size = pci_cfg_space_size(dev); + /* "Unknown power state" */ dev->current_state = PCI_UNKNOWN; @@ -959,9 +961,6 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) return NULL; } - /* need to have dev->class ready */ - dev->cfg_size = pci_cfg_space_size(dev); - return dev; } -- cgit v1.2.3 From 7ae0567fd3f4f51d55c4c638ecc6836347992de2 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 26 Mar 2009 16:49:52 +0900 Subject: PCI: fix kernel oops on bridge removal Fix the following kernel oops problem that happens when removing PCI bridge with pciehp loaded. It should also occur with other hotplug driver that is implemented as a bridge's driver. [ 459.997257] pciehp 0000:2f:04.0:pcie24: unloading service driver pciehp [ 459.997495] general protection fault: 0000 [#1] SMP [ 459.997737] last sysfs file: /sys/devices/pci0000:00/0000:00:04.0/0000:2e:00.0/0000:2f:04.0/remove [ 459.997964] CPU 4 [ 459.998129] Modules linked in: pciehp ipv6 autofs4 hidp rfcomm l2cap bluetooth sunrpc cpufreq_ondemand acpi_cpufreq dm_mirror dm_region_hash dm_log dm_multipath scsi_dh dm_mod sbs sbshc battery ac parport_pc lp parport mptspi mptscsih mptbase scsi_transport_spi e1000e sg sr_mod cdrom button serio_raw i2c_i801 i2c_core shpchp pcspkr ata_piix libata megaraid_sas sd_mod scsi_mod crc_t10dif ext3 jbd uhci_hcd ohci_hcd ehci_hcd [last unloaded: microcode] [ 459.998129] Pid: 56, comm: events/4 Not tainted 2.6.29-rc8-kk #1 PRIMERGY [ 459.998129] RIP: 0010:[] [] pci_slot_release+0x37/0x100 [ 459.998129] RSP: 0018:ffff88083b3bf9e0 EFLAGS: 00010246 [ 459.998129] RAX: ffff88083adc5158 RBX: ffff880836c1bc80 RCX: 6b6b6b6b6b6b6b6b [ 459.998129] RDX: 0000000000000000 RSI: ffffffff803a77f0 RDI: ffff880836c1bc48 [ 459.998129] RBP: ffff88083b3bfa00 R08: 0000000000000002 R09: 0000000000000000 [ 459.998129] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880836c1bc48 [ 459.998129] R13: ffff880836c1bc20 R14: ffff880836c1bc48 R15: ffff880836d1ec38 [ 459.998129] FS: 0000000000000000(0000) GS:ffff88083ccc3770(0000) knlGS:0000000000000000 [ 459.998129] CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b [ 459.998129] CR2: 00007f1562f1d558 CR3: 0000000838090000 CR4: 00000000000006e0 [ 459.998129] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 459.998129] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 459.998129] Process events/4 (pid: 56, threadinfo ffff88083b3be000, task ffff88083b3b3e40) [ 459.998129] Stack: [ 459.998129] ffff880836c1bc80 ffff880836c1bc48 ffffffff80793320 ffff88083b0d0960 [ 459.998129] ffff88083b3bfa30 ffffffff803a788a ffff880836c1bc80 ffffffff803a77f0 [ 459.998129] ffff880836c1bc20 ffff880836d1ec38 ffff88083b3bfa50 ffffffff803a8ce7 [ 459.998129] Call Trace: [ 459.998129] [] kobject_release+0x9a/0x290 [ 459.998129] [] ? kobject_release+0x0/0x290 [ 459.998129] [] kref_put+0x37/0x80 [ 459.998129] [] kobject_put+0x27/0x60 [ 459.998129] [] ? pci_destroy_slot+0x3c/0xc0 [ 459.998129] [] pci_destroy_slot+0x45/0xc0 [ 459.998129] [] pci_hp_deregister+0x13d/0x210 [ 459.998129] [] cleanup_slots+0x2d/0x80 [pciehp] [ 459.998129] [] pciehp_remove+0x15/0x30 [pciehp] [ 459.998129] [] pcie_port_remove_service+0x69/0x90 [ 459.998129] [] __device_release_driver+0x59/0x90 [ 459.998129] [] device_release_driver+0x2b/0x40 [ 459.998129] [] bus_remove_device+0xa6/0x120 [ 459.998129] [] device_del+0x12b/0x190 [ 459.998129] [] ? remove_iter+0x0/0x40 [ 459.998129] [] device_unregister+0x26/0x70 [ 459.998129] [] remove_iter+0x2f/0x40 [ 459.998129] [] device_for_each_child+0x33/0x60 [ 459.998129] [] ? sysfs_schedule_callback_work+0x0/0x50 [ 459.998129] [] pcie_port_device_remove+0x30/0x80 [ 459.998129] [] pcie_portdrv_remove+0x11/0x20 [ 459.998129] [] pci_device_remove+0x32/0x70 [ 459.998129] [] __device_release_driver+0x59/0x90 [ 459.998129] [] device_release_driver+0x2b/0x40 [ 459.998129] [] bus_remove_device+0xa6/0x120 [ 459.998129] [] device_del+0x12b/0x190 [ 459.998129] [] device_unregister+0x26/0x70 [ 459.998129] [] pci_stop_dev+0x49/0x60 [ 459.998129] [] pci_remove_bus_device+0x40/0xc0 [ 459.998129] [] remove_callback+0x29/0x40 [ 459.998129] [] sysfs_schedule_callback_work+0x1f/0x50 [ 459.998129] [] run_workqueue+0x15a/0x230 [ 459.998129] [] ? run_workqueue+0x108/0x230 [ 459.998129] [] worker_thread+0x9f/0x100 [ 459.998129] [] ? autoremove_wake_function+0x0/0x40 [ 459.998129] [] ? worker_thread+0x0/0x100 [ 459.998129] [] kthread+0x4d/0x80 [ 459.998129] [] child_rip+0xa/0x20 [ 459.998129] [] ? restore_args+0x0/0x30 [ 459.998129] [] ? kthread+0x0/0x80 [ 459.998129] [] ? child_rip+0x0/0x20 [ 459.998129] Code: 56 49 89 fe 41 55 4c 8d 6f d8 41 54 53 74 09 f6 05 b8 05 c7 00 08 75 72 49 8b 45 00 48 8b 48 28 eb 05 66 90 48 89 f1 49 8b 45 00 <48> 8b 31 48 83 c0 28 0f 18 0e 48 39 c1 74 1c 8b 41 38 41 0f b6 [ 459.998129] RIP [] pci_slot_release+0x37/0x100 [ 459.998129] RSP [ 460.018595] ---[ end trace 5a08d2095374aedc ]--- The pci_remove_bus_device() removes all buses and devices under the bridge, and then removes the bridge. So the remove() callback of the hotplug drivers implemented as a bridge's driver is executed after the struct pci_bus of the bridge's secondary bus is removed. The remove() callback of those driver unregisters the slot using pci_destroy_slot(), and slot's release callback refers to the the struct pci_bus that was already freed. This is the cause of the kernel oops. This patch solves the problem by stopping bus drivers before removing the bridge and its child bus and devices. Acked-by: Alex Chiang Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/remove.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci') diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index caf8e1eae45..86503c14ce7 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -95,6 +95,7 @@ EXPORT_SYMBOL(pci_remove_bus); */ void pci_remove_bus_device(struct pci_dev *dev) { + pci_stop_bus_device(dev); if (dev->subordinate) { struct pci_bus *b = dev->subordinate; -- cgit v1.2.3 From 7bb2cb3e90dc49be1cd14956c155451499c857a7 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 26 Mar 2009 18:34:33 +1100 Subject: PCI: update fakephp for bus_id removal Get rid of a new use of bus_id that snuck in. Signed-off-by: Stephen Rothwell Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/fakephp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index 2dc7828df48..6151389fd90 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../pci.h" struct legacy_slot { @@ -88,7 +89,7 @@ static int legacy_add_slot(struct pci_dev *pdev) if (kobject_init_and_add(&slot->kobj, &legacy_ktype, &pci_slots_kset->kobj, "%s", - pdev->dev.bus_id)) { + dev_name(&pdev->dev))) { dev_warn(&pdev->dev, "Failed to created legacy fake slot\n"); return -EINVAL; } -- cgit v1.2.3 From 898585172fa729513d8636257b44bd1cfd279096 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 16 Feb 2009 02:55:47 +0800 Subject: PCI: save and restore PCIe 2.0 registers PCIe 2.0 defines several new registers (Device Control 2, Link Control 2, and Slot Control 2). Save and retore them in pci_save_pcie_state() and pci_restore_pcie_state(). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 676bbcbc272..59569b8cf1d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -647,6 +647,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) EXPORT_SYMBOL(pci_choose_state); +#define PCI_EXP_SAVE_REGS 7 + static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; @@ -668,6 +670,9 @@ static int pci_save_pcie_state(struct pci_dev *dev) pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); return 0; } @@ -688,6 +693,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); } @@ -1372,7 +1380,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) { int error; - error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16)); + error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, + PCI_EXP_SAVE_REGS * sizeof(u16)); if (error) dev_err(&dev->dev, "unable to preallocate PCI Express save buffer\n"); -- cgit v1.2.3 From de7453065d5d5243686467998f1fcc58d20e0a7c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 20 Mar 2009 19:29:41 -0700 Subject: PCI: don't enable too much HT MSI mapping Impact: fix bug Prakash reported that his c51-mcp51 system ondie sound card doesn't work MSI but if he hack out the HT-MSI on mcp51, the MSI will work well with sound card. This patch reworks nv_msi_ht_cap_quirk() and will only avoid enabling ht_msi on devices following that root device. Reported-by: Prakash Punnoor Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 118 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 82 insertions(+), 36 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 7ddcfc65e79..faf02dd0001 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2147,6 +2147,65 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15, nvenet_msi_disable); +static int __devinit ht_check_msi_mapping(struct pci_dev *dev) +{ + int pos, ttl = 48; + int found = 0; + + /* check if there is HT MSI cap or enabled on this device */ + pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING); + while (pos && ttl--) { + u8 flags; + + if (found < 1) + found = 1; + if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS, + &flags) == 0) { + if (flags & HT_MSI_FLAGS_ENABLE) { + if (found < 2) { + found = 2; + break; + } + } + } + pos = pci_find_next_ht_capability(dev, pos, + HT_CAPTYPE_MSI_MAPPING); + } + + return found; +} + +static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge) +{ + struct pci_dev *dev; + int pos; + int i, dev_no; + int found = 0; + + dev_no = host_bridge->devfn >> 3; + for (i = dev_no + 1; i < 0x20; i++) { + dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0)); + if (!dev) + continue; + + /* found next host bridge ?*/ + pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE); + if (pos != 0) { + pci_dev_put(dev); + break; + } + + if (ht_check_msi_mapping(dev)) { + found = 1; + pci_dev_put(dev); + break; + } + pci_dev_put(dev); + } + + return found; +} + static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) { struct pci_dev *host_bridge; @@ -2171,6 +2230,10 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) if (!found) return; + /* don't enable host_bridge with leaf directly here */ + if (host_bridge == dev && host_bridge_with_leaf(host_bridge)) + goto out; + /* root did that ! */ if (msi_ht_cap_enabled(host_bridge)) goto out; @@ -2201,44 +2264,12 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev) } } -static int __devinit ht_check_msi_mapping(struct pci_dev *dev) -{ - int pos, ttl = 48; - int found = 0; - - /* check if there is HT MSI cap or enabled on this device */ - pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING); - while (pos && ttl--) { - u8 flags; - - if (found < 1) - found = 1; - if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS, - &flags) == 0) { - if (flags & HT_MSI_FLAGS_ENABLE) { - if (found < 2) { - found = 2; - break; - } - } - } - pos = pci_find_next_ht_capability(dev, pos, - HT_CAPTYPE_MSI_MAPPING); - } - - return found; -} - -static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) +static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all) { struct pci_dev *host_bridge; int pos; int found; - /* Enabling HT MSI mapping on this device breaks MCP51 */ - if (dev->device == 0x270) - return; - /* check if there is HT MSI cap or enabled on this device */ found = ht_check_msi_mapping(dev); @@ -2262,7 +2293,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) /* Host bridge is to HT */ if (found == 1) { /* it is not enabled, try to enable it */ - nv_ht_enable_msi_mapping(dev); + if (all) + ht_enable_msi_mapping(dev); + else + nv_ht_enable_msi_mapping(dev); } return; } @@ -2274,8 +2308,20 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) /* Host bridge is not to HT, disable HT MSI mapping on this device */ ht_disable_msi_mapping(dev); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk); + +static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev) +{ + return __nv_msi_ht_cap_quirk(dev, 1); +} + +static void __devinit nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev) +{ + return __nv_msi_ht_cap_quirk(dev, 0); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev) { -- cgit v1.2.3 From 877d03105d04b2c13e241130277fa69c8d2564f0 Mon Sep 17 00:00:00 2001 From: Nick Andrew Date: Mon, 26 Jan 2009 11:06:57 +0100 Subject: trivial: Fix misspelling of firmware Fix misspelling of firmware. Signed-off-by: Nick Andrew Signed-off-by: Jiri Kosina --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6d6120007af..dab33a21d49 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -550,7 +550,7 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) * @dev: PCI device to handle. * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. * - * Transition a device to a new power state, using the platform formware and/or + * Transition a device to a new power state, using the platform firmware and/or * the device's PCI PM registers. * * RETURN VALUE: -- cgit v1.2.3 From eeafda70bf2807544e96fa4e52b2433cd470ff46 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 29 Mar 2009 12:30:05 -0700 Subject: PCI: fix HT MSI mapping fix Impact: fix bug This patch reworks the nv_msi_ht_cap_quirk() and will only try to avoid to enable ht_msi on device following that root dev, and don't touch that root dev, but only do that trick with end_device on the chain. Reported-by: Prakash Punnoor Tested-by: Prakash Punnoor Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index faf02dd0001..9b2f0d96900 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2206,6 +2206,33 @@ static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge) return found; } +#define PCI_HT_CAP_SLAVE_CTRL0 4 /* link control */ +#define PCI_HT_CAP_SLAVE_CTRL1 8 /* link control to */ + +static int __devinit is_end_of_ht_chain(struct pci_dev *dev) +{ + int pos, ctrl_off; + int end = 0; + u16 flags, ctrl; + + pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE); + + if (!pos) + goto out; + + pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags); + + ctrl_off = ((flags >> 10) & 1) ? + PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1; + pci_read_config_word(dev, pos + ctrl_off, &ctrl); + + if (ctrl & (1 << 6)) + end = 1; + +out: + return end; +} + static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) { struct pci_dev *host_bridge; @@ -2230,8 +2257,9 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) if (!found) return; - /* don't enable host_bridge with leaf directly here */ - if (host_bridge == dev && host_bridge_with_leaf(host_bridge)) + /* don't enable end_device/host_bridge with leaf directly here */ + if (host_bridge == dev && is_end_of_ht_chain(host_bridge) && + host_bridge_with_leaf(host_bridge)) goto out; /* root did that ! */ -- cgit v1.2.3 From 57ef80266e14ecc363380268fedc64e519047b4a Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Mon, 16 Mar 2009 22:39:56 +0100 Subject: PCI PM: Consistently use variable name "error" for pm call return values I noticed two functions use a variable "i" to store the return value of PM function calls while the rest of the file uses "error". As "i" normally indicates a counter of some sort it seems better to keep this consistent. Signed-off-by: Frans Pop Signed-off-by: Jesse Barnes Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-driver.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 93eac142358..a5f11ad975b 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -352,17 +352,17 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int i = 0; + int error = 0; if (drv && drv->suspend) { pci_power_t prev = pci_dev->current_state; pci_dev->state_saved = false; - i = drv->suspend(pci_dev, state); - suspend_report_result(drv->suspend, i); - if (i) - return i; + error = drv->suspend(pci_dev, state); + suspend_report_result(drv->suspend, error); + if (error) + return error; if (pci_dev->state_saved) goto Fixup; @@ -385,20 +385,20 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); - return i; + return error; } static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int i = 0; + int error = 0; if (drv && drv->suspend_late) { - i = drv->suspend_late(pci_dev, state); - suspend_report_result(drv->suspend_late, i); + error = drv->suspend_late(pci_dev, state); + suspend_report_result(drv->suspend_late, error); } - return i; + return error; } static int pci_legacy_resume_early(struct device *dev) -- cgit v1.2.3 From f00a20ef46b1795c495869163a9a7333f899713a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:08 +0100 Subject: PCI PM: Use pci_set_power_state during early resume Once we have allowed timer interrupts to be enabled during the early phase of resuming devices, we are now able to use the generic pci_set_power_state() to put PCI devices into D0 at that time. Then, the platform-specific PM code will have a chance to handle devices that don't implement the native PCI PM or that require some additional, platform-specific operations to be carried out to power them up. Also, by doing this we can simplify the code quite a bit. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci.c | 48 +++++++++--------------------------------------- 1 file changed, 9 insertions(+), 39 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6d6120007af..3acb1da296d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -426,7 +426,6 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * given PCI device * @dev: PCI device to handle. * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. - * @wait: If 'true', wait for the device to change its power state * * RETURN VALUE: * -EINVAL if the requested state is invalid. @@ -435,8 +434,7 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * 0 if device already is in the requested state. * 0 if device's power state has been successfully changed. */ -static int -pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) +static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) { u16 pmcsr; bool need_restore = false; @@ -481,10 +479,8 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) break; case PCI_UNKNOWN: /* Boot-up */ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot - && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) { + && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) need_restore = true; - wait = true; - } /* Fall-through: force to D0 */ default: pmcsr = 0; @@ -494,9 +490,6 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) /* enter specified state */ pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); - if (!wait) - return 0; - /* Mandatory power management transition delays */ /* see PCI PM 1.1 5.6.1 table 18 */ if (state == PCI_D3hot || dev->current_state == PCI_D3hot) @@ -521,7 +514,7 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) if (need_restore) pci_restore_bars(dev); - if (wait && dev->bus->self) + if (dev->bus->self) pcie_aspm_pm_state_change(dev->bus->self); return 0; @@ -591,7 +584,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) return 0; - error = pci_raw_set_power_state(dev, state, true); + error = pci_raw_set_power_state(dev, state); if (state > PCI_D0 && platform_pci_power_manageable(dev)) { /* Allow the platform to finalize the transition */ @@ -1390,37 +1383,14 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) */ int pci_restore_standard_config(struct pci_dev *dev) { - pci_power_t prev_state; - int error; - - pci_update_current_state(dev, PCI_D0); - - prev_state = dev->current_state; - if (prev_state == PCI_D0) - goto Restore; - - error = pci_raw_set_power_state(dev, PCI_D0, false); - if (error) - return error; + pci_update_current_state(dev, PCI_UNKNOWN); - /* - * This assumes that we won't get a bus in B2 or B3 from the BIOS, but - * we've made this assumption forever and it appears to be universally - * satisfied. - */ - switch(prev_state) { - case PCI_D3cold: - case PCI_D3hot: - mdelay(pci_pm_d3_delay); - break; - case PCI_D2: - udelay(PCI_PM_D2_DELAY); - break; + if (dev->current_state != PCI_D0) { + int error = pci_set_power_state(dev, PCI_D0); + if (error) + return error; } - pci_update_current_state(dev, PCI_D0); - - Restore: return dev->state_saved ? pci_restore_state(dev) : 0; } -- cgit v1.2.3 From 0128a89cf75124500b5b69f0c3c7b7c5aa60676f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:18 +0100 Subject: PCI PM: Move pci_restore_standard_config to pci-driver.c Move pci_restore_standard_config() from pci.c to pci-driver.c and make it static. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci-driver.c | 17 +++++++++++++++++ drivers/pci/pci.c | 21 --------------------- drivers/pci/pci.h | 1 - 3 files changed, 17 insertions(+), 22 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index a5f11ad975b..8395206d1ae 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -423,6 +423,23 @@ static int pci_legacy_resume(struct device *dev) /* Auxiliary functions used by the new power management framework */ +/** + * pci_restore_standard_config - restore standard config registers of PCI device + * @pci_dev: PCI device to handle + */ +static int pci_restore_standard_config(struct pci_dev *pci_dev) +{ + pci_update_current_state(pci_dev, PCI_UNKNOWN); + + if (pci_dev->current_state != PCI_D0) { + int error = pci_set_power_state(pci_dev, PCI_D0); + if (error) + return error; + } + + return pci_dev->state_saved ? pci_restore_state(pci_dev) : 0; +} + static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev) { pci_restore_standard_config(pci_dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3acb1da296d..a4ecc2f1512 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1373,27 +1373,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) "unable to preallocate PCI-X save buffer\n"); } -/** - * pci_restore_standard_config - restore standard config registers of PCI device - * @dev: PCI device to handle - * - * This function assumes that the device's configuration space is accessible. - * If the device needs to be powered up, the function will wait for it to - * change the state. - */ -int pci_restore_standard_config(struct pci_dev *dev) -{ - pci_update_current_state(dev, PCI_UNKNOWN); - - if (dev->current_state != PCI_D0) { - int error = pci_set_power_state(dev, PCI_D0); - if (error) - return error; - } - - return dev->state_saved ? pci_restore_state(dev) : 0; -} - /** * pci_enable_ari - enable ARI forwarding if hardware support it * @dev: the PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 07c0aa5275e..149fff65891 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -49,7 +49,6 @@ extern void pci_disable_enabled_device(struct pci_dev *dev); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); -extern int pci_restore_standard_config(struct pci_dev *dev); static inline bool pci_is_bridge(struct pci_dev *pci_dev) { -- cgit v1.2.3 From 46939f8b15e44f065d052e89ea4f2adc81fdc740 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:26 +0100 Subject: PCI PM: Put devices into low power states during late suspend (rev. 2) Once we have allowed timer interrupts to be enabled during the late phase of suspending devices, we are now able to use the generic pci_set_power_state() to put PCI devices into low power states at that time. We can also use some related platform callbacks, like the ones preparing devices for wake-up, during the late suspend. Doing this will allow us to avoid the race condition where a device using shared interrupts is put into a low power state with interrupts enabled and then an interrupt (for another device) comes in and confuses its driver. At the same time, devices that don't support the native PCI PM or that require some additional, platform-specific operations to be carried out to put them into low power states will be handled as appropriate. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci-driver.c | 134 ++++++++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 53 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 8395206d1ae..3c1831c82f5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -352,53 +352,60 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int error = 0; + + pci_dev->state_saved = false; if (drv && drv->suspend) { pci_power_t prev = pci_dev->current_state; - - pci_dev->state_saved = false; + int error; error = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, error); if (error) return error; - if (pci_dev->state_saved) - goto Fixup; - - if (pci_dev->current_state != PCI_D0 + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, "PCI PM: Device state not saved by %pF\n", drv->suspend); - goto Fixup; } } - pci_save_state(pci_dev); - /* - * This is for compatibility with existing code with legacy PM support. - */ - pci_pm_set_unknown_state(pci_dev); - - Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); - return error; + return 0; } static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - int error = 0; if (drv && drv->suspend_late) { + pci_power_t prev = pci_dev->current_state; + int error; + error = drv->suspend_late(pci_dev, state); suspend_report_result(drv->suspend_late, error); + if (error) + return error; + + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 + && pci_dev->current_state != PCI_UNKNOWN) { + WARN_ONCE(pci_dev->current_state != prev, + "PCI PM: Device state not saved by %pF\n", + drv->suspend_late); + return 0; + } } - return error; + + if (!pci_dev->state_saved) + pci_save_state(pci_dev); + + pci_pm_set_unknown_state(pci_dev); + + return 0; } static int pci_legacy_resume_early(struct device *dev) @@ -460,7 +467,6 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev) /* Disable non-bridge devices without PM support */ if (!pci_is_bridge(pci_dev)) pci_disable_enabled_device(pci_dev); - pci_save_state(pci_dev); } static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) @@ -526,24 +532,14 @@ static int pci_pm_suspend(struct device *dev) if (error) return error; - if (pci_dev->state_saved) - goto Fixup; - - if (pci_dev->current_state != PCI_D0 + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, "PCI PM: State of device not saved by %pF\n", pm->suspend); - goto Fixup; } } - if (!pci_dev->state_saved) { - pci_save_state(pci_dev); - if (!pci_is_bridge(pci_dev)) - pci_prepare_to_sleep(pci_dev); - } - Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); @@ -553,21 +549,41 @@ static int pci_pm_suspend(struct device *dev) static int pci_pm_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; - int error = 0; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); - if (drv && drv->pm && drv->pm->suspend_noirq) { - error = drv->pm->suspend_noirq(dev); - suspend_report_result(drv->pm->suspend_noirq, error); + if (!pm) + return 0; + + if (pm->suspend_noirq) { + pci_power_t prev = pci_dev->current_state; + int error; + + error = pm->suspend_noirq(dev); + suspend_report_result(pm->suspend_noirq, error); + if (error) + return error; + + if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 + && pci_dev->current_state != PCI_UNKNOWN) { + WARN_ONCE(pci_dev->current_state != prev, + "PCI PM: State of device not saved by %pF\n", + pm->suspend_noirq); + return 0; + } } - if (!error) - pci_pm_set_unknown_state(pci_dev); + if (!pci_dev->state_saved) { + pci_save_state(pci_dev); + if (!pci_is_bridge(pci_dev)) + pci_prepare_to_sleep(pci_dev); + } - return error; + pci_pm_set_unknown_state(pci_dev); + + return 0; } static int pci_pm_resume_noirq(struct device *dev) @@ -650,9 +666,6 @@ static int pci_pm_freeze(struct device *dev) return error; } - if (!pci_dev->state_saved) - pci_save_state(pci_dev); - return 0; } @@ -660,20 +673,25 @@ static int pci_pm_freeze_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; - int error = 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_FREEZE); if (drv && drv->pm && drv->pm->freeze_noirq) { + int error; + error = drv->pm->freeze_noirq(dev); suspend_report_result(drv->pm->freeze_noirq, error); + if (error) + return error; } - if (!error) - pci_pm_set_unknown_state(pci_dev); + if (!pci_dev->state_saved) + pci_save_state(pci_dev); - return error; + pci_pm_set_unknown_state(pci_dev); + + return 0; } static int pci_pm_thaw_noirq(struct device *dev) @@ -716,7 +734,6 @@ static int pci_pm_poweroff(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - int error = 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); @@ -729,33 +746,44 @@ static int pci_pm_poweroff(struct device *dev) pci_dev->state_saved = false; if (pm->poweroff) { + int error; + error = pm->poweroff(dev); suspend_report_result(pm->poweroff, error); + if (error) + return error; } - if (!pci_dev->state_saved && !pci_is_bridge(pci_dev)) - pci_prepare_to_sleep(pci_dev); - Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); - return error; + return 0; } static int pci_pm_poweroff_noirq(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; - int error = 0; if (pci_has_legacy_pm_support(to_pci_dev(dev))) return pci_legacy_suspend_late(dev, PMSG_HIBERNATE); - if (drv && drv->pm && drv->pm->poweroff_noirq) { + if (!drv || !drv->pm) + return 0; + + if (drv->pm->poweroff_noirq) { + int error; + error = drv->pm->poweroff_noirq(dev); suspend_report_result(drv->pm->poweroff_noirq, error); + if (error) + return error; } - return error; + if (!pci_dev->state_saved && !pci_is_bridge(pci_dev)) + pci_prepare_to_sleep(pci_dev); + + return 0; } static int pci_pm_restore_noirq(struct device *dev) -- cgit v1.2.3 From 4a865905f685eaefaedf6ade362323dc52aa703b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:36 +0100 Subject: PCI PM: Make pci_set_power_state() handle devices with no PM support There is a problem with PCI devices without any PM support (either native or through the platform) that pci_set_power_state() always returns error code for them, even if they are being put into D0. However, such devices are always in D0, so pci_set_power_state() should return success when attempting to put such a device into D0. It also should update the current_state field for these devices as appropriate. This modification is necessary so that the standard configuration registers of these devices are successfully restored by pci_restore_standard_config() during the "early" phase of resume. In addition, pci_set_power_state() should check the value of current_state before calling the platform to change the power state of the device to avoid doing that unnecessarily. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a4ecc2f1512..979ceb1d37e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -439,6 +439,10 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) u16 pmcsr; bool need_restore = false; + /* Check if we're already there */ + if (dev->current_state == state) + return 0; + if (!dev->pm_cap) return -EIO; @@ -449,10 +453,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) * Can enter D0 from any state, but if we can only go deeper * to sleep if we're already in a low power state */ - if (dev->current_state == state) { - /* we're already there */ - return 0; - } else if (state != PCI_D0 && dev->current_state <= PCI_D3cold + if (state != PCI_D0 && dev->current_state <= PCI_D3cold && dev->current_state > state) { dev_err(&dev->dev, "invalid power transition " "(from state %d to %d)\n", dev->current_state, state); @@ -570,12 +571,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) */ return 0; - if (state == PCI_D0 && platform_pci_power_manageable(dev)) { + /* Check if we're already there */ + if (dev->current_state == state) + return 0; + + if (state == PCI_D0) { /* * Allow the platform to change the state, for example via ACPI * _PR0, _PS0 and some such, but do not trust it. */ - int ret = platform_pci_set_power_state(dev, PCI_D0); + int ret = platform_pci_power_manageable(dev) ? + platform_pci_set_power_state(dev, PCI_D0) : 0; if (!ret) pci_update_current_state(dev, PCI_D0); } -- cgit v1.2.3 From 931ff68a5a53fa84bcdf9b1b179a80e54e034bd0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2009 22:40:50 +0100 Subject: PCI PM: Restore config spaces of all devices during early resume At present the configuration spaces of PCI devices that have no drivers or no PM support in the drivers (either legacy or through a pm object) are not saved during suspend and, consequently, they are not restored during resume. This generally may lead to the state of the system being slightly inconsistent after the resume, so it's better to save and restore the configuration spaces of these devices as well. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Acked-by: Jesse Barnes --- drivers/pci/pci-driver.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 3c1831c82f5..267de88551c 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -516,13 +516,13 @@ static int pci_pm_suspend(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND); + pci_dev->state_saved = false; + if (!pm) { pci_pm_default_suspend(pci_dev); goto Fixup; } - pci_dev->state_saved = false; - if (pm->suspend) { pci_power_t prev = pci_dev->current_state; int error; @@ -554,8 +554,10 @@ static int pci_pm_suspend_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); - if (!pm) + if (!pm) { + pci_save_state(pci_dev); return 0; + } if (pm->suspend_noirq) { pci_power_t prev = pci_dev->current_state; @@ -650,13 +652,13 @@ static int pci_pm_freeze(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); + pci_dev->state_saved = false; + if (!pm) { pci_pm_default_suspend(pci_dev); return 0; } - pci_dev->state_saved = false; - if (pm->freeze) { int error; @@ -738,13 +740,13 @@ static int pci_pm_poweroff(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); + pci_dev->state_saved = false; + if (!pm) { pci_pm_default_suspend(pci_dev); goto Fixup; } - pci_dev->state_saved = false; - if (pm->poweroff) { int error; -- cgit v1.2.3 From 0e5dd46b761195356065a30611f265adec286d0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 26 Mar 2009 22:51:40 +0100 Subject: PCI PM: Introduce __pci_[start|complete]_power_transition() (rev. 2) The radeonfb driver needs to program the device's PMCSR directly due to some quirky hardware it has to handle (see http://bugzilla.kernel.org/show_bug.cgi?id=12846 for details) and after doing that it needs to call the platform (usually ACPI) to finish the power transition of the device. Currently it uses pci_set_power_state() for this purpose, however making a specific assumption about the internal behavior of this function, which has changed recently so that this assumption is no longer satisfied. For this reason, introduce __pci_complete_power_transition() that may be called by the radeonfb driver to complete the power transition of the device. For symmetry, introduce __pci_start_power_transition(). Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes --- drivers/pci/pci.c | 69 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 979ceb1d37e..de54fd643ba 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -539,6 +539,53 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) } } +/** + * pci_platform_power_transition - Use platform to change device power state + * @dev: PCI device to handle. + * @state: State to put the device into. + */ +static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state) +{ + int error; + + if (platform_pci_power_manageable(dev)) { + error = platform_pci_set_power_state(dev, state); + if (!error) + pci_update_current_state(dev, state); + } else { + error = -ENODEV; + /* Fall back to PCI_D0 if native PM is not supported */ + pci_update_current_state(dev, PCI_D0); + } + + return error; +} + +/** + * __pci_start_power_transition - Start power transition of a PCI device + * @dev: PCI device to handle. + * @state: State to put the device into. + */ +static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state) +{ + if (state == PCI_D0) + pci_platform_power_transition(dev, PCI_D0); +} + +/** + * __pci_complete_power_transition - Complete power transition of a PCI device + * @dev: PCI device to handle. + * @state: State to put the device into. + * + * This function should not be called directly by device drivers. + */ +int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state) +{ + return state > PCI_D0 ? + pci_platform_power_transition(dev, state) : -EINVAL; +} +EXPORT_SYMBOL_GPL(__pci_complete_power_transition); + /** * pci_set_power_state - Set the power state of a PCI device * @dev: PCI device to handle. @@ -575,16 +622,8 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (dev->current_state == state) return 0; - if (state == PCI_D0) { - /* - * Allow the platform to change the state, for example via ACPI - * _PR0, _PS0 and some such, but do not trust it. - */ - int ret = platform_pci_power_manageable(dev) ? - platform_pci_set_power_state(dev, PCI_D0) : 0; - if (!ret) - pci_update_current_state(dev, PCI_D0); - } + __pci_start_power_transition(dev, state); + /* This device is quirked not to be put into D3, so don't put it in D3 */ if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) @@ -592,14 +631,8 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) error = pci_raw_set_power_state(dev, state); - if (state > PCI_D0 && platform_pci_power_manageable(dev)) { - /* Allow the platform to finalize the transition */ - int ret = platform_pci_set_power_state(dev, state); - if (!ret) { - pci_update_current_state(dev, state); - error = 0; - } - } + if (!__pci_complete_power_transition(dev, state)) + error = 0; return error; } -- cgit v1.2.3 From 8efb8c76fcdccf5050c0ea059dac392789baaff2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Mar 2009 21:46:27 +0200 Subject: PCI PM: Make pci_prepare_to_sleep() disable wake-up if needed If the device is not supposed to wake up the system, ie. when device_may_wakeup(&dev->dev) returns 'false', pci_prepare_to_sleep() should pass 'false' to pci_enable_wake() so that it calls the platform to disable the wake-up capability of the device. Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index de54fd643ba..0195066251e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1263,7 +1263,7 @@ int pci_prepare_to_sleep(struct pci_dev *dev) if (target_state == PCI_POWER_ERROR) return -EIO; - pci_enable_wake(dev, target_state, true); + pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev)); error = pci_set_power_state(dev, target_state); -- cgit v1.2.3