From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/percpu.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4cdd393e71e..2edacc8e6b8 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -23,12 +23,19 @@ __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.page_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #else #define DEFINE_PER_CPU(type, name) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ DEFINE_PER_CPU(type, name) + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #endif #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -- cgit v1.2.3 From 1886e8a90a580f3ad343f2065c84c1b9e1dac9ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:37 -0700 Subject: x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping Allocate the iommu during the parse of DMA remapping hardware definition structures. And also, introduce routines for device scope initialization which will be explicitly called during dma-remapping initialization. These will be used for enabling interrupt remapping separately from the existing DMA-remapping enabling sequence. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 56c73b84755..3ab07e42558 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -46,12 +46,14 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ struct pci_dev **devices; /* target device array */ int devices_cnt; /* target device count */ @@ -62,6 +64,7 @@ struct dmar_drhd_unit { struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ struct pci_dev **devices; /* target devices */ @@ -72,6 +75,8 @@ struct dmar_rmrr_unit { list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +extern int alloc_iommu(struct dmar_drhd_unit *); #else static inline void detect_intel_iommu(void) { @@ -81,6 +86,9 @@ static inline int intel_iommu_init(void) { return -ENODEV; } - +static inline int dmar_table_init(void) +{ + return -ENODEV; +} #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.3 From ad3ad3f6a2caebf56869b83b69e23eb9fa5e0ab6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:40 -0700 Subject: x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Parse the vt-d device scope structures to find the mapping between IO-APICs and the interrupt remapping hardware units. This will be used later for enabling Interrupt-remapping for IOAPIC devices. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 3ab07e42558..c4e96eb2961 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,7 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); extern int dmar_dev_scope_init(void); +extern int parse_ioapics_under_ir(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; -- cgit v1.2.3 From 2ae21010694e56461a63bfc80e960090ce0a5ed9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:43 -0700 Subject: x64, x2apic/intr-remap: Interrupt remapping infrastructure Interrupt remapping (part of Intel Virtualization Tech for directed I/O) infrastructure. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 120 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c4e96eb2961..8a0238dd2c1 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,9 +25,85 @@ #include #include -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + +extern struct list_head dmar_drhd_units; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); + +/* Intel IOMMU detection */ +extern void detect_intel_iommu(void); + + +extern int parse_ioapics_under_ir(void); +extern int alloc_iommu(struct dmar_drhd_unit *); +#else +static inline void detect_intel_iommu(void) +{ + return; +} + +static inline int dmar_table_init(void) +{ + return -ENODEV; +} +#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ + +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + +struct irte { + union { + struct { + __u64 present : 1, + fpd : 1, + dst_mode : 1, + redir_hint : 1, + trigger_mode : 1, + dlvry_mode : 3, + avail : 4, + __reserved_1 : 4, + vector : 8, + __reserved_2 : 8, + dest_id : 32; + }; + __u64 low; + }; + + union { + struct { + __u64 sid : 16, + sq : 2, + svt : 2, + __reserved_3 : 44; + }; + __u64 high; + }; +}; +#else +#define enable_intr_remapping(mode) (-1) +#define intr_remapping_enabled (0) +#endif + +#ifdef CONFIG_DMAR extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions @@ -40,29 +116,8 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); -/* Intel IOMMU detection and initialization functions */ -extern void detect_intel_iommu(void); -extern int intel_iommu_init(void); - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); -extern int parse_ioapics_under_ir(void); - -extern struct list_head dmar_drhd_units; +extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; - -struct dmar_drhd_unit { - struct list_head list; /* list of drhd units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ - int devices_cnt; /* target device count */ - u8 ignored:1; /* ignore drhd */ - u8 include_all:1; - struct intel_iommu *iommu; -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -72,24 +127,19 @@ struct dmar_rmrr_unit { int devices_cnt; /* target device count */ }; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -extern int alloc_iommu(struct dmar_drhd_unit *); +/* Intel DMAR initialization functions */ +extern int intel_iommu_init(void); +extern int dmar_disabled; #else -static inline void detect_intel_iommu(void) -{ - return; -} static inline int intel_iommu_init(void) { +#ifdef CONFIG_INTR_REMAP + return dmar_dev_scope_init(); +#else return -ENODEV; -} -static inline int dmar_table_init(void) -{ - return -ENODEV; +#endif } #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.3 From b6fcb33ad6c05f152a672f7c96c1fab006527b80 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:44 -0700 Subject: x64, x2apic/intr-remap: routines managing Interrupt remapping table entries. Routines handling the management of interrupt remapping table entries. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a0238dd2c1..324bbca85a2 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -98,7 +98,19 @@ struct irte { __u64 high; }; }; +extern int get_irte(int irq, struct irte *entry); +extern int modify_irte(int irq, struct irte *irte_modified); +extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); +extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle); +extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); +extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); +extern int flush_irte(int irq); +extern int free_irte(int irq); + +extern int irq_remapped(int irq); #else +#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) #endif -- cgit v1.2.3 From 72b1e22dfcad1daca6906148fd956ffe404bb0bc Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:45 -0700 Subject: x64, x2apic/intr-remap: generic irq migration support from process context Generic infrastructure for migrating the irq from the process context in the presence of CONFIG_GENERIC_PENDING_IRQ. This will be used later for migrating irq in the presence of interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c..c211984b55e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) -- cgit v1.2.3 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 324bbca85a2..bf41ffa7470 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) -- cgit v1.2.3 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bf41ffa7470..c360c558e59 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) -- cgit v1.2.3 From 3cac97cbb14aed00d83eb33d4613b0fe3aaea863 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 6 Jul 2008 17:23:55 +0800 Subject: rcu classic: simplify the next pending batch use a batch number(rcp->pending) instead of a flag(rcp->next_pending) rcu_start_batch() need to change this flag, so mb()s is needed for memory-access safe. but(after this patch applied) rcu_start_batch() do not change this batch number(rcp->pending), rcp->pending is managed by __rcu_process_callbacks only, and troublesome mb()s are eliminated. And codes look simpler and clearer. Signed-off-by: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Dipankar Sarma Cc: Gautham Shenoy Cc: Dhaval Giani Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 8c774905dcf..c847e59c600 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -45,7 +45,7 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ - int next_pending; /* Is the next batch already waiting? */ + long pending; /* Number of the last pending batch */ int signaled; -- cgit v1.2.3 From 5127bed588a2f8f3a1f732de2a8a190b7df5dce3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 6 Jul 2008 17:23:59 +0800 Subject: rcu classic: new algorithm for callbacks-processing(v2) This is v2, it's a little deference from v1 that I had send to lkml. use ACCESS_ONCE use rcu_batch_after/rcu_batch_before for batch # comparison. rcutorture test result: (hotplugs: do cpu-online/offline once per second) No CONFIG_NO_HZ: OK, 12hours No CONFIG_NO_HZ, hotplugs: OK, 12hours CONFIG_NO_HZ=y: OK, 24hours CONFIG_NO_HZ=y, hotplugs: Failed. (Failed also without my patch applied, exactly the same bug occurred, http://lkml.org/lkml/2008/7/3/24) v1's email thread: http://lkml.org/lkml/2008/6/2/539 v1's description: The code/algorithm of the implement of current callbacks-processing is very efficient and technical. But when I studied it and I found a disadvantage: In multi-CPU systems, when a new RCU callback is being queued(call_rcu[_bh]), this callback will be invoked after the grace period for the batch with batch number = rcp->cur+2 has completed very very likely in current implement. Actually, this callback can be invoked after the grace period for the batch with batch number = rcp->cur+1 has completed. The delay of invocation means that latency of synchronize_rcu() is extended. But more important thing is that the callbacks usually free memory, and these works are delayed too! it's necessary for reclaimer to free memory as soon as possible when left memory is few. A very simple way can solve this problem: a field(struct rcu_head::batch) is added to record the batch number for the RCU callback. And when a new RCU callback is being queued, we determine the batch number for this callback(head->batch = rcp->cur+1) and we move this callback to rdp->donelist if we find that head->batch <= rcp->completed when we process callbacks. This simple way reduces the wait time for invocation a lot. (about 2.5Grace Period -> 1.5Grace Period in average in multi-CPU systems) This is my algorithm. But I do not add any field for struct rcu_head in my implement. We just need to memorize the last 2 batches and their batch number, because these 2 batches include all entries that for whom the grace period hasn't completed. So we use a special linked-list rather than add a field. Please see the comment of struct rcu_data. Signed-off-by: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Dipankar Sarma Cc: Gautham Shenoy Cc: Dhaval Giani Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index c847e59c600..04c728147be 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -66,11 +66,7 @@ static inline int rcu_batch_after(long a, long b) return (a - b) > 0; } -/* - * Per-CPU data for Read-Copy UPdate. - * nxtlist - new callbacks are added here - * curlist - current batch for which quiescent cycle started if any - */ +/* Per-CPU data for Read-Copy UPdate. */ struct rcu_data { /* 1) quiescent state handling : */ long quiescbatch; /* Batch # for grace period */ @@ -78,12 +74,24 @@ struct rcu_data { int qs_pending; /* core waits for quiesc state */ /* 2) batch handling */ - long batch; /* Batch # for current RCU batch */ + /* + * if nxtlist is not NULL, then: + * batch: + * The batch # for the last entry of nxtlist + * [*nxttail[1], NULL = *nxttail[2]): + * Entries that batch # <= batch + * [*nxttail[0], *nxttail[1]): + * Entries that batch # <= batch - 1 + * [nxtlist, *nxttail[0]): + * Entries that batch # <= batch - 2 + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + long batch; struct rcu_head *nxtlist; - struct rcu_head **nxttail; + struct rcu_head **nxttail[3]; long qlen; /* # of queued callbacks */ - struct rcu_head *curlist; - struct rcu_head **curtail; struct rcu_head *donelist; struct rcu_head **donetail; long blimit; /* Upper limit on a processed batch */ -- cgit v1.2.3 From 67182ae1c42206e516f7efb292b745e826497b24 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Aug 2008 18:35:38 -0700 Subject: rcu, debug: detect stalled grace periods this is a diagnostic patch for Classic RCU. The approach is to record a timestamp at the beginning of the grace period (in rcu_start_batch()), then have rcu_check_callbacks() complain if: 1. it is running on a CPU that has holding up grace periods for a long time (say one second). This will identify the culprit assuming that the culprit has not disabled hardware irqs, instruction execution, or some such. 2. it is running on a CPU that is not holding up grace periods, but grace periods have been held up for an even longer time (say two seconds). It is enabled via the default-off CONFIG_DEBUG_RCU_STALL kernel parameter. Rather than exponential backoff, it backs off to once per 30 seconds. My feeling upon thinking on it was that if you have stalled RCU grace periods for that long, a few extra printk() messages are probably the least of your worries... Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Yinghai Lu Cc: David Witbrodt Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 04c728147be..16589958b40 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -46,6 +46,9 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ long pending; /* Number of the last pending batch */ +#ifdef CONFIG_DEBUG_RCU_STALL + unsigned long gp_check; /* Time grace period should end, in seconds. */ +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */ int signaled; -- cgit v1.2.3 From c1bc667e844c2677cdf927102ab384fe7b033762 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 7 Aug 2008 16:43:38 +0200 Subject: IPVS: Add genetlink interface definitions to ip_vs.h Add IPVS Generic Netlink interface definitions to include/linux/ip_vs.h. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/linux/ip_vs.h | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index ec6eb49af2d..0f434a28fb5 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -242,4 +242,164 @@ struct ip_vs_daemon_user { int syncid; }; +/* + * + * IPVS Generic Netlink interface definitions + * + */ + +/* Generic Netlink family info */ + +#define IPVS_GENL_NAME "IPVS" +#define IPVS_GENL_VERSION 0x1 + +struct ip_vs_flags { + __be32 flags; + __be32 mask; +}; + +/* Generic Netlink command attributes */ +enum { + IPVS_CMD_UNSPEC = 0, + + IPVS_CMD_NEW_SERVICE, /* add service */ + IPVS_CMD_SET_SERVICE, /* modify service */ + IPVS_CMD_DEL_SERVICE, /* delete service */ + IPVS_CMD_GET_SERVICE, /* get service info */ + + IPVS_CMD_NEW_DEST, /* add destination */ + IPVS_CMD_SET_DEST, /* modify destination */ + IPVS_CMD_DEL_DEST, /* delete destination */ + IPVS_CMD_GET_DEST, /* get destination info */ + + IPVS_CMD_NEW_DAEMON, /* start sync daemon */ + IPVS_CMD_DEL_DAEMON, /* stop sync daemon */ + IPVS_CMD_GET_DAEMON, /* get sync daemon status */ + + IPVS_CMD_SET_CONFIG, /* set config settings */ + IPVS_CMD_GET_CONFIG, /* get config settings */ + + IPVS_CMD_SET_INFO, /* only used in GET_INFO reply */ + IPVS_CMD_GET_INFO, /* get general IPVS info */ + + IPVS_CMD_ZERO, /* zero all counters and stats */ + IPVS_CMD_FLUSH, /* flush services and dests */ + + __IPVS_CMD_MAX, +}; + +#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1) + +/* Attributes used in the first level of commands */ +enum { + IPVS_CMD_ATTR_UNSPEC = 0, + IPVS_CMD_ATTR_SERVICE, /* nested service attribute */ + IPVS_CMD_ATTR_DEST, /* nested destination attribute */ + IPVS_CMD_ATTR_DAEMON, /* nested sync daemon attribute */ + IPVS_CMD_ATTR_TIMEOUT_TCP, /* TCP connection timeout */ + IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, /* TCP FIN wait timeout */ + IPVS_CMD_ATTR_TIMEOUT_UDP, /* UDP timeout */ + __IPVS_CMD_ATTR_MAX, +}; + +#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a service + * + * Used inside nested attribute IPVS_CMD_ATTR_SERVICE + */ +enum { + IPVS_SVC_ATTR_UNSPEC = 0, + IPVS_SVC_ATTR_AF, /* address family */ + IPVS_SVC_ATTR_PROTOCOL, /* virtual service protocol */ + IPVS_SVC_ATTR_ADDR, /* virtual service address */ + IPVS_SVC_ATTR_PORT, /* virtual service port */ + IPVS_SVC_ATTR_FWMARK, /* firewall mark of service */ + + IPVS_SVC_ATTR_SCHED_NAME, /* name of scheduler */ + IPVS_SVC_ATTR_FLAGS, /* virtual service flags */ + IPVS_SVC_ATTR_TIMEOUT, /* persistent timeout */ + IPVS_SVC_ATTR_NETMASK, /* persistent netmask */ + + IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */ + __IPVS_SVC_ATTR_MAX, +}; + +#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a destination (real server) + * + * Used inside nested attribute IPVS_CMD_ATTR_DEST + */ +enum { + IPVS_DEST_ATTR_UNSPEC = 0, + IPVS_DEST_ATTR_ADDR, /* real server address */ + IPVS_DEST_ATTR_PORT, /* real server port */ + + IPVS_DEST_ATTR_FWD_METHOD, /* forwarding method */ + IPVS_DEST_ATTR_WEIGHT, /* destination weight */ + + IPVS_DEST_ATTR_U_THRESH, /* upper threshold */ + IPVS_DEST_ATTR_L_THRESH, /* lower threshold */ + + IPVS_DEST_ATTR_ACTIVE_CONNS, /* active connections */ + IPVS_DEST_ATTR_INACT_CONNS, /* inactive connections */ + IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */ + + IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */ + __IPVS_DEST_ATTR_MAX, +}; + +#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1) + +/* + * Attributes describing a sync daemon + * + * Used inside nested attribute IPVS_CMD_ATTR_DAEMON + */ +enum { + IPVS_DAEMON_ATTR_UNSPEC = 0, + IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */ + IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ + IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ + __IPVS_DAEMON_ATTR_MAX, +}; + +#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1) + +/* + * Attributes used to describe service or destination entry statistics + * + * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS + */ +enum { + IPVS_STATS_ATTR_UNSPEC = 0, + IPVS_STATS_ATTR_CONNS, /* connections scheduled */ + IPVS_STATS_ATTR_INPKTS, /* incoming packets */ + IPVS_STATS_ATTR_OUTPKTS, /* outgoing packets */ + IPVS_STATS_ATTR_INBYTES, /* incoming bytes */ + IPVS_STATS_ATTR_OUTBYTES, /* outgoing bytes */ + + IPVS_STATS_ATTR_CPS, /* current connection rate */ + IPVS_STATS_ATTR_INPPS, /* current in packet rate */ + IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */ + IPVS_STATS_ATTR_INBPS, /* current in byte rate */ + IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */ + __IPVS_STATS_ATTR_MAX, +}; + +#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1) + +/* Attributes used in response to IPVS_CMD_GET_INFO command */ +enum { + IPVS_INFO_ATTR_UNSPEC = 0, + IPVS_INFO_ATTR_VERSION, /* IPVS version number */ + IPVS_INFO_ATTR_CONN_TAB_SIZE, /* size of connection hash table */ + __IPVS_INFO_ATTR_MAX, +}; + +#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1) + #endif /* _IP_VS_H */ -- cgit v1.2.3 From ff9cf2ce7afe76435d66c898cc9dacaa68e79d41 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Aug 2008 14:10:02 -0700 Subject: rcu: fixes to include/linux/rcupreempt.h Hello! Compared tip/core/rcu to my latest patchset, and found the following issues: o the memory barrier in rcu_exit_nohz() somehow got out of place (it is correct in mainline as of 2.6.26-rc7). o There is a duplicate declaration of rcu_dyntick_sched. The attached patch fixes these. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcupreempt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 0967f03b070..addb5e282f3 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -111,7 +111,6 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { @@ -126,8 +125,8 @@ static inline void rcu_exit_nohz(void) { static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ __get_cpu_var(rcu_dyntick_sched).dynticks++; + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), &rs); } -- cgit v1.2.3 From 34d7c2b38d124219b7034356716e3455c439acd3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Aug 2008 14:11:05 -0700 Subject: rcu: remove list_for_each_rcu() All of the in-tree uses of list_for_each_rcu() have been converted to list_for_each_entry_rcu(), so list_for_each_rcu() can now be removed. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index eb4443c7e05..e649bd3f2c9 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,20 +198,6 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ -- cgit v1.2.3 From 07dd20e0324f4d3e33bde1944d4f7771a09c498c Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:18:04 +0100 Subject: sched: reorder signal_struct to remove 8 bytes on 64 bit builds reorder structure to remove 8 bytes of padding on 64 bit builds Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb0d87b99f..5a8058e44f5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -451,8 +451,8 @@ struct signal_struct { * - everyone except group_exit_task is stopped during signal delivery * of fatal signals, group_exit_task processes the signal. */ - struct task_struct *group_exit_task; int notify_count; + struct task_struct *group_exit_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; -- cgit v1.2.3 From bee367ed066e26c14263d808136fba8eec3bd70a Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:24:08 +0100 Subject: sched: reorder struct sched_rt_entity to remove padding on 64 bit builds remove 8 bytes of padding on 64 bit builds (also removes 8 bytes from task_struct) Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a8058e44f5..08a87b5f29e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1010,8 +1010,8 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; - unsigned int time_slice; unsigned long timeout; + unsigned int time_slice; int nr_cpus_allowed; struct sched_rt_entity *back; -- cgit v1.2.3 From 3fb669dd6ec11e14819c0114a0e68a9ddcec65e1 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:36:28 +0100 Subject: reorder struct prop_local_single to remove padding on 64 bit builds reorder structure to remove 8 bytes of padding on 64 bit builds (also removes 8 bytes from task_struct) Signed-off-by: Richard Kennedy Cc: peterz@infradead.org Signed-off-by: Ingo Molnar --- include/linux/proportions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 5afc1b23346..cf793bbbd05 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -104,8 +104,8 @@ struct prop_local_single { * snapshot of the last seen global state * and a lock protecting this state */ - int shift; unsigned long period; + int shift; spinlock_t lock; /* protect the snapshot state */ }; -- cgit v1.2.3 From dd0078f4f04d939950a792c493d7d97d7ce663b8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Jul 2008 14:20:54 -0400 Subject: rcu: just rename call_rcu_bh instead of making it a macro Seems that I found a box that has a config that passes call_rcu_bh as a function pointer (see net/sctp/sm_make_chunk.c), so declaring the call_rcu_bh has a macro function isn't good enough. This patch makes it just another name of call_rcu for rcupreempt. Signed-off-by: Steven Rostedt Reviewed-by: "Paul E. McKenney" Signed-off-by: Ingo Molnar --- include/linux/rcupreempt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index addb5e282f3..3e05c09b54a 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -57,7 +57,13 @@ static inline void rcu_qsctr_inc(int cpu) rdssp->sched_qs++; } #define rcu_bh_qsctr_inc(cpu) -#define call_rcu_bh(head, rcu) call_rcu(head, rcu) + +/* + * Someone might want to pass call_rcu_bh as a function pointer. + * So this needs to just be a rename and not a macro function. + * (no parentheses) + */ +#define call_rcu_bh call_rcu /** * call_rcu_sched - Queue RCU callback for invocation after sched grace period. -- cgit v1.2.3 From ded00a56e99555c3f4000ef3eebfd5fe0d574565 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Aug 2008 12:50:36 -0700 Subject: rcu: remove redundant ACCESS_ONCE definition from rcupreempt.c Remove the redundant definition of ACCESS_ONCE() from rcupreempt.c in favor of the one in compiler.h. Also merge the comment header from rcupreempt.c's definition into that in compiler.h. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c8bd2daf95e..8322141ee48 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -190,7 +190,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); * ACCESS_ONCE() in different C statements. * * This macro does absolutely -nothing- to prevent the CPU from reordering, - * merging, or refetching absolutely anything at any time. + * merging, or refetching absolutely anything at any time. Its main intended + * use is to mediate communication between process-level code and irq/NMI + * handlers, all running on the same CPU. */ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) -- cgit v1.2.3 From 1f7c14c62ce63805f9574664a6c6de3633d4a354 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Thu, 9 Oct 2008 12:50:59 -0400 Subject: percpu counter: clean up percpu_counter_sum_and_set() percpu_counter_sum_and_set() and percpu_counter_sum() is the same except the former updates the global counter after accounting. Since we are taking the fbc->lock to calculate the precise value of the counter in percpu_counter_sum() anyway, it should simply set fbc->count too, as the percpu_counter_sum_and_set() does. This patch merges these two interfaces into one. Signed-off-by: Mingming Cao Acked-by: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- include/linux/percpu_counter.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 20838883535..9007ccdfc11 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); +s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc, 0); + s64 ret = __percpu_counter_sum(fbc); return ret < 0 ? 0 : ret; } -static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) -{ - return __percpu_counter_sum(fbc, 1); -} - - static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc, 0); + return __percpu_counter_sum(fbc); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) -- cgit v1.2.3 From d9105c2b01eedb620cae96073dde4f760367817f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Va=C5=A1ut?= Date: Sun, 3 Aug 2008 21:34:08 +0100 Subject: [ARM] 5184/1: Split ucb1400_ts into core and touchscreen This patch splits ucb1400_ts into ucb1400_ts and ucb1400_core. Since this chip supports more features than only touchscreen, it was necessary to prepare it for feature addition. The previous functionality is preserved by applying this patch. [Build fixes for non-ARM by Stephen Rothwell and Takashi Iwai] Signed-off-by: Marek Vasut Signed-off-by: Russell King --- include/linux/ucb1400.h | 161 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 include/linux/ucb1400.h (limited to 'include/linux') diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h new file mode 100644 index 00000000000..970473bf8d5 --- /dev/null +++ b/include/linux/ucb1400.h @@ -0,0 +1,161 @@ +/* + * Register definitions and functions for: + * Philips UCB1400 driver + * + * Based on ucb1400_ts: + * Author: Nicolas Pitre + * Created: September 25, 2006 + * Copyright: MontaVista Software, Inc. + * + * Spliting done by: Marek Vasut + * If something doesnt work and it worked before spliting, e-mail me, + * dont bother Nicolas please ;-) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This code is heavily based on ucb1x00-*.c copyrighted by Russell King + * covering the UCB1100, UCB1200 and UCB1300.. Support for the UCB1400 has + * been made separate from ucb1x00-core/ucb1x00-ts on Russell's request. + */ + +#ifndef _LINUX__UCB1400_H +#define _LINUX__UCB1400_H + +#include +#include +#include + +/* + * UCB1400 AC-link registers + */ + +#define UCB_IO_DATA 0x5a +#define UCB_IO_DIR 0x5c +#define UCB_IE_RIS 0x5e +#define UCB_IE_FAL 0x60 +#define UCB_IE_STATUS 0x62 +#define UCB_IE_CLEAR 0x62 +#define UCB_IE_ADC (1 << 11) +#define UCB_IE_TSPX (1 << 12) + +#define UCB_TS_CR 0x64 +#define UCB_TS_CR_TSMX_POW (1 << 0) +#define UCB_TS_CR_TSPX_POW (1 << 1) +#define UCB_TS_CR_TSMY_POW (1 << 2) +#define UCB_TS_CR_TSPY_POW (1 << 3) +#define UCB_TS_CR_TSMX_GND (1 << 4) +#define UCB_TS_CR_TSPX_GND (1 << 5) +#define UCB_TS_CR_TSMY_GND (1 << 6) +#define UCB_TS_CR_TSPY_GND (1 << 7) +#define UCB_TS_CR_MODE_INT (0 << 8) +#define UCB_TS_CR_MODE_PRES (1 << 8) +#define UCB_TS_CR_MODE_POS (2 << 8) +#define UCB_TS_CR_BIAS_ENA (1 << 11) +#define UCB_TS_CR_TSPX_LOW (1 << 12) +#define UCB_TS_CR_TSMX_LOW (1 << 13) + +#define UCB_ADC_CR 0x66 +#define UCB_ADC_SYNC_ENA (1 << 0) +#define UCB_ADC_VREFBYP_CON (1 << 1) +#define UCB_ADC_INP_TSPX (0 << 2) +#define UCB_ADC_INP_TSMX (1 << 2) +#define UCB_ADC_INP_TSPY (2 << 2) +#define UCB_ADC_INP_TSMY (3 << 2) +#define UCB_ADC_INP_AD0 (4 << 2) +#define UCB_ADC_INP_AD1 (5 << 2) +#define UCB_ADC_INP_AD2 (6 << 2) +#define UCB_ADC_INP_AD3 (7 << 2) +#define UCB_ADC_EXT_REF (1 << 5) +#define UCB_ADC_START (1 << 7) +#define UCB_ADC_ENA (1 << 15) + +#define UCB_ADC_DATA 0x68 +#define UCB_ADC_DAT_VALID (1 << 15) +#define UCB_ADC_DAT_MASK 0x3ff + +#define UCB_ID 0x7e +#define UCB_ID_1400 0x4304 + +struct ucb1400_ts { + struct input_dev *ts_idev; + struct task_struct *ts_task; + int id; + wait_queue_head_t ts_wait; + unsigned int ts_restart:1; + int irq; + unsigned int irq_pending; /* not bit field shared */ + struct snd_ac97 *ac97; +}; + +struct ucb1400 { + struct platform_device *ucb1400_ts; +}; + +static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg) +{ + return ac97->bus->ops->read(ac97, reg); +} + +static inline void ucb1400_reg_write(struct snd_ac97 *ac97, u16 reg, u16 val) +{ + ac97->bus->ops->write(ac97, reg, val); +} + +static inline u16 ucb1400_gpio_get_value(struct snd_ac97 *ac97, u16 gpio) +{ + return ucb1400_reg_read(ac97, UCB_IO_DATA) & (1 << gpio); +} + +static inline void ucb1400_gpio_set_value(struct snd_ac97 *ac97, u16 gpio, + u16 val) +{ + ucb1400_reg_write(ac97, UCB_IO_DATA, val ? + ucb1400_reg_read(ac97, UCB_IO_DATA) | (1 << gpio) : + ucb1400_reg_read(ac97, UCB_IO_DATA) & ~(1 << gpio)); +} + +static inline u16 ucb1400_gpio_get_direction(struct snd_ac97 *ac97, u16 gpio) +{ + return ucb1400_reg_read(ac97, UCB_IO_DIR) & (1 << gpio); +} + +static inline void ucb1400_gpio_set_direction(struct snd_ac97 *ac97, u16 gpio, + u16 dir) +{ + ucb1400_reg_write(ac97, UCB_IO_DIR, dir ? + ucb1400_reg_read(ac97, UCB_IO_DIR) | (1 << gpio) : + ucb1400_reg_read(ac97, UCB_IO_DIR) & ~(1 << gpio)); +} + +static inline void ucb1400_adc_enable(struct snd_ac97 *ac97) +{ + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA); +} + +static unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, + int adcsync) +{ + unsigned int val; + + if (adcsync) + adc_channel |= UCB_ADC_SYNC_ENA; + + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel); + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel | + UCB_ADC_START); + + while (!((val = ucb1400_reg_read(ac97, UCB_ADC_DATA)) + & UCB_ADC_DAT_VALID)) + schedule_timeout_uninterruptible(1); + + return val & UCB_ADC_DAT_MASK; +} + +static inline void ucb1400_adc_disable(struct snd_ac97 *ac97) +{ + ucb1400_reg_write(ac97, UCB_ADC_CR, 0); +} + +#endif -- cgit v1.2.3 From 9961920199ec88d6b581d3e38502088935925c04 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:10:58 -0300 Subject: rfkill: add default global states (v2) Add a second set of global states, "rfkill_default_states", to track the state that will be used when the first rfkill class of a given type is registered, and also to save "undo" information when rfkill_epo is called. Add a new exported function, rfkill_set_default(), which can be used by platform drivers to restore radio state saved by the platform across reboots or shutdown. Also, fix rfkill_epo to properly update rfkill_states, but still preserve a copy of the state so that we can undo the effect of rfkill_epo later if we want to. Add rfkill_restore_states() to restore rfkill_states from the copy. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 741d1a62cc3..aa3c7d5852f 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -116,6 +116,7 @@ int rfkill_register(struct rfkill *rfkill); void rfkill_unregister(struct rfkill *rfkill); int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state); +int rfkill_set_default(enum rfkill_type type, enum rfkill_state state); /** * rfkill_state_complement - return complementar state -- cgit v1.2.3 From 77fba13ccc3a2a3db100892a4a6cc5e2f8290cc7 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:10:59 -0300 Subject: rfkill: add __must_check annotations rfkill is not a small, mere detail in wireless support. Once it starts supporting rfkill and users start counting on that support, a wireless device is at risk of operating in dangerous conditions should rfkill support fail to properly activate. Therefore, add the required __must_check annotations on some key functions of the rfkill API, for which the wireless drivers absolutely MUST handle the failure mode safely in order to avoid a potentially dangerous situation where the wireless transmitter is left enabled when the user don't want it to. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Cc: Matthew Garrett Signed-off-by: John W. Linville --- include/linux/rfkill.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index aa3c7d5852f..e92d8e94bb8 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -110,9 +110,10 @@ struct rfkill { }; #define to_rfkill(d) container_of(d, struct rfkill, dev) -struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type); +struct rfkill * __must_check rfkill_allocate(struct device *parent, + enum rfkill_type type); void rfkill_free(struct rfkill *rfkill); -int rfkill_register(struct rfkill *rfkill); +int __must_check rfkill_register(struct rfkill *rfkill); void rfkill_unregister(struct rfkill *rfkill); int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state); -- cgit v1.2.3 From 96c87607ac8f9b0e641d11ba6e57f8ec0214ea1c Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 2 Aug 2008 15:11:00 -0300 Subject: rfkill: introduce RFKILL_STATE_MAX While it is interesting to not add last-enum-markers because it allows gcc to warn us of switch() statements missing a valid state, we really should be handling memory corruption on a rfkill state with default clauses, anyway. So add RFKILL_STATE_MAX and use it where applicable. It makes for safer code in the long run. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index e92d8e94bb8..4cd64b0d982 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -49,6 +49,7 @@ enum rfkill_state { RFKILL_STATE_SOFT_BLOCKED = 0, /* Radio output blocked */ RFKILL_STATE_UNBLOCKED = 1, /* Radio output allowed */ RFKILL_STATE_HARD_BLOCKED = 2, /* Output blocked, non-overrideable */ + RFKILL_STATE_MAX, /* marker for last valid state */ }; /* -- cgit v1.2.3 From fef1643bf0cdd092a52dc3378479e4811fd65152 Mon Sep 17 00:00:00 2001 From: Jasper Bryant-Greene Date: Sun, 3 Aug 2008 11:30:55 +1200 Subject: move ETH_P_PAE from ieee80211_i.h to if_ether.h ETH_P_PAE belongs in if_ether.h with the other ETH_P_* definitions. This patch moves it there. Signed-off-by: Jasper Bryant-Greene Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index e157c1399b6..5028e0b6082 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -74,6 +74,7 @@ #define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport * over Ethernet */ +#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ -- cgit v1.2.3 From 65eb3dc609dec17deea48dcd4de2e549d29a9824 Mon Sep 17 00:00:00 2001 From: Kevin Diggs Date: Tue, 26 Aug 2008 10:26:54 +0200 Subject: sched: add kernel doc for the completion, fix kernel-doc-nano-HOWTO.txt This patch adds kernel doc for the completion feature. An error in the split-man.pl PERL snippet in kernel-doc-nano-HOWTO.txt is also fixed. Signed-off-by: Kevin Diggs Signed-off-by: Ingo Molnar --- include/linux/completion.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 02ef8835999..4a6b604ef7e 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -10,6 +10,18 @@ #include +/** + * struct completion - structure used to maintain state for a "completion" + * + * This is the opaque structure used to maintain the state for a "completion". + * Completions currently use a FIFO to queue threads that have to wait for + * the "completion" event. + * + * See also: complete(), wait_for_completion() (and friends _timeout, + * _interruptible, _interruptible_timeout, and _killable), init_completion(), + * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and + * INIT_COMPLETION(). + */ struct completion { unsigned int done; wait_queue_head_t wait; @@ -21,6 +33,14 @@ struct completion { #define COMPLETION_INITIALIZER_ONSTACK(work) \ ({ init_completion(&work); work; }) +/** + * DECLARE_COMPLETION: - declare and initialize a completion structure + * @work: identifier for the completion structure + * + * This macro declares and initializes a completion structure. Generally used + * for static declarations. You should use the _ONSTACK variant for automatic + * variables. + */ #define DECLARE_COMPLETION(work) \ struct completion work = COMPLETION_INITIALIZER(work) @@ -29,6 +49,13 @@ struct completion { * completions - so we use the _ONSTACK() variant for those that * are on the kernel stack: */ +/** + * DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure + * @work: identifier for the completion structure + * + * This macro declares and initializes a completion structure on the kernel + * stack. + */ #ifdef CONFIG_LOCKDEP # define DECLARE_COMPLETION_ONSTACK(work) \ struct completion work = COMPLETION_INITIALIZER_ONSTACK(work) @@ -36,6 +63,13 @@ struct completion { # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) #endif +/** + * init_completion: - Initialize a dynamically allocated completion + * @x: completion structure that is to be initialized + * + * This inline function will initialize a dynamically created completion + * structure. + */ static inline void init_completion(struct completion *x) { x->done = 0; @@ -55,6 +89,13 @@ extern bool completion_done(struct completion *x); extern void complete(struct completion *); extern void complete_all(struct completion *); +/** + * INIT_COMPLETION: - reinitialize a completion structure + * @x: completion structure to be reinitialized + * + * This macro should be used to reinitialize a completion structure so it can + * be reused. This is especially important after complete_all() is used. + */ #define INIT_COMPLETION(x) ((x).done = 0) -- cgit v1.2.3 From da31894ed7b654e2e1741e7ac4ef6c15be0dd14b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 22 Aug 2008 11:35:57 -0400 Subject: securityfs: do not depend on CONFIG_SECURITY Add a new Kconfig option SECURITYFS which will build securityfs support but does not require CONFIG_SECURITY. The only current user of securityfs does not depend on CONFIG_SECURITY and there is no reason the full LSM needs to be built to build this fs. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/security.h | 54 +++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 80c4d002864..f5c4a51eb42 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1560,11 +1560,6 @@ struct security_operations { extern int security_init(void); extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); -extern struct dentry *securityfs_create_file(const char *name, mode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); -extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); -extern void securityfs_remove(struct dentry *dentry); /* Security operations */ int security_ptrace_may_access(struct task_struct *child, unsigned int mode); @@ -2424,25 +2419,6 @@ static inline int security_netlink_recv(struct sk_buff *skb, int cap) return cap_netlink_recv(skb, cap); } -static inline struct dentry *securityfs_create_dir(const char *name, - struct dentry *parent) -{ - return ERR_PTR(-ENODEV); -} - -static inline struct dentry *securityfs_create_file(const char *name, - mode_t mode, - struct dentry *parent, - void *data, - const struct file_operations *fops) -{ - return ERR_PTR(-ENODEV); -} - -static inline void securityfs_remove(struct dentry *dentry) -{ -} - static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { return -EOPNOTSUPP; @@ -2806,5 +2782,35 @@ static inline void security_audit_rule_free(void *lsmrule) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_AUDIT */ +#ifdef CONFIG_SECURITYFS + +extern struct dentry *securityfs_create_file(const char *name, mode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); +extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); +extern void securityfs_remove(struct dentry *dentry); + +#else /* CONFIG_SECURITYFS */ + +static inline struct dentry *securityfs_create_dir(const char *name, + struct dentry *parent) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *securityfs_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops) +{ + return ERR_PTR(-ENODEV); +} + +static inline void securityfs_remove(struct dentry *dentry) +{} + +#endif + #endif /* ! __LINUX_SECURITY_H */ -- cgit v1.2.3 From 0f8e0d9a317406612700426fad3efab0b7bbc467 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Aug 2008 13:30:24 -0500 Subject: dlm: allow multiple lockspace creates Add a count for lockspace create and release so that create can be called multiple times to use the lockspace from different places. Also add the new flag DLM_LSFL_NEWEXCL to create a lockspace with the previous behavior of returning -EEXIST if the lockspace already exists. Signed-off-by: David Teigland --- include/linux/dlm.h | 5 ++++- include/linux/dlm_device.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 203a025e30e..b9cd38603fd 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -65,9 +65,12 @@ struct dlm_lksb { char * sb_lvbptr; }; +/* dlm_new_lockspace() flags */ + #define DLM_LSFL_NODIR 0x00000001 #define DLM_LSFL_TIMEWARN 0x00000002 #define DLM_LSFL_FS 0x00000004 +#define DLM_LSFL_NEWEXCL 0x00000008 #ifdef __KERNEL__ diff --git a/include/linux/dlm_device.h b/include/linux/dlm_device.h index c6034508fed..3060783c419 100644 --- a/include/linux/dlm_device.h +++ b/include/linux/dlm_device.h @@ -26,7 +26,7 @@ /* Version of the device interface */ #define DLM_DEVICE_VERSION_MAJOR 6 #define DLM_DEVICE_VERSION_MINOR 0 -#define DLM_DEVICE_VERSION_PATCH 0 +#define DLM_DEVICE_VERSION_PATCH 1 /* struct passed to the lock write */ struct dlm_lock_params { -- cgit v1.2.3 From da7f033ddc9fdebb3223b0bf88a2a2ab5b797608 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 31 Jul 2008 17:08:25 +0800 Subject: crypto: cryptomgr - Add test infrastructure This patch moves the newly created alg_test infrastructure into cryptomgr. This shall allow us to use it for testing at algorithm registrations. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index c43dc47fdf7..7ea0a4bc4ce 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -515,6 +515,8 @@ struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags); struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); void crypto_free_tfm(struct crypto_tfm *tfm); +int alg_test(const char *driver, const char *alg, u32 type, u32 mask); + /* * Transform helpers which query the underlying algorithm. */ -- cgit v1.2.3 From 73d3864a4823abda19ebc4387b6ddcbf416e3a77 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 3 Aug 2008 21:15:23 +0800 Subject: crypto: api - Use test infrastructure This patch makes use of the new testing infrastructure by requiring algorithms to pass a run-time test before they're made available to users. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7ea0a4bc4ce..81d994a3bda 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -60,6 +60,14 @@ */ #define CRYPTO_ALG_GENIV 0x00000200 +/* + * Set if the algorithm has passed automated run-time testing. Note that + * if there is no run-time testing for a given algorithm it is considered + * to have passed. + */ + +#define CRYPTO_ALG_TESTED 0x00000400 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3 From 17f0f4a47df9aea9ee26c939f8057c35e0be1847 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 14 Aug 2008 22:15:52 +1000 Subject: crypto: rng - RNG interface and implementation This patch adds a random number generator interface as well as a cryptographic pseudo-random number generator based on AES. It is meant to be used in cases where a deterministic CPRNG is required. One of the first applications will be as an input in the IPsec IV generation process. Signed-off-by: Neil Horman Signed-off-by: Herbert Xu --- include/linux/crypto.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 81d994a3bda..3d2317e4af2 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -38,6 +38,7 @@ #define CRYPTO_ALG_TYPE_DIGEST 0x00000008 #define CRYPTO_ALG_TYPE_HASH 0x00000009 #define CRYPTO_ALG_TYPE_AHASH 0x0000000a +#define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c @@ -113,6 +114,7 @@ struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; struct crypto_ahash; +struct crypto_rng; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; @@ -298,6 +300,15 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; +struct rng_alg { + int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata, + unsigned int dlen); + int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); + + unsigned int seedsize; +}; + + #define cra_ablkcipher cra_u.ablkcipher #define cra_aead cra_u.aead #define cra_blkcipher cra_u.blkcipher @@ -306,6 +317,7 @@ struct compress_alg { #define cra_hash cra_u.hash #define cra_ahash cra_u.ahash #define cra_compress cra_u.compress +#define cra_rng cra_u.rng struct crypto_alg { struct list_head cra_list; @@ -333,6 +345,7 @@ struct crypto_alg { struct hash_alg hash; struct ahash_alg ahash; struct compress_alg compress; + struct rng_alg rng; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); @@ -438,6 +451,12 @@ struct compress_tfm { u8 *dst, unsigned int *dlen); }; +struct rng_tfm { + int (*rng_gen_random)(struct crypto_rng *tfm, u8 *rdata, + unsigned int dlen); + int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); +}; + #define crt_ablkcipher crt_u.ablkcipher #define crt_aead crt_u.aead #define crt_blkcipher crt_u.blkcipher @@ -445,6 +464,7 @@ struct compress_tfm { #define crt_hash crt_u.hash #define crt_ahash crt_u.ahash #define crt_compress crt_u.compress +#define crt_rng crt_u.rng struct crypto_tfm { @@ -458,6 +478,7 @@ struct crypto_tfm { struct hash_tfm hash; struct ahash_tfm ahash; struct compress_tfm compress; + struct rng_tfm rng; } crt_u; struct crypto_alg *__crt_alg; @@ -489,6 +510,10 @@ struct crypto_hash { struct crypto_tfm base; }; +struct crypto_rng { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, -- cgit v1.2.3 From 9f1ba9062e032fb7b395cd27fc564754fe4e9867 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 7 Aug 2008 20:07:01 +0300 Subject: mac80211/cfg80211: Add BSS configuration options for AP mode This change adds a new cfg80211 command, NL80211_CMD_SET_BSS, to allow AP mode BSS parameters to be changed from user space (e.g., hostapd). The drivers using mac80211 are expected to be modified with separate changes to use the new BSS info parameter for short slot time in the bss_info_changed() handler. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2be7c63bc0f..447c02a5190 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -89,6 +89,8 @@ * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC * or, if no MAC address given, all mesh paths, on the interface identified * by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by + * %NL80211_ATTR_IFINDEX. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -127,6 +129,8 @@ enum nl80211_commands { NL80211_CMD_NEW_MPATH, NL80211_CMD_DEL_MPATH, + NL80211_CMD_SET_BSS, + /* add commands here */ /* used to define NL80211_CMD_MAX below */ @@ -134,6 +138,11 @@ enum nl80211_commands { NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new commands by defining them + * here + */ +#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS /** * enum nl80211_attrs - nl80211 netlink attributes @@ -192,6 +201,12 @@ enum nl80211_commands { * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled + * (u8, 0 or 1) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -235,6 +250,10 @@ enum nl80211_attrs { NL80211_ATTR_MPATH_NEXT_HOP, NL80211_ATTR_MPATH_INFO, + NL80211_ATTR_BSS_CTS_PROT, + NL80211_ATTR_BSS_SHORT_PREAMBLE, + NL80211_ATTR_BSS_SHORT_SLOT_TIME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 18d7c65ba6628d2759bd650e7039d6307f2c99e9 Mon Sep 17 00:00:00 2001 From: Sujith Date: Thu, 14 Aug 2008 13:27:41 +0530 Subject: mac80211: Add an 802.11n definition This patch adds a HT Capability (DSSS/CCK Mode in 40MHz BSS) definition to ieee80211.h Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7f4df7c7659..be456450cd2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -714,6 +714,7 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_CAP_SGI_40 0x0040 #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 +#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 /* 802.11n HT capability AMPDU settings */ #define IEEE80211_HT_CAP_AMPDU_FACTOR 0x03 #define IEEE80211_HT_CAP_AMPDU_DENSITY 0x1C -- cgit v1.2.3 From 095f695cbb07281682462da0618fffabb499d0be Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 19 Aug 2008 12:50:31 -0500 Subject: ssb: Update for Rev. 5 SPROM Although a revision 5 SPROM has not been seen in the wild, the open-source portion of the MIPS driver 4.150.10.5 describes its layout, which is mostly inherited from revision 4. This patch implements the differences. Signed-off-by: Larry Finger Acked-by: Michael Buesch Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index ebad0bac980..271bb4b6446 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -316,6 +316,21 @@ #define SSB_SPROM4_PA1B1 0x1090 #define SSB_SPROM4_PA1B2 0x1092 +/* SPROM Revision 5 (inherits most data from rev 4) */ +#define SSB_SPROM5_BFLLO 0x104A /* Boardflags (low 16 bits) */ +#define SSB_SPROM5_BFLHI 0x104C /* Board Flags Hi */ +#define SSB_SPROM5_IL0MAC 0x1052 /* 6 byte MAC address for a/b/g/n */ +#define SSB_SPROM5_CCODE 0x1044 /* Country Code (2 bytes) */ +#define SSB_SPROM5_GPIOA 0x1076 /* Gen. Purpose IO # 0 and 1 */ +#define SSB_SPROM5_GPIOA_P0 0x00FF /* Pin 0 */ +#define SSB_SPROM5_GPIOA_P1 0xFF00 /* Pin 1 */ +#define SSB_SPROM5_GPIOA_P1_SHIFT 8 +#define SSB_SPROM5_GPIOB 0x1078 /* Gen. Purpose IO # 2 and 3 */ +#define SSB_SPROM5_GPIOB_P2 0x00FF /* Pin 2 */ +#define SSB_SPROM5_GPIOB_P3 0xFF00 /* Pin 3 */ +#define SSB_SPROM5_GPIOB_P3_SHIFT 8 + + /* Values for SSB_SPROM1_BINF_CCODE */ enum { SSB_SPROM1CCODE_WORLD = 0, -- cgit v1.2.3 From 31ce12fb3ebf88b054deb99ad729e84888bf6125 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 20 Aug 2008 17:45:06 -0500 Subject: ssb: Clean up extraction of MAC addresses from SPROM Only rev 1 and 2 ssb SPROMs have fields named et0mac and et1mac; however, all of the extraction routines extract pseudo data for these fields from regions that are all 1's resulting in a hardware address of FF:FF:FF:FF:FF:FF. This patch forces such a fill at the beginning of the data extraction process, and only does the formal extraction if the SPROM rev is 1 or 2. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 271bb4b6446..99a0f991e85 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -245,8 +245,6 @@ /* SPROM Revision 3 (inherits most data from rev 2) */ #define SSB_SPROM3_IL0MAC 0x104A /* 6 bytes MAC address for 802.11b/g */ -#define SSB_SPROM3_ET0MAC 0x1050 /* 6 bytes MAC address for Ethernet ?? */ -#define SSB_SPROM3_ET1MAC 0x1050 /* 6 bytes MAC address for 802.11a ?? */ #define SSB_SPROM3_OFDMAPO 0x102C /* A-PHY OFDM Mid Power Offset (4 bytes, BigEndian) */ #define SSB_SPROM3_OFDMALPO 0x1030 /* A-PHY OFDM Low Power Offset (4 bytes, BigEndian) */ #define SSB_SPROM3_OFDMAHPO 0x1034 /* A-PHY OFDM High Power Offset (4 bytes, BigEndian) */ @@ -267,8 +265,6 @@ /* SPROM Revision 4 */ #define SSB_SPROM4_IL0MAC 0x104C /* 6 byte MAC address for a/b/g/n */ -#define SSB_SPROM4_ET0MAC 0x1018 /* 6 bytes MAC address for Ethernet ?? */ -#define SSB_SPROM4_ET1MAC 0x1018 /* 6 bytes MAC address for 802.11a ?? */ #define SSB_SPROM4_ETHPHY 0x105A /* Ethernet PHY settings ?? */ #define SSB_SPROM4_ETHPHY_ET0A 0x001F /* MII Address for enet0 */ #define SSB_SPROM4_ETHPHY_ET1A 0x03E0 /* MII Address for enet1 */ -- cgit v1.2.3 From 36aedc903ea11a4188de0a118d26c9f20afdd272 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 25 Aug 2008 11:58:58 +0300 Subject: mac80211/cfg80211: HT capabilities for NEW_STA Allow userspace (e.g., hostapd) to set HT capabilities for associated STAs. This is based on a patch from Zhu Yi (only the NL80211_ATTR_HT_CAPABILITY for NEW_STA part is included here). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 447c02a5190..0c1147de3ec 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -207,6 +207,9 @@ enum nl80211_commands { * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled * (u8, 0 or 1) * + * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -254,16 +257,25 @@ enum nl80211_attrs { NL80211_ATTR_BSS_SHORT_PREAMBLE, NL80211_ATTR_BSS_SHORT_SLOT_TIME, + NL80211_ATTR_HT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new attributes by defining them + * here + */ +#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY + #define NL80211_MAX_SUPP_RATES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 +#define NL80211_HT_CAPABILITY_LEN 26 /** * enum nl80211_iftype - (virtual) interface types -- cgit v1.2.3 From b4eec206370b7154dc354dc30f0a3f02ea8468b2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Implement lookup table for feature-negotiation information A lookup table for feature-negotiation information, extracted from RFC 4340/42, is provided by this patch. All currently known features can be found in this table, along with their feature location, their default value, and type. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6080449fbec..3978aff197d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -176,19 +176,20 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum { +enum dccp_feature_numbers { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ + DCCPF_SHORT_SEQNOS = 2, DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ + DCCPF_ECN_INCAPABLE = 4, DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ + DCCPF_DATA_CHECKSUM = 9, /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -- cgit v1.2.3 From 828755cee087e4a34f45d6c9db661ccd0631cc6d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Per-socket initialisation of feature negotiation This provides feature-negotiation initialisation for both DCCP sockets and DCCP request_sockets, to support feature negotiation during connection setup. It also resolves a FIXME regarding the congestion control initialisation. Thanks to Wei Yongjun for help with the IPv6 side of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3978aff197d..484b8a1fb02 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk); * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) + * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -421,6 +422,7 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; + struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -498,6 +500,7 @@ struct dccp_ackvec; * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_minisock - associated minisock (accessed via dccp_msk) + * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) @@ -535,6 +538,7 @@ struct dccp_sock { __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; + struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; -- cgit v1.2.3 From 71bb49596bbf4e5a3328e1704d18604e822ba181 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Query supported CCIDs This provides a data structure to record which CCIDs are locally supported and three accessor functions: - a test function for internal use which is used to validate CCID requests made by the user; - a copy function so that the list can be used for feature-negotiation; - documented getsockopt() support so that the user can query capabilities. The data structure is a table which is filled in at compile-time with the list of available CCIDs (which in turn depends on the Kconfig choices). Using the copy function for cloning the list of supported CCIDs is useful for feature negotiation, since the negotiation is now with the full list of available CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation will not fail if the peer requests to use CCID3 instead of CCID2. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 484b8a1fb02..d3ac1bde60b 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -209,6 +209,7 @@ struct dccp_so_feat { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 +#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.3 From 668144f7b41716a9efe1b398e15ead32a26cd101 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Deprecate old setsockopt framework The previous setsockopt interface, which passed socket options via struct dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to ugly code since the old approach did not distinguish between NN and SP values. This patch removes the old setsockopt interface and replaces it with two new functions to register NN/SP values for feature negotiation. These are essentially wrappers around the internal __feat_register functions, with checking added to avoid * wrong usage (type); * changing values while the connection is in progress. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d3ac1bde60b..6eaaca9b037 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -193,13 +193,6 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ -struct dccp_so_feat { - __u8 dccpsf_feat; - __u8 __user *dccpsf_val; - __u8 dccpsf_len; -}; - /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 -- cgit v1.2.3 From 20f41eee82864e308a5499308a1722dc3181cc3a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Feature negotiation for minimum-checksum-coverage This provides feature negotiation for server minimum checksum coverage which so far has been missing. Since sender/receiver coverage values range only from 0...15, their type has also been reduced in size from u16 to u4. Feature-negotiation options are now generated for both sender and receiver coverage, i.e. when the peer has `forgotten' to enable partial coverage then feature negotiation will automatically enable (negotiate) the partial coverage value for this connection. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6eaaca9b037..5a5a89935db 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -527,8 +527,8 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u16 dccps_pcslen; - __u16 dccps_pcrlen; + __u8 dccps_pcslen:4; + __u8 dccps_pcrlen:4; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.3 From 17c30b40ed79e9f3955e884632c8f01e577b204a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Deprecate Ack Ratio sysctl This patch deprecates the Ack Ratio sysctl, since * Ack Ratio is entirely ignored by CCID-3 and CCID-4, * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1); * even if it would work in CCID-2, there is no point for a user to change it: - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2), - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts (since waiting for Acks which will never arrive in this window), - cwnd is not a user-configurable value. The only reasonable place for Ack Ratio is to print it for debugging. It is planned to do this later on, as part of e.g. dccp_probe. With this patch Ack Ratio is now under full control of feature negotiation: * Ack Ratio is resolved as a dependency of the selected CCID; * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to the default of 2, following RFC 4340, 11.3 - "New connections start with Ack Ratio 2 for both endpoints"; * what happens then is part of another patch set, since it concerns the dynamic update of Ack Ratio while the connection is in full flight. Thanks to Tomasz Grobelny for discussion leading up to this patch. Signed-off-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5a5a89935db..eda389ce04f 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) - * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ @@ -378,7 +377,6 @@ struct dccp_minisock { __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; - __u8 dccpms_ack_ratio; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.3 From fade756f18d42694e3acb00e3471ab43002cba16 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Set per-connection CCIDs via socket options With this patch, TX/RX CCIDs can now be changed on a per-connection basis, which overrides the defaults set by the global sysctl variables for TX/RX CCIDs. To make full use of this facility, the remaining patches of this patch set are needed, which track dependencies and activate negotiated feature values. Note on the maximum number of CCIDs that can be registered: ----------------------------------------------------------- The maximum number of CCIDs that can be registered on the socket is constrained by the space in a Confirm/Change feature negotiation option. The space in these in turn depends on the size of header options as defined in RFC 4340, 5.8. Since this is a recurring constant, it has been moved from ackvec.h into linux/dccp.h, clarifying its purpose. Relative to this size, the maximum number of CCID identifiers that can be present in a Confirm option (which always consumes 1 byte more than a Change option, cf. 6.1) is 2 bytes less than the maximum TLV size: one for the CCID-feature-type and one for the selected value. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eda389ce04f..6a72ff52a8a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -168,6 +168,8 @@ enum { DCCPO_MIN_CCID_SPECIFIC = 128, DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ +#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -203,6 +205,9 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 #define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 +#define DCCP_SOCKOPT_CCID 13 +#define DCCP_SOCKOPT_TX_CCID 14 +#define DCCP_SOCKOPT_RX_CCID 15 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.2.3 From 78673e24df27c76ec75565f4024d45c2c74ef148 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Remove obsolete parts of the old CCID interface The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector case, their value equals initially that of the sysctl, but at the end of feature negotiation may be something different. The old interface removed by this patch thus has been replaced by the newer interface to dynamically query the currently loaded CCIDs earlier in this patch set. Also removed the constructors for the TX CCID and the RX CCID, since the switch rx/non-rx is done by the handler in minisocks.c (and the handler is the only place in the code where CCIDs are loaded). Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6a72ff52a8a..46daea312d9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated @@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_rx_ccid; - __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; -- cgit v1.2.3 From 68e074bfcef269bc61006c2740d7f89ccbbd93d7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Remove manual influence on NDP Count feature Updating the NDP count feature is handled automatically now: * for CCID-2 it is disabled, since the code does not use NDP counts; * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths. Allowing the user to change NDP values leads to unpredictable and failing behaviour, since it is then possible to disable NDP counts even when they are needed (e.g. in CCID-3). This means that only those user settings are sensible that agree with the values for Send NDP Count implied by the choice of CCID. But those settings are already activated by the feature negotiation (CCID dependency tracking), hence this form of support is redundant. At startup the initialisation of the NDP count feature is with the default value of 0, which is done implicitly by the zeroing-out of the socket when it is allocated. If the choice of CCID or feature negotiation enables NDP count, this will then be updated via the NDP activation handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 46daea312d9..60e94438ead 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) - * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; __u8 dccpms_send_ack_vector; - __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; struct list_head dccpms_conf; }; @@ -490,6 +488,7 @@ struct dccp_ackvec; * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) + * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) @@ -529,6 +528,7 @@ struct dccp_sock { __u16 dccps_r_ack_ratio; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; + __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.2.3 From b235dc4abbc1356284bd0dc730efa711f394e0e2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl This removes the use of the sysctl and the minisock variable for the Send Ack Vector feature, which is now handled fully dynamically via feature negotiation; i.e. when CCID2 is enabled, Ack Vectors are automatically enabled (as per RFC 4341, 4.). Using a sysctl in parallel to this implementation would open the door to crashes, since much of the code relies on tests of the boolean minisock / sysctl variable. Thus, this patch replaces all tests of type if (dccp_msk(sk)->dccpms_send_ack_vector) /* ... */ with if (dp->dccps_hc_rx_ackvec != NULL) /* ... */ The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature negotiation concluded that Ack Vectors are to be used on the half-connection. Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child), so that the test is a valid one. The activation handler for Ack Vectors is called as soon as the feature negotiation has concluded at the * server when the Ack marking the transition RESPOND => OPEN arrives; * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN. Adding the sequence number of the Response packet to the Ack Vector has been removed, since (a) connection establishment implies that the Response has been received; (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e. this entry will always be ignored; (c) it can not be used for anything useful - to detect loss for instance, only packets received after the loss can serve as pseudo-dupacks. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 60e94438ead..61734e27abb 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 #define DCCPF_INITIAL_CCID DCCPC_CCID2 -#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_send_ack_vector; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.2.3 From 5d3dac267a7fd0811ec777e76a81f97f5cdcb395 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Initialisation framework for feature negotiation This initialises feature negotiation from two tables, which are initialised from sysctls. As a novel feature, specifics of the implementation (e.g. currently short seqnos and ECN are not supported) are advertised for robustness. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61734e27abb..990e97fa1f0 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -369,28 +369,9 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_pending - List of features being negotiated - * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - struct list_head dccpms_pending; - struct list_head dccpms_conf; -}; - -struct dccp_opt_conf { - __u8 *dccpoc_val; - __u8 dccpoc_len; -}; - -struct dccp_opt_pend { - struct list_head dccpop_node; - __u8 dccpop_type; - __u8 dccpop_feat; - __u8 *dccpop_val; - __u8 dccpop_len; - int dccpop_conf; - struct dccp_opt_conf *dccpop_sc; }; extern void dccp_minisock_init(struct dccp_minisock *dmsk); -- cgit v1.2.3 From 51c7d4fa2675c106a980ddcdbe308b54b5151945 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Implement both feature-local and feature-remote Sequence Window feature This adds full support for local/remote Sequence Window feature, from which the * sequence-number-validity (W) and * acknowledgment-number-validity (W') windows derive as specified in RFC 4340, 7.5.3. Specifically, the following changes are introduced: * integrated new socket fields into dccp_sk; * updated the update_gsr/gss routines with regard to these fields; * updated handler code: the Sequence Window feature is located at the TX side, so the local feature is meant if the handler-rx flag is false; * the initialisation of `rcv_wnd' in reqsk is removed, since - rcv_wnd is not used by the code anywhere; - sequence number checks are not done in the LISTEN state (cf. 7.5.3); - dccp_check_req checks the Ack number validity more rigorously; * the `struct dccp_minisock' became empty and is now removed. Until the handshake completes with activating negotiated values, the local/remote Sequence-Window values are undefined and thus can not reliably be estimated. This issue is addressed in a separate patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 990e97fa1f0..7a0502ab383 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -363,19 +363,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 -/** - * struct dccp_minisock - Minimal DCCP connection representation - * - * Will be used to pass the state from dccp_request_sock to dccp_sock. - * - * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - */ -struct dccp_minisock { - __u64 dccpms_sequence_window; -}; - -extern void dccp_minisock_init(struct dccp_minisock *dmsk); - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from @@ -464,13 +451,14 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio + * @dccps_l_seq_win - local Sequence Window (influences ack number validity) + * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) @@ -504,12 +492,13 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; + __u64 dccps_l_seq_win:48; + __u64 dccps_r_seq_win:48; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct dccp_minisock dccps_minisock; struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; @@ -527,11 +516,6 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } -static inline struct dccp_minisock *dccp_msk(const struct sock *sk) -{ - return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { -- cgit v1.2.3 From 0a4822679d94e2b0117aeead06a19fad59533905 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Initialisation and type-checking of feature sysctls This patch takes care of initialising and type-checking sysctls related to feature negotiation. Type checking is important since some of the sysctls now directly act on the feature-negotiation process. The sysctls are initialised with the known default values for each feature. For the type-checking the value constraints from RFC 4340 are used: * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes), tested and confirmed that it works up to 4294967295 - for Gbps speed; * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer); * CCIDs are between 0 .. 255; * request_retries, retries1, retries2 also between 0..255 for good measure; * tx_qlen is checked to be non-negative; * sync_ratelimit remains as before. Further changes: ---------------- Performed s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- include/linux/dccp.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7a0502ab383..7434a8353e2 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -355,14 +355,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } - -/* initial values for each feature */ -#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 -#define DCCPF_INITIAL_ACK_RATIO 2 -#define DCCPF_INITIAL_CCID DCCPC_CCID2 -/* FIXME: for now we're default to 1 but it should really be 0 */ -#define DCCPF_INITIAL_SEND_NDP_COUNT 1 - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from -- cgit v1.2.3 From f10ecaee6dc2c6d56783462b2a82e98bc81b55f4 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Replace magic CCID-specific numbers by symbolic constants The constants DCCPO_{MIN,MAX}_CCID_SPECIFIC are nowhere used in the code, but instead for the CCID-specific options numbers are used. This patch unifies the use of CCID-specific option numbers, by adding symbolic names reflecting the definitions in RFC 4340, 10.3. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7434a8353e2..7187bd8a75f 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,8 +165,10 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_CCID_SPECIFIC = 128, - DCCPO_MAX_CCID_SPECIFIC = 255, + DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ + DCCPO_MAX_RX_CCID_SPECIFIC = 191, + DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ + DCCPO_MAX_TX_CCID_SPECIFIC = 255, }; /* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ #define DCCP_SINGLE_OPT_MAXLEN 253 -- cgit v1.2.3 From c2f42077bd06f300ae959204f3c007f820f5e769 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism The problem with Ack Vectors is that i) their length is variable and can in principle grow quite large, ii) it is hard to predict exactly how large they will be. Due to the second point it seems not a good idea to reduce the MPS; in particular when on average there is enough room for the Ack Vector and an increase in length is momentarily due to some burst loss, after which the Ack Vector returns to its normal/average length. The solution taken by this patch is to subtract a minimum-expected Ack Vector length from the MPS (previous patch), and to defer any larger Ack Vectors onto a separate Sync - but only if indeed there is no space left on the skb. This patch provides the infrastructure to schedule Sync-packets for transporting (urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since it does not need to wait for new application data. It can thus serve other parts of the DCCP code as well. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7187bd8a75f..83197b601a4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -462,6 +462,7 @@ struct dccp_ackvec; * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) + * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" * @dccps_xmit_timer - timer for when CCID is not ready to send * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ @@ -502,6 +503,7 @@ struct dccp_sock { __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; + __u8 dccps_sync_scheduled:1; struct timer_list dccps_xmit_timer; }; -- cgit v1.2.3 From e7937772d7a2b0127cc4cbc67bc594e139fdaf63 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Extend CCID packet dequeueing interface This extends the packet dequeuing interface of dccp_write_xmit() to allow 1. CCIDs to take care of timing when the next packet may be sent; 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds). The main purpose is to take CCID2 out of its polling mode (when it is network- limited, it tries every millisecond to send, without interruption). The interface can also be used to support other CCIDs. The mode of operation for (2) is as follows: * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(), * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full), * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER', * dccp_write_xmit() returns without further action; * after some time the wait-condition for CCID becomes true, * that CCID schedules the tasklet, * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(), * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now", * packet is sent, and possibly more (since dccp_write_xmit() loops). Code reuse: the taskled function calls dccp_write_xmit(), the timer function reduces to a wrapper around the same code. If the tasklet finds that the socket is locked, it re-schedules the tasklet function (not the tasklet) after one jiffy. Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets). Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 83197b601a4..eed52bcd35d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -463,7 +463,8 @@ struct dccp_ackvec; * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmit_timer - timer for when CCID is not ready to send + * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets + * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -504,6 +505,7 @@ struct dccp_sock { __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; __u8 dccps_sync_scheduled:1; + struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; -- cgit v1.2.3 From d6da3511d6b558d0b017777b61dc08b8fbc06ea4 Mon Sep 17 00:00:00 2001 From: Tomasz Grobelny Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Policy-based packet dequeueing infrastructure This patch adds a generic infrastructure for policy-based dequeueing of TX packets and provides two policies: * a simple FIFO policy (which is the default) and * a priority based policy (set via socket options). Both policies honour the tx_qlen sysctl for the maximum size of the write queue (can be overridden via socket options). The priority policy uses skb->priority internally to assign an u32 priority identifier, using the same ranking as SO_PRIORITY. The skb->priority field is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary data using cmsg(3), the patch also provides the requisite parsing routines. Signed-off-by: Tomasz Grobelny Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eed52bcd35d..010e2d87ed7 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -197,6 +197,21 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; +/* DCCP socket control message types for cmsg */ +enum dccp_cmsg_type { + DCCP_SCM_PRIORITY = 1, + DCCP_SCM_QPOLICY_MAX = 0xFFFF, + /* ^-- Up to here reserved exclusively for qpolicy parameters */ + DCCP_SCM_MAX +}; + +/* DCCP priorities for outgoing/queued packets */ +enum dccp_packet_dequeueing_policy { + DCCPQ_POLICY_SIMPLE, + DCCPQ_POLICY_PRIO, + DCCPQ_POLICY_MAX +}; + /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 @@ -210,6 +225,8 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_CCID 13 #define DCCP_SOCKOPT_TX_CCID 14 #define DCCP_SOCKOPT_RX_CCID 15 +#define DCCP_SOCKOPT_QPOLICY_ID 16 +#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -458,6 +475,8 @@ struct dccp_ackvec; * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options + * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy + * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending @@ -500,6 +519,8 @@ struct dccp_sock { struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; + __u8 dccps_qpolicy; + __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; -- cgit v1.2.3 From 268364a0f48aee2f851f9d1ef8a6cda0f3039ef1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:02:44 +0200 Subject: IO resources: add reserve_region_with_split() add reserve_region_with_split() to not lose e820 reserved entries if they overlap with existing IO regions: with test case by extend 0xe0000000 - 0xeffffff to 0xdd800000 - we get: e0000000-efffffff : PCI MMCONFIG 0 e0000000-efffffff : reserved and in /proc/iomem we get: found conflict for reserved [dd800000, efffffff], try to reserve with split __reserve_region_with_split: (PCI Bus #80) [dd000000, ddffffff], res: (reserved) [dd800000, efffffff] __reserve_region_with_split: (PCI Bus #00) [de000000, dfffffff], res: (reserved) [de000000, efffffff] initcall pci_subsys_init+0x0/0x121 returned 0 after 381 msecs in dmesg various fixes and improvements suggested by Linus. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 8d3b7a9afd1..fded376b94e 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -108,6 +108,9 @@ extern struct resource iomem_resource; extern int request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); +extern void reserve_region_with_split(struct resource *root, + resource_size_t start, resource_size_t end, + const char *name); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); extern int allocate_resource(struct resource *root, struct resource *new, -- cgit v1.2.3 From f7981c1c67b53abb4a7d8a501e68585b9826179a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 10:23:22 +0200 Subject: mv643xx_eth: require contiguous receive and transmit queue numbering Simplify receive and transmit queue handling by requiring the set of queue numbers to be contiguous starting from zero. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 12078577aef..eb78b00edcd 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -49,10 +49,10 @@ struct mv643xx_eth_platform_data { int duplex; /* - * Which RX/TX queues to use. + * How many RX/TX queues to use. */ - int rx_queue_mask; - int tx_queue_mask; + int rx_queue_count; + int tx_queue_count; /* * Override default RX/TX queue sizes if nonzero. -- cgit v1.2.3 From fc0eb9f226d8ecc8e3b563bf808bd6d61a6153a1 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 12:56:56 +0200 Subject: mv643xx_eth: smi sharing is a per-unit property, not a per-port one Which top-level unit's SMI interface to use should be a property of the top-level unit, not of the individual ports. This patch moves the ->shared_smi pointer from the per-port platform data to the global platform data. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index eb78b00edcd..12339eb6570 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -17,6 +17,7 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; + struct platform_device *shared_smi; unsigned int t_clk; }; @@ -30,7 +31,6 @@ struct mv643xx_eth_platform_data { /* * Whether a PHY is present, and if yes, at which address. */ - struct platform_device *shared_smi; int force_phy_addr; int phy_addr; -- cgit v1.2.3 From ac840605f3b1d9b99e1e6629a54994f8e003ff91 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 14:06:47 +0200 Subject: mv643xx_eth: remove force_phy_addr field Currently, there are two different fields in the mv643xx_eth_platform_data struct that together describe the PHY address -- one field (phy_addr) has the address of the PHY, but if that address is zero, a second field (force_phy_addr) needs to be set to distinguish the actual address zero from a zero due to not having filled in the PHY address explicitly (which should mean 'use the default PHY address'). If we are a bit smarter about the encoding of the phy_addr field, we can avoid the need for a second field -- this patch does that. Signed-off-by: Lennert Buytenhek --- include/linux/mv643xx_eth.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 12339eb6570..cbbbe9bfeca 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -21,6 +21,10 @@ struct mv643xx_eth_shared_platform_data { unsigned int t_clk; }; +#define MV643XX_ETH_PHY_ADDR_DEFAULT 0 +#define MV643XX_ETH_PHY_ADDR(x) (0x80 | (x)) +#define MV643XX_ETH_PHY_NONE 0xff + struct mv643xx_eth_platform_data { /* * Pointer back to our parent instance, and our port number. @@ -31,7 +35,6 @@ struct mv643xx_eth_platform_data { /* * Whether a PHY is present, and if yes, at which address. */ - int force_phy_addr; int phy_addr; /* -- cgit v1.2.3 From ca1af29a733629b9158a4a32a927d16ff9009a95 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Sep 2008 13:13:39 +0200 Subject: x86, pci: add northbridge pci ids for fam 0x11 processors The PCI device ids for AMD family 0x11 processors are missing in pci_ids.h. This patch adds them. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9ec2bcce8e8..0d0b314476c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -497,6 +497,11 @@ #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 +#define PCI_DEVICE_ID_AMD_11H_NB_HT 0x1300 +#define PCI_DEVICE_ID_AMD_11H_NB_MAP 0x1301 +#define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 +#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 +#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -- cgit v1.2.3 From f59ac0481660e66cec67f1d6b024e78b9dc715fe Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 29 Aug 2008 16:26:43 -0700 Subject: cfg80211: keep track of supported interface modes It is obviously good for userspace to know up front which interface modes a given piece of hardware might support (even if adding such an interface might fail later because of concurrency issues), so let's make cfg80211 aware of that. For good measure, disallow adding interfaces in all other modes so drivers don't forget to announce support for one mode when they add it. Signed-off-by: Johannes Berg Signed-off-by: Stephen Blackheath Signed-off-by: Ivo van Doorn Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 0c1147de3ec..5e51f4e7600 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -210,6 +210,10 @@ enum nl80211_commands { * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) * + * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all + * supported interface types, each a flag attribute with the number + * of the interface mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -259,6 +263,8 @@ enum nl80211_attrs { NL80211_ATTR_HT_CAPABILITY, + NL80211_ATTR_SUPPORTED_IFTYPES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From b0dbcf511c4bd10350902e79a1bdd4f5dcca66b6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 4 Sep 2008 21:13:37 +0100 Subject: [NET] smc91x: provide configurable leds This patch provides a mechanism for platforms to be able to supply the LED configuration via platform data, rather than having to hard code it in smc91x.h. Acked-by: Eric Miao Acked-by: Nicolas Pitre Acked-by: Jeff Garzik Signed-off-by: Russell King --- include/linux/smc91x.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index 3827b922ba1..ed25483d25d 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -18,6 +18,8 @@ struct smc91x_platdata { unsigned long flags; + unsigned char leda; + unsigned char ledb; }; #endif /* __SMC91X_H__ */ -- cgit v1.2.3 From 05496769e5da83ce22ed97345afd9c7b71d6bd24 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 16 Sep 2008 14:36:17 -0400 Subject: jbd2: clean up how the journal device name is printed Calculate the journal device name once and stash it away in the journal_s structure. This avoids needing to call bdevname() everywhere and reduces stack usage by not needing to allocate an on-stack buffer. In addition, we eliminate the '/' that can appear in device names (e.g. "cciss/c0d0p9" --- see kernel bugzilla #11321) that can cause problems when creating proc directory names, and include the inode number to support ocfs2 which creates multiple journals with different inode numbers. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 3dd20900709..66c3499478b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -850,7 +850,8 @@ struct journal_s */ struct block_device *j_dev; int j_blocksize; - unsigned long long j_blk_offset; + unsigned long long j_blk_offset; + char j_devname[BDEVNAME_SIZE+24]; /* * Device which holds the client fs. For internal journal this will be -- cgit v1.2.3 From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 16:57:22 +0200 Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier Right now, there is no notifier that is called on a new cpu, before the new cpu begins processing interrupts/softirqs. Various kernel function would need that notification, e.g. kvm works around by calling smp_call_function_single(), rcu polls cpu_online_map. The patch adds a CPU_STARTING notification. It also adds a helper function that sends the message to all cpu_chain handlers. Tested on x86-64. All other archs are untested. Especially on sparc, I'm not sure if I got it right. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 1 + include/linux/notifier.h | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d7faf880849..c2747ac2ae4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,6 +69,7 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) #endif int cpu_up(unsigned int cpu); +void notify_cpu_starting(unsigned int cpu); extern void cpu_hotplug_init(void); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index da2698b0fdd..b86fa2ffca0 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -213,9 +213,16 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, - * not handling interrupts, soon dead */ + * not handling interrupts, soon dead. + * Called on the dying cpu, interrupts + * are already disabled. Must not + * sleep, must not fail */ #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug * lock is dropped */ +#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. + * Called on the new cpu, just before + * enabling interrupts. Must not sleep, + * must not fail */ /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend * operation in progress @@ -229,6 +236,7 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ -- cgit v1.2.3 From 410e27a49bb98bc7fa3ff5fc05cc313817b9f253 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 9 Sep 2008 13:27:22 +0200 Subject: This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp" as it accentally contained the wrong set of patches. These will be submitted separately. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 122 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 010e2d87ed7..6080449fbec 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,13 +165,9 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ - DCCPO_MAX_RX_CCID_SPECIFIC = 191, - DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ - DCCPO_MAX_TX_CCID_SPECIFIC = 255, + DCCPO_MIN_CCID_SPECIFIC = 128, + DCCPO_MAX_CCID_SPECIFIC = 255, }; -/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ -#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -180,36 +176,27 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum dccp_feature_numbers { +enum { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, + DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, + DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, + DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, - DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* DCCP socket control message types for cmsg */ -enum dccp_cmsg_type { - DCCP_SCM_PRIORITY = 1, - DCCP_SCM_QPOLICY_MAX = 0xFFFF, - /* ^-- Up to here reserved exclusively for qpolicy parameters */ - DCCP_SCM_MAX -}; - -/* DCCP priorities for outgoing/queued packets */ -enum dccp_packet_dequeueing_policy { - DCCPQ_POLICY_SIMPLE, - DCCPQ_POLICY_PRIO, - DCCPQ_POLICY_MAX +/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ +struct dccp_so_feat { + __u8 dccpsf_feat; + __u8 __user *dccpsf_val; + __u8 dccpsf_len; }; /* DCCP socket options */ @@ -221,12 +208,6 @@ enum dccp_packet_dequeueing_policy { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 -#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 -#define DCCP_SOCKOPT_CCID 13 -#define DCCP_SOCKOPT_TX_CCID 14 -#define DCCP_SOCKOPT_RX_CCID 15 -#define DCCP_SOCKOPT_QPOLICY_ID 16 -#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -374,13 +355,62 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } + +/* initial values for each feature */ +#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 +#define DCCPF_INITIAL_ACK_RATIO 2 +#define DCCPF_INITIAL_CCID DCCPC_CCID2 +#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 +/* FIXME: for now we're default to 1 but it should really be 0 */ +#define DCCPF_INITIAL_SEND_NDP_COUNT 1 + +/** + * struct dccp_minisock - Minimal DCCP connection representation + * + * Will be used to pass the state from dccp_request_sock to dccp_sock. + * + * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) + * @dccpms_ccid - Congestion Control Id (CCID) (section 10) + * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) + * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) + * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) + * @dccpms_pending - List of features being negotiated + * @dccpms_conf - + */ +struct dccp_minisock { + __u64 dccpms_sequence_window; + __u8 dccpms_rx_ccid; + __u8 dccpms_tx_ccid; + __u8 dccpms_send_ack_vector; + __u8 dccpms_send_ndp_count; + __u8 dccpms_ack_ratio; + struct list_head dccpms_pending; + struct list_head dccpms_conf; +}; + +struct dccp_opt_conf { + __u8 *dccpoc_val; + __u8 dccpoc_len; +}; + +struct dccp_opt_pend { + struct list_head dccpop_node; + __u8 dccpop_type; + __u8 dccpop_feat; + __u8 *dccpop_val; + __u8 dccpop_len; + int dccpop_conf; + struct dccp_opt_conf *dccpop_sc; +}; + +extern void dccp_minisock_init(struct dccp_minisock *dmsk); + /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) - * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -390,7 +420,6 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; - struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -462,28 +491,21 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio - * @dccps_l_seq_win - local Sequence Window (influences ack number validity) - * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) - * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) + * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options - * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy - * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets - * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) + * @dccps_xmit_timer - timer for when CCID is not ready to send * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -507,26 +529,19 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u64 dccps_l_seq_win:48; - __u64 dccps_r_seq_win:48; - __u8 dccps_pcslen:4; - __u8 dccps_pcrlen:4; - __u8 dccps_send_ndp_count:1; + __u16 dccps_pcslen; + __u16 dccps_pcrlen; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct list_head dccps_featneg; + struct dccp_minisock dccps_minisock; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; - __u8 dccps_qpolicy; - __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; - __u8 dccps_sync_scheduled:1; - struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; @@ -535,6 +550,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } +static inline struct dccp_minisock *dccp_msk(const struct sock *sk) +{ + return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; +} + static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { -- cgit v1.2.3 From fb683f1627745e937ef199edd3428ac4b2ef1e08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Aug 2008 16:36:28 +0200 Subject: Export smc91x led definitions Now that we can configure smc91x leds from its platform data, it seems rather useful to move the led definitions to the externally visible header file. Signed-off-by: Marc Zyngier --- include/linux/smc91x.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index ed25483d25d..bc21db598c0 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -16,6 +16,15 @@ #define SMC91X_USE_DMA (1 << 6) +#define RPC_LED_100_10 (0x00) /* LED = 100Mbps OR's with 10Mbps link detect */ +#define RPC_LED_RES (0x01) /* LED = Reserved */ +#define RPC_LED_10 (0x02) /* LED = 10Mbps link detect */ +#define RPC_LED_FD (0x03) /* LED = Full Duplex Mode */ +#define RPC_LED_TX_RX (0x04) /* LED = TX or RX packet occurred */ +#define RPC_LED_100 (0x05) /* LED = 100Mbps link dectect */ +#define RPC_LED_TX (0x06) /* LED = TX packet occurred */ +#define RPC_LED_RX (0x07) /* LED = RX packet occurred */ + struct smc91x_platdata { unsigned long flags; unsigned char leda; -- cgit v1.2.3 From 636dc67cbf8c481a996faf6c23f0532d0f02ebad Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:46 +0900 Subject: add is_buffer_dma_capable helper function is_buffer_dma_capable helper function is to see if a memory region is DMA-capable or not. The arugments are the dma_mask (or coherent_dma_mask) of a device and the address and size of a memory region. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 952e0f857ac..6ed50c1642f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -48,6 +48,11 @@ static inline int is_device_dma_capable(struct device *dev) return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; } +static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) +{ + return addr + size <= mask; +} + #ifdef CONFIG_HAS_DMA #include #else -- cgit v1.2.3 From 271bff7afbb2cbaa81e744006ad2fff1f3e10b1b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 Sep 2008 04:48:58 -0700 Subject: net: Add DMA mapping tokens to skb_shared_info. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 90992371783..4b2be23903c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -146,8 +146,14 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; +#ifdef CONFIG_HAS_DMA + unsigned int num_dma_maps; +#endif struct sk_buff *frag_list; skb_frag_t frags[MAX_SKB_FRAGS]; +#ifdef CONFIG_HAS_DMA + dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; +#endif }; /* We divide dataref into two halves. The higher 16 bits hold references -- cgit v1.2.3 From a40c24a13366e324bc0ff8c3bb107db89312c984 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 Sep 2008 04:51:14 -0700 Subject: net: Add SKB DMA mapping helper functions. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b2be23903c..aa80ad9cbc8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -359,6 +359,14 @@ struct sk_buff { #include +#ifdef CONFIG_HAS_DMA +#include +extern int skb_dma_map(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir); +extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir); +#endif + extern void kfree_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, -- cgit v1.2.3 From 00c5ae2fa0f8191a1b204e71f0ee11359e3b2c06 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 3 Sep 2008 11:26:42 +0800 Subject: mac80211: change MIMO_PS to SM_PS This patch follows 11n spec naming more rigorously replacing MIMO_PS with SM_PS (Spatial Multiplexing Power Save). (Originally submitted as 4 patches, "mac80211: change MIMO_PS to SM_PS", "iwlwifi: change MIMO_PS to SM_PS", "ath9k: change MIMO_PS to SM_PS", and "iwlwifi: remove double definition of SM PS". -- JWL) Signed-off-by: Ron Rindjunsky Signed-off-by: Tomas Winkler Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index be456450cd2..333d3ae7688 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -708,7 +708,7 @@ struct ieee80211_ht_addt_info { /* 802.11n HT capabilities masks */ #define IEEE80211_HT_CAP_SUP_WIDTH 0x0002 -#define IEEE80211_HT_CAP_MIMO_PS 0x000C +#define IEEE80211_HT_CAP_SM_PS 0x000C #define IEEE80211_HT_CAP_GRN_FLD 0x0010 #define IEEE80211_HT_CAP_SGI_20 0x0020 #define IEEE80211_HT_CAP_SGI_40 0x0040 @@ -737,11 +737,11 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 -/* MIMO Power Save Modes */ -#define WLAN_HT_CAP_MIMO_PS_STATIC 0 -#define WLAN_HT_CAP_MIMO_PS_DYNAMIC 1 -#define WLAN_HT_CAP_MIMO_PS_INVALID 2 -#define WLAN_HT_CAP_MIMO_PS_DISABLED 3 +/* Spatial Multiplexing Power Save Modes */ +#define WLAN_HT_CAP_SM_PS_STATIC 0 +#define WLAN_HT_CAP_SM_PS_DYNAMIC 1 +#define WLAN_HT_CAP_SM_PS_INVALID 2 +#define WLAN_HT_CAP_SM_PS_DISABLED 3 /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 -- cgit v1.2.3 From 44d414dbff9d5bf46fc09f2e68567b5848cbbfd3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Sep 2008 17:44:28 +0200 Subject: mac80211: move some HT code out of mlme.c Some of the HT code in mlme.c is misplaced: * constants/definitions belong to the ieee80211.h header * code being used in other modes as well shouldn't be there Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 333d3ae7688..abc1abc63bf 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -643,6 +643,9 @@ struct ieee80211_mgmt { } u; } __attribute__ ((packed)); +/* mgmt header + 1 byte category code */ +#define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) + /* Control frames */ struct ieee80211_rts { @@ -737,6 +740,21 @@ struct ieee80211_ht_addt_info { #define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 +/* block-ack parameters */ +#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 +#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C +#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0 +#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 +#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 + +/* + * A-PMDU buffer sizes + * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) + */ +#define IEEE80211_MIN_AMPDU_BUF 0x8 +#define IEEE80211_MAX_AMPDU_BUF 0x40 + + /* Spatial Multiplexing Power Save Modes */ #define WLAN_HT_CAP_SM_PS_STATIC 0 #define WLAN_HT_CAP_SM_PS_DYNAMIC 1 -- cgit v1.2.3 From 92651940ab00dbe64722e908f70d816713d677b7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:29:34 -0700 Subject: pkt_sched: Add multiqueue scheduler support This patch is intended to add a qdisc to support the new tx multiqueue architecture by providing a band for each hardware queue. By doing this it is possible to support a different qdisc per physical hardware queue. This qdisc uses the skb->queue_mapping to select which band to place the traffic onto. It then uses a round robin w/ a check to see if the subqueue is stopped to determine which band to dequeue the packet from. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index e5de421ac7b..5d921fa91a5 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -123,6 +123,13 @@ struct tc_prio_qopt __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + /* TBF section */ struct tc_tbf_qopt -- cgit v1.2.3 From ca9b0e27e072be4cef2f5f0cbc0b0fd94eae3520 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:30:20 -0700 Subject: pkt_action: add new action skbedit This new action will have the ability to change the priority and/or queue_mapping fields on an sk_buff. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_skbedit.h | 44 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 include/linux/tc_act/tc_skbedit.h (limited to 'include/linux') diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 6dac0d7365c..76990937f4c 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -3,3 +3,4 @@ header-y += tc_ipt.h header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h +header-y += tc_skbedit.h diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h new file mode 100644 index 00000000000..a14e461a7af --- /dev/null +++ b/include/linux/tc_act/tc_skbedit.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#ifndef __LINUX_TC_SKBEDIT_H +#define __LINUX_TC_SKBEDIT_H + +#include + +#define TCA_ACT_SKBEDIT 11 + +#define SKBEDIT_F_PRIORITY 0x1 +#define SKBEDIT_F_QUEUE_MAPPING 0x2 + +struct tc_skbedit { + tc_gen; +}; + +enum { + TCA_SKBEDIT_UNSPEC, + TCA_SKBEDIT_TM, + TCA_SKBEDIT_PARMS, + TCA_SKBEDIT_PRIORITY, + TCA_SKBEDIT_QUEUE_MAPPING, + __TCA_SKBEDIT_MAX +}; +#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) + +#endif -- cgit v1.2.3 From eecfffc154ffbfe70686a9905c090b488778c28e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:33 +0900 Subject: iommu: add iommu_device_max_index IOMMU helper function This function helps IOMMUs to know the highest address that a device can access to. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/linux/iommu-helper.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index c975caf7538..58f41107e4a 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -1,3 +1,13 @@ +static inline unsigned long iommu_device_max_index(unsigned long size, + unsigned long offset, + u64 dma_mask) +{ + if (size + offset > dma_mask) + return dma_mask - offset + 1; + else + return size; +} + extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, unsigned long boundary_size); -- cgit v1.2.3 From 589fc9a6e2102b498978f6350581ec7fa5aeb032 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:34 +0900 Subject: iommu: add dma_get_mask helper function Several IOMMUs do the same thing to get the dma_mask of a device. This adds a helper function to do the same thing to sweep them. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/linux/dma-mapping.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6ed50c1642f..0dba7433af1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -63,6 +63,13 @@ static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) #define dma_sync_single dma_sync_single_for_cpu #define dma_sync_sg dma_sync_sg_for_cpu +static inline u64 dma_get_mask(struct device *dev) +{ + if (dev->dma_mask && *dev->dma_mask) + return *dev->dma_mask; + return DMA_32BIT_MASK; +} + extern u64 dma_get_required_mask(struct device *dev); static inline unsigned int dma_get_max_seg_size(struct device *dev) -- cgit v1.2.3 From b2e1b30290539b344cbaff0d9da38012e03aa347 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 9 Sep 2008 23:19:48 -0700 Subject: cfg80211: Add new wireless regulatory infrastructure This adds the new wireless regulatory infrastructure. The main motiviation behind this was to centralize regulatory code as each driver was implementing their own regulatory solution, and to replace the initial centralized code we have where: * only 3 regulatory domains are supported: US, JP and EU * regulatory domains can only be changed through module parameter * all rules were built statically in the kernel We now have support for regulatory domains for many countries and regulatory domains are now queried through a userspace agent through udev allowing distributions to update regulatory rules without updating the kernel. Each driver can regulatory_hint() a regulatory domain based on either their EEPROM mapped regulatory domain value to a respective ISO/IEC 3166-1 country code or pass an internally built regulatory domain. We also add support to let the user set the regulatory domain through userspace in case of faulty EEPROMs to further help compliance. Support for world roaming will be added soon for cards capable of this. For more information see: http://wireless.kernel.org/en/developers/Regulatory/CRDA For now we leave an option to enable the old module parameter, ieee80211_regdom, and to build the 3 old regdomains statically (US, JP and EU). This option is CONFIG_WIRELESS_OLD_REGULATORY. These old static definitions and the module parameter is being scheduled for removal for 2.6.29. Note that if you use this you won't make use of a world regulatory domain as its pointless. If you leave this option enabled and if CRDA is present and you use US or JP we will try to ask CRDA to update us a regulatory domain for us. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 96 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5e51f4e7600..9bad65400fb 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -92,6 +92,20 @@ * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by * %NL80211_ATTR_IFINDEX. * + * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command + * after being queried by the kernel. CRDA replies by sending a regulatory + * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our + * current alpha2 if it found a match. It also provides + * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each + * regulatory rule is a nested set of attributes given by + * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and + * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by + * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and + * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. + * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain + * to the the specified ISO/IEC 3166-1 alpha2 country code. The core will + * store this as a valid request and then query userspace for it. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -131,7 +145,10 @@ enum nl80211_commands { NL80211_CMD_SET_BSS, - /* add commands here */ + NL80211_CMD_SET_REG, + NL80211_CMD_REQ_SET_REG, + + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ __NL80211_CMD_AFTER_LAST, @@ -197,10 +214,21 @@ enum nl80211_commands { * info given for %NL80211_CMD_GET_MPATH, nested attribute described at * &enum nl80211_mpath_info. * - * * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the + * current regulatory domain should be set to or is already set to. + * For example, 'CR', for Costa Rica. This attribute is used by the kernel + * to query the CRDA to retrieve one regulatory domain. This attribute can + * also be used by userspace to query the kernel for the currently set + * regulatory domain. We chose an alpha2 as that is also used by the + * IEEE-802.11d country information element to identify a country. + * Users can also simply ask the wireless core to set regulatory domain + * to a specific alpha2. + * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory + * rules. + * * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled * (u8, 0 or 1) @@ -265,6 +293,9 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORTED_IFTYPES, + NL80211_ATTR_REG_ALPHA2, + NL80211_ATTR_REG_RULES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -278,6 +309,7 @@ enum nl80211_attrs { #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_MAX_SUPP_RATES 32 +#define NL80211_MAX_SUPP_REG_RULES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 @@ -472,6 +504,66 @@ enum nl80211_bitrate_attr { NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_reg_rule_attr - regulatory rule attributes + * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional + * considerations for a given frequency range. These are the + * &enum nl80211_reg_rule_flags. + * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory + * rule in KHz. This is not a center of frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule + * in KHz. This is not a center a frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this + * frequency range, in KHz. + * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain + * for a given frequency range. The value is in mBi (100 * dBi). + * If you don't have one then don't send this. + * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for + * a given frequency range. The value is in mBm (100 * dBm). + */ +enum nl80211_reg_rule_attr { + __NL80211_REG_RULE_ATTR_INVALID, + NL80211_ATTR_REG_RULE_FLAGS, + + NL80211_ATTR_FREQ_RANGE_START, + NL80211_ATTR_FREQ_RANGE_END, + NL80211_ATTR_FREQ_RANGE_MAX_BW, + + NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, + NL80211_ATTR_POWER_RULE_MAX_EIRP, + + /* keep last */ + __NL80211_REG_RULE_ATTR_AFTER_LAST, + NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_reg_rule_flags - regulatory rule flags + * + * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed + * @NL80211_RRF_NO_CCK: CCK modulation not allowed + * @NL80211_RRF_NO_INDOOR: indoor operation not allowed + * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed + * @NL80211_RRF_DFS: DFS support is required to be used + * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links + * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links + * @NL80211_RRF_PASSIVE_SCAN: passive scan is required + * @NL80211_RRF_NO_IBSS: no IBSS is allowed + */ +enum nl80211_reg_rule_flags { + NL80211_RRF_NO_OFDM = 1<<0, + NL80211_RRF_NO_CCK = 1<<1, + NL80211_RRF_NO_INDOOR = 1<<2, + NL80211_RRF_NO_OUTDOOR = 1<<3, + NL80211_RRF_DFS = 1<<4, + NL80211_RRF_PTP_ONLY = 1<<5, + NL80211_RRF_PTMP_ONLY = 1<<6, + NL80211_RRF_PASSIVE_SCAN = 1<<7, + NL80211_RRF_NO_IBSS = 1<<8, +}; + /** * enum nl80211_mntr_flags - monitor configuration flags * -- cgit v1.2.3 From b08508c40adf3fd1330aabc4f37d3254179776c4 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Tue, 26 Aug 2008 08:20:34 -0700 Subject: PCI: fix compiler warnings in pci_get_subsys() pci_get_subsys() changed in 2.6.26 so that the from pointer is modified when the call is being invoked, so fix up the 'const' marking of it that the compiler is complaining about. Reported-by: Rufus & Azrael Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jesse Barnes --- include/linux/pci.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c0e14008a3c..98dc6243a70 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -534,7 +534,7 @@ extern void pci_sort_breadthfirst(void); #ifdef CONFIG_PCI_LEGACY struct pci_dev __deprecated *pci_find_device(unsigned int vendor, unsigned int device, - const struct pci_dev *from); + struct pci_dev *from); struct pci_dev __deprecated *pci_find_slot(unsigned int bus, unsigned int devfn); #endif /* CONFIG_PCI_LEGACY */ @@ -550,7 +550,7 @@ struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from); struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from); + struct pci_dev *from); struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); @@ -816,7 +816,7 @@ _PCI_NOP_ALL(write,) static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, - const struct pci_dev *from) + struct pci_dev *from) { return NULL; } @@ -838,7 +838,7 @@ static inline struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from) + struct pci_dev *from) { return NULL; } -- cgit v1.2.3 From 6ae19b04ab41a4db0f0c48ec0b78950f6b028823 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 10 Sep 2008 12:06:15 -0400 Subject: Input: ads7846 - introduce .gpio_pendown to get pendown state The GPIO connected to ADS7846 nPENIRQ signal is usually used to get the pendown state as well. Introduce a .gpio_pendown, and use this to decide the pendown state if .get_pendown_state is NULL. Signed-off-by: Eric Miao Signed-off-by: Dmitry Torokhov --- include/linux/spi/ads7846.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index daf744017a3..05eab2f11e6 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -43,6 +43,9 @@ struct ads7846_platform_data { u16 debounce_tol; /* tolerance used for filtering */ u16 debounce_rep; /* additional consecutive good readings * required after the first two */ + int gpio_pendown; /* the GPIO used to decide the pendown + * state if get_pendown_state == NULL + */ int (*get_pendown_state)(void); int (*filter_init) (struct ads7846_platform_data *pdata, void **filter_data); -- cgit v1.2.3 From 452c1ce218a68b5dbd626397ecfc65ca89dd3cbb Mon Sep 17 00:00:00 2001 From: Chris Snook Date: Sun, 14 Sep 2008 19:56:10 -0500 Subject: atl2: add atl2 driver Driver for Atheros L2 10/100 network device. Includes necessary changes for Kconfig, Makefile, and pci_ids.h. Signed-off-by: Chris Snook Signed-off-by: Jay Cliburn Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f1624b39675..90a132ab84a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2213,6 +2213,7 @@ #define PCI_VENDOR_ID_ATTANSIC 0x1969 #define PCI_DEVICE_ID_ATTANSIC_L1 0x1048 +#define PCI_DEVICE_ID_ATTANSIC_L2 0x2048 #define PCI_VENDOR_ID_JMICRON 0x197B #define PCI_DEVICE_ID_JMICRON_JMB360 0x2360 -- cgit v1.2.3 From 01f2e4ead2c51226ed1283ef6a8388ca6f4cff8f Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 15 Sep 2008 09:17:11 -0700 Subject: enic: add Cisco 10G Ethernet NIC driver Signed-off-by: Scott Feldman Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 90a132ab84a..6f4276d461c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1411,6 +1411,8 @@ #define PCI_DEVICE_ID_EICON_MAESTRAQ_U 0xe013 #define PCI_DEVICE_ID_EICON_MAESTRAP 0xe014 +#define PCI_VENDOR_ID_CISCO 0x1137 + #define PCI_VENDOR_ID_ZIATECH 0x1138 #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 -- cgit v1.2.3 From 4fd5f812c23c7deee6425f4a318e85c317cd1d6c Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 26 Aug 2008 13:08:46 +0200 Subject: phylib: allow incremental scanning of an mii bus This patch splits the bus scanning code in mdiobus_register() off into a separate function, and makes this function available for calling from external code. This allows incrementally scanning an mii bus, e.g. as information about which addresses are 'safe' to scan becomes available. Signed-off-by: Lennert Buytenhek Acked-by: Andy Fleming --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7224c4099a2..5f170f5b1a3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -410,6 +410,8 @@ int phy_start_aneg(struct phy_device *phydev); int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); +struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); + void phy_sanitize_settings(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_enable_interrupts(struct phy_device *phydev); -- cgit v1.2.3 From 07a2c01a0c2a0cb4581a67d50d4f17cb4d2457c4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 19 Sep 2008 02:02:05 +0900 Subject: convert swiotlb to use dma_get_mask swiotlb can use dma_get_mask() instead of the homegrown function. Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com Signed-off-by: Ingo Molnar --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0dba7433af1..ba9114ec5d3 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -65,7 +65,7 @@ static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) static inline u64 dma_get_mask(struct device *dev) { - if (dev->dma_mask && *dev->dma_mask) + if (dev && dev->dma_mask && *dev->dma_mask) return *dev->dma_mask; return DMA_32BIT_MASK; } -- cgit v1.2.3 From 006f582c73f4eda35e06fd323193c3df43fb3459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:20:20 -0700 Subject: tcp: convert retransmit_cnt_hint to seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Main benefit in this is that we can then freely point the retransmit_skb_hint to anywhere we want to because there's no longer need to know what would be the count changes involve, and since this is really used only as a terminator, unnecessary work is one time walk at most, and if some retransmissions are necessary after that point later on, the walk is not full waste of time anyway. Since retransmit_high must be kept valid, all lost markers must ensure that. Now I also have learned how those "holes" in the rexmittable skbs can appear, mtu probe does them. So I removed the misleading comment as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2e2557388e3..d7637c4b284 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -358,7 +358,7 @@ struct tcp_sock { */ int lost_cnt_hint; - int retransmit_cnt_hint; + u32 retransmit_high; /* L-bits may be on up to this seqno */ u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ -- cgit v1.2.3 From 0e1c54c2a405494281e0639aacc90db03b50ae77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 20 Sep 2008 21:24:21 -0700 Subject: tcp: reorganize retransmit code loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both loops are quite similar, so they can be combined with little effort. As a result, forward_skb_hint becomes obsolete as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d7637c4b284..76729062829 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -342,7 +342,6 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *scoreboard_skb_hint; struct sk_buff *retransmit_skb_hint; - struct sk_buff *forward_skb_hint; struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ -- cgit v1.2.3 From 67fed45930fa31e92c11beb3a3dbf83a1a92a58d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 22:36:24 -0700 Subject: net: Add new interfaces for SKB list light-weight init and splicing. This will be used by subsequent changesets. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aa80ad9cbc8..027b06170b4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -660,6 +660,22 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) return list_->qlen; } +/** + * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head + * @list: queue to initialize + * + * This initializes only the list and queue length aspects of + * an sk_buff_head object. This allows to initialize the list + * aspects of an sk_buff_head without reinitializing things like + * the spinlock. It can also be used for on-stack sk_buff_head + * objects where the spinlock is known to not be used. + */ +static inline void __skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = list->next = (struct sk_buff *)list; + list->qlen = 0; +} + /* * This function creates a split out lock class for each invocation; * this is needed for now since a whole lot of users of the skb-queue @@ -671,8 +687,7 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); - list->prev = list->next = (struct sk_buff *)list; - list->qlen = 0; + __skb_queue_head_init(list); } static inline void skb_queue_head_init_class(struct sk_buff_head *list, @@ -699,6 +714,83 @@ static inline void __skb_insert(struct sk_buff *newsk, list->qlen++; } +static inline void __skb_queue_splice(const struct sk_buff_head *list, + struct sk_buff *prev, + struct sk_buff *next) +{ + struct sk_buff *first = list->next; + struct sk_buff *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +/** + * skb_queue_splice - join two skb lists, this is designed for stacks + * @list: the new list to add + * @head: the place to add it in the first list + */ +static inline void skb_queue_splice(const struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, (struct sk_buff *) head, head->next); + head->qlen = list->qlen; + } +} + +/** + * skb_queue_splice - join two skb lists and reinitialise the emptied list + * @list: the new list to add + * @head: the place to add it in the first list + * + * The list at @list is reinitialised + */ +static inline void skb_queue_splice_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, (struct sk_buff *) head, head->next); + head->qlen = list->qlen; + __skb_queue_head_init(list); + } +} + +/** + * skb_queue_splice_tail - join two skb lists, each list being a queue + * @list: the new list to add + * @head: the place to add it in the first list + */ +static inline void skb_queue_splice_tail(const struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, head->prev, (struct sk_buff *) head); + head->qlen = list->qlen; + } +} + +/** + * skb_queue_splice_tail - join two skb lists and reinitialise the emptied list + * @list: the new list to add + * @head: the place to add it in the first list + * + * Each of the lists is a queue. + * The list at @list is reinitialised + */ +static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + if (!skb_queue_empty(list)) { + __skb_queue_splice(list, head->prev, (struct sk_buff *) head); + head->qlen = list->qlen; + __skb_queue_head_init(list); + } +} + /** * __skb_queue_after - queue a buffer at the list head * @list: list to use -- cgit v1.2.3 From 6d80c39f9155e289fe8037a8b6352931ff916ceb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 22 Sep 2008 07:29:31 +0100 Subject: GFS2: Add UUID to GFS2 sb This patch adds a UUID to the GFS2 sb structure. This field is not actually referenced from kernel space at all, but is added for completeness and due to the userland tools which get their on-disk structure information from the gfs2_ondisk.h header file. Since we have to be backwards compatible, we will assume that any GFS2 sb for which the UUID is all 0 does not have a UUID as such. We should then be (after some userland changes) able to support the -U mount option. This addresses Fedora bugzilla #242689 Signed-off-by: Steven Whitehouse --- include/linux/gfs2_ondisk.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index c3c19f926e6..14d0df0b574 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -118,7 +118,11 @@ struct gfs2_sb { char sb_lockproto[GFS2_LOCKNAME_LEN]; char sb_locktable[GFS2_LOCKNAME_LEN]; - /* In gfs1, quota and license dinodes followed */ + + struct gfs2_inum __pad3; /* Was quota inode in gfs1 */ + struct gfs2_inum __pad4; /* Was licence inode in gfs1 */ +#define GFS2_HAS_UUID 1 + __u8 sb_uuid[16]; /* The UUID, maybe 0 for backwards compat */ }; /* -- cgit v1.2.3 From 38783e671399b5405f1fd177d602c400a9577ae6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 01:15:02 -0700 Subject: isdn: isdn_ppp: Use SKB list facilities instead of home-grown implementation. Signed-off-by: David S. Miller --- include/linux/isdn_ppp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index 8687a7dc063..4c218ee7587 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -157,7 +157,7 @@ typedef struct { typedef struct { int mp_mrru; /* unused */ - struct sk_buff * frags; /* fragments sl list -- use skb->next */ + struct sk_buff_head frags; /* fragments sl list */ long frames; /* number of frames in the frame list */ unsigned int seq; /* last processed packet seq #: any packets * with smaller seq # will be dropped -- cgit v1.2.3 From 15afe09bf496ae10c989e1a375a6b5da7bd3e16e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 20 Sep 2008 23:38:02 +0200 Subject: sched: wakeup preempt when small overlap Lin Ming reported a 10% OLTP regression against 2.6.27-rc4. The difference seems to come from different preemption agressiveness, which affects the cache footprint of the workload and its effective cache trashing. Aggresively preempt a task if its avg overlap is very small, this should avoid the task going to sleep and find it still running when we schedule back to it - saving a wakeup. Reported-by: Lin Ming Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b3b7a8f3247..d8e699b5585 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -897,7 +897,7 @@ struct sched_class { void (*yield_task) (struct rq *rq); int (*select_task_rq)(struct task_struct *p, int sync); - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); + void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); struct task_struct * (*pick_next_task) (struct rq *rq); void (*put_prev_task) (struct rq *rq, struct task_struct *p); -- cgit v1.2.3 From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 22 Sep 2008 22:35:07 +0900 Subject: iommu: export iommu_area_reserve helper function x86 has set_bit_string() that does the exact same thing that set_bit_area() in lib/iommu-helper.c does. This patch exports set_bit_area() in lib/iommu-helper.c as iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/linux/iommu-helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 58f41107e4a..786539e432d 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -11,6 +11,7 @@ static inline unsigned long iommu_device_max_index(unsigned long size, extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, unsigned long boundary_size); +extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len); extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, -- cgit v1.2.3 From 5c1824587f0797373c95719a196f6098f7c6d20c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Sep 2008 19:48:19 -0700 Subject: ipsec: Fix xfrm_state_walk race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discovered by Timo Teräs, the currently xfrm_state_walk scheme is racy because if a second dump finishes before the first, we may free xfrm states that the first dump would walk over later. This patch fixes this by storing the dumps in a list in order to calculate the correct completion counter which cures this problem. I've expanded netlink_cb in order to accomodate the extra state related to this. It shouldn't be a big deal since netlink_cb is kmalloced for each dump and we're just increasing it by 4 or 8 bytes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54908f..cbba7760545 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[6]; + long args[7]; }; struct netlink_notify -- cgit v1.2.3 From bce7b15426cac3000bf6a9cf59d9356ef0be2dec Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 19:51:15 -0700 Subject: Phonet: global definitions Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/if_phonet.h | 14 ++++++ include/linux/phonet.h | 125 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 4 ++ include/linux/socket.h | 4 +- 5 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 include/linux/if_phonet.h create mode 100644 include/linux/phonet.h (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 5028e0b6082..723a1c5fbc6 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -100,6 +100,7 @@ #define ETH_P_ECONET 0x0018 /* Acorn Econet */ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* * This is an Ethernet frame header. diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h new file mode 100644 index 00000000000..22df25fbc4e --- /dev/null +++ b/include/linux/if_phonet.h @@ -0,0 +1,14 @@ +/* + * File: if_phonet.h + * + * Phonet interface kernel definitions + * + * Copyright (C) 2008 Nokia Corporation. All rights reserved. + */ + +#define PHONET_HEADER_LEN 8 /* Phonet header length */ + +#define PHONET_MIN_MTU 6 +/* 6 bytes header + 65535 bytes payload */ +#define PHONET_MAX_MTU 65541 +#define PHONET_DEV_MTU PHONET_MAX_MTU diff --git a/include/linux/phonet.h b/include/linux/phonet.h new file mode 100644 index 00000000000..6a764f8584a --- /dev/null +++ b/include/linux/phonet.h @@ -0,0 +1,125 @@ +/** + * file phonet.h + * + * Phonet sockets kernel interface + * + * Copyright (C) 2008 Nokia Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef LINUX_PHONET_H +#define LINUX_PHONET_H + +/* Automatic protocol selection */ +#define PN_PROTO_TRANSPORT 0 +/* Phonet datagram socket */ +#define PN_PROTO_PHONET 1 +#define PHONET_NPROTO 2 + +#define PNADDR_ANY 0 +#define PNPORT_RESOURCE_ROUTING 0 + +/* Phonet protocol header */ +struct phonethdr { + __u8 pn_rdev; + __u8 pn_sdev; + __u8 pn_res; + __be16 pn_length; + __u8 pn_robj; + __u8 pn_sobj; +} __attribute__((packed)); + +/* Phonet socket address structure */ +struct sockaddr_pn { + sa_family_t spn_family; + __u8 spn_obj; + __u8 spn_dev; + __u8 spn_resource; + __u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3]; +} __attribute__ ((packed)); + +static inline __u16 pn_object(__u8 addr, __u16 port) +{ + return (addr << 8) | (port & 0x3ff); +} + +static inline __u8 pn_obj(__u16 handle) +{ + return handle & 0xff; +} + +static inline __u8 pn_dev(__u16 handle) +{ + return handle >> 8; +} + +static inline __u16 pn_port(__u16 handle) +{ + return handle & 0x3ff; +} + +static inline __u8 pn_addr(__u16 handle) +{ + return (handle >> 8) & 0xfc; +} + +static inline void pn_sockaddr_set_addr(struct sockaddr_pn *spn, __u8 addr) +{ + spn->spn_dev &= 0x03; + spn->spn_dev |= addr & 0xfc; +} + +static inline void pn_sockaddr_set_port(struct sockaddr_pn *spn, __u16 port) +{ + spn->spn_dev &= 0xfc; + spn->spn_dev |= (port >> 8) & 0x03; + spn->spn_obj = port & 0xff; +} + +static inline void pn_sockaddr_set_object(struct sockaddr_pn *spn, + __u16 handle) +{ + spn->spn_dev = pn_dev(handle); + spn->spn_obj = pn_obj(handle); +} + +static inline void pn_sockaddr_set_resource(struct sockaddr_pn *spn, + __u8 resource) +{ + spn->spn_resource = resource; +} + +static inline __u8 pn_sockaddr_get_addr(const struct sockaddr_pn *spn) +{ + return spn->spn_dev & 0xfc; +} + +static inline __u16 pn_sockaddr_get_port(const struct sockaddr_pn *spn) +{ + return ((spn->spn_dev & 0x03) << 8) | spn->spn_obj; +} + +static inline __u16 pn_sockaddr_get_object(const struct sockaddr_pn *spn) +{ + return pn_object(spn->spn_dev, spn->spn_obj); +} + +static inline __u8 pn_sockaddr_get_resource(const struct sockaddr_pn *spn) +{ + return spn->spn_resource; +} + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ca643b13b02..2b3d51c6ec9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -582,6 +582,10 @@ enum rtnetlink_groups { #define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE RTNLGRP_ND_USEROPT, #define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/linux/socket.h b/include/linux/socket.h index dc5086fe773..818ca33bf79 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -190,7 +190,8 @@ struct ucred { #define AF_IUCV 32 /* IUCV sockets */ #define AF_RXRPC 33 /* RxRPC sockets */ #define AF_ISDN 34 /* mISDN sockets */ -#define AF_MAX 35 /* For now.. */ +#define AF_PHONET 35 /* Phonet sockets */ +#define AF_MAX 36 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -227,6 +228,7 @@ struct ucred { #define PF_IUCV AF_IUCV #define PF_RXRPC AF_RXRPC #define PF_ISDN AF_ISDN +#define PF_PHONET AF_PHONET #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ -- cgit v1.2.3 From ba113a94b7503ee23ffe819e7045134b0c1d31de Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:19 -0700 Subject: Phonet: common socket glue This provides the socket API for the Phonet protocols family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 6a764f8584a..001c0e67909 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -32,6 +32,9 @@ #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* ioctls */ +#define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) + /* Phonet protocol header */ struct phonethdr { __u8 pn_rdev; -- cgit v1.2.3 From 5f77076d75d35c9f5619e1f9d7e7428a627f65e6 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:08:04 -0700 Subject: Phonet: provide MAC header operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index 22df25fbc4e..7e989216ec1 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -12,3 +12,7 @@ /* 6 bytes header + 65535 bytes payload */ #define PHONET_MAX_MTU 65541 #define PHONET_DEV_MTU PHONET_MAX_MTU + +#ifdef __KERNEL__ +extern struct header_ops phonet_header_ops; +#endif -- cgit v1.2.3 From be0c52bfed7f7828494fa00060efd5d758e92580 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:09:13 -0700 Subject: Phonet: emit errors when a packet cannot be delivered locally When there is no listener socket for a received packet, send an error back to the sender. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 001c0e67909..3a027f588a4 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -45,6 +45,38 @@ struct phonethdr { __u8 pn_sobj; } __attribute__((packed)); +/* Common Phonet payload header */ +struct phonetmsg { + __u8 pn_trans_id; /* transaction ID */ + __u8 pn_msg_id; /* message type */ + union { + struct { + __u8 pn_submsg_id; /* message subtype */ + __u8 pn_data[5]; + } base; + struct { + __u16 pn_e_res_id; /* extended resource ID */ + __u8 pn_e_submsg_id; /* message subtype */ + __u8 pn_e_data[3]; + } ext; + } pn_msg_u; +}; +#define PN_COMMON_MESSAGE 0xF0 +#define PN_PREFIX 0xE0 /* resource for extended messages */ +#define pn_submsg_id pn_msg_u.base.pn_submsg_id +#define pn_e_submsg_id pn_msg_u.ext.pn_e_submsg_id +#define pn_e_res_id pn_msg_u.ext.pn_e_res_id +#define pn_data pn_msg_u.base.pn_data +#define pn_e_data pn_msg_u.ext.pn_e_data + +/* data for unreachable errors */ +#define PN_COMM_SERVICE_NOT_IDENTIFIED_RESP 0x01 +#define PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP 0x14 +#define pn_orig_msg_id pn_data[0] +#define pn_status pn_data[1] +#define pn_e_orig_msg_id pn_e_data[0] +#define pn_e_status pn_e_data[1] + /* Phonet socket address structure */ struct sockaddr_pn { sa_family_t spn_family; -- cgit v1.2.3 From 0b815a1a6d43ab498674b8430c8c35ab08487a16 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 22 Sep 2008 21:28:11 -0700 Subject: net: network device name ifalias support This patch add support for keeping an additional character alias associated with an network interface. This is useful for maintaining the SNMP ifAlias value which is a user defined value. Routers use this to hold information like which circuit or line it is connected to. It is just an arbitrary text label on the network device. There are two exposed interfaces with this patch, the value can be read/written either via netlink or sysfs. This could be maintained just by the snmp daemon, but it is more generally useful for other management tools, and the kernel is good place to act as an agreed upon interface to store it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/if.h | 1 + include/linux/if_link.h | 1 + include/linux/netdevice.h | 3 +++ 3 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if.h b/include/linux/if.h index 5c9d1fa93fe..65246846c84 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -24,6 +24,7 @@ #include /* for "__user" et al */ #define IFNAMSIZ 16 +#define IFALIASZ 256 #include /* Standard interface flags (netdevice->flags). */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 84c3492ae5c..f9032c88716 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,6 +79,7 @@ enum IFLA_LINKINFO, #define IFLA_LINKINFO IFLA_LINKINFO IFLA_NET_NS_PID, + IFLA_IFALIAS, __IFLA_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 488c56e649b..d675df08b94 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -471,6 +471,8 @@ struct net_device char name[IFNAMSIZ]; /* device name hash chain */ struct hlist_node name_hlist; + /* snmp alias */ + char *ifalias; /* * I/O specific fields @@ -1224,6 +1226,7 @@ extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); +extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); -- cgit v1.2.3 From 1d4a31dde95af56edac4dee268249a29a21fa7c0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 21:57:21 -0700 Subject: net: Fix bus in SKB queue splicing interfaces. Handle the case of head being non-empty, by adding list->qlen to head->qlen instead of using direct assignment. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 027b06170b4..4a144e8d053 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -738,7 +738,7 @@ static inline void skb_queue_splice(const struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); - head->qlen = list->qlen; + head->qlen += list->qlen; } } @@ -754,7 +754,7 @@ static inline void skb_queue_splice_init(struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); - head->qlen = list->qlen; + head->qlen += list->qlen; __skb_queue_head_init(list); } } @@ -769,7 +769,7 @@ static inline void skb_queue_splice_tail(const struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); - head->qlen = list->qlen; + head->qlen += list->qlen; } } @@ -786,7 +786,7 @@ static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); - head->qlen = list->qlen; + head->qlen += list->qlen; __skb_queue_head_init(list); } } -- cgit v1.2.3 From fc7ebb212d3e51d1188948d975aa93dbb0f58b25 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:34:07 -0700 Subject: net: Add skb_queue_is_last(). Several bits of code want to know "is this the last SKB in a queue", and all of them implement this by hand. Provide an common interface to make this check. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4a144e8d053..3a5838da160 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -472,6 +472,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) return list->next == (struct sk_buff *)list; } +/** + * skb_queue_is_last - check if skb is the last entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the last buffer on the list. + */ +static inline bool skb_queue_is_last(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->next == (struct sk_buff *) list); +} + /** * skb_get - reference buffer * @skb: buffer to reference -- cgit v1.2.3 From 249c8b42c7e5e6f33d0ff983041f08278b137e53 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:44:42 -0700 Subject: net: Add skb_queue_next(). A lot of code wants to iterate over an SKB queue at the top level using it's own control structure and iterator scheme. Provide skb_queue_next(), which is only valid to invoke if skb_queue_is_last() returns false. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a5838da160..d2f1778877d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -485,6 +485,24 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, return (skb->next == (struct sk_buff *) list); } +/** + * skb_queue_next - return the next packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the next packet in @list after @skb. It is only valid to + * call this if skb_queue_is_last() evaluates to false. + */ +static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_last(list, skb)); + return skb->next; +} + /** * skb_get - reference buffer * @skb: buffer to reference -- cgit v1.2.3 From 1164f52a244204830c7625b3c22812781996d7b4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:49:44 -0700 Subject: net: Add skb_queue_walk_from() and skb_queue_walk_from_safe(). These will be used by TCP write queue handling and elsewhere. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d2f1778877d..a19ea43fea0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1571,6 +1571,15 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->next) +#define skb_queue_walk_from(queue, skb) \ + for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + skb = skb->next) + +#define skb_queue_walk_from_safe(queue, skb, tmp) \ + for (tmp = skb->next; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->next) + #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ -- cgit v1.2.3 From c32a162fd420fe8dfb049db941b2438061047fcc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 22 Sep 2008 13:57:43 -0700 Subject: smb.h: do not include linux/time.h in userspace linux/time.h conflicts with time.h from glibc It breaks building smbmount from samba. It's regression introduced by commit 76308da (" smb.h: uses struct timespec but didn't include linux/time.h"). Signed-off-by: Kirill A. Shutemov Cc: [2.6.26.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smb.h b/include/linux/smb.h index caa43b2370c..82fefddc598 100644 --- a/include/linux/smb.h +++ b/include/linux/smb.h @@ -11,7 +11,9 @@ #include #include +#ifdef __KERNEL__ #include +#endif enum smb_protocol { SMB_PROTOCOL_NONE, -- cgit v1.2.3 From faa312da9cd0b044bdc84483162c6ee10b9c83c0 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Fri, 29 Aug 2008 04:18:43 +0800 Subject: lcd: allow lcd device to handle mode change events Some LCD panels are capable of different resolutions, and is allowed to change at run-time, so to make "struct lcd_device" to be able to handle mode change events here. Signed-off-by: Eric Miao Acked-by: Krzysztof Helt Signed-off-by: Russell King --- include/linux/lcd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 173febac665..c67fecafff9 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -11,6 +11,7 @@ #include #include #include +#include /* Notes on locking: * @@ -45,6 +46,8 @@ struct lcd_ops { int (*get_contrast)(struct lcd_device *); /* Set LCD panel contrast */ int (*set_contrast)(struct lcd_device *, int contrast); + /* Set LCD panel mode (resolutions ...) */ + int (*set_mode)(struct lcd_device *, struct fb_videomode *); /* Check if given framebuffer device is the one LCD is bound to; return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */ int (*check_fb)(struct lcd_device *, struct fb_info *); -- cgit v1.2.3 From b18250a8f66050bd2a52287cd543fb93100e8ee0 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Fri, 29 Aug 2008 04:21:44 +0800 Subject: lcd: add SPI-based LCD and backlight driver for SHARP corgi/spitz The driver is based on different source files including corgi_ssp.c, corgi_lcd.c and corgi_bl.c, previously authored by Richard Purdie and many others. The LCD and Backlight device actually share the same SPI device, so they are made into this single driver. Signed-off-by: Eric Miao Cc: Richard Purdie Signed-off-by: Russell King --- include/linux/spi/corgi_lcd.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/spi/corgi_lcd.h (limited to 'include/linux') diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h new file mode 100644 index 00000000000..3c53ac26c8d --- /dev/null +++ b/include/linux/spi/corgi_lcd.h @@ -0,0 +1,16 @@ +#ifndef __LINUX_SPI_CORGI_LCD_H +#define __LINUX_SPI_CORGI_LCD_H + +#define CORGI_LCD_MODE_QVGA 1 +#define CORGI_LCD_MODE_VGA 2 + +struct corgi_lcd_platform_data { + int init_mode; + int max_intensity; + int default_intensity; + + void (*notify)(int intensity); + void (*kick_battery)(void); +}; + +#endif /* __LINUX_SPI_CORGI_LCD_H */ -- cgit v1.2.3 From bfdcaa3b6899bbfc6ba633aff3f5f2422486c8c1 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Fri, 29 Aug 2008 05:57:20 +0800 Subject: lcd: add corgibl_limit_intensity() to corgi_lcd This is not generic enough, added here for backward compatibility. And make this an individual commit so future revert will be a bit easier. Signed-off-by: Eric Miao Cc: Richard Purdie Signed-off-by: Russell King --- include/linux/spi/corgi_lcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h index 3c53ac26c8d..b6161aae275 100644 --- a/include/linux/spi/corgi_lcd.h +++ b/include/linux/spi/corgi_lcd.h @@ -8,6 +8,7 @@ struct corgi_lcd_platform_data { int init_mode; int max_intensity; int default_intensity; + int limit_mask; void (*notify)(int intensity); void (*kick_battery)(void); -- cgit v1.2.3 From 79617deeebb9cf089e2bc2aad19743b1209043f6 Mon Sep 17 00:00:00 2001 From: YanBo Date: Mon, 22 Sep 2008 13:30:32 +0800 Subject: mac80211: mesh portal functionality support Currently the mesh code doesn't support bridging mesh point interfaces with wired ethernet or AP to construct an MPP or MAP. This patch adds code to support the "6 address frame format packet" functionality to mesh point interfaces. Now the mesh network can be used as backhaul for end to end communication. Signed-off-by: Li YanBo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index abc1abc63bf..14126bc3664 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -471,6 +471,11 @@ struct ieee80211s_hdr { u8 eaddr3[6]; } __attribute__ ((packed)); +/* Mesh flags */ +#define MESH_FLAGS_AE_A4 0x1 +#define MESH_FLAGS_AE_A5_A6 0x2 +#define MESH_FLAGS_PS_DEEP 0x4 + /** * struct ieee80211_quiet_ie * -- cgit v1.2.3 From 040dec3b37e4b9ec15b359bf5744f1ceba39fe3e Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Fri, 12 Sep 2008 06:55:14 -0700 Subject: netxen: add pci ids Define old and new pci vendor and device ids. Signed-off-by: Dhananjay Phadke Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6f4276d461c..a65b082a888 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2247,6 +2247,16 @@ #define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007 #define PCI_DEVICE_ID_3DLABS_PERMEDIA2V 0x0009 +#define PCI_VENDOR_ID_NETXEN 0x4040 +#define PCI_DEVICE_ID_NX2031_10GXSR 0x0001 +#define PCI_DEVICE_ID_NX2031_10GCX4 0x0002 +#define PCI_DEVICE_ID_NX2031_4GCU 0x0003 +#define PCI_DEVICE_ID_NX2031_IMEZ 0x0004 +#define PCI_DEVICE_ID_NX2031_HMEZ 0x0005 +#define PCI_DEVICE_ID_NX2031_XG_MGMT 0x0024 +#define PCI_DEVICE_ID_NX2031_XG_MGMT2 0x0025 +#define PCI_DEVICE_ID_NX3031 0x0100 + #define PCI_VENDOR_ID_AKS 0x416c #define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100 -- cgit v1.2.3 From b4f151ff899362fec952c45d166252c9912c041f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Sep 2008 17:48:26 +0100 Subject: MN10300: Move asm-arm/cnt32_to_63.h to include/linux/ Move asm-arm/cnt32_to_63.h to include/linux/ so that MN10300 can make use of it too. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/cnt32_to_63.h | 80 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 include/linux/cnt32_to_63.h (limited to 'include/linux') diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h new file mode 100644 index 00000000000..8c0f9505b48 --- /dev/null +++ b/include/linux/cnt32_to_63.h @@ -0,0 +1,80 @@ +/* + * Extend a 32-bit counter to 63 bits + * + * Author: Nicolas Pitre + * Created: December 3, 2006 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#ifndef __LINUX_CNT32_TO_63_H__ +#define __LINUX_CNT32_TO_63_H__ + +#include +#include +#include + +/* this is used only to give gcc a clue about good code generation */ +union cnt32_to_63 { + struct { +#if defined(__LITTLE_ENDIAN) + u32 lo, hi; +#elif defined(__BIG_ENDIAN) + u32 hi, lo; +#endif + }; + u64 val; +}; + + +/** + * cnt32_to_63 - Expand a 32-bit counter to a 63-bit counter + * @cnt_lo: The low part of the counter + * + * Many hardware clock counters are only 32 bits wide and therefore have + * a relatively short period making wrap-arounds rather frequent. This + * is a problem when implementing sched_clock() for example, where a 64-bit + * non-wrapping monotonic value is expected to be returned. + * + * To overcome that limitation, let's extend a 32-bit counter to 63 bits + * in a completely lock free fashion. Bits 0 to 31 of the clock are provided + * by the hardware while bits 32 to 62 are stored in memory. The top bit in + * memory is used to synchronize with the hardware clock half-period. When + * the top bit of both counters (hardware and in memory) differ then the + * memory is updated with a new value, incrementing it when the hardware + * counter wraps around. + * + * Because a word store in memory is atomic then the incremented value will + * always be in synch with the top bit indicating to any potential concurrent + * reader if the value in memory is up to date or not with regards to the + * needed increment. And any race in updating the value in memory is harmless + * as the same value would simply be stored more than once. + * + * The only restriction for the algorithm to work properly is that this + * code must be executed at least once per each half period of the 32-bit + * counter to properly update the state bit in memory. This is usually not a + * problem in practice, but if it is then a kernel timer could be scheduled + * to manage for this code to be executed often enough. + * + * Note that the top bit (bit 63) in the returned value should be considered + * as garbage. It is not cleared here because callers are likely to use a + * multiplier on the returned value which can get rid of the top bit + * implicitly by making the multiplier even, therefore saving on a runtime + * clear-bit instruction. Otherwise caller must remember to clear the top + * bit explicitly. + */ +#define cnt32_to_63(cnt_lo) \ +({ \ + static volatile u32 __m_cnt_hi; \ + union cnt32_to_63 __x; \ + __x.hi = __m_cnt_hi; \ + __x.lo = (cnt_lo); \ + if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \ + __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \ + __x.val; \ +}) + +#endif -- cgit v1.2.3 From ff7a4c7130c0ad97d55f7ab3f0a35fbc1f41b376 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Sun, 7 Sep 2008 11:30:06 +0800 Subject: [ARM] corgi_lcd: use GPIO API for BACKLIGHT_ON and BACKLIGHT_CONT Signed-off-by: Eric Miao Signed-off-by: Russell King --- include/linux/spi/corgi_lcd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h index b6161aae275..6692b3418cc 100644 --- a/include/linux/spi/corgi_lcd.h +++ b/include/linux/spi/corgi_lcd.h @@ -10,6 +10,9 @@ struct corgi_lcd_platform_data { int default_intensity; int limit_mask; + int gpio_backlight_on; /* -1 if n/a */ + int gpio_backlight_cont; /* -1 if n/a */ + void (*notify)(int intensity); void (*kick_battery)(void); }; -- cgit v1.2.3 From 82ef04fb4c82542b3eda81cca461f0594ce9cd0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:40 +0900 Subject: libata: make SCR access ops per-link Logically, SCR access ops should take @link; however, there was no compelling reason to convert all SCR access ops when adding @link abstraction as there's one-to-one mapping between a port and a non-PMP link. However, that assumption won't hold anymore with the scheduled addition of slave link. Make SCR access ops per-link. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 225bfc5bd9e..ffd622fa319 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -772,8 +772,8 @@ struct ata_port_operations { /* * Optional features */ - int (*scr_read)(struct ata_port *ap, unsigned int sc_reg, u32 *val); - int (*scr_write)(struct ata_port *ap, unsigned int sc_reg, u32 val); + int (*scr_read)(struct ata_link *link, unsigned int sc_reg, u32 *val); + int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); int (*enable_pm)(struct ata_port *ap, enum link_pm policy); -- cgit v1.2.3 From aadffb682cc5572f48cc24883681db65530bd284 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:41 +0900 Subject: libata: reimplement link iterator Implement __ata_port_next_link() and reimplement __ata_port_for_each_link() and ata_port_for_each_link() using it. This removes relatively large inlined code and makes iteration easier to extend. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index ffd622fa319..3eaca347ce2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1265,34 +1265,17 @@ static inline int ata_link_active(struct ata_link *link) return ata_tag_valid(link->active_tag) || link->sactive; } -static inline struct ata_link *ata_port_first_link(struct ata_port *ap) -{ - if (sata_pmp_attached(ap)) - return ap->pmp_link; - return &ap->link; -} - -static inline struct ata_link *ata_port_next_link(struct ata_link *link) -{ - struct ata_port *ap = link->ap; - - if (ata_is_host_link(link)) { - if (!sata_pmp_attached(ap)) - return NULL; - return ap->pmp_link; - } - - if (++link < ap->nr_pmp_links + ap->pmp_link) - return link; - return NULL; -} +extern struct ata_link *__ata_port_next_link(struct ata_port *ap, + struct ata_link *link, + bool dev_only); -#define __ata_port_for_each_link(lk, ap) \ - for ((lk) = &(ap)->link; (lk); (lk) = ata_port_next_link(lk)) +#define __ata_port_for_each_link(link, ap) \ + for ((link) = __ata_port_next_link((ap), NULL, false); (link); \ + (link) = __ata_port_next_link((ap), (link), false)) #define ata_port_for_each_link(link, ap) \ - for ((link) = ata_port_first_link(ap); (link); \ - (link) = ata_port_next_link(link)) + for ((link) = __ata_port_next_link((ap), NULL, true); (link); \ + (link) = __ata_port_next_link((ap), (link), true)) #define ata_link_for_each_dev(dev, link) \ for ((dev) = (link)->device; \ -- cgit v1.2.3 From b5b3fa386b8f96c7fa92e507e5deddc2637924b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:42 +0900 Subject: libata: misc updates to prepare for slave link * Add ATA_EH_ALL_ACTIONS. * Make sata_link_{on|off}_line() return bool instead of int. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3eaca347ce2..0c7e6f3c28e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -321,6 +321,8 @@ enum { ATA_EH_LPM = (1 << 4), /* link power management action */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE, + ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | + ATA_EH_ENABLE_LINK | ATA_EH_LPM, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ @@ -920,8 +922,8 @@ extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); -extern int ata_link_online(struct ata_link *link); -extern int ata_link_offline(struct ata_link *link); +extern bool ata_link_online(struct ata_link *link); +extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); -- cgit v1.2.3 From b1c72916abbdd0a55015c87358536ca0ebaf6735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 31 Jul 2008 17:02:43 +0900 Subject: libata: implement slave_link Explanation taken from the comment of ata_slave_link_init(). In libata, a port contains links and a link contains devices. There is single host link but if a PMP is attached to it, there can be multiple fan-out links. On SATA, there's usually a single device connected to a link but PATA and SATA controllers emulating TF based interface can have two - master and slave. However, there are a few controllers which don't fit into this abstraction too well - SATA controllers which emulate TF interface with both master and slave devices but also have separate SCR register sets for each device. These controllers need separate links for physical link handling (e.g. onlineness, link speed) but should be treated like a traditional M/S controller for everything else (e.g. command issue, softreset). slave_link is libata's way of handling this class of controllers without impacting core layer too much. For anything other than physical link handling, the default host link is used for both master and slave. For physical link handling, separate @ap->slave_link is used. All dirty details are implemented inside libata core layer. From LLD's POV, the only difference is that prereset, hardreset and postreset are called once more for the slave link, so the reset sequence looks like the following. prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) -> softreset(M) -> postreset(M) -> postreset(S) Note that softreset is called only for the master. Softreset resets both M/S by definition, so SRST on master should handle both (the standard method will work just fine). As slave_link excludes PMP support and only code paths which deal with the attributes of physical link are affected, all the changes are localized to libata.h, libata-core.c and libata-eh.c. * ata_is_host_link() updated so that slave_link is considered as host link too. * iterator extended to iterate over the slave_link when using the underbarred version. * force param handling updated such that devno 16 is mapped to the slave link/device. * ata_link_on/offline() updated to return the combined result from master and slave link. ata_phys_link_on/offline() are the direct versions. * EH autopsy and report are performed separately for master slave links. Reset is udpated to implement the above described reset sequence. Except for reset update, most changes are minor, many of them just modifying dev->link to ata_dev_phys_link(dev) or using phys online test instead. After this update, LLDs can take full advantage of per-dev SCR registers by simply turning on slave link. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0c7e6f3c28e..244ff601559 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -690,7 +690,8 @@ struct ata_port { unsigned int qc_active; int nr_active_links; /* #links with active qcs */ - struct ata_link link; /* host default link */ + struct ata_link link; /* host default link */ + struct ata_link *slave_link; /* see ata_slave_link_init() */ int nr_pmp_links; /* nr of available PMP links */ struct ata_link *pmp_link; /* array of PMP links */ @@ -897,6 +898,7 @@ extern void ata_port_disable(struct ata_port *); extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports); extern struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports); +extern int ata_slave_link_init(struct ata_port *ap); extern int ata_host_start(struct ata_host *host); extern int ata_host_register(struct ata_host *host, struct scsi_host_template *sht); @@ -1136,7 +1138,7 @@ static inline bool sata_pmp_attached(struct ata_port *ap) static inline int ata_is_host_link(const struct ata_link *link) { - return link == &link->ap->link; + return link == &link->ap->link || link == link->ap->slave_link; } #else /* CONFIG_SATA_PMP */ static inline bool sata_pmp_supported(struct ata_port *ap) @@ -1169,7 +1171,7 @@ static inline int sata_srst_pmp(struct ata_link *link) printk("%sata%u: "fmt, lv, (ap)->print_id , ##args) #define ata_link_printk(link, lv, fmt, args...) do { \ - if (sata_pmp_attached((link)->ap)) \ + if (sata_pmp_attached((link)->ap) || (link)->ap->slave_link) \ printk("%sata%u.%02u: "fmt, lv, (link)->ap->print_id, \ (link)->pmp , ##args); \ else \ -- cgit v1.2.3 From ea6ce53cd5d005455ec0a3cc1d45d3af0cb90919 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Fri, 19 Sep 2008 23:46:01 +0200 Subject: [libata] Introduce ata_id_has_unload() Add a function to check an ATA device's id for head unload support as specified in ATA-7. Signed-off-by: Elias Oltmanns Signed-off-by: Jeff Garzik --- include/linux/ata.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 8a12d718c16..a26ebd25bac 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -667,6 +667,15 @@ static inline int ata_id_has_dword_io(const u16 *id) return 0; } +static inline int ata_id_has_unload(const u16 *id) +{ + if (ata_id_major_version(id) >= 7 && + (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 && + id[ATA_ID_CFSSE] & (1 << 13)) + return 1; + return 0; +} + static inline int ata_id_current_chs_valid(const u16 *id) { /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command -- cgit v1.2.3 From 45fabbb77bd95adff7a80bde1c7a0ace1075fde6 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Sun, 21 Sep 2008 11:54:08 +0200 Subject: libata: Implement disk shock protection support On user request (through sysfs), the IDLE IMMEDIATE command with UNLOAD FEATURE as specified in ATA-7 is issued to the device and processing of the request queue is stopped thereafter until the specified timeout expires or user space asks to resume normal operation. This is supposed to prevent the heads of a hard drive from accidentally crashing onto the platter when a heavy shock is anticipated (like a falling laptop expected to hit the floor). In fact, the whole port stops processing commands until the timeout has expired in order to avoid any resets due to failed commands on another device. Signed-off-by: Elias Oltmanns Signed-off-by: Jeff Garzik --- include/linux/libata.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 244ff601559..1f44cfb847e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -146,6 +146,7 @@ enum { ATA_DFLAG_SPUNDOWN = (1 << 14), /* XXX: for spindown_compat */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ + ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), @@ -244,6 +245,7 @@ enum { ATA_TMOUT_BOOT = 30000, /* heuristic */ ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ ATA_TMOUT_INTERNAL_QUICK = 5000, + ATA_TMOUT_MAX_PARK = 30000, /* FIXME: GoVault needs 2s but we can't afford that without * parallel probing. 800ms is enough for iVDR disk @@ -319,8 +321,9 @@ enum { ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_LPM = (1 << 4), /* link power management action */ + ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ - ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE, + ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK | ATA_EH_LPM, @@ -454,6 +457,7 @@ enum link_pm { MEDIUM_POWER, }; extern struct device_attribute dev_attr_link_power_management_policy; +extern struct device_attribute dev_attr_unload_heads; extern struct device_attribute dev_attr_em_message_type; extern struct device_attribute dev_attr_em_message; extern struct device_attribute dev_attr_sw_activity; @@ -566,6 +570,7 @@ struct ata_device { /* n_sector is used as CLEAR_OFFSET, read comment above CLEAR_OFFSET */ u64 n_sectors; /* size of device, if ATA */ unsigned int class; /* ATA_DEV_xxx */ + unsigned long unpark_deadline; u8 pio_mode; u8 dma_mode; @@ -623,6 +628,7 @@ struct ata_eh_context { [ATA_EH_CMD_TIMEOUT_TABLE_SIZE]; unsigned int classes[ATA_MAX_DEVICES]; unsigned int did_probe_mask; + unsigned int unloaded_mask; unsigned int saved_ncq_enabled; u8 saved_xfer_mode[ATA_MAX_DEVICES]; /* timestamp for the last reset attempt or success */ @@ -712,6 +718,7 @@ struct ata_port { struct list_head eh_done_q; wait_queue_head_t eh_wait_q; int eh_tries; + struct completion park_req_pending; pm_message_t pm_mesg; int *pm_result; @@ -1102,6 +1109,7 @@ extern void ata_std_error_handler(struct ata_port *ap); */ extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; +extern struct device_attribute *ata_common_sdev_attrs[]; #define ATA_BASE_SHT(drv_name) \ .module = THIS_MODULE, \ @@ -1116,7 +1124,8 @@ extern const struct ata_port_operations sata_port_ops; .proc_name = drv_name, \ .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ - .bios_param = ata_std_bios_param + .bios_param = ata_std_bios_param, \ + .sdev_attrs = ata_common_sdev_attrs #define ATA_NCQ_SHT(drv_name) \ ATA_BASE_SHT(drv_name), \ -- cgit v1.2.3 From 6866e7bc83f13a1bc6de59099930e9db1ab0042f Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 22 Sep 2008 14:47:13 -0700 Subject: libata: reorder ata_device to remove 8 bytes of padding on 64 bits reduce size by 8 bytes from 1160 to 1152 allowing it to fit in 1 fewer cachelines. Signed-off-by: Richard Kennedy Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 1f44cfb847e..947cf84e555 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -560,8 +560,8 @@ struct ata_ering { struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ - unsigned long flags; /* ATA_DFLAG_xxx */ unsigned int horkage; /* List of broken features */ + unsigned long flags; /* ATA_DFLAG_xxx */ struct scsi_device *sdev; /* attached SCSI device */ #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; -- cgit v1.2.3 From b00c1a99e7758f794923c61e5cd55268d61c9469 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Sep 2008 15:44:46 +0200 Subject: hrtimer: mark migration state Impact: during migration active hrtimers can be seen as inactive The migration code removes the hrtimers from the queues of the dead CPU and sets the state temporary to INACTIVE. The enqueue code sets it to ACTIVE/PENDING again. Prevent that the wrong state can be seen by using a separate migration state bit. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cb..bdd88df1b4e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -67,9 +67,10 @@ enum hrtimer_cb_mode { * 0x02 callback function running * 0x04 callback pending (high resolution mode) * - * Special case: + * Special cases: * 0x03 callback function running and enqueued * (was requeued on another CPU) + * 0x09 timer was migrated on CPU hotunplug * The "callback function running and enqueued" status is only possible on * SMP. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer @@ -87,6 +88,7 @@ enum hrtimer_cb_mode { #define HRTIMER_STATE_ENQUEUED 0x01 #define HRTIMER_STATE_CALLBACK 0x02 #define HRTIMER_STATE_PENDING 0x04 +#define HRTIMER_STATE_MIGRATE 0x08 /** * struct hrtimer - the basic hrtimer structure -- cgit v1.2.3 From ccc7dadf736639da86f3e0c86832c11a66fc8221 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Sep 2008 15:47:42 +0200 Subject: hrtimer: prevent migration of per CPU hrtimers Impact: per CPU hrtimers can be migrated from a dead CPU The hrtimer code has no knowledge about per CPU timers, but we need to prevent the migration of such timers and warn when such a timer is active at migration time. Explicitely mark the timers as per CPU and use a more understandable mode descriptor for the interrupts safe unlocked callback mode, which is used by hrtimer_sleeper and the scheduler code. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bdd88df1b4e..2f245fe63bd 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -47,14 +47,22 @@ enum hrtimer_restart { * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and * does not restart the timer - * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context - * Special mode for tick emultation + * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context + * Special mode for tick emulation and + * scheduler timer. Such timers are per + * cpu and not allowed to be migrated on + * cpu unplug. + * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context + * with timer->base lock unlocked + * used for timers which call wakeup to + * avoid lock order problems with rq->lock */ enum hrtimer_cb_mode { HRTIMER_CB_SOFTIRQ, HRTIMER_CB_IRQSAFE, HRTIMER_CB_IRQSAFE_NO_RESTART, - HRTIMER_CB_IRQSAFE_NO_SOFTIRQ, + HRTIMER_CB_IRQSAFE_PERCPU, + HRTIMER_CB_IRQSAFE_UNLOCKED, }; /* -- cgit v1.2.3 From cf04a4c764cd3e651a64b3e667bb6a673ead99e1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 30 Sep 2008 02:22:14 -0700 Subject: netdev: use const for some name functions dev_change_name and netdev_drivername should use const char on parameters that are read-only input values. The strcpy to newname is not needed since newname is not used later in function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d675df08b94..9cfd20be8b7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1225,7 +1225,7 @@ extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); -extern int dev_change_name(struct net_device *, char *); +extern int dev_change_name(struct net_device *, const char *); extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); @@ -1670,7 +1670,7 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); -extern char *netdev_drivername(struct net_device *dev, char *buffer, int len); +extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); -- cgit v1.2.3 From a57334e95e4fb132acca05bdc0efa2f9dda194af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 30 Sep 2008 02:53:18 -0700 Subject: Phonet: declare headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b68ec09399b..f431e40725d 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -126,6 +126,7 @@ header-y += pci_regs.h header-y += pfkeyv2.h header-y += pg.h header-y += phantom.h +header-y += phonet.h header-y += pkt_cls.h header-y += pkt_sched.h header-y += posix_types.h @@ -232,6 +233,7 @@ unifdef-y += if_fddi.h unifdef-y += if_frad.h unifdef-y += if_ltalk.h unifdef-y += if_link.h +unifdef-y += if_phonet.h unifdef-y += if_pppol2tp.h unifdef-y += if_pppox.h unifdef-y += if_tr.h -- cgit v1.2.3 From 1c50b728c3e734150b8a4a8310ce3e01bc5c70be Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Sep 2008 11:06:46 -0400 Subject: rcu: add rcu_read_lock_sched() / rcu_read_unlock_sched() Add rcu_read_lock_sched() and rcu_read_unlock_sched() to rcupdate.h to match the recently added write-side call_rcu_sched() and rcu_barrier_sched(). They also match the no-so-recently-added synchronize_sched(). It will help following matching use of the update/read lock primitives. Those new read lock will replace preempt_disable()/enable() used in pair with RCU-classic synchronization. Signed-off-by: Mathieu Desnoyers Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e8b4039cfb2..86f1f5e43e3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -132,6 +132,26 @@ struct rcu_head { */ #define rcu_read_unlock_bh() __rcu_read_unlock_bh() +/** + * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * + * Should be used with either + * - synchronize_sched() + * or + * - call_rcu_sched() and rcu_barrier_sched() + * on the write-side to insure proper synchronization. + */ +#define rcu_read_lock_sched() preempt_disable() + +/* + * rcu_read_unlock_sched - marks the end of a RCU-classic critical section + * + * See rcu_read_lock_sched for more information. + */ +#define rcu_read_unlock_sched() preempt_enable() + + + /** * rcu_dereference - fetch an RCU-protected pointer in an * RCU read-side critical section. This pointer may later -- cgit v1.2.3 From 24268245d8ba9270152b2281666099ddc8ca389d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Oct 2008 08:59:40 +0200 Subject: x86: add PCI IDs for AMD Barcelona PCI devices Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2886b0eb53e..c114103af98 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -497,6 +497,11 @@ #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 +#define PCI_DEVICE_ID_AMD_10H_NB_HT 0x1200 +#define PCI_DEVICE_ID_AMD_10H_NB_MAP 0x1201 +#define PCI_DEVICE_ID_AMD_10H_NB_DRAM 0x1202 +#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203 +#define PCI_DEVICE_ID_AMD_10H_NB_LINK 0x1204 #define PCI_DEVICE_ID_AMD_11H_NB_HT 0x1300 #define PCI_DEVICE_ID_AMD_11H_NB_MAP 0x1301 #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 -- cgit v1.2.3 From 6e50e8a2136f1a90de251c653226ded447c5c915 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Wed, 1 Oct 2008 01:30:19 -0700 Subject: phonet: Protect if_phonet.h against multiple inclusions. From: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index 7e989216ec1..d70034bcec0 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -5,14 +5,15 @@ * * Copyright (C) 2008 Nokia Corporation. All rights reserved. */ +#ifndef LINUX_IF_PHONET_H +#define LINUX_IF_PHONET_H -#define PHONET_HEADER_LEN 8 /* Phonet header length */ - -#define PHONET_MIN_MTU 6 -/* 6 bytes header + 65535 bytes payload */ -#define PHONET_MAX_MTU 65541 +#define PHONET_MIN_MTU 6 /* pn_length = 0 */ +#define PHONET_MAX_MTU 65541 /* pn_length = 0xffff */ #define PHONET_DEV_MTU PHONET_MAX_MTU #ifdef __KERNEL__ extern struct header_ops phonet_header_ops; #endif + +#endif -- cgit v1.2.3 From 04a4bb55bcf35b63d40fd2725e58599ff8310dd7 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 1 Oct 2008 02:33:12 -0700 Subject: net: add skb_recycle_check() to enable netdriver skb recycling This patch adds skb_recycle_check(), which can be used by a network driver after transmitting an skb to check whether this skb can be recycled as a receive buffer. skb_recycle_check() checks that the skb is not shared or cloned, and that it is linear and its head portion large enough (as determined by the driver) to be recycled as a receive buffer. If these conditions are met, it does any necessary reference count dropping and cleans up the skbuff as if it just came from __alloc_skb(). Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a19ea43fea0..720b688c22b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -383,6 +383,8 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, -1); } +extern int skb_recycle_check(struct sk_buff *skb, int skb_size); + extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); -- cgit v1.2.3 From 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Oct 2008 07:03:24 -0700 Subject: ipsec: Put dumpers on the dump list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu came up with the idea and the original patch to make xfrm_state dump list contain also dumpers: As it is we go to extraordinary lengths to ensure that states don't go away while dumpers go to sleep. It's much easier if we just put the dumpers themselves on the list since they can't go away while they're going. I've also changed the order of addition on new states to prevent a never-ending dump. Timo Teräs improved the patch to apply cleanly to latest tree, modified iteration code to be more readable by using a common struct for entries in the list, implemented the same idea for xfrm_policy dumping and moved the af_key specific "last" entry caching to af_key. Signed-off-by: Herbert Xu Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cbba7760545..9ff1b54908f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[7]; + long args[6]; }; struct netlink_notify -- cgit v1.2.3 From f5715aea4564f233767ea1d944b2637a5fd7cd2e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:30:02 -0700 Subject: ipv4: Implement IP_TRANSPARENT socket option This patch introduces the IP_TRANSPARENT socket option: enabling that will make the IPv4 routing omit the non-local source address check on output. Setting IP_TRANSPARENT requires NET_ADMIN capability. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/linux/in.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 4065313cd7e..db458beef19 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -75,6 +75,7 @@ struct in_addr { #define IP_IPSEC_POLICY 16 #define IP_XFRM_POLICY 17 #define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS -- cgit v1.2.3 From 4e9687d9c843dc34d368358a36f5f1610e4fbab3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Va=C5=A1ut?= Date: Thu, 11 Sep 2008 19:37:32 +0100 Subject: [ARM] 5248/1: wm97xx generic battery driver This patch adds generic battery driver for wm97xx chips. Signed-off-by: Marek Vasut Acked-by: Anton Vorontsov Acked-by: Mark Brown Signed-off-by: Russell King --- include/linux/wm97xx_batt.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/linux/wm97xx_batt.h (limited to 'include/linux') diff --git a/include/linux/wm97xx_batt.h b/include/linux/wm97xx_batt.h new file mode 100644 index 00000000000..9681d1ab0e4 --- /dev/null +++ b/include/linux/wm97xx_batt.h @@ -0,0 +1,26 @@ +#ifndef _LINUX_WM97XX_BAT_H +#define _LINUX_WM97XX_BAT_H + +#include + +struct wm97xx_batt_info { + int batt_aux; + int temp_aux; + int charge_gpio; + int min_voltage; + int max_voltage; + int batt_div; + int batt_mult; + int temp_div; + int temp_mult; + int batt_tech; + char *batt_name; +}; + +#ifdef CONFIG_BATTERY_WM97XX +void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data); +#else +static inline void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data) {} +#endif + +#endif -- cgit v1.2.3 From 4b19de6d1cb07c8bcb6778e771f9cfd5bcfdfd3e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 2 Oct 2008 14:50:16 -0700 Subject: mm: tiny-shmem nommu fix The previous patch db203d53d474aa068984e409d807628f5841da1b ("mm: tiny-shmem fix lock ordering: mmap_sem vs i_mutex") to fix the lock ordering in tiny-shmem breaks shared anonymous and IPC memory on NOMMU architectures because it was using the expanding truncate to signal ramfs to allocate a physically contiguous RAM backing the inode (otherwise it is unusable for "memory mapping" it to userspace). However do_truncate is what caused the lock ordering error, due to it taking i_mutex. In this case, we can actually just call ramfs directly to allocate memory for the mapping, rather than go via truncate. Acked-by: David Howells Acked-by: Hugh Dickins Signed-off-by: Nick Piggin Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ramfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index b160fb18e8d..37aaf2b3986 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -6,6 +6,7 @@ extern int ramfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt); #ifndef CONFIG_MMU +extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize); extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, -- cgit v1.2.3 From 2133b5d7ff531bc15a923db4a6a50bf96c561be9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Oct 2008 16:06:39 -0700 Subject: rcu: RCU-based detection of stalled CPUs for Classic RCU This patch adds stalled-CPU detection to Classic RCU. This capability is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which defaults disabled. This is a debugging feature to detect infinite loops in kernel code, not something that non-kernel-hackers would be expected to care about. This feature can detect looping CPUs in !PREEMPT builds and looping CPUs with preemption disabled in PREEMPT builds. This is essentially a port of this functionality from the treercu patch, replacing the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4). The changes from the patch in tip/core/rcu include making the config variable name match that in treercu, changing from seconds to jiffies to avoid spurious warnings, and printing a boot message when this feature is enabled. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 29bf528c7dc..5f89b62e698 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -40,15 +40,21 @@ #include #include +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* Global control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ long pending; /* Number of the last pending batch */ -#ifdef CONFIG_DEBUG_RCU_STALL - unsigned long gp_check; /* Time grace period should end, in seconds. */ -#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + unsigned long gp_start; /* Time at which GP started in jiffies. */ + unsigned long jiffies_stall; + /* Time at which to check for CPU stalls. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ int signaled; -- cgit v1.2.3 From 3c9f3681d0b4af09c1cbf04f92fdfb72bd81ad7b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 31 Aug 2008 10:13:54 -0500 Subject: [SCSI] lib: add generic helper to print sizes rounded to the correct SI range This patch adds the ability to print sizes in either units of 10^3 (SI) or 2^10 (Binary) units. It rounds up to three significant figures and can be used for either memory or storage capacities. Oh, and I'm fully aware that 64 bits is only 16EiB ... the Zetta and Yotta units are added for future proofing against the day we have 128 bit computers ... [fujita.tomonori@lab.ntt.co.jp: fix missed unsigned long long cast] Signed-off-by: James Bottomley --- include/linux/string_helpers.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/string_helpers.h (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h new file mode 100644 index 00000000000..a3eb2f65b65 --- /dev/null +++ b/include/linux/string_helpers.h @@ -0,0 +1,16 @@ +#ifndef _LINUX_STRING_HELPERS_H_ +#define _LINUX_STRING_HELPERS_H_ + +#include + +/* Descriptions of the types of units to + * print in */ +enum string_size_units { + STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ + STRING_UNITS_2, /* use binary powers of 2^10 */ +}; + +int string_get_size(u64 size, enum string_size_units units, + char *buf, int len); + +#endif -- cgit v1.2.3 From c4b929b85bdb64afacbbf6453b1f2bf7e14c9e89 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 8 Oct 2008 19:44:18 -0400 Subject: vfs: vfs-level fiemap interface Basic vfs-level fiemap infrastructure, which sets up a new ->fiemap inode operation. Userspace can get extent information on a file via fiemap ioctl. As input, the fiemap ioctl takes a struct fiemap which includes an array of struct fiemap_extent (fm_extents). Size of the extent array is passed as fm_extent_count and number of extents returned will be written into fm_mapped_extents. Offset and length fields on the fiemap structure (fm_start, fm_length) describe a logical range which will be searched for extents. All extents returned will at least partially contain this range. The actual extent offsets and ranges returned will be unmodified from their offset and range on-disk. The fiemap ioctl returns '0' on success. On error, -1 is returned and errno is set. If errno is equal to EBADR, then fm_flags will contain those flags which were passed in which the kernel did not understand. On all other errors, the contents of fm_extents is undefined. As fiemap evolved, there have been many authors of the vfs patch. As far as I can tell, the list includes: Kalpak Shah Andreas Dilger Eric Sandeen Mark Fasheh Signed-off-by: Mark Fasheh Signed-off-by: "Theodore Ts'o" Cc: Michael Kerrisk Cc: linux-api@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org --- include/linux/fiemap.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 18 ++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 include/linux/fiemap.h (limited to 'include/linux') diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h new file mode 100644 index 00000000000..671decbd2ae --- /dev/null +++ b/include/linux/fiemap.h @@ -0,0 +1,64 @@ +/* + * FS_IOC_FIEMAP ioctl infrastructure. + * + * Some portions copyright (C) 2007 Cluster File Systems, Inc + * + * Authors: Mark Fasheh + * Kalpak Shah + * Andreas Dilger + */ + +#ifndef _LINUX_FIEMAP_H +#define _LINUX_FIEMAP_H + +struct fiemap_extent { + __u64 fe_logical; /* logical offset in bytes for the start of + * the extent from the beginning of the file */ + __u64 fe_physical; /* physical offset in bytes for the start + * of the extent from the beginning of the disk */ + __u64 fe_length; /* length in bytes for this extent */ + __u64 fe_reserved64[2]; + __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ + __u32 fe_reserved[3]; +}; + +struct fiemap { + __u64 fm_start; /* logical offset (inclusive) at + * which to start mapping (in) */ + __u64 fm_length; /* logical length of mapping which + * userspace wants (in) */ + __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ + __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ + __u32 fm_extent_count; /* size of fm_extents array (in) */ + __u32 fm_reserved; + struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ +}; + +#define FIEMAP_MAX_OFFSET (~0ULL) + +#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ +#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ + +#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) + +#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ +#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ +#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. + * Sets EXTENT_UNKNOWN. */ +#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read + * while fs is unmounted */ +#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs. + * Sets EXTENT_NO_BYPASS. */ +#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be + * block aligned. */ +#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata. + * Sets EXTENT_NOT_ALIGNED.*/ +#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block. + * Sets EXTENT_NOT_ALIGNED.*/ +#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but + * no data (i.e. zero). */ +#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively + * support extents. Result + * merged for efficiency. */ + +#endif /* _LINUX_FIEMAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668f..194fb237a30 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -231,6 +231,7 @@ extern int dir_notify_enable; #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) #define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) #define FS_IOC32_GETFLAGS _IOR('f', 1, int) #define FS_IOC32_SETFLAGS _IOW('f', 2, int) #define FS_IOC32_GETVERSION _IOR('v', 1, int) @@ -291,6 +292,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include @@ -1178,6 +1180,20 @@ extern void dentry_unhash(struct dentry *dentry); */ extern int file_permission(struct file *, int); +/* + * VFS FS_IOC_FIEMAP helper definitions. + */ +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent + * array */ +}; +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags); +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + /* * File types * @@ -1287,6 +1303,8 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); + int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, + u64 len); }; struct seq_file; -- cgit v1.2.3 From 68c9d702bb72f367f3b148963ec6cf5e07ff7f65 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 3 Oct 2008 17:32:43 -0400 Subject: generic block based fiemap implementation Any block based fs (this patch includes ext3) just has to declare its own fiemap() function and then call this generic function with its own get_block_t. This works well for block based filesystems that will map multiple contiguous blocks at one time, but will work for filesystems that only map one block at a time, you will just end up with an "extent" for each block. One gotcha is this will not play nicely where there is hole+data after the EOF. This function will assume its hit the end of the data as soon as it hits a hole after the EOF, so if there is any data past that it will not pick that up. AFAIK no block based fs does this anyway, but its in the comments of the function anyway just in case. Signed-off-by: Josef Bacik Signed-off-by: Mark Fasheh Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org --- include/linux/ext3_fs.h | 2 ++ include/linux/fs.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 80171ee89a2..8120fa1bc23 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -837,6 +837,8 @@ extern void ext3_truncate (struct inode *); extern void ext3_set_inode_flags(struct inode *); extern void ext3_get_inode_flags(struct ext3_inode_info *); extern void ext3_set_aops(struct inode *inode); +extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); /* ioctl.c */ extern int ext3_ioctl (struct inode *, struct file *, unsigned int, diff --git a/include/linux/fs.h b/include/linux/fs.h index 194fb237a30..385c9a197df 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1998,6 +1998,9 @@ extern int vfs_fstat(unsigned int, struct kstat *); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); +extern int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len, get_block_t *get_block); extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); -- cgit v1.2.3 From 897312bd240357c88ce906633703c324c6f0a5cd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 3 Oct 2008 15:23:41 -0700 Subject: include/linux/stacktrace.h: declare struct task_struct include/linux/stacktrace.h:13: warning: 'struct task_struct' declared inside parameter list (This might be a hard error on sparc64, which uses this header and has -Werror) Reported-by: "Randy.Dunlap" Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stacktrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 5da9794b2d7..b106fd8e0d5 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -1,6 +1,8 @@ #ifndef __LINUX_STACKTRACE_H #define __LINUX_STACKTRACE_H +struct task_struct; + #ifdef CONFIG_STACKTRACE struct stack_trace { unsigned int nr_entries, max_entries; -- cgit v1.2.3 From f20f258603ebc5da91e76884cf0c0d7ac9804b1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 5 Oct 2008 18:23:27 +0200 Subject: ide-cd: temporary tray close fix This one fixes http://bugzilla.kernel.org/show_bug.cgi?id=11602. A more generic fix for drives which cannot autoclose tray will follow. Signed-off-by: Borislav Petkov Cc: Jens Axboe [bart: add an extra parentheses for consistency with the rest of kernel code] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1524829f73f..6514db8fd2e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -366,7 +366,9 @@ enum { /* Currently on a filemark */ IDE_AFLAG_FILEMARK = (1 << 25), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 26) + IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), + + IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), }; struct ide_drive_s { -- cgit v1.2.3 From 9641458d3ec42def729fde64669abf07f3220cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:15:13 -0700 Subject: Phonet: Pipe End Point for Phonet Pipes protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This protocol provides some connection handling and negotiated congestion control. Nokia cellular modems use it for bulk transfers. It provides packet boundaries (hence SOCK_SEQPACKET). Congestion control is per packet rather per byte, so we do not re-use the generic socket memory accounting. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 3a027f588a4..f9218524207 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -27,7 +27,9 @@ #define PN_PROTO_TRANSPORT 0 /* Phonet datagram socket */ #define PN_PROTO_PHONET 1 -#define PHONET_NPROTO 2 +/* Phonet pipe */ +#define PN_PROTO_PIPE 2 +#define PHONET_NPROTO 3 #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 -- cgit v1.2.3 From 02a47617cdce440f60c71a51f3a93f9f5fcc5a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:16:16 -0700 Subject: Phonet: implement GPRS virtual interface over PEP socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 8 ++++++++ include/linux/socket.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index f9218524207..c9609f9aeda 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -31,9 +31,17 @@ #define PN_PROTO_PIPE 2 #define PHONET_NPROTO 3 +/* Socket options for SOL_PNPIPE level */ +#define PNPIPE_ENCAP 1 +#define PNPIPE_IFINDEX 2 + #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* Values for PNPIPE_ENCAP option */ +#define PNPIPE_ENCAP_NONE 0 +#define PNPIPE_ENCAP_IP 1 + /* ioctls */ #define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) diff --git a/include/linux/socket.h b/include/linux/socket.h index 818ca33bf79..20fc4bbfca4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -297,6 +297,7 @@ struct ucred { #define SOL_RXRPC 272 #define SOL_PPPOL2TP 273 #define SOL_BLUETOOTH 274 +#define SOL_PNPIPE 275 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From 13c1d18931ebb5cf407cb348ef2cd6284d68902d Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Sun, 5 Oct 2008 13:33:42 -0700 Subject: xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate) Provides implementation of the enhancements of XFRM/PF_KEY MIGRATE mechanism specified in draft-ebalard-mext-pfkey-enhanced-migrate-00. Defines associated PF_KEY SADB_X_EXT_KMADDRESS extension and XFRM/netlink XFRMA_KMADDRESS attribute. Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 13 ++++++++++++- include/linux/xfrm.h | 10 ++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 700725ddcaa..01b262959f2 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -226,6 +226,15 @@ struct sadb_x_sec_ctx { } __attribute__((packed)); /* sizeof(struct sadb_sec_ctx) = 8 */ +/* Used by MIGRATE to pass addresses IKE will use to perform + * negotiation with the peer */ +struct sadb_x_kmaddress { + uint16_t sadb_x_kmaddress_len; + uint16_t sadb_x_kmaddress_exttype; + uint32_t sadb_x_kmaddress_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_x_kmaddress) == 8 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -346,7 +355,9 @@ struct sadb_x_sec_ctx { #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 #define SADB_X_EXT_SEC_CTX 24 -#define SADB_EXT_MAX 24 +/* Used with MIGRATE to pass @ to IKE for negotiation */ +#define SADB_X_EXT_KMADDRESS 25 +#define SADB_EXT_MAX 25 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index fb0c215a305..4bc1e6b86cb 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -279,6 +279,7 @@ enum xfrm_attr_type_t { XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -415,6 +416,15 @@ struct xfrm_user_report { struct xfrm_selector sel; }; +/* Used by MIGRATE to pass addresses IKE should use to perform + * SA negotiation with the peer */ +struct xfrm_user_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + __u32 reserved; + __u16 family; +}; + struct xfrm_user_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; -- cgit v1.2.3 From 1e19b16a30c34c042f1eaa23db4c99bfad1dac0e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Oct 2008 14:15:24 +0200 Subject: AMD IOMMU: use iommu_device_max_index, fix include/linux/iommu-helper.h has no header guards, which breaks sparc64 build. Add them. Signed-off-by: Thomas Gleixner Cc: Joerg Roedel Cc: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/linux/iommu-helper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 786539e432d..a6d0586e2bf 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -1,3 +1,6 @@ +#ifndef _LINUX_IOMMU_HELPER_H +#define _LINUX_IOMMU_HELPER_H + static inline unsigned long iommu_device_max_index(unsigned long size, unsigned long offset, u64 dma_mask) @@ -19,3 +22,5 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long align_mask); extern void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr); + +#endif -- cgit v1.2.3 From 654bed16cf86a9ef94495d9e6131b7ff7840a3dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Oct 2008 14:22:33 -0700 Subject: net: packet split receive api Add some packet-split receive hooks. For one this allows to do NUMA node affine page allocs. Later on these hooks will be extended to do emergency reserve allocations for fragments. Signed-off-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/skbuff.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 720b688c22b..2725f4e5a9b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -968,6 +968,9 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, skb_shinfo(skb)->nr_frags = i + 1; } +extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, + int off, int size); + #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) @@ -1382,6 +1385,26 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); + +/** + * netdev_alloc_page - allocate a page for ps-rx on a specific device + * @dev: network device to receive on + * + * Allocate a new page node local to the specified device. + * + * %NULL is returned if there is no free memory. + */ +static inline struct page *netdev_alloc_page(struct net_device *dev) +{ + return __netdev_alloc_page(dev, GFP_ATOMIC); +} + +static inline void netdev_free_page(struct net_device *dev, struct page *page) +{ + __free_page(page); +} + /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check -- cgit v1.2.3 From 33f5f57eeb0c6386fdd85f9c690dc8d700ba7928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 7 Oct 2008 14:43:06 -0700 Subject: tcp: kill pointless urg_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It all started from me noticing that this urgent check in tcp_clean_rtx_queue is unnecessarily inside the loop. Then I took a longer look to it and found out that the users of urg_mode can trivially do without, well almost, there was one gotcha. Bonus: those funny people who use urg with >= 2^31 write_seq - snd_una could now rejoice too (that's the only purpose for the between being there, otherwise a simple compare would have done the thing). Not that I assume that the rest of the tcp code happily lives with such mind-boggling numbers :-). Alas, it turned out to be impossible to set wmem to such numbers anyway, yes I really tried a big sendfile after setting some wmem but nothing happened :-). ...Tcp_wmem is int and so is sk_sndbuf... So I hacked a bit variable to long and found out that it seems to work... :-) Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 76729062829..fe77e1499ab 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -312,8 +312,11 @@ struct tcp_sock { u32 retrans_out; /* Retransmitted packets out */ u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 urg_mode; /* In urgent mode */ u8 ecn_flags; /* ECN status bits. */ + u8 reordering; /* Packet reordering metric. */ + u32 snd_up; /* Urgent pointer */ + + u8 keepalive_probes; /* num of allowed keep alive probes */ /* * Options received (usually on last packet, some only on SYN packets). */ @@ -361,8 +364,6 @@ struct tcp_sock { u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ - u8 reordering; /* Packet reordering metric. */ - u8 keepalive_probes; /* num of allowed keep alive probes */ u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ @@ -374,8 +375,6 @@ struct tcp_sock { u32 total_retrans; /* Total retransmits for entire connection */ u32 urg_seq; /* Seq of received urgent pointer */ - u32 snd_up; /* Urgent pointer */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ -- cgit v1.2.3 From b8bae41ed6a53cce56c50811a91cd963e3187d1c Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 7 Oct 2008 15:34:37 -0700 Subject: ipv4: add mc_count to in_device. This patch add mc_count to struct in_device and updates increment/decrement/initilaize of this field in IPv4 and in IPv6. - Also printing the vfs /proc entry (/proc/net/igmp) is adjusted to use the new mc_count. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index c6f51ad52d5..06fcdb45106 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -25,6 +25,7 @@ struct in_device struct in_ifaddr *ifa_list; /* IP ifaddr chain */ rwlock_t mc_list_lock; struct ip_mc_list *mc_list; /* IP multicast filter chain */ + int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; -- cgit v1.2.3 From 76108cea065cda58366d16a7eb6ca90d717a1396 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Use unsigned types for hooknum and pf vars and (try to) consistently use u_int8_t for the L3 family. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 30 ++++++++++++++++-------------- include/linux/netfilter/x_tables.h | 28 ++++++++++++++-------------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0c5eb7ed8b3..8c83d2e23bd 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -92,8 +92,8 @@ struct nf_hook_ops /* User fills in from here down. */ nf_hookfn *hook; struct module *owner; - int pf; - int hooknum; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; @@ -102,7 +102,7 @@ struct nf_sockopt_ops { struct list_head list; - int pf; + u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; @@ -140,7 +140,7 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); @@ -151,7 +151,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -167,7 +167,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -212,14 +212,14 @@ __ret;}) NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN) /* Call setsockopt() */ -int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -int compat_nf_setsockopt(struct sock *sk, int pf, int optval, +int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int compat_nf_getsockopt(struct sock *sk, int pf, int optval, +int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); /* Call this before modifying an existing packet: ensures it is @@ -292,7 +292,7 @@ extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo); extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #ifdef CONFIG_NF_NAT_NEEDED void (*decodefn)(struct sk_buff *, struct flowi *); @@ -315,7 +315,7 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -324,7 +324,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, { return okfn(skb); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -332,7 +332,9 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, } struct flowi; static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) +{ +} #endif /*CONFIG_NETFILTER*/ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2326296b6f2..6989b22716e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -292,7 +292,7 @@ struct xt_table /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; - int af; /* address/protocol family */ + u_int8_t af; /* address/protocol family */ }; #include @@ -346,19 +346,19 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table, struct xt_table_info *newinfo, int *error); -extern struct xt_match *xt_find_match(int af, const char *name, u8 revision); -extern struct xt_target *xt_find_target(int af, const char *name, u8 revision); -extern struct xt_target *xt_request_find_target(int af, const char *name, +extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); -extern int xt_find_revision(int af, const char *name, u8 revision, int target, - int *err); +extern int xt_find_revision(u8 af, const char *name, u8 revision, + int target, int *err); -extern struct xt_table *xt_find_table_lock(struct net *net, int af, +extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name); extern void xt_table_unlock(struct xt_table *t); -extern int xt_proto_init(struct net *net, int af); -extern void xt_proto_fini(struct net *net, int af); +extern int xt_proto_init(struct net *net, u_int8_t af); +extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); @@ -423,12 +423,12 @@ struct compat_xt_counters_info #define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \ & ~(__alignof__(struct compat_xt_counters)-1)) -extern void xt_compat_lock(int af); -extern void xt_compat_unlock(int af); +extern void xt_compat_lock(u_int8_t af); +extern void xt_compat_unlock(u_int8_t af); -extern int xt_compat_add_offset(int af, unsigned int offset, short delta); -extern void xt_compat_flush_offsets(int af); -extern short xt_compat_calc_jump(int af, unsigned int offset); +extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); +extern void xt_compat_flush_offsets(u_int8_t af); +extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, -- cgit v1.2.3 From e948b20a71a06a740c925d6ea22b59b4e17cfa0c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: rename ipt_recent to xt_recent Like with other modules (such as ipt_state), ipt_recent.h is changed to forward definitions to (IOW include) xt_recent.h, and xt_recent.c is changed to use the new constant names. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_recent.h | 26 ++++++++++++++++++++++++++ include/linux/netfilter_ipv4/ipt_recent.h | 28 +++++++++++----------------- 3 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 include/linux/netfilter/xt_recent.h (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 3aff513d12c..5a8af875bce 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -32,6 +32,7 @@ header-y += xt_owner.h header-y += xt_pkttype.h header-y += xt_rateest.h header-y += xt_realm.h +header-y += xt_recent.h header-y += xt_sctp.h header-y += xt_state.h header-y += xt_statistic.h diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h new file mode 100644 index 00000000000..5cfeb81c679 --- /dev/null +++ b/include/linux/netfilter/xt_recent.h @@ -0,0 +1,26 @@ +#ifndef _LINUX_NETFILTER_XT_RECENT_H +#define _LINUX_NETFILTER_XT_RECENT_H 1 + +enum { + XT_RECENT_CHECK = 1 << 0, + XT_RECENT_SET = 1 << 1, + XT_RECENT_UPDATE = 1 << 2, + XT_RECENT_REMOVE = 1 << 3, + XT_RECENT_TTL = 1 << 4, + + XT_RECENT_SOURCE = 0, + XT_RECENT_DEST = 1, + + XT_RECENT_NAME_LEN = 200, +}; + +struct xt_recent_mtinfo { + u_int32_t seconds; + u_int32_t hit_count; + u_int8_t check_set; + u_int8_t invert; + char name[XT_RECENT_NAME_LEN]; + u_int8_t side; +}; + +#endif /* _LINUX_NETFILTER_XT_RECENT_H */ diff --git a/include/linux/netfilter_ipv4/ipt_recent.h b/include/linux/netfilter_ipv4/ipt_recent.h index 6508a459265..d636cca133c 100644 --- a/include/linux/netfilter_ipv4/ipt_recent.h +++ b/include/linux/netfilter_ipv4/ipt_recent.h @@ -1,27 +1,21 @@ #ifndef _IPT_RECENT_H #define _IPT_RECENT_H -#define RECENT_NAME "ipt_recent" -#define RECENT_VER "v0.3.1" +#include -#define IPT_RECENT_CHECK 1 -#define IPT_RECENT_SET 2 -#define IPT_RECENT_UPDATE 4 -#define IPT_RECENT_REMOVE 8 -#define IPT_RECENT_TTL 16 +#define ipt_recent_info xt_recent_mtinfo -#define IPT_RECENT_SOURCE 0 -#define IPT_RECENT_DEST 1 +enum { + IPT_RECENT_CHECK = XT_RECENT_CHECK, + IPT_RECENT_SET = XT_RECENT_SET, + IPT_RECENT_UPDATE = XT_RECENT_UPDATE, + IPT_RECENT_REMOVE = XT_RECENT_REMOVE, + IPT_RECENT_TTL = XT_RECENT_TTL, -#define IPT_RECENT_NAME_LEN 200 + IPT_RECENT_SOURCE = XT_RECENT_SOURCE, + IPT_RECENT_DEST = XT_RECENT_DEST, -struct ipt_recent_info { - u_int32_t seconds; - u_int32_t hit_count; - u_int8_t check_set; - u_int8_t invert; - char name[IPT_RECENT_NAME_LEN]; - u_int8_t side; + IPT_RECENT_NAME_LEN = XT_RECENT_NAME_LEN, }; #endif /*_IPT_RECENT_H*/ -- cgit v1.2.3 From 7e9c6eeb136a46dfd941852803b3a9dd78939b69 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Introduce NFPROTO_* constants The netfilter subsystem only supports a handful of protocols (much less than PF_*) and even non-PF protocols like ARP and pseudo-protocols like PF_BRIDGE. By creating NFPROTO_*, we can earn a few memory savings on arrays that previously were always PF_MAX-sized and keep the pseudo-protocols to ourselves. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 8c83d2e23bd..bf3afb0844f 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -52,6 +52,16 @@ enum nf_inet_hooks { NF_INET_NUMHOOKS }; +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, + NFPROTO_DECNET = 12, + NFPROTO_NUMPROTO, +}; + union nf_inet_addr { __u32 all[4]; __be32 ip; @@ -138,7 +148,7 @@ extern struct ctl_path nf_net_netfilter_sysctl_path[]; extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; #endif /* CONFIG_SYSCTL */ -extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -247,7 +257,7 @@ struct nf_afinfo { int route_key_size; }; -extern const struct nf_afinfo *nf_afinfo[NPROTO]; +extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) { return rcu_dereference(nf_afinfo[family]); -- cgit v1.2.3 From 48dc7865aa3db9404aedc8677d9daf8f8f469ab0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:01 +0200 Subject: netfilter: netns: remove nf_*_net() wrappers Now that dev_net() exists, the usefullness of them is even less. Also they're a big problem in resolving circular header dependencies necessary for NOTRACK-in-netns patch. See below. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 53 ----------------------------------------------- 1 file changed, 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bf3afb0844f..48cfe51bfdd 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -5,13 +5,11 @@ #include #include #include -#include #include #include #include #include #include -#include #endif #include #include @@ -355,56 +353,5 @@ extern void (*nf_ct_destroy)(struct nf_conntrack *); static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif -static inline struct net *nf_pre_routing_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_local_in_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_forward_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - BUG_ON(in->nd_net != out->nd_net); - return in->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_local_out_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return out->nd_net; -#else - return &init_net; -#endif -} - -static inline struct net *nf_post_routing_net(const struct net_device *in, - const struct net_device *out) -{ -#ifdef CONFIG_NET_NS - return out->nd_net; -#else - return &init_net; -#endif -} - #endif /*__KERNEL__*/ #endif /*__LINUX_NETFILTER_H*/ -- cgit v1.2.3 From 3bb0d1c00f86b13bb184193a8f0189ddd6f0459f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:10 +0200 Subject: netfilter: netns nf_conntrack: GRE conntracking in netns * make keymap list per-netns * per-netns keymal lock (not strictly necessary) * flush keymap at netns stop and module unload. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_proto_gre.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 535e4219d2b..2a10efda17f 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -87,7 +87,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); -extern void nf_ct_gre_keymap_flush(void); +extern void nf_ct_gre_keymap_flush(struct net *net); extern void nf_nat_need_gre(void); #endif /* __KERNEL__ */ -- cgit v1.2.3 From e84392707e10301b93121e1b74e2823db50cdf9e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: iptables TPROXY target The TPROXY target implements redirection of non-local TCP/UDP traffic to local sockets. Additionally, it's possible to manipulate the packet mark if and only if a socket has been found. (We need this because we cannot use multiple targets in the same iptables rule.) Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_TPROXY.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/netfilter/xt_TPROXY.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h new file mode 100644 index 00000000000..152e8f97132 --- /dev/null +++ b/include/linux/netfilter/xt_TPROXY.h @@ -0,0 +1,14 @@ +#ifndef _XT_TPROXY_H_target +#define _XT_TPROXY_H_target + +/* TPROXY target is capable of marking the packet to perform + * redirection. We can get rid of that whenever we get support for + * mutliple targets in the same rule. */ +struct xt_tproxy_target_info { + u_int32_t mark_mask; + u_int32_t mark_value; + __be32 laddr; + __be16 lport; +}; + +#endif /* _XT_TPROXY_H_target */ -- cgit v1.2.3 From 18219d3f7d6a5bc43825a41e0763158efbdb80d3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: ebtables: do centralized size checking Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 892f5b7771c..fd085af8962 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -215,6 +215,7 @@ struct ebt_match int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); + unsigned int matchsize; struct module *me; }; @@ -229,6 +230,7 @@ struct ebt_watcher int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); + unsigned int targetsize; struct module *me; }; @@ -244,6 +246,7 @@ struct ebt_target int (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); + unsigned int targetsize; struct module *me; }; -- cgit v1.2.3 From 19eda879a136889110c692dec4c2ab59e0e43cef Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: change return types of check functions for Ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index fd085af8962..5f71719b7a2 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -211,8 +211,7 @@ struct ebt_match int (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); unsigned int matchsize; @@ -226,8 +225,7 @@ struct ebt_watcher void (*watcher)(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *watcherdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); unsigned int targetsize; @@ -242,8 +240,7 @@ struct ebt_target int (*target)(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); - /* 0 == let it in */ - int (*check)(const char *tablename, unsigned int hookmask, + bool (*check)(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); unsigned int targetsize; -- cgit v1.2.3 From 8cc784eec6676b58e7f60419c88179aaa97bf71c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: change return types of match functions for ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 5f71719b7a2..f9fda2c442a 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -207,8 +207,7 @@ struct ebt_match { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - /* 0 == it matches */ - int (*match)(const struct sk_buff *skb, const struct net_device *in, + bool (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, -- cgit v1.2.3 From 0ac6ab1f7915fc820ca0cf8f597290dbb249edcc Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:13 +0200 Subject: netfilter: Change return types of targets/watchers for Ebtables extensions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index f9fda2c442a..097432b94c5 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -221,7 +221,7 @@ struct ebt_watcher { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - void (*watcher)(const struct sk_buff *skb, unsigned int hooknr, + unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *watcherdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, @@ -235,8 +235,8 @@ struct ebt_target { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - /* returns one of the standard verdicts */ - int (*target)(struct sk_buff *skb, unsigned int hooknr, + /* returns one of the standard EBT_* verdicts */ + unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); bool (*check)(const char *tablename, unsigned int hookmask, -- cgit v1.2.3 From 001a18d369f4813ed792629ff4a9a6ade2a4a031 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:14 +0200 Subject: netfilter: add dummy members to Ebtables code to ease transition to Xtables Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 097432b94c5..82f854bf37e 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -214,6 +214,8 @@ struct ebt_match const struct ebt_entry *e, void *matchdata, unsigned int datalen); void (*destroy)(void *matchdata, unsigned int datalen); unsigned int matchsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; @@ -228,6 +230,8 @@ struct ebt_watcher const struct ebt_entry *e, void *watcherdata, unsigned int datalen); void (*destroy)(void *watcherdata, unsigned int datalen); unsigned int targetsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; @@ -243,6 +247,8 @@ struct ebt_target const struct ebt_entry *e, void *targetdata, unsigned int datalen); void (*destroy)(void *targetdata, unsigned int datalen); unsigned int targetsize; + u_int8_t revision; + u_int8_t family; struct module *me; }; -- cgit v1.2.3 From 2d06d4a5cc107046508d860a0b47dbc43b829b79 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:15 +0200 Subject: netfilter: change Ebtables function signatures to match Xtables's Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 43 +++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 82f854bf37e..f20a57da7a2 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -31,6 +31,9 @@ * The 4 lsb are more than enough to store the verdict. */ #define EBT_VERDICT_BITS 0x0000000F +struct xt_match; +struct xt_target; + struct ebt_counter { uint64_t pcnt; @@ -208,11 +211,13 @@ struct ebt_match struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; bool (*match)(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *matchdata, - unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *matchdata, unsigned int datalen); - void (*destroy)(void *matchdata, unsigned int datalen); + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_match *match, void *matchinfo); unsigned int matchsize; u_int8_t revision; u_int8_t family; @@ -223,12 +228,14 @@ struct ebt_watcher { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; - unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *watcherdata, unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *watcherdata, unsigned int datalen); - void (*destroy)(void *watcherdata, unsigned int datalen); + unsigned int (*target)(struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int hook_num, const struct xt_target *target, + const void *targinfo); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_target *target, void *targinfo); unsigned int targetsize; u_int8_t revision; u_int8_t family; @@ -240,12 +247,14 @@ struct ebt_target struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; /* returns one of the standard EBT_* verdicts */ - unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *targetdata, unsigned int datalen); - bool (*check)(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *targetdata, unsigned int datalen); - void (*destroy)(void *targetdata, unsigned int datalen); + unsigned int (*target)(struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int hook_num, const struct xt_target *target, + const void *targinfo); + bool (*checkentry)(const char *table, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask); + void (*destroy)(const struct xt_target *target, void *targinfo); unsigned int targetsize; u_int8_t revision; u_int8_t family; -- cgit v1.2.3 From 043ef46c7690bfdbd5b012e15812a14a19ca5604 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:15 +0200 Subject: netfilter: move Ebtables to use Xtables Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index f20a57da7a2..d3f9243b9d9 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -124,7 +124,7 @@ struct ebt_entry_match { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_match *match; + struct xt_match *match; } u; /* size of data */ unsigned int match_size; @@ -135,7 +135,7 @@ struct ebt_entry_watcher { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_watcher *watcher; + struct xt_target *watcher; } u; /* size of data */ unsigned int watcher_size; @@ -146,7 +146,7 @@ struct ebt_entry_target { union { char name[EBT_FUNCTION_MAXNAMELEN]; - struct ebt_target *target; + struct xt_target *target; } u; /* size of data */ unsigned int target_size; -- cgit v1.2.3 From 66bff35b722956cc2423f55fcf1b69cefa24ef8b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:16 +0200 Subject: netfilter: remove unused Ebtables functions Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index d3f9243b9d9..568a690f6a6 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -302,12 +302,6 @@ struct ebt_table ~(__alignof__(struct ebt_replace)-1)) extern int ebt_register_table(struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); -extern int ebt_register_match(struct ebt_match *match); -extern void ebt_unregister_match(struct ebt_match *match); -extern int ebt_register_watcher(struct ebt_watcher *watcher); -extern void ebt_unregister_watcher(struct ebt_watcher *watcher); -extern int ebt_register_target(struct ebt_target *target); -extern void ebt_unregister_target(struct ebt_target *target); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); -- cgit v1.2.3 From 367c679007fa4f990eb7ee381326ec59d8148b0e Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:17 +0200 Subject: netfilter: xtables: do centralized checkentry call (1/2) It used to be that {ip,ip6,etc}_tables called extension->checkentry themselves, but this can be moved into the xtables core. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6989b22716e..85aa42785a5 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -330,10 +330,12 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto); + unsigned short proto, int inv_proto, + const void *entry, void *matchinfo); extern int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto); + unsigned short proto, int inv_proto, + const void *entry, void *targinfo); extern struct xt_table *xt_register_table(struct net *net, struct xt_table *table, -- cgit v1.2.3 From f7108a20dee44e5bb037f9e48f6a207b42e6ae1c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:18 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (1/6) The function signatures for Xtables extensions have grown over time. It involves a lot of typing/replication, and also a bit of stack space even if they are not used. Realize an NFWS2008 idea and pack them into structs. The skb remains outside of the struct so gcc can continue to apply its optimizations. This patch does this for match extensions' match functions. A few ambiguities have also been addressed. The "offset" parameter for example has been renamed to "fragoff" (there are so many different offsets already) and "protoff" to "thoff" (there is more than just one protocol here, so clarify). Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 85aa42785a5..bcd40ec8325 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -173,6 +173,26 @@ struct xt_counters_info #include +/** + * struct xt_match_param - parameters for match extensions' match functions + * + * @in: input netdevice + * @out: output netdevice + * @match: struct xt_match through which this function was invoked + * @matchinfo: per-match data + * @fragoff: packet is a fragment, this is the data offset + * @thoff: position of transport header relative to skb->data + * @hotdrop: drop packet if we had inspection problems + */ +struct xt_match_param { + const struct net_device *in, *out; + const struct xt_match *match; + const void *matchinfo; + int fragoff; + unsigned int thoff; + bool *hotdrop; +}; + struct xt_match { struct list_head list; @@ -185,13 +205,7 @@ struct xt_match non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ bool (*match)(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop); + const struct xt_match_param *); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ -- cgit v1.2.3 From 9b4fce7a3508a9776534188b6065b206a9608ccf Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:18 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (2/6) This patch does this for match extensions' checkentry functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index bcd40ec8325..763a704ce83 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -193,6 +193,25 @@ struct xt_match_param { bool *hotdrop; }; +/** + * struct xt_mtchk_param - parameters for match extensions' + * checkentry functions + * + * @table: table the rule is tried to be inserted into + * @entryinfo: the family-specific rule data + * (struct ipt_ip, ip6t_ip, ebt_entry) + * @match: struct xt_match through which this function was invoked + * @matchinfo: per-match data + * @hook_mask: via which hooks the new rule is reachable + */ +struct xt_mtchk_param { + const char *table; + const void *entryinfo; + const struct xt_match *match; + void *matchinfo; + unsigned int hook_mask; +}; + struct xt_match { struct list_head list; @@ -208,12 +227,7 @@ struct xt_match const struct xt_match_param *); /* Called when user tries to insert an entry of this type. */ - /* Should return true or false. */ - bool (*checkentry)(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask); + bool (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_match *match, void *matchinfo); @@ -342,10 +356,8 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); -extern int xt_check_match(const struct xt_match *match, unsigned short family, - unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto, - const void *entry, void *matchinfo); +extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, + unsigned int size, u_int8_t proto, bool inv_proto); extern int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook, unsigned short proto, int inv_proto, -- cgit v1.2.3 From 6be3d8598e883fb632edf059ba2f8d1b9f4da138 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (3/6) This patch does this for match extensions' destroy functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 763a704ce83..c79c8838014 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -212,6 +212,12 @@ struct xt_mtchk_param { unsigned int hook_mask; }; +/* Match destructor parameters */ +struct xt_mtdtor_param { + const struct xt_match *match; + void *matchinfo; +}; + struct xt_match { struct list_head list; @@ -230,7 +236,7 @@ struct xt_match bool (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_match *match, void *matchinfo); + void (*destroy)(const struct xt_mtdtor_param *); /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); -- cgit v1.2.3 From 7eb3558655aaa87a3e71a0c065dfaddda521fa6d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (4/6) This patch does this for target extensions' target functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c79c8838014..46d0cb1ad34 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -218,6 +218,22 @@ struct xt_mtdtor_param { void *matchinfo; }; +/** + * struct xt_target_param - parameters for target extensions' target functions + * + * @hooknum: hook through which this target was invoked + * @target: struct xt_target through which this function was invoked + * @targinfo: per-target data + * + * Other fields see above. + */ +struct xt_target_param { + const struct net_device *in, *out; + unsigned int hooknum; + const struct xt_target *target; + const void *targinfo; +}; + struct xt_match { struct list_head list; @@ -269,11 +285,7 @@ struct xt_target must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo); + const struct xt_target_param *); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be -- cgit v1.2.3 From af5d6dc200eb0fcc6fbd3df1ab4d8969004cb37f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (5/6) This patch does this for target extensions' checkentry functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 29 ++++++++++++++++++++--------- include/linux/netfilter_bridge/ebtables.h | 4 ++-- 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 46d0cb1ad34..8daeb496ba7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -234,6 +234,23 @@ struct xt_target_param { const void *targinfo; }; +/** + * struct xt_tgchk_param - parameters for target extensions' + * checkentry functions + * + * @entryinfo: the family-specific rule data + * (struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry) + * + * Other fields see above. + */ +struct xt_tgchk_param { + const char *table; + void *entryinfo; + const struct xt_target *target; + void *targinfo; + unsigned int hook_mask; +}; + struct xt_match { struct list_head list; @@ -291,11 +308,7 @@ struct xt_target hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ - bool (*checkentry)(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask); + bool (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_target *target, void *targinfo); @@ -376,10 +389,8 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, unsigned int size, u_int8_t proto, bool inv_proto); -extern int xt_check_target(const struct xt_target *target, unsigned short family, - unsigned int size, const char *table, unsigned int hook, - unsigned short proto, int inv_proto, - const void *entry, void *targinfo); +extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family, + unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, struct xt_table *table, diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 568a690f6a6..d45e29cd1cf 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -310,9 +310,9 @@ extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ -#define BASE_CHAIN (hookmask & (1 << NF_BR_NUMHOOKS)) +#define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) /* Clear the bit in the hook mask that tells if the rule is on a base chain */ -#define CLEAR_BASE_CHAIN_BIT (hookmask &= ~(1 << NF_BR_NUMHOOKS)) +#define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS)) /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) -- cgit v1.2.3 From a2df1648ba615dd5908e9a1fa7b2f133fa302487 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:19 +0200 Subject: netfilter: xtables: move extension arguments into compound structure (6/6) This patch does this for target extensions' destroy functions. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8daeb496ba7..e3b3b669a14 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,6 +251,12 @@ struct xt_tgchk_param { unsigned int hook_mask; }; +/* Target destructor parameters */ +struct xt_tgdtor_param { + const struct xt_target *target; + void *targinfo; +}; + struct xt_match { struct list_head list; @@ -311,7 +317,7 @@ struct xt_target bool (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_target *target, void *targinfo); + void (*destroy)(const struct xt_tgdtor_param *); /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); -- cgit v1.2.3 From 916a917dfec18535ff9e2afdafba82e6279eb4f4 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:20 +0200 Subject: netfilter: xtables: provide invoked family value to extensions By passing in the family through which extensions were invoked, a bit of data space can be reclaimed. The "family" member will be added to the parameter structures and the check functions be adjusted. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e3b3b669a14..be41b609c88 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -183,6 +183,8 @@ struct xt_counters_info * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data * @hotdrop: drop packet if we had inspection problems + * @family: Actual NFPROTO_* through which the function is invoked + * (helpful when match->family == NFPROTO_UNSPEC) */ struct xt_match_param { const struct net_device *in, *out; @@ -191,6 +193,7 @@ struct xt_match_param { int fragoff; unsigned int thoff; bool *hotdrop; + u_int8_t family; }; /** @@ -210,12 +213,14 @@ struct xt_mtchk_param { const struct xt_match *match; void *matchinfo; unsigned int hook_mask; + u_int8_t family; }; /* Match destructor parameters */ struct xt_mtdtor_param { const struct xt_match *match; void *matchinfo; + u_int8_t family; }; /** @@ -232,6 +237,7 @@ struct xt_target_param { unsigned int hooknum; const struct xt_target *target; const void *targinfo; + u_int8_t family; }; /** @@ -249,12 +255,14 @@ struct xt_tgchk_param { const struct xt_target *target; void *targinfo; unsigned int hook_mask; + u_int8_t family; }; /* Target destructor parameters */ struct xt_tgdtor_param { const struct xt_target *target; void *targinfo; + u_int8_t family; }; struct xt_match @@ -393,9 +401,9 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); -extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family, +extern int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); -extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family, +extern int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, -- cgit v1.2.3 From 18ee49ddb0d242ed1d0e273038d5e4f6de7379d3 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 1 Oct 2008 15:41:33 +0000 Subject: phylib: rename mii_bus::dev to mii_bus::parent In preparation of giving mii_bus objects a device tree presence of their own, rename struct mii_bus's ->dev argument to ->parent, since having a 'struct device *dev' that points to our parent device conflicts with introducing a 'struct device dev' representing our own device. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller Acked-by: Andy Fleming --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5f170f5b1a3..87499bdedc6 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -99,7 +99,7 @@ struct mii_bus { */ struct mutex mdio_lock; - struct device *dev; + struct device *parent; /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; -- cgit v1.2.3 From 298cf9beb9679522de995e249eccbd82f7c51999 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 8 Oct 2008 16:29:57 -0700 Subject: phylib: move to dynamic allocation of struct mii_bus This patch introduces mdiobus_alloc() and mdiobus_free(), and makes all mdio bus drivers use these functions to allocate their struct mii_bus'es dynamically. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller Acked-by: Andy Fleming --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 87499bdedc6..ca4a83c4c53 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -408,8 +408,10 @@ void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); +struct mii_bus *mdiobus_alloc(void); int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); +void mdiobus_free(struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); void phy_sanitize_settings(struct phy_device *phydev); -- cgit v1.2.3 From 46abc02175b3c246dd5141d878f565a8725060c9 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 8 Oct 2008 16:33:40 -0700 Subject: phylib: give mdio buses a device tree presence Introduce the mdio_bus class, and give each 'struct mii_bus' its own 'struct device', so that mii_bus objects are represented in the device tree and can be found by querying the device tree. Signed-off-by: Lennert Buytenhek Acked-by: Andy Fleming Signed-off-by: David S. Miller --- include/linux/phy.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index ca4a83c4c53..891f27f9137 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -100,6 +100,13 @@ struct mii_bus { struct mutex mdio_lock; struct device *parent; + enum { + MDIOBUS_ALLOCATED = 1, + MDIOBUS_REGISTERED, + MDIOBUS_UNREGISTERED, + MDIOBUS_RELEASED, + } state; + struct device dev; /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; @@ -113,6 +120,7 @@ struct mii_bus { */ int *irq; }; +#define to_mii_bus(d) container_of(d, struct mii_bus, dev) #define PHY_INTERRUPT_DISABLED 0x0 #define PHY_INTERRUPT_ENABLED 0x80000000 -- cgit v1.2.3 From 2e888103295f47b8fcbf7e9bb8c5da97dd2ecd76 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 29 Sep 2008 17:12:35 +0000 Subject: phylib: add mdiobus_{read,write} Add mdiobus_{read,write} routines to allow direct reading/writing of registers on an mii bus without having to go through the PHY abstraction, and make phy_{read,write} use these primitives. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- include/linux/phy.h | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 891f27f9137..77c4ed60b98 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -122,6 +122,15 @@ struct mii_bus { }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) +struct mii_bus *mdiobus_alloc(void); +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void mdiobus_free(struct mii_bus *bus); +struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); +int mdiobus_read(struct mii_bus *bus, int addr, u16 regnum); +int mdiobus_write(struct mii_bus *bus, int addr, u16 regnum, u16 val); + + #define PHY_INTERRUPT_DISABLED 0x0 #define PHY_INTERRUPT_ENABLED 0x80000000 @@ -399,8 +408,35 @@ struct phy_fixup { int (*run)(struct phy_device *phydev); }; -int phy_read(struct phy_device *phydev, u16 regnum); -int phy_write(struct phy_device *phydev, u16 regnum, u16 val); +/** + * phy_read - Convenience function for reading a given PHY register + * @phydev: the phy_device struct + * @regnum: register number to read + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +static inline int phy_read(struct phy_device *phydev, u16 regnum) +{ + return mdiobus_read(phydev->bus, phydev->addr, regnum); +} + +/** + * phy_write - Convenience function for writing a given PHY register + * @phydev: the phy_device struct + * @regnum: register number to write + * @val: value to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +static inline int phy_write(struct phy_device *phydev, u16 regnum, u16 val) +{ + return mdiobus_write(phydev->bus, phydev->addr, regnum, val); +} + int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); @@ -416,12 +452,6 @@ void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); -struct mii_bus *mdiobus_alloc(void); -int mdiobus_register(struct mii_bus *bus); -void mdiobus_unregister(struct mii_bus *bus); -void mdiobus_free(struct mii_bus *bus); -struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); - void phy_sanitize_settings(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); int phy_enable_interrupts(struct phy_device *phydev); -- cgit v1.2.3 From 91da11f870f00a3322b81c73042291d7f0be5a17 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:44:02 +0000 Subject: net: Distributed Switch Architecture protocol support Distributed Switch Architecture is a protocol for managing hardware switch chips. It consists of a set of MII management registers and commands to configure the switch, and an ethernet header format to signal which of the ports of the switch a packet was received from or is intended to be sent to. The switches that this driver supports are typically embedded in access points and routers, and a typical setup with a DSA switch looks something like this: +-----------+ +-----------+ | | RGMII | | | +-------+ +------ 1000baseT MDI ("WAN") | | | 6-port +------ 1000baseT MDI ("LAN1") | CPU | | ethernet +------ 1000baseT MDI ("LAN2") | |MIImgmt| switch +------ 1000baseT MDI ("LAN3") | +-------+ w/5 PHYs +------ 1000baseT MDI ("LAN4") | | | | +-----------+ +-----------+ The switch driver presents each port on the switch as a separate network interface to Linux, polls the switch to maintain software link state of those ports, forwards MII management interface accesses to those network interfaces (e.g. as done by ethtool) to the switch, and exposes the switch's hardware statistics counters via the appropriate Linux kernel interfaces. This initial patch supports the MII management interface register layout of the Marvell 88E6123, 88E6161 and 88E6165 switch chips, and supports the "Ethertype DSA" packet tagging format. (There is no officially registered ethertype for the Ethertype DSA packet format, so we just grab a random one. The ethertype to use is programmed into the switch, and the switch driver uses the value of ETH_P_EDSA for this, so this define can be changed at any time in the future if the one we chose is allocated to another protocol or if Ethertype DSA gets its own officially registered ethertype, and everything will continue to work.) Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Byron Bradley Tested-by: Tim Ellis Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 723a1c5fbc6..2140aacb633 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -77,6 +77,7 @@ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ +#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ /* * Non DIX types. Won't clash for 1500 types. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9cfd20be8b7..794eeb4b346 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -607,6 +607,9 @@ struct net_device /* Protocol specific pointers */ +#ifdef CONFIG_NET_DSA + void *dsa_ptr; /* dsa specific data */ +#endif void *atalk_ptr; /* AppleTalk link */ void *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ -- cgit v1.2.3 From cf85d08fdf4548ee46657ccfb7f9949a85145db5 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:45:02 +0000 Subject: dsa: add support for original DSA tagging format Most of the DSA switches currently in the field do not support the Ethertype DSA tagging format that one of the previous patches added support for, but only the original DSA tagging format. The original DSA tagging format carries the same information as the Ethertype DSA tagging format, but with the difference that it does not have an ethertype field. In other words, when receiving a packet that is tagged with an original DSA tag, there is no way of telling in eth_type_trans() that this packet is in fact a DSA-tagged packet. This patch adds a hook into eth_type_trans() which is only compiled in if support for a switch chip that doesn't support Ethertype DSA is selected, and which checks whether there is a DSA switch driver instance attached to this network device which uses the old tag format. If so, it sets the protocol field to ETH_P_DSA without looking at the packet, so that the packet ends up in the right place. Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 2140aacb633..32b9dcda68c 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -101,6 +101,7 @@ #define ETH_P_ECONET 0x0018 /* Acorn Econet */ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794eeb4b346..97f0c64c152 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -42,6 +42,7 @@ #include #include +#include struct vlan_group; struct ethtool_ops; @@ -801,6 +802,16 @@ void dev_net_set(struct net_device *dev, struct net *net) #endif } +static inline bool netdev_uses_dsa_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_DSA + if (dev->dsa_ptr != NULL) + return dsa_uses_dsa_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device -- cgit v1.2.3 From 396138f03f4521c55ecc3a5dd75d4c56e6323244 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:46:07 +0000 Subject: dsa: add support for Trailer tagging format This adds support for the Trailer switch tagging format. This is another tagging that doesn't explicitly mark tagged packets with a distinct ethertype, so that we need to add a similar hack in the receive path as for the Original DSA tagging format. Signed-off-by: Lennert Buytenhek Tested-by: Byron Bradley Tested-by: Tim Ellis Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 32b9dcda68c..a0099e98b5c 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -102,6 +102,7 @@ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ +#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97f0c64c152..d3ea3de70a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -812,6 +812,16 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev) return 0; } +static inline bool netdev_uses_trailer_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_TRAILER + if (dev->dsa_ptr != NULL) + return dsa_uses_trailer_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device -- cgit v1.2.3 From 7a67f63b3233ff28e753854fe27891c44f8588ae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Aug 2008 11:17:12 +0200 Subject: block: add bio_has_data() to detect whether a bio carries data or not Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 0933a14e641..9e93c929947 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -445,6 +445,14 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +/* + * Check whether this bio carries any data or not. A NULL bio is allowed. + */ +static inline int bio_has_data(struct bio *bio) +{ + return bio && bio->bi_io_vec != NULL; +} + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -- cgit v1.2.3 From a9c701e594669dd49fed448c27c64f20cfacc8a7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Aug 2008 11:04:44 +0200 Subject: block: use bio_has_data() to check for data carrying bio Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9e93c929947..dbeb66f813a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -185,7 +185,7 @@ struct bio { #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) { -- cgit v1.2.3 From d628eaef310533767ce68664873869c2d7f78f09 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 9 Aug 2008 16:22:17 +0100 Subject: Fix up comments about matching flags between bio and rq Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- include/linux/blkdev.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index dbeb66f813a..17f1fbdb31b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -150,8 +150,8 @@ struct bio { * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately */ -#define BIO_RW 0 -#define BIO_RW_AHEAD 1 +#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ +#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ #define BIO_RW_BARRIER 2 #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53ea933cf60..e0ba018f5e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -84,7 +84,7 @@ enum { }; /* - * request type modified bits. first three bits match BIO_RW* bits, important + * request type modified bits. first two bits match BIO_RW* bits, important */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ -- cgit v1.2.3 From fb2dce862d9f9a68e6b9374579056ec9eca02a63 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:01:53 +0100 Subject: Add 'discard' request handling Some block devices benefit from a hint that they can forget the contents of certain sectors. Add basic support for this to the block core, along with a 'blkdev_issue_discard()' helper function which issues such requests. The caller doesn't get to provide an end_io functio, since blkdev_issue_discard() will automatically split the request up into multiple bios if appropriate. Neither does the function wait for completion -- it's expected that callers won't care about when, or even _if_, the request completes. It's only a hint to the device anyway. By definition, the file system doesn't _care_ about these sectors any more. [With feedback from OGAWA Hirofumi and Jens Axboe Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 ++++++-- include/linux/blkdev.h | 16 ++++++++++++++++ include/linux/fs.h | 3 ++- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 17f1fbdb31b..1fdfc5621c8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -149,6 +149,8 @@ struct bio { * bit 2 -- barrier * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * bit 5 -- metadata request + * bit 6 -- discard sectors */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ @@ -156,6 +158,7 @@ struct bio { #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 #define BIO_RW_META 5 +#define BIO_RW_DISCARD 6 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -186,13 +189,14 @@ struct bio { #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) +#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) static inline unsigned int bio_cur_sectors(struct bio *bio) { if (bio->bi_vcnt) return bio_iovec(bio)->bv_len >> 9; - - return 0; + else /* dataless requests such as discard */ + return bio->bi_size >> 9; } static inline void *bio_data(struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e0ba018f5e8..26ececbbebe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -89,6 +89,7 @@ enum { enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST, /* no low level driver retries */ + __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ @@ -111,6 +112,7 @@ enum rq_flag_bits { }; #define REQ_RW (1 << __REQ_RW) +#define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_FAILFAST (1 << __REQ_FAILFAST) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) @@ -252,6 +254,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); +typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -307,6 +310,7 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; + prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -546,6 +550,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) +#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) /* rq->queuelist of dequeued request must be list_empty() */ @@ -796,6 +801,7 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); +extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); @@ -837,6 +843,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); +extern int blkdev_issue_discard(struct block_device *, sector_t sector, + unsigned nr_sects); + +static inline int sb_issue_discard(struct super_block *sb, + sector_t block, unsigned nr_blocks) +{ + block <<= (sb->s_blocksize_bits - 9); + nr_blocks <<= (sb->s_blocksize_bits - 9); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); +} /* * command filter functions diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668f..eb013131913 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -86,7 +86,8 @@ extern int dir_notify_enable; #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) +#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) +#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD)) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.3 From eae9acd13a8d14b50c00a961fa959606f34bbd92 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:08:25 +0100 Subject: Support 'discard sectors' operation in translation layer support core Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/mtd/blktrans.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 26ececbbebe..727886d25c4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -81,6 +81,7 @@ enum { */ REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index 310e6160641..8b4aa0523db 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -41,6 +41,8 @@ struct mtd_blktrans_ops { unsigned long block, char *buffer); int (*writesect)(struct mtd_blktrans_dev *dev, unsigned long block, char *buffer); + int (*discard)(struct mtd_blktrans_dev *dev, + unsigned long block, unsigned nr_blocks); /* Block layer ioctls */ int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo); -- cgit v1.2.3 From 27b29e86bf3d4b3cf6641a0efd78ed11a9b633b2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 Aug 2008 11:21:57 +0100 Subject: blktrace: support discard requests Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d084b8d227a..27da2cc682e 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -21,6 +21,7 @@ enum blktrace_cat { BLK_TC_NOTIFY = 1 << 10, /* special message */ BLK_TC_AHEAD = 1 << 11, /* readahead */ BLK_TC_META = 1 << 12, /* metadata */ + BLK_TC_DISCARD = 1 << 13, /* discard requests */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -195,6 +196,9 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (likely(!bt)) return; + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); -- cgit v1.2.3 From d30a2605be9d5132d95944916e8f578fcfe4f976 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 11 Aug 2008 15:58:42 +0100 Subject: Add BLKDISCARD ioctl to allow userspace to discard sectors We may well want mkfs tools to use this to mark the whole device as unwanted before they format it, for example. The ioctl takes a pair of uint64_ts, which are start offset and length in _bytes_. Although at the moment it might make sense for them both to be in 512-byte sectors, I don't want to limit the ABI to that. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index eb013131913..88358ca6af2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -223,6 +223,7 @@ extern int dir_notify_enable; #define BLKTRACESTART _IO(0x12,116) #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From e17fc0a1ccf88f6d4dcb363729f3141b0958c325 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 9 Aug 2008 16:42:20 +0100 Subject: Allow elevators to sort/merge discard requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit But blkdev_issue_discard() still emits requests which are interpreted as soft barriers, because naïve callers might otherwise issue subsequent writes to those same sectors, which might cross on the queue (if they're reallocated quickly enough). Callers still _can_ issue non-barrier discard requests, but they have to take care of queue ordering for themselves. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/blkdev.h | 5 +++-- include/linux/fs.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1fdfc5621c8..33c3947d61e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -188,8 +188,8 @@ struct bio { #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) #define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 727886d25c4..e9eb35c9bf2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -541,7 +541,7 @@ enum { #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) -#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) +#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) @@ -598,7 +598,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) #define RQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ + (blk_discard_rq(rq) || blk_fs_request((rq)))) /* * q->prep_rq_fn return values diff --git a/include/linux/fs.h b/include/linux/fs.h index 88358ca6af2..860689f541b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,7 +87,8 @@ extern int dir_notify_enable; #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) -#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD)) +#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) +#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.3 From 1a8e2bddd5c29008f311613e75925fecbf522c5b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Aug 2008 12:35:09 +0100 Subject: Kill REQ_TYPE_FLUSH It was only used by ps3disk, and it should probably have been REQ_TYPE_LINUX_BLOCK + REQ_LB_OP_FLUSH. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9eb35c9bf2..f131776f029 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -54,7 +54,6 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_FLUSH, /* flush request */ REQ_TYPE_SPECIAL, /* driver defined type */ REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ /* @@ -76,11 +75,8 @@ enum rq_cmd_type_bits { * */ enum { - /* - * just examples for now - */ REQ_LB_OP_EJECT = 0x40, /* eject request */ - REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_FLUSH = 0x41, /* flush request */ REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; -- cgit v1.2.3 From 766ca4428d1239a970926856c447310c9c191af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Thu, 14 Aug 2008 09:59:13 +0200 Subject: virtio_blk: use a wrapper function to access io context information of IO requests struct request has an ioprio member but it is never updated because currently bios do not hold io context information. The implication of this is that virtio_blk ends up passing useless information to the backend driver. That said, some IO schedulers such as CFQ do store io context information in struct request, but use private members for that, which means that that information cannot be directly accessed in a IO scheduler-independent way. This patch adds a function to obtain the ioprio of a request. We should avoid accessing ioprio directly and use this function instead, so that its users do not have to care about future changes in block layer structures or what the currently active IO controller is. This patch does not introduce any functional changes but paves the way for future clean-ups and enhancements. Signed-off-by: Fernando Luis Vazquez Cao Acked-by: Rusty Russell Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f131776f029..490ce458b03 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -232,6 +232,11 @@ struct request { struct request *next_rq; }; +static inline unsigned short req_get_ioprio(struct request *req) +{ + return req->ioprio; +} + /* * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME * requests. Some step values could eventually be made generic. -- cgit v1.2.3 From b8b3e16cfe6435d961f6aaebcfd52a1ff2a988c5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:15:19 +0200 Subject: block: drop virtual merging accounting Remove virtual merge accounting. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- include/linux/bio.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 33c3947d61e..894d16ce002 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -26,21 +26,8 @@ #ifdef CONFIG_BLOCK -/* Platforms may set this to teach the BIO layer about IOMMU hardware. */ #include -#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) -#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) -#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) -#else -#define BIOVEC_VIRT_START_SIZE(x) 0 -#define BIOVEC_VIRT_OVERSIZE(x) 0 -#endif - -#ifndef BIO_VMERGE_BOUNDARY -#define BIO_VMERGE_BOUNDARY 0 -#endif - #define BIO_DEBUG #ifdef BIO_DEBUG @@ -240,8 +227,6 @@ static inline void *bio_data(struct bio *bio) ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) #endif -#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ - ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ -- cgit v1.2.3 From 5df97b91b5d7ed426034fcc84cb6e7cf682b8838 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:20:02 +0200 Subject: drop vmerge accounting Remove hw_segments field from struct bio and struct request. Without virtual merge accounting they have no purpose. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- include/linux/bio.h | 16 +--------------- include/linux/blkdev.h | 7 ------- 2 files changed, 1 insertion(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 894d16ce002..dfc3556d311 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -77,21 +77,8 @@ struct bio { */ unsigned short bi_phys_segments; - /* Number of segments after physical and DMA remapping - * hardware coalescing is performed. - */ - unsigned short bi_hw_segments; - unsigned int bi_size; /* residual I/O count */ - /* - * To keep track of the max hw size, we account for the - * sizes of the first and last virtually mergeable segments - * in this bio - */ - unsigned int bi_hw_front_size; - unsigned int bi_hw_back_size; - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -113,7 +100,7 @@ struct bio { #define BIO_UPTODATE 0 /* ok after I/O completion */ #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ #define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ @@ -324,7 +311,6 @@ extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); extern struct bio *bio_clone(struct bio *, gfp_t); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 490ce458b03..1adb03827bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -189,13 +189,6 @@ struct request { */ unsigned short nr_phys_segments; - /* Number of scatter-gather addr+len pairs after - * physical and DMA remapping hardware coalescing is performed. - * This is the number of scatter-gather entries the driver - * will actually have to deal with after DMA mapping is done. - */ - unsigned short nr_hw_segments; - unsigned short ioprio; void *special; -- cgit v1.2.3 From 5b99c2ffa980528a197f26c7d876cceeccce8dd5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:56:11 +0200 Subject: block: make bi_phys_segments an unsigned int instead of short raid5 can overflow with more than 255 stripes, and we can increase it to an int for free on both 32 and 64-bit archs due to the padding. Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index dfc3556d311..2c0c09034fd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -75,7 +75,7 @@ struct bio { /* Number of segments in this BIO after * physical address coalescing is performed. */ - unsigned short bi_phys_segments; + unsigned int bi_phys_segments; unsigned int bi_size; /* residual I/O count */ -- cgit v1.2.3 From a1ed5b0cffe4b16a93a6a3390e8cee0fbef94f86 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:50:16 +0200 Subject: klist: don't iterate over deleted entries A klist entry is kept on the list till all its current iterations are finished; however, a new iteration after deletion also iterates over deleted entries as long as their reference count stays above zero. This causes problems for cases where there are users which iterate over the list while synchronized against list manipulations and natuarally expect already deleted entries to not show up during iteration. This patch implements dead flag which gets set on deletion so that iteration can skip already deleted entries. The dead flag piggy backs on the lowest bit of knode->n_klist and only visible to klist implementation proper. While at it, drop klist_iter->i_head as it's redundant and doesn't offer anything in semantics or performance wise as klist_iter->i_klist is dereferenced on every iteration anyway. Signed-off-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/klist.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index 06c338ef7f1..8ea98db223e 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -38,7 +38,7 @@ extern void klist_init(struct klist *k, void (*get)(struct klist_node *), void (*put)(struct klist_node *)); struct klist_node { - struct klist *n_klist; + void *n_klist; /* never access directly */ struct list_head n_node; struct kref n_ref; struct completion n_removed; @@ -57,7 +57,6 @@ extern int klist_node_attached(struct klist_node *n); struct klist_iter { struct klist *i_klist; - struct list_head *i_head; struct klist_node *i_cur; }; -- cgit v1.2.3 From 5a3ceb861663040f9ef0176df4aaa494bba5e352 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:50:19 +0200 Subject: driver-core: use klist for class device list and implement iterator Iterating over entries using callback usually isn't too fun especially when the entry being iterated over can't be manipulated freely. This patch converts class->p->class_devices to klist and implements class device iterator so that the users can freely build their own control structure. The users are also free to call back into class code without worrying about locking. class_for_each_device() and class_find_device() are converted to use the new iterators, so their users don't have to worry about locking anymore either. Note: This depends on klist-dont-iterate-over-deleted-entries patch because class_intf->add/remove_dev() depends on proper synchronization with device removal. Signed-off-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/device.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 4d8372d135d..246937c9cbc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -199,6 +199,11 @@ struct class { struct class_private *p; }; +struct class_dev_iter { + struct klist_iter ki; + const struct device_type *type; +}; + extern struct kobject *sysfs_dev_block_kobj; extern struct kobject *sysfs_dev_char_kobj; extern int __must_check __class_register(struct class *class, @@ -213,6 +218,13 @@ extern void class_unregister(struct class *class); __class_register(class, &__key); \ }) +extern void class_dev_iter_init(struct class_dev_iter *iter, + struct class *class, + struct device *start, + const struct device_type *type); +extern struct device *class_dev_iter_next(struct class_dev_iter *iter); +extern void class_dev_iter_exit(struct class_dev_iter *iter); + extern int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); @@ -396,7 +408,7 @@ struct device { spinlock_t devres_lock; struct list_head devres_head; - struct list_head node; + struct klist_node knode_class; struct class *class; dev_t devt; /* dev_t, creates the sysfs "dev" */ struct attribute_group **groups; /* optional groups */ -- cgit v1.2.3 From 310a2c1012934f590192377f65940cad4aa72b15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:17 +0900 Subject: block: misc updates This patch makes the following misc updates in preparation for disk->part dereference fix and extended block devt support. * implment part_to_disk() * fix comment about gendisk->part indexing * rename get_part() to disk_map_sector() * don't use n which is always zero while printing disk information in diskstats_show() Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index be4f5e5bfe0..c64e659c984 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -116,7 +116,7 @@ struct gendisk { int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor] */ + struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -145,14 +145,21 @@ struct gendisk { #endif }; +static inline struct gendisk *part_to_disk(struct hd_struct *part) +{ + if (likely(part)) + return dev_to_disk((part)->dev.parent); + return NULL; +} + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *get_part(struct gendisk *gendiskp, - sector_t sector) +static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, + sector_t sector) { struct hd_struct *part; int i; -- cgit v1.2.3 From cf771cb5a7b716f3f9e532fd42a1e3a0a75adec5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:09 +0200 Subject: block: make variable and argument names more consistent In hd_struct, @partno is used to denote partition number and a number of other places use @part to denote hd_struct. Functions use @part and @index instead. This causes confusion and makes it difficult to use consistent variable names for hd_struct. Always use @partno if a variable represents partition number. Also, print out functions use @f or @part for seq_file argument. Use @seqf uniformly instead. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c64e659c984..d1723c0a860 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -365,7 +365,7 @@ extern int get_blkdev_list(char *, int); extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *part); +extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -534,8 +534,8 @@ struct unixware_disklabel { #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -extern dev_t blk_lookup_devt(const char *name, int part); -extern char *disk_name (struct gendisk *hd, int part, char *buf); +extern dev_t blk_lookup_devt(const char *name, int partno); +extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); @@ -553,16 +553,16 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int index) +static inline struct block_device *bdget_disk(struct gendisk *disk, int partno) { - return bdget(MKDEV(disk->major, disk->first_minor) + index); + return bdget(MKDEV(disk->major, disk->first_minor) + partno); } #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int part) +static inline dev_t blk_lookup_devt(const char *name, int partno) { dev_t devt = MKDEV(0, 0); return devt; -- cgit v1.2.3 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index d1723c0a860..0ff75329199 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -111,10 +111,14 @@ struct hd_struct { #define GENHD_FL_FAIL 64 struct gendisk { + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). + */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + char disk_name[32]; /* name of major driver */ struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; @@ -152,6 +156,21 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline int disk_max_parts(struct gendisk *disk) +{ + return disk->minors - 1; +} + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return disk->dev.devt; +} + +static inline dev_t part_devt(struct hd_struct *part) +{ + return part->dev.devt; +} + /* * Macros to operate on percpu disk statistics: * @@ -163,7 +182,7 @@ static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, { struct hd_struct *part; int i; - for (i = 0; i < gendiskp->minors - 1; i++) { + for (i = 0; i < disk_max_parts(gendiskp); i++) { part = gendiskp->part[i]; if (part && part->start_sect <= sector && sector < part->start_sect + part->nr_sects) @@ -366,6 +385,7 @@ extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); +extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -553,11 +573,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int partno) -{ - return bdget(MKDEV(disk->major, disk->first_minor) + partno); -} - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3 From e71bf0d0ee89e51b92776391c5634938236977d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:03:02 +0200 Subject: block: fix disk->part[] dereferencing race disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 53 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0ff75329199..7fbba19e076 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_BLOCK @@ -100,6 +101,7 @@ struct hd_struct { #else struct disk_stats dkstats; #endif + struct rcu_head rcu_head; }; #define GENHD_FL_REMOVABLE 1 @@ -120,7 +122,14 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor - 1] */ + + /* Array of pointers to partitions indexed by partno - 1. + * Protected with matching bdev lock but stat and other + * non-critical accesses use RCU. Always access through + * helpers. + */ + struct hd_struct **__part; + struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -171,25 +180,41 @@ static inline dev_t part_devt(struct hd_struct *part) return part->dev.devt; } +extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); + +static inline void disk_put_part(struct hd_struct *part) +{ + if (likely(part)) + put_device(&part->dev); +} + +/* + * Smarter partition iterator without context limits. + */ +#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ +#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ + +struct disk_part_iter { + struct gendisk *disk; + struct hd_struct *part; + int idx; + unsigned int flags; +}; + +extern void disk_part_iter_init(struct disk_part_iter *piter, + struct gendisk *disk, unsigned int flags); +extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +extern void disk_part_iter_exit(struct disk_part_iter *piter); + +extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, + sector_t sector); + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, - sector_t sector) -{ - struct hd_struct *part; - int i; - for (i = 0; i < disk_max_parts(gendiskp); i++) { - part = gendiskp->part[i]; - if (part && part->start_sect <= sector - && sector < part->start_sect + part->nr_sects) - return part; - } - return NULL; -} #ifdef CONFIG_SMP #define __disk_stat_add(gendiskp, field, addnd) \ -- cgit v1.2.3 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- include/linux/genhd.h | 139 +++++++++++++++++++++----------------------------- 1 file changed, 57 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7fbba19e076..ac8a901f200 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -209,16 +209,24 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); -/* +/* * Macros to operate on percpu disk statistics: * - * The __ variants should only be called in critical sections. The full - * variants disable/enable preemption. + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. */ - #ifdef CONFIG_SMP -#define __disk_stat_add(gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) #define disk_stat_read(gendiskp, field) \ ({ \ @@ -229,7 +237,8 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, res; \ }) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { +static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +{ int i; for_each_possible_cpu(i) @@ -237,14 +246,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { sizeof(struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ - (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) +#define part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr(part->dkstats, cpu)->field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - __part_stat_add(part, field, addnd); \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) \ @@ -264,10 +273,13 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } - + #else /* !CONFIG_SMP */ -#define __disk_stat_add(gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); 0; }) +#define disk_stat_unlock() rcu_read_unlock() + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) @@ -275,14 +287,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ +#define part_stat_add(cpu, part, field, addnd) \ (part->dkstats.field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part->dkstats.field += addnd; \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) (part->dkstats.field) @@ -294,63 +306,26 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #endif /* CONFIG_SMP */ -#define disk_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __disk_stat_add(gendiskp, field, addnd); \ - preempt_enable(); \ - } while (0) - -#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1) -#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1) - -#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1) -#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1) - -#define __disk_stat_sub(gendiskp, field, subnd) \ - __disk_stat_add(gendiskp, field, -subnd) -#define disk_stat_sub(gendiskp, field, subnd) \ - disk_stat_add(gendiskp, field, -subnd) - -#define part_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __part_stat_add(gendiskp, field, addnd);\ - preempt_enable(); \ - } while (0) - -#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) -#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) - -#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) -#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) - -#define __part_stat_sub(gendiskp, field, subnd) \ - __part_stat_add(gendiskp, field, -subnd) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define all_stat_add(gendiskp, part, field, addnd, sector) \ - do { \ - preempt_disable(); \ - __all_stat_add(gendiskp, part, field, addnd, sector); \ - preempt_enable(); \ - } while (0) - -#define __all_stat_dec(gendiskp, field, sector) \ - __all_stat_add(gendiskp, field, -1, sector) -#define all_stat_dec(gendiskp, field, sector) \ - all_stat_add(gendiskp, field, -1, sector) - -#define __all_stat_inc(gendiskp, part, field, sector) \ - __all_stat_add(gendiskp, part, field, 1, sector) -#define all_stat_inc(gendiskp, part, field, sector) \ - all_stat_add(gendiskp, part, field, 1, sector) - -#define __all_stat_sub(gendiskp, part, field, subnd, sector) \ - __all_stat_add(gendiskp, part, field, -subnd, sector) -#define all_stat_sub(gendiskp, part, field, subnd, sector) \ - all_stat_add(gendiskp, part, field, -subnd, sector) +#define disk_stat_dec(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, -1) +#define disk_stat_inc(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, 1) +#define disk_stat_sub(cpu, gendiskp, field, subnd) \ + disk_stat_add(cpu, gendiskp, field, -subnd) + +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +#define all_stat_dec(cpu, gendiskp, field, sector) \ + all_stat_add(cpu, gendiskp, field, -1, sector) +#define all_stat_inc(cpu, gendiskp, part, field, sector) \ + all_stat_add(cpu, gendiskp, part, field, 1, sector) +#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ + all_stat_add(cpu, gendiskp, part, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP @@ -401,8 +376,8 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(struct gendisk *disk); -extern void part_round_stats(struct hd_struct *part); +extern void disk_round_stats(int cpu, struct gendisk *disk); +extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); -- cgit v1.2.3 From bcce3de1be61e424deef35d1e86e86a35c4b6e65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:22 +0900 Subject: block: implement extended dev numbers Implement extended device numbers. A block driver can tell block layer that it wants to use extended device numbers. After the usual minor space is used up, block layer automatically allocates devt's from EXT_BLOCK_MAJOR. Currently only one major number is allocated for this but as the allocation is strictly on-demand, ~1mil minor space under it should suffice unless the system actually has more than ~1mil partitions and if that ever happens adding more majors to the extended devt area is easy. Due to internal implementation issues, the first partition can't be allocated on the extended area. In other words, genhd->minors should at least be 1. This limitation will be lifted by later changes. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++++++--- include/linux/major.h | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ac8a901f200..6fc53242406 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -113,13 +113,15 @@ struct hd_struct { #define GENHD_FL_FAIL 64 struct gendisk { - /* major, first_minor and minors are input parameters only, - * don't use directly. Use disk_devt() and disk_max_parts(). + /* major, first_minor, minors and ext_minors are input + * parameters only, don't use directly. Use disk_devt() and + * disk_max_parts(). */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + int ext_minors; /* number of extended dynamic minors */ char disk_name[32]; /* name of major driver */ @@ -167,7 +169,7 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) static inline int disk_max_parts(struct gendisk *disk) { - return disk->minors - 1; + return disk->minors + disk->ext_minors - 1; } static inline dev_t disk_devt(struct gendisk *disk) @@ -554,6 +556,8 @@ struct unixware_disklabel { #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 +extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); +extern void blk_free_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); @@ -564,6 +568,9 @@ extern void printk_all_partitions(void); extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); +extern struct gendisk *alloc_disk_ext_node(int minors, int ext_minrs, + int node_id); +extern struct gendisk *alloc_disk_ext(int minors, int ext_minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, diff --git a/include/linux/major.h b/include/linux/major.h index 53d5fafd85c..88249452b93 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -170,4 +170,6 @@ #define VIOTAPE_MAJOR 230 +#define BLOCK_EXT_MAJOR 259 + #endif -- cgit v1.2.3 From 1f0142905d4812966831613847db38a66da29eb8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:23 +0900 Subject: block: adjust formatting for large minors and add ext_range sysfs attr With extended minors and the soon-to-follow debug feature, large minor numbers for block devices will be common. This patch does the followings to make printouts pretty. * Adapt print formats such that large minors don't break the formatting. * For extended MAJ:MIN, %02x%02x for MAJ:MIN used in printk_all_partitions() doesn't cut it anymore. Update it such that %03x:%05x is used if either MAJ or MIN doesn't fit in %02x. * Implement ext_range sysfs attribute which shows total minors the device can use including both conventional minor space and the extended one. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 860689f541b..02a9fb5a830 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1685,6 +1685,7 @@ extern void chrdev_show(struct seq_file *,off_t); /* fs/block_dev.c */ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ +#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_HASH_SIZE 255 -- cgit v1.2.3 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fc53242406..e4e18c509ac 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -15,9 +15,11 @@ #ifdef CONFIG_BLOCK -#define kobj_to_dev(k) container_of(k, struct device, kobj) -#define dev_to_disk(device) container_of(device, struct gendisk, dev) -#define dev_to_part(device) container_of(device, struct hd_struct, dev) +#define kobj_to_dev(k) container_of((k), struct device, kobj) +#define dev_to_disk(device) container_of((device), struct gendisk, __dev) +#define dev_to_part(device) container_of((device), struct hd_struct, __dev) +#define disk_to_dev(disk) (&((disk)->__dev)) +#define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; extern struct kobject *block_depr; @@ -88,7 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; - struct device dev; + struct device __dev; struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -139,7 +141,7 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct device dev; + struct device __dev; struct kobject *holder_dir; struct kobject *slave_dir; @@ -163,7 +165,7 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { if (likely(part)) - return dev_to_disk((part)->dev.parent); + return dev_to_disk(part_to_dev(part)->parent); return NULL; } @@ -174,12 +176,12 @@ static inline int disk_max_parts(struct gendisk *disk) static inline dev_t disk_devt(struct gendisk *disk) { - return disk->dev.devt; + return disk_to_dev(disk)->devt; } static inline dev_t part_devt(struct hd_struct *part) { - return part->dev.devt; + return part_to_dev(part)->devt; } extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); @@ -187,7 +189,7 @@ extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) { if (likely(part)) - put_device(&part->dev); + put_device(part_to_dev(part)); } /* -- cgit v1.2.3 From b5d0b9df0ba5d9a044f3a21e7544f53d90bd1465 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:06:42 +0200 Subject: block: introduce partition 0 genhd and partition code handled disk and partitions separately. All information about the whole disk was in struct genhd and partitions in struct hd_struct. However, the whole disk (part0) and other partitions have a lot in common and the data structures end up having good number of common fields and thus separate code paths doing the same thing. Also, the partition array was indexed by partno - 1 which gets pretty confusing at times. This patch introduces partition 0 and makes the partition array indexed by partno. Following patches will unify the handling of disk and parts piece-by-piece. This patch also implements disk_partitionable() which tests whether a disk is partitionable. With coming dynamic partition array change, the most common usage of disk_max_parts() will be testing whether a disk is partitionable and the number of max partitions will become much less important. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e4e18c509ac..9e866a2aee5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -127,12 +127,13 @@ struct gendisk { char disk_name[32]; /* name of major driver */ - /* Array of pointers to partitions indexed by partno - 1. + /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other * non-critical accesses use RCU. Always access through * helpers. */ struct hd_struct **__part; + struct hd_struct part0; struct block_device_operations *fops; struct request_queue *queue; @@ -171,7 +172,12 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) static inline int disk_max_parts(struct gendisk *disk) { - return disk->minors + disk->ext_minors - 1; + return disk->minors + disk->ext_minors; +} + +static inline bool disk_partitionable(struct gendisk *disk) +{ + return disk_max_parts(disk) > 1; } static inline dev_t disk_devt(struct gendisk *disk) @@ -197,6 +203,7 @@ static inline void disk_put_part(struct hd_struct *part) */ #define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ #define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ +#define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ struct disk_part_iter { struct gendisk *disk; -- cgit v1.2.3 From 80795aefb76d10c5d698e60c7e7750b5330787da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:07 +0900 Subject: block: move capacity from disk to part0 Move disk->capacity to part0->nr_sects and convert all users who directly accessed the field to use {get|set}_capacity(). This is done early to allow the __dev field to be moved. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9e866a2aee5..1cf828148ec 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -138,7 +138,6 @@ struct gendisk { struct block_device_operations *fops; struct request_queue *queue; void *private_data; - sector_t capacity; int flags; struct device *driverfs_dev; // FIXME: remove @@ -411,11 +410,11 @@ static inline sector_t get_start_sect(struct block_device *bdev) } static inline sector_t get_capacity(struct gendisk *disk) { - return disk->capacity; + return disk->part0.nr_sects; } static inline void set_capacity(struct gendisk *disk, sector_t size) { - disk->capacity = size; + disk->part0.nr_sects = size; } #ifdef CONFIG_SOLARIS_X86_PARTITION -- cgit v1.2.3 From 548b10eb2959c96cef6fc29fc96e0931eeb53bc5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 29 Aug 2008 09:01:47 +0200 Subject: block: move __dev from disk to part0 Move disk->__dev to part0->__dev. This simplifies bdget_disk() and lookup_devt() and allows common sysfs attributes to be unified. part_to_disk() is updated to handle part0 -> disk. Updated to include a fix from Bartlomiej Zolnierkiewicz , he writes: "part0 is a "special" partition and doesn't need to have capacity set - this fixes regression caused by "block: move __dev from disk to part0" commit." Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1cf828148ec..ff293ec8b3f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -16,9 +16,9 @@ #ifdef CONFIG_BLOCK #define kobj_to_dev(k) container_of((k), struct device, kobj) -#define dev_to_disk(device) container_of((device), struct gendisk, __dev) +#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) #define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define disk_to_dev(disk) (&((disk)->__dev)) +#define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; @@ -141,7 +141,6 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct device __dev; struct kobject *holder_dir; struct kobject *slave_dir; @@ -164,8 +163,12 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { - if (likely(part)) - return dev_to_disk(part_to_dev(part)->parent); + if (likely(part)) { + if (part->partno) + return dev_to_disk(part_to_dev(part)->parent); + else + return dev_to_disk(part_to_dev(part)); + } return NULL; } -- cgit v1.2.3 From e56105214943ce5f0901d20e972a7cfd0d1d0656 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:09 +0900 Subject: block: unify sysfs size node handling Now that capacity and __dev are moved to part0, part0 and others can share the same method. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ff293ec8b3f..9cb8380cf0e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -591,6 +591,9 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); +extern ssize_t part_size_show(struct device *dev, + struct device_attribute *attr, char *buf); + #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3 From b7db9956e57c8151b930d5e5fe5c766e6aad3ff7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:10 +0900 Subject: block: move policy from disk to part0 Move disk->policy to part0->policy. Implement and use get_disk_ro(). Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9cb8380cf0e..4411bdd671d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,7 +145,6 @@ struct gendisk { struct kobject *slave_dir; struct timer_rand_state *random; - int policy; atomic_t sync_io; /* RAID */ unsigned long stamp; @@ -403,6 +402,11 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0.policy; +} + /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk); extern void rand_initialize_disk(struct gendisk *disk); -- cgit v1.2.3 From 4c46501d1659475dc6c89554af6ce7fe6ecf615c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:11 +0900 Subject: block: move holder_dir from disk to part0 Move disk->holder_dir to part0->holder_dir. Kill now mostly superflous bdev_get_holder(). While at it, kill superflous kobject_get/put() around holder_dir, slave_dir and cmd_filter creation and collapse disk_sysfs_add_subdirs() into register_disk(). These serve no purpose but obfuscating the code. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4411bdd671d..2c0e1b597ab 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -141,7 +141,6 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct kobject *holder_dir; struct kobject *slave_dir; struct timer_rand_state *random; -- cgit v1.2.3 From 0762b8bde9729f10f8e6249809660ff2ec3ad735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:12 +0900 Subject: block: always set bdev->bd_part Till now, bdev->bd_part is set only if the bdev was for parts other than part0. This patch makes bdev->bd_part always set so that code paths don't have to differenciate common handling. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 2c0e1b597ab..45a3682b5d8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -412,7 +412,7 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect; + return bdev->bd_part->start_sect; } static inline sector_t get_capacity(struct gendisk *disk) { -- cgit v1.2.3 From eddb2e26b5ee3c5da68ba4bf1921ba20e2097bff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:13 +0900 Subject: block: kill GENHD_FL_FAIL and use part0->make_it_fail GENHD_FL_FAIL for disk is what make_it_fail is for parts. Kill it and use part0->make_it_fail. Sysfs node handling is unified too. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 45a3682b5d8..3d15b42dc35 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -112,7 +112,6 @@ struct hd_struct { #define GENHD_FL_CD 8 #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 -#define GENHD_FL_FAIL 64 struct gendisk { /* major, first_minor, minors and ext_minors are input @@ -596,6 +595,13 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); +#ifdef CONFIG_FAIL_MAKE_REQUEST +extern ssize_t part_fail_show(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t part_fail_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +#endif /* CONFIG_FAIL_MAKE_REQUEST */ #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 159 ++++++++++++++------------------------------------ 1 file changed, 45 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d15b42dc35..c90e1b4fbe5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,13 +145,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ - unsigned long stamp; - int in_flight; -#ifdef CONFIG_SMP - struct disk_stats *dkstats; -#else - struct disk_stats dkstats; -#endif struct work_struct async_notify; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; @@ -232,46 +225,18 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, * internal use only. */ #ifdef CONFIG_SMP -#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) - -#define disk_stat_read(gendiskp, field) \ -({ \ - typeof(gendiskp->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ - res; \ -}) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(gendiskp->dkstats, i), value, - sizeof(struct disk_stats)); -} +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr(part->dkstats, cpu)->field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) +#define __part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) #define part_stat_read(part, field) \ ({ \ - typeof(part->dkstats->field) res = 0; \ + typeof((part)->dkstats->field) res = 0; \ int i; \ for_each_possible_cpu(i) \ - res += per_cpu_ptr(part->dkstats, i)->field; \ + res += per_cpu_ptr((part)->dkstats, i)->field; \ res; \ }) @@ -284,109 +249,73 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) sizeof(struct disk_stats)); } -#else /* !CONFIG_SMP */ -#define disk_stat_lock() ({ rcu_read_lock(); 0; }) -#define disk_stat_unlock() rcu_read_unlock() - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) -#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +static inline int init_part_stats(struct hd_struct *part) { - memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; } -#define part_stat_add(cpu, part, field, addnd) \ - (part->dkstats.field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) - -#define part_stat_read(part, field) (part->dkstats.field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void free_part_stats(struct hd_struct *part) { - memset(&part->dkstats, value, sizeof(struct disk_stats)); + free_percpu(part->dkstats); } -#endif /* CONFIG_SMP */ - -#define disk_stat_dec(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, -1) -#define disk_stat_inc(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, 1) -#define disk_stat_sub(cpu, gendiskp, field, subnd) \ - disk_stat_add(cpu, gendiskp, field, -subnd) - -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() -#define all_stat_dec(cpu, gendiskp, field, sector) \ - all_stat_add(cpu, gendiskp, field, -1, sector) -#define all_stat_inc(cpu, gendiskp, part, field, sector) \ - all_stat_add(cpu, gendiskp, part, field, 1, sector) -#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ - all_stat_add(cpu, gendiskp, part, field, -subnd, sector) +#define __part_stat_add(cpu, part, field, addnd) \ + ((part)->dkstats.field += addnd) -/* Inlines to alloc and free disk stats in struct gendisk */ -#ifdef CONFIG_SMP -static inline int init_disk_stats(struct gendisk *disk) -{ - disk->dkstats = alloc_percpu(struct disk_stats); - if (!disk->dkstats) - return 0; - return 1; -} +#define part_stat_read(part, field) ((part)->dkstats.field) -static inline void free_disk_stats(struct gendisk *disk) +static inline void part_stat_set_all(struct hd_struct *part, int value) { - free_percpu(disk->dkstats); + memset(&part->dkstats, value, sizeof(struct disk_stats)); } static inline int init_part_stats(struct hd_struct *part) { - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; return 1; } static inline void free_part_stats(struct hd_struct *part) { - free_percpu(part->dkstats); } -#else /* CONFIG_SMP */ -static inline int init_disk_stats(struct gendisk *disk) -{ - return 1; -} +#endif /* CONFIG_SMP */ -static inline void free_disk_stats(struct gendisk *disk) -{ -} +#define part_stat_add(cpu, part, field, addnd) do { \ + __part_stat_add((cpu), (part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add((cpu), &part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) -static inline int init_part_stats(struct hd_struct *part) +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +static inline void part_inc_in_flight(struct hd_struct *part) { - return 1; + part->in_flight++; + if (part->partno) + part_to_disk(part)->part0.in_flight++; } -static inline void free_part_stats(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part) { + part->in_flight--; + if (part->partno) + part_to_disk(part)->part0.in_flight--; } -#endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(int cpu, struct gendisk *disk); extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ @@ -595,6 +524,8 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From 540eed5637b766bb1e881ef744c42617760b4815 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:15 +0900 Subject: block: make partition array dynamic disk->__part used to be statically allocated to the maximum possible number of partitions. This patch makes partition array allocation dynamic. The added overhead is minimal as only real change is one memory dereference changed to RCU one. This saves both a bit of memory and cpu cycles iterating through unoccupied slots and makes increasing partition limit easier. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c90e1b4fbe5..ecf649c3dee 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -113,6 +113,21 @@ struct hd_struct { #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 +#define BLK_SCSI_MAX_CMDS (256) +#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) + +struct blk_scsi_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; + struct kobject kobj; +}; + +struct disk_part_tbl { + struct rcu_head rcu_head; + int len; + struct hd_struct *part[]; +}; + struct gendisk { /* major, first_minor, minors and ext_minors are input * parameters only, don't use directly. Use disk_devt() and @@ -131,7 +146,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct hd_struct **__part; + struct disk_part_tbl *part_tbl; struct hd_struct part0; struct block_device_operations *fops; @@ -149,6 +164,7 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; #endif + int node_id; }; static inline struct gendisk *part_to_disk(struct hd_struct *part) @@ -503,6 +519,7 @@ extern void blk_free_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); +extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); extern void delete_partition(struct gendisk *, int); -- cgit v1.2.3 From 689d6fac40b41c7bf154f362deaf442548e4dc81 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:16 +0900 Subject: block: replace @ext_minors with GENHD_FL_EXT_DEVT With previous changes, it's meaningless to limit the number of partitions. Replace @ext_minors with GENHD_FL_EXT_DEVT such that setting the flag allows the disk to have maximum number of allowed partitions (only limited by the number of entries in parsed_partitions as determined by MAX_PART constant). This kills not-too-pretty alloc_disk_ext[_node]() functions and makes @minors parameter to alloc_disk[_node]() unnecessary. The parameter is left alone to avoid disturbing the users. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ecf649c3dee..04524c213de 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -58,6 +58,8 @@ enum { UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ }; +#define DISK_MAX_PARTS 256 + #include #include #include @@ -112,6 +114,7 @@ struct hd_struct { #define GENHD_FL_CD 8 #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 +#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) @@ -129,15 +132,13 @@ struct disk_part_tbl { }; struct gendisk { - /* major, first_minor, minors and ext_minors are input - * parameters only, don't use directly. Use disk_devt() and - * disk_max_parts(). + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - int ext_minors; /* number of extended dynamic minors */ char disk_name[32]; /* name of major driver */ @@ -180,7 +181,9 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) static inline int disk_max_parts(struct gendisk *disk) { - return disk->minors + disk->ext_minors; + if (disk->flags & GENHD_FL_EXT_DEVT) + return DISK_MAX_PARTS; + return disk->minors; } static inline bool disk_partitionable(struct gendisk *disk) @@ -527,9 +530,6 @@ extern void printk_all_partitions(void); extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); -extern struct gendisk *alloc_disk_ext_node(int minors, int ext_minrs, - int node_id); -extern struct gendisk *alloc_disk_ext(int minors, int ext_minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, -- cgit v1.2.3 From 3e1a7ff8a0a7b948f2684930166954f9e8e776fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:17 +0900 Subject: block: allow disk to have extended device number Now that disk and partition handlings are mostly unified, it's easy to allow disk to have extended device number. This patch makes add_disk() use extended device number if disk->minors is zero. Both sd and ide-disk are updated to use this. * sd_format_disk_name() is implemented which can generically determine the drive name. This removes disk number restriction stemming from limited device names. * If sd index goes over SD_MAX_DISKS (which can be increased now BTW), sd simply doesn't initialize minors letting block layer choose extended device number. * If CONFIG_DEBUG_EXT_DEVT is set, both sd and ide-disk always set minors to 0 and use extended device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 04524c213de..206cdf96c3a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -59,6 +59,7 @@ enum { }; #define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 #include #include @@ -140,7 +141,7 @@ struct gendisk { int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - char disk_name[32]; /* name of major driver */ + char disk_name[DISK_NAME_LEN]; /* name of major driver */ /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other -- cgit v1.2.3 From 18887ad910e56066233a07fd3cfb2fa11338b782 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Jul 2008 13:08:45 +0200 Subject: block: make kblockd_schedule_work() take the queue as parameter Preparatory patch for checking queuing affinity. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1adb03827bd..10aa46c8f17 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -912,7 +912,7 @@ static inline void put_dev_sector(Sector p) } struct work_struct; -int kblockd_schedule_work(struct work_struct *work); +int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ -- cgit v1.2.3 From c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Sep 2008 20:26:01 +0200 Subject: block: add support for IO CPU affinity This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++++ include/linux/blkdev.h | 5 ++++- include/linux/elevator.h | 8 ++++---- 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 2c0c09034fd..13aba20edb2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -81,6 +81,8 @@ struct bio { unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + unsigned int bi_comp_cpu; /* completion CPU */ + struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; @@ -105,6 +107,7 @@ struct bio { #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -342,6 +345,14 @@ void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); extern unsigned int bvec_nr_vecs(unsigned short idx); +/* + * Allow queuer to specify a completion CPU for this bio + */ +static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) +{ + bio->bi_comp_cpu = cpu; +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 10aa46c8f17..93204bf7b29 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -139,7 +140,8 @@ enum rq_flag_bits { */ struct request { struct list_head queuelist; - struct list_head donelist; + struct call_single_data csd; + int cpu; struct request_queue *q; @@ -420,6 +422,7 @@ struct request_queue #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ static inline int queue_is_locked(struct request_queue *q) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 639624b55fb..bb791c311a5 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -173,15 +173,15 @@ enum { #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* - * Hack to reuse the donelist list_head as the fifo time holder while + * Hack to reuse the csd.list list_head as the fifo time holder while * the request is in the io scheduler. Saves an unsigned long in rq. */ -#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) -#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) +#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next) +#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp)) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) do { \ list_del_init(&(rq)->queuelist); \ - INIT_LIST_HEAD(&(rq)->donelist); \ + INIT_LIST_HEAD(&(rq)->csd.list); \ } while (0) /* -- cgit v1.2.3 From ab780f1ece0dc8d5e8e8e85435acc5e4747ccda3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 26 Aug 2008 10:25:02 +0200 Subject: block: inherit CPU completion on bio->rq and rq->rq merges Somewhat incomplete, as we do allow merges of requests and bios that have different completion CPUs given. This is done on the assumption that a larger IO is still more beneficial than CPU locality. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 93204bf7b29..12df8efeef1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -545,6 +545,7 @@ enum { #define blk_pm_request(rq) \ (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) +#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) -- cgit v1.2.3 From a3bce90edd8f6cafe3f63b1a943800792e830178 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:05 +0900 Subject: block: add gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov Currently, blk_rq_map_user and blk_rq_map_user_iov always do GFP_KERNEL allocation. This adds gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov so sg can use it (sg always does GFP_ATOMIC allocation). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 +++++---- include/linux/blkdev.h | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 13aba20edb2..200b185c3e8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -325,11 +325,11 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, - unsigned long, unsigned int, int); + unsigned long, unsigned int, int, gfp_t); struct sg_iovec; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int); + struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -337,9 +337,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); +extern struct bio *bio_copy_user(struct request_queue *, unsigned long, + unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, - int, int); + int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12df8efeef1..00e388d0e22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -710,11 +710,12 @@ extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); -extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); +extern int blk_rq_map_user(struct request_queue *, struct request *, + void __user *, unsigned long, gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int); + struct sg_iovec *, int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3 From 152e283fdfea0cd11e297d982378b55937842dde Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:06 +0900 Subject: block: introduce struct rq_map_data to use reserved pages This patch introduces struct rq_map_data to enable bio_copy_use_iov() use reserved pages. Currently, bio_copy_user_iov allocates bounce pages but drivers/scsi/sg.c wants to allocate pages by itself and use them. struct rq_map_data can be used to pass allocated pages to bio_copy_user_iov. The current users of bio_copy_user_iov simply passes NULL (they don't want to use pre-allocated pages). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Cc: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 +++++--- include/linux/blkdev.h | 12 ++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 200b185c3e8..bc386cd5e99 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -327,6 +327,7 @@ extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; +struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, struct sg_iovec *, int, int, gfp_t); @@ -337,9 +338,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, - unsigned int, int, gfp_t); -extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, +extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, + unsigned long, unsigned int, int, gfp_t); +extern struct bio *bio_copy_user_iov(struct request_queue *, + struct rq_map_data *, struct sg_iovec *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 00e388d0e22..358ac423ed2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -642,6 +642,12 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_MMU */ +struct rq_map_data { + struct page **pages; + int page_order; + int nr_entries; +}; + struct req_iterator { int i; struct bio *bio; @@ -711,11 +717,13 @@ extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, - void __user *, unsigned long, gfp_t); + struct rq_map_data *, void __user *, unsigned long, + gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int, gfp_t); + struct rq_map_data *, struct sg_iovec *, int, + unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3 From 879040742cf09f2360a9ac41846288707e4e567c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 15:05:58 +0900 Subject: block: add blk_rq_aligned helper function This adds blk_rq_aligned helper function to see if alignment and padding requirement is satisfied for DMA transfer. This also converts blk_rq_map_kern and __blk_rq_map_user to use the helper function. Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 358ac423ed2..9c254926042 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -899,6 +899,13 @@ static inline int queue_dma_alignment(struct request_queue *q) return q ? q->dma_alignment : 511; } +static inline int blk_rq_aligned(struct request_queue *q, void *addr, + unsigned int len) +{ + unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; + return !((unsigned long)addr & alignment) && !(len & alignment); +} + /* assumes size > 256 */ static inline unsigned int blksize_bits(unsigned int size) { -- cgit v1.2.3 From 818827669d85b84241696ffef2de485db46b0b5e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 16:20:19 +0900 Subject: block: make blk_rq_map_user take a NULL user-space buffer This patch changes blk_rq_map_user to accept a NULL user-space buffer with a READ command if rq_map_data is not NULL. Thus a caller can pass page frames to lk_rq_map_user to just set up a request and bios with page frames propely. bio_uncopy_user (called via blk_rq_unmap_user) doesn't copy data to user space with such request. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index bc386cd5e99..7af373f253d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -108,6 +108,7 @@ struct bio { #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ +#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* -- cgit v1.2.3 From 0c002c2f74e10baa9021d3ecc50585c6eafea568 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:20 -0600 Subject: Wrapper for lower-level revalidate_disk routines. This is a wrapper for the lower-level revalidate_disk call-backs such as sd_revalidate_disk(). It allows us to perform pre and post operations when calling them. We will use this wrapper in a later patch to adjust block device sizes after an online resize (a _post_ operation). Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 02a9fb5a830..d63461f9798 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,6 +1722,7 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); -- cgit v1.2.3 From c3279d1454cdfed02a557d789d8a6d08ab4cbe70 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:25 -0600 Subject: Adjust block device size after an online resize of a disk. The revalidate_disk routine now checks if a disk has been resized by comparing the gendisk capacity to the bdev inode size. If they are different (usually because the disk has been resized underneath the kernel) the bdev inode size is adjusted to match the capacity. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d63461f9798..32477e8872d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,6 +1722,8 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern void check_disk_size_change(struct gendisk *disk, + struct block_device *bdev); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); -- cgit v1.2.3 From 242f9dcb8ba6f68fcd217a119a7648a4f69290e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Sep 2008 05:55:09 -0700 Subject: block: unify request timeout handling Right now SCSI and others do their own command timeout handling. Move those bits to the block layer. Instead of having a timer per command, we try to be a bit more clever and simply have one per-queue. This avoids the overhead of having to tear down and setup a timer for each command, so it will result in a lot less timer fiddling. Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c254926042..067f28b8007 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -147,6 +147,7 @@ struct request { unsigned int cmd_flags; enum rq_cmd_type_bits cmd_type; + unsigned long atomic_flags; /* Maintain bio traversal state for part by part I/O submission. * hard_* are block layer internals, no driver should touch them! @@ -214,6 +215,8 @@ struct request { void *data; void *sense; + unsigned long deadline; + struct list_head timeout_list; unsigned int timeout; int retries; @@ -266,6 +269,14 @@ typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); +enum blk_eh_timer_return { + BLK_EH_NOT_HANDLED, + BLK_EH_HANDLED, + BLK_EH_RESET_TIMER, +}; + +typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); + enum blk_queue_state { Queue_down, Queue_up, @@ -311,6 +322,7 @@ struct request_queue merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; + rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; /* @@ -386,6 +398,10 @@ struct request_queue unsigned int nr_sorted; unsigned int in_flight; + unsigned int rq_timeout; + struct timer_list timeout; + struct list_head timeout_list; + /* * sg stuff */ @@ -770,6 +786,8 @@ extern int blk_end_request_callback(struct request *rq, int error, unsigned int nr_bytes, int (drv_callback)(struct request *)); extern void blk_complete_request(struct request *); +extern void __blk_complete_request(struct request *); +extern void blk_abort_request(struct request *); /* * blk_end_request() takes bytes instead of sectors as a complete size. @@ -811,6 +829,8 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); +extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); -- cgit v1.2.3 From 11914a53d2ec2974a565311af327b8983d8c820d Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Sat, 13 Sep 2008 20:31:27 +0200 Subject: block: Add interface to abort queued requests Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/blktrace_api.h | 2 ++ include/linux/elevator.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 067f28b8007..37781d6fe04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -788,6 +788,7 @@ extern int blk_end_request_callback(struct request *rq, int error, extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); +extern void blk_abort_queue(struct request_queue *); /* * blk_end_request() takes bytes instead of sectors as a complete size. diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 27da2cc682e..dcaf2452ed1 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -48,6 +48,7 @@ enum blktrace_act { __BLK_TA_SPLIT, /* bio was split */ __BLK_TA_BOUNCE, /* bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ + __BLK_TA_ABORT, /* request aborted */ }; /* @@ -78,6 +79,7 @@ enum blktrace_notify { #define BLK_TA_SPLIT (__BLK_TA_SPLIT) #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index bb791c311a5..92f6f634e3e 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -112,6 +112,7 @@ extern struct request *elv_latter_request(struct request_queue *, struct request extern int elv_register_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, int); +extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *, struct request *, gfp_t); extern void elv_put_request(struct request_queue *, struct request *); -- cgit v1.2.3 From 3e6053d76dcbd92b2f9f4ad5ece9bce83149523e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 11 Sep 2008 10:57:55 +0200 Subject: block: adjust blkdev_issue_discard for swap Two mods to blkdev_issue_discard(), thinking ahead to its use on swap: 1. Add gfp_mask argument, so swap allocation can use it where GFP_KERNEL might deadlock but GFP_NOIO is safe. 2. Enlarge nr_sects argument from unsigned to sector_t: unsigned long is enough to cover a whole swap area, but sector_t suits any partition. Change sb_issue_discard()'s nr_blocks to sector_t too; but no need seen for a gfp_mask there, just pass GFP_KERNEL down to blkdev_issue_discard(). Signed-off-by: Hugh Dickins Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 37781d6fe04..b47767c72ce 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -873,15 +874,15 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); -extern int blkdev_issue_discard(struct block_device *, sector_t sector, - unsigned nr_sects); +extern int blkdev_issue_discard(struct block_device *, + sector_t sector, sector_t nr_sects, gfp_t); static inline int sb_issue_discard(struct super_block *sb, - sector_t block, unsigned nr_blocks) + sector_t block, sector_t nr_blocks) { block <<= (sb->s_blocksize_bits - 9); nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); } /* -- cgit v1.2.3 From 0a0d96b03a1f3bfd6bc3ea08008699e8e59fccd9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Sep 2008 13:17:37 +0200 Subject: block: add bio_kmalloc() Not all callers need (or want!) the mempool backing guarentee, it essentially means that you can only use bio_alloc() for short allocations and not for preallocating some bio's at setup or init time. So add bio_kmalloc() which does the same thing as bio_alloc(), except it just uses kmalloc() as the backing instead of the bio mempools. Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7af373f253d..6520ee1a3f6 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -308,6 +308,7 @@ extern struct bio_set *bioset_create(int, int); extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(gfp_t, int); +extern struct bio *bio_kmalloc(gfp_t, int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); -- cgit v1.2.3 From 581d4e28d9195aa8b2231383dbabc288988d615e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Sep 2008 05:56:33 -0700 Subject: block: add fault injection mechanism for faking request timeouts Only works for the generic request timer handling. Allows one to sporadically ignore request completions, thus exercising the timeout handling. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b47767c72ce..e34999d14c1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -440,6 +440,7 @@ struct request_queue #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ static inline int queue_is_locked(struct request_queue *q) { -- cgit v1.2.3 From 9c02f2b02e29a2244e36c6e1f246080d8afc6cff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 18 Sep 2008 09:31:53 -0700 Subject: block: cleanup some of the integrity stuff in blkdev.h Don't put functions that are only used in fs/bio-integrity.c in blkdev.h, it's much cleaner to just keep it in there. Also kill completely unused bdev_get_tag_size() Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e34999d14c1..e23b838825b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1004,47 +1004,6 @@ extern int blk_integrity_compare(struct block_device *, struct block_device *); extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request *); -static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) -{ - if (bi) - return bi->tuple_size; - - return 0; -} - -static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) -{ - return bdev->bd_disk->integrity; -} - -static inline unsigned int bdev_get_tag_size(struct block_device *bdev) -{ - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi) - return bi->tag_size; - - return 0; -} - -static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) -{ - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi == NULL) - return 0; - - if (rw == READ && bi->verify_fn != NULL && - (bi->flags & INTEGRITY_FLAG_READ)) - return 1; - - if (rw == WRITE && bi->generate_fn != NULL && - (bi->flags & INTEGRITY_FLAG_WRITE)) - return 1; - - return 0; -} - static inline int blk_integrity_rq(struct request *rq) { if (rq->bio == NULL) @@ -1058,8 +1017,6 @@ static inline int blk_integrity_rq(struct request *rq) #define blk_integrity_rq(rq) (0) #define blk_rq_count_integrity_sg(a) (0) #define blk_rq_map_integrity_sg(a, b) (0) -#define bdev_get_integrity(a) (0) -#define bdev_get_tag_size(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); -- cgit v1.2.3 From 32fab448e5e86694beade415e750363538ea5f49 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 18 Sep 2008 10:45:09 -0400 Subject: block: add request update interface This patch adds blk_update_request(), which updates struct request with completing its data part, but doesn't complete the struct request itself. Though it looks like end_that_request_first() of older kernels, blk_update_request() should be used only by request stacking drivers. Request-based dm will use it in bio->bi_end_io callback to update the original request when a data part of a cloned request completes. Followings are additional background information of why request-based dm needs this interface. - Request stacking drivers can't use blk_end_request() directly from the lower driver's completion context (bio->bi_end_io or rq->end_io), because some device drivers (e.g. ide) may try to complete their request with queue lock held, and it may cause deadlock. See below for detailed description of possible deadlock: - To solve that, request-based dm offloads the completion of cloned struct request to softirq context (i.e. using blk_complete_request() from rq->end_io). - Though it is possible to use the same solution from bio->bi_end_io, it will delay the notification of bio completion to the original submitter. Also, it will cause inefficient partial completion, because the lower driver can't perform the cloned request anymore and request-based dm needs to requeue and redispatch it to the lower driver again later. That's not good. - So request-based dm needs blk_update_request() to perform the bio completion in the lower driver's completion context, which is more efficient. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23b838825b..e82a84c9f37 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -791,6 +791,8 @@ extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); +extern void blk_update_request(struct request *rq, int error, + unsigned int nr_bytes); /* * blk_end_request() takes bytes instead of sectors as a complete size. -- cgit v1.2.3 From 82124d60354846623a4b94af335717a5e142a074 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 18 Sep 2008 10:45:38 -0400 Subject: block: add request submission interface This patch adds blk_insert_cloned_request(), a generic request submission interface for request stacking drivers. Request-based dm will use it to submit their clones to underlying devices. blk_rq_check_limits() is also added because it is possible that the lower queue has stronger limitations than the upper queue if multiple drivers are stacking at request-level. Not only for blk_insert_cloned_request()'s internal use, the function will be used by request-based dm when the queue limitation is modified (e.g. by replacing dm's table). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e82a84c9f37..964c246bc27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -693,6 +693,9 @@ extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); +extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); +extern int blk_insert_cloned_request(struct request_queue *q, + struct request *rq); extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); -- cgit v1.2.3 From 4ee5eaf4516a60f8ef64d3c246c64c6be0cf8c3a Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 18 Sep 2008 10:46:13 -0400 Subject: block: add a queue flag for request stacking support This patch adds a queue flag to indicate the block device can be used for request stacking. Request stacking drivers need to stack their devices on top of only devices of which q->request_fn is functional. Since bio stacking drivers (e.g. md, loop) basically initialize their queue using blk_alloc_queue() and don't set q->request_fn, the check of (q->request_fn == NULL) looks enough for that purpose. However, dm will become both types of stacking driver (bio-based and request-based). And dm will always set q->request_fn even if the dm device is bio-based of which q->request_fn is not functional actually. So we need something else to distinguish the type of the device. Adding a queue flag is a solution for that. The reason why dm always sets q->request_fn is to keep the compatibility of dm user-space tools. Currently, all dm user-space tools are using bio-based dm without specifying the type of the dm device they use. To use request-based dm without changing such tools, the kernel must decide the type of the dm device automatically. The automatic type decision can't be done at the device creation time and needs to be deferred until such tools load a mapping table, since the actual type is decided by dm target type included in the mapping table. So a dm device has to be initialized using blk_init_queue() so that we can load either type of table. Then, all queue stuffs are set (e.g. q->request_fn) and we have no element to distinguish that it is bio-based or request-based, even after a table is loaded and the type of the device is decided. By the way, some stuffs of the queue (e.g. request_list, elevator) are needless when the dm device is used as bio-based. But the memory size is not so large (about 20[KB] per queue on ia64), so I hope the memory loss can be acceptable for bio-based dm users. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 964c246bc27..86f77ef127f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -441,6 +441,7 @@ struct request_queue #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ static inline int queue_is_locked(struct request_queue *q) { @@ -547,6 +548,8 @@ enum { #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) +#define blk_queue_stackable(q) \ + test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) -- cgit v1.2.3 From 9e49184c82e9ec3ab4d45f9ea5a17ccaf43869f0 Mon Sep 17 00:00:00 2001 From: Keith Wansbrough Date: Mon, 22 Sep 2008 14:57:17 -0700 Subject: floppy: support arbitrary first-sector numbers The current floppy_struct allows floppies to number sectors starting from 0 or 1. This patch allows arbitrary first-sector numbers - for example, 0xC1 for Amstrad CPC disks. This extends the existing 1-bit field (FD_ZEROBASED, bit 2 of stretch) to 8 bits (FD_SECTMASK, bits 2 to 9). Currently 0x00 denotes a first sector number of 1, and 0x01 denotes a first sector number of 0. We extend this by interpreting FD_SECTMASK as the first sector number with the LSB flipped. Signed-off-by: Keith Wansbrough Cc: Alain Knaff Cc: Michael Kerrisk Cc: Karel Zak Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/fd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fd.h b/include/linux/fd.h index b6bd41d2b46..f5d194af07a 100644 --- a/include/linux/fd.h +++ b/include/linux/fd.h @@ -15,10 +15,16 @@ struct floppy_struct { sect, /* sectors per track */ head, /* nr of heads */ track, /* nr of tracks */ - stretch; /* !=0 means double track steps */ + stretch; /* bit 0 !=0 means double track steps */ + /* bit 1 != 0 means swap sides */ + /* bits 2..9 give the first sector */ + /* number (the LSB is flipped) */ #define FD_STRETCH 1 #define FD_SWAPSIDES 2 #define FD_ZEROBASED 4 +#define FD_SECTBASEMASK 0x3FC +#define FD_MKSECTBASE(s) (((s) ^ 1) << 2) +#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1) unsigned char gap, /* gap1 size */ -- cgit v1.2.3 From a68bbddba486020c9c74825ce90c4c1ec463e0e8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Sep 2008 13:03:33 +0200 Subject: block: add queue flag for SSD/non-rotational devices We don't want to idle in AS/CFQ if the device doesn't have a seek penalty. So add a QUEUE_FLAG_NONROT to indicate a non-rotational device, low level drivers should set this flag upon discovery of an SSD or similar device type. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86f77ef127f..0cf3e619fb2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -442,6 +442,7 @@ struct request_queue #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ static inline int queue_is_locked(struct request_queue *q) { @@ -547,6 +548,7 @@ enum { #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) +#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) -- cgit v1.2.3 From 8bff7c6b0f63c7ee9c5e3a076338d74125b8debb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Sep 2008 13:05:10 +0200 Subject: libata: set queue SSD flag for SSD devices SSD devices should give an RPM setting of 1 in word 217 of the ID page. If we see such a device, tell the block layer about it. Signed-off-by: Jens Axboe --- include/linux/ata.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 8a12d718c16..c1c8b4a4ba2 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -88,6 +88,7 @@ enum { ATA_ID_DLF = 128, ATA_ID_CSFO = 129, ATA_ID_CFA_POWER = 160, + ATA_ID_ROT_SPEED = 217, ATA_ID_PIO4 = (1 << 1), ATA_ID_SERNO_LEN = 20, @@ -691,6 +692,11 @@ static inline int ata_id_is_cfa(const u16 *id) return 0; } +static inline int ata_id_is_ssd(const u16 *id) +{ + return id[ATA_ID_ROT_SPEED] == 0x01; +} + static inline int ata_drive_40wire(const u16 *dev_id) { if (ata_id_is_sata(dev_id)) -- cgit v1.2.3 From c0ddffa84a7d12da9943a94d04dadbfb1883b904 Mon Sep 17 00:00:00 2001 From: Sven Schuetz Date: Fri, 26 Sep 2008 10:58:02 +0200 Subject: include blktrace_api.h in headers_install This header file is of interest for user space programming, i.e. for tools that process blktrace data. We would like to use it for a tool on-top of blktrace which processes data provided by blktrace. For this purpose, it would be helpful if the blktrace API would make it to /usr/include/linux. The git tree for the blktrace tools comes with its own copy of this header file. I didn't manage to replace that copy with the file generated by the patch below yet. A few more cleanups would be needed. For example, the blktrace ioctl numbers, which are currently defined in usr/include/fs.h, might need to be moved. Should be feasible, though. Signed-off-by: Sven Schuetz Signed-off-by: Martin Peschke Signed-off-by: Jens Axboe --- include/linux/Kbuild | 1 + include/linux/blktrace_api.h | 58 ++++++++++++++++++++++++-------------------- 2 files changed, 33 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b68ec09399b..31474e89c59 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -180,6 +180,7 @@ unifdef-y += audit.h unifdef-y += auto_fs.h unifdef-y += auxvec.h unifdef-y += binfmts.h +unifdef-y += blktrace_api.h unifdef-y += capability.h unifdef-y += capi.h unifdef-y += cciss_ioctl.h diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index dcaf2452ed1..a2a7d0ca275 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -1,8 +1,10 @@ #ifndef BLKTRACE_H #define BLKTRACE_H +#ifdef __KERNEL__ #include #include +#endif /* * Trace categories @@ -92,17 +94,17 @@ enum blktrace_notify { * The trace itself */ struct blk_io_trace { - u32 magic; /* MAGIC << 8 | version */ - u32 sequence; /* event number */ - u64 time; /* in microseconds */ - u64 sector; /* disk offset */ - u32 bytes; /* transfer length */ - u32 action; /* what happened */ - u32 pid; /* who did it */ - u32 device; /* device number */ - u32 cpu; /* on what cpu did it happen */ - u16 error; /* completion error */ - u16 pdu_len; /* length of data after this trace */ + __u32 magic; /* MAGIC << 8 | version */ + __u32 sequence; /* event number */ + __u64 time; /* in microseconds */ + __u64 sector; /* disk offset */ + __u32 bytes; /* transfer length */ + __u32 action; /* what happened */ + __u32 pid; /* who did it */ + __u32 device; /* device number */ + __u32 cpu; /* on what cpu did it happen */ + __u16 error; /* completion error */ + __u16 pdu_len; /* length of data after this trace */ }; /* @@ -120,6 +122,25 @@ enum { Blktrace_stopped, }; +/* + * User setup structure passed with BLKTRACESTART + */ +struct blk_user_trace_setup { +#ifdef __KERNEL__ + char name[BDEVNAME_SIZE]; /* output */ +#else + char name[32]; /* output */ +#endif + __u16 act_mask; /* input */ + __u32 buf_size; /* input */ + __u32 buf_nr; /* input */ + __u64 start_lba; + __u64 end_lba; + __u32 pid; +}; + +#ifdef __KERNEL__ +#if defined(CONFIG_BLK_DEV_IO_TRACE) struct blk_trace { int trace_state; struct rchan *rchan; @@ -136,21 +157,6 @@ struct blk_trace { atomic_t dropped; }; -/* - * User setup structure passed with BLKTRACESTART - */ -struct blk_user_trace_setup { - char name[BDEVNAME_SIZE]; /* output */ - u16 act_mask; /* input */ - u32 buf_size; /* input */ - u32 buf_nr; /* input */ - u64 start_lba; - u64 end_lba; - u32 pid; -}; - -#ifdef __KERNEL__ -#if defined(CONFIG_BLK_DEV_IO_TRACE) extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); -- cgit v1.2.3 From ef9e3facdf1fe1228721a7c295a76d1b7a0e57ec Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 16:12:15 +0200 Subject: block: add lld busy state exporting interface This patch adds an new interface, blk_lld_busy(), to check lld's busy state from the block layer. blk_lld_busy() calls down into low-level drivers for the checking if the drivers set q->lld_busy_fn() using blk_queue_lld_busy(). This resolves a performance problem on request stacking devices below. Some drivers like scsi mid layer stop dispatching request when they detect busy state on its low-level device like host/target/device. It allows other requests to stay in the I/O scheduler's queue for a chance of merging. Request stacking drivers like request-based dm should follow the same logic. However, there is no generic interface for the stacked device to check if the underlying device(s) are busy. If the request stacking driver dispatches and submits requests to the busy underlying device, the requests will stay in the underlying device's queue without a chance of merging. This causes performance problem on burst I/O load. With this patch, busy state of the underlying device is exported via q->lld_busy_fn(). So the request stacking driver can check it and stop dispatching requests if busy. The underlying device driver must return the busy state appropriately: 1: when the device driver can't process requests immediately. 0: when the device driver can process requests immediately, including abnormal situations where the device driver needs to kill all requests. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0cf3e619fb2..9e0ee1a8254 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,6 +269,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); +typedef int (lld_busy_fn) (struct request_queue *q); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -325,6 +326,7 @@ struct request_queue softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; + lld_busy_fn *lld_busy_fn; /* * Dispatch queue sorting @@ -699,6 +701,7 @@ extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); +extern int blk_lld_busy(struct request_queue *q); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_plug_device(struct request_queue *); @@ -835,6 +838,7 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); +extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); -- cgit v1.2.3 From 0497b345e7d067109e0dd9bf9f4978a6847ee13b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 1 Oct 2008 16:16:25 +0200 Subject: blktrace: use BLKTRACE_BDEV_SIZE as the name size for setup structure Define as 32, which is is what BDEVNAME_SIZE is/was as well. This keeps the user interface the same and gets rid of the difference between kernel and user api here. Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index a2a7d0ca275..3a31eb50616 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -122,15 +122,13 @@ enum { Blktrace_stopped, }; +#define BLKTRACE_BDEV_SIZE 32 + /* * User setup structure passed with BLKTRACESTART */ struct blk_user_trace_setup { -#ifdef __KERNEL__ - char name[BDEVNAME_SIZE]; /* output */ -#else - char name[32]; /* output */ -#endif + char name[BLKTRACE_BDEV_SIZE]; /* output */ __u16 act_mask; /* input */ __u32 buf_size; /* input */ __u32 buf_nr; /* input */ -- cgit v1.2.3 From d00e29fd99dd63d1c51917604e35dee824ed567f Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:14:46 -0400 Subject: block: remove end_{queued|dequeued}_request() This patch removes end_queued_request() and end_dequeued_request(), which are no longer used. As a results, users of __end_request() became only end_request(). So the actual code in __end_request() is moved to end_request() and __end_request() is removed. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9e0ee1a8254..bfc18e497c7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -793,8 +793,6 @@ extern int __blk_end_request(struct request *rq, int error, extern int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); extern void end_request(struct request *, int); -extern void end_queued_request(struct request *, int); -extern void end_dequeued_request(struct request *, int); extern int blk_end_request_callback(struct request *rq, int error, unsigned int nr_bytes, int (drv_callback)(struct request *)); -- cgit v1.2.3 From 8deaf7210728c453295dc1cb2a5b66c68183ac85 Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Thu, 2 Oct 2008 12:46:53 +0200 Subject: bio.h: Remove unused conditional code The whole bio_integrity() definition is inside an #ifdef CONFIG_BLK_DEV_INTEGRITY, there's no need for the conditional code. Signed-off-by: Alberto Bertogli Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6520ee1a3f6..98c2d057065 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -457,14 +457,7 @@ static inline int bio_has_data(struct bio *bio) #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) -static inline int bio_integrity(struct bio *bio) -{ -#if defined(CONFIG_BLK_DEV_INTEGRITY) - return bio->bi_integrity != NULL; -#else - return 0; -#endif -} +#define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -- cgit v1.2.3 From b04accc425d52ca59699290661e0dfd09b0feeeb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 2 Oct 2008 12:53:22 +0200 Subject: block: revert part of d7533ad0e132f92e75c1b2eb7c26387b25a583c1 We need bdev_get_integrity() to support the pending md/dm patches. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bfc18e497c7..bc693f5c388 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1016,6 +1016,12 @@ extern int blk_integrity_compare(struct block_device *, struct block_device *); extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request *); +static inline +struct blk_integrity *bdev_get_integrity(struct block_device *bdev) +{ + return bdev->bd_disk->integrity; +} + static inline int blk_integrity_rq(struct request *rq) { if (rq->bio == NULL) @@ -1029,6 +1035,7 @@ static inline int blk_integrity_rq(struct request *rq) #define blk_integrity_rq(rq) (0) #define blk_rq_count_integrity_sg(a) (0) #define blk_rq_map_integrity_sg(a, b) (0) +#define bdev_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); -- cgit v1.2.3 From 74aa8c2cc010035a7eef2b4ca4d6430e0dae206a Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Oct 2008 03:38:37 -0400 Subject: block: Introduce integrity data ownership flag A filesystem might supply its own integrity metadata. Introduce a flag that indicates whether the filesystem or the block layer owns the integrity buffer. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 98c2d057065..d86d39d490e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -109,6 +109,7 @@ struct bio { #define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ #define BIO_NULL_MAPPED 9 /* contains invalid user pages */ +#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* -- cgit v1.2.3 From ad7fce93147d32ae53d25d9ea1a8ba31a239deee Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Oct 2008 03:38:39 -0400 Subject: block: Switch blk_integrity_compare from bdev to gendisk The DM and MD integrity support now depends on being able to use gendisks instead of block_devices when comparing integrity profiles. Change function parameters accordingly. Also update comparison logic so that two NULL profiles are a valid configuration. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc693f5c388..00d340b0f75 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1012,7 +1012,7 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); -extern int blk_integrity_compare(struct block_device *, struct block_device *); +extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request *); -- cgit v1.2.3 From b02739b01c5309d74a59859f2ce92c931d1f1955 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 2 Oct 2008 18:47:49 +0200 Subject: block: gendisk integrity wrapper This is a wrapper for accessing a gendisk's integrity bits. It allows the integrity support in MD to be compiled with BLK_DEV_INTEGRITY off. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 00d340b0f75..a92d9e4ea96 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1022,6 +1022,11 @@ struct blk_integrity *bdev_get_integrity(struct block_device *bdev) return bdev->bd_disk->integrity; } +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + return disk->integrity; +} + static inline int blk_integrity_rq(struct request *rq) { if (rq->bio == NULL) @@ -1036,6 +1041,7 @@ static inline int blk_integrity_rq(struct request *rq) #define blk_rq_count_integrity_sg(a) (0) #define blk_rq_map_integrity_sg(a, b) (0) #define bdev_get_integrity(a) (0) +#define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); -- cgit v1.2.3 From ad3316bf4eeb53c89164f759767f911072b56203 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Oct 2008 22:42:53 -0400 Subject: block: Find bio sector offset given idx and offset Helper function to find the sector offset in a bio given bvec index and page offset. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d86d39d490e..fe12d0f9eba 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -327,6 +327,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); +extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; -- cgit v1.2.3 From 6feef531f55cf4a20fd9eb39f5352e5745203603 Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Thu, 9 Oct 2008 08:57:05 +0200 Subject: block: mark bio_split_pool static Since all bio_split calls refer the same single bio_split_pool, the bio_split function can use bio_split_pool directly instead of the mempool_t parameter; then the mempool_t parameter can be removed from bio_split param list, and bio_split_pool is only referred in fs/bio.c file, can be marked static. Signed-off-by: Denis ChengRq Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index fe12d0f9eba..fb97221d7c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -300,9 +300,7 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, - int first_sectors); -extern mempool_t *bio_split_pool; +extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int); -- cgit v1.2.3 From af5639424008ffe96f89b059bea1aec15e0115a9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Oct 2008 09:01:10 +0200 Subject: block: add some comments around the bio read-write flags Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index fb97221d7c3..ff5b4cf9e2d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -123,13 +123,23 @@ struct bio { /* * bio bi_rw flags * - * bit 0 -- read (not set) or write (set) + * bit 0 -- data direction + * If not set, bio is a read from device. If set, it's a write to device. * bit 1 -- rw-ahead when set * bit 2 -- barrier + * Insert a serialization point in the IO queue, forcing previously + * submitted IO to be completed before this oen is issued. * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * Note that this does NOT indicate that the IO itself is sync, just + * that the block layer will not postpone issue of this IO by plugging. * bit 5 -- metadata request + * Used for tracing to differentiate metadata and data IO. May also + * get some preferential treatment in the IO scheduler * bit 6 -- discard sectors + * Informs the lower level device that this range of sectors is no longer + * used by the file system and may thus be freed by the device. Used + * for flash based storage. */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ -- cgit v1.2.3 From a5d8c3483a6e19aca95ef6a2c5890e33bfa5b293 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Oct 2008 11:35:51 +0200 Subject: sched debug: add name to sched_domain sysctl entries add /proc/sys/kernel/sched_domain/cpu0/domain0/name, to make it easier to see which specific scheduler domain remained at that entry. Since we process the scheduler domain tree and simplify it, it's not always immediately clear during debugging which domain came from where. depends on CONFIG_SCHED_DEBUG=y. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d8e699b5585..5d0819ee442 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -824,6 +824,9 @@ struct sched_domain { unsigned int ttwu_move_affine; unsigned int ttwu_move_balance; #endif +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif }; extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, -- cgit v1.2.3 From bf0b90e357c883e8efd72954432efe652de74c76 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:07 -0700 Subject: [CPUFREQ][1/6] cpufreq: Add cpu number parameter to __cpufreq_driver_getavg() Add a cpu parameter to __cpufreq_driver_getavg(). This is needed for software cpufreq coordination where policy->cpu may not be same as the CPU on which we want to getavg frequency. A follow-on patch will use this parameter to getavg freq from all cpus in policy->cpus. Change since last patch. Fix the offline/online and suspend/resume oops reported by Youquan Song Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- include/linux/cpufreq.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6fd5668aa57..1ee608fd7b7 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -187,7 +187,8 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); -extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy); +extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy, + unsigned int cpu); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); @@ -226,7 +227,9 @@ struct cpufreq_driver { unsigned int (*get) (unsigned int cpu); /* optional */ - unsigned int (*getavg) (unsigned int cpu); + unsigned int (*getavg) (struct cpufreq_policy *policy, + unsigned int cpu); + int (*exit) (struct cpufreq_policy *policy); int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); -- cgit v1.2.3 From 8083e4ad970e4eb567e31037060cdd4ba346f0c0 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:11 -0700 Subject: [CPUFREQ][5/6] cpufreq: Changes to get_cpu_idle_time_us(), used by ondemand governor export get_cpu_idle_time_us() for it to be used in ondemand governor. Last update time can be current time when the CPU is currently non-idle, accounting for the busy time since last idle. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- include/linux/tick.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 8cf8cfe2cc9..98921a3e1aa 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -126,7 +126,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) return len; } static inline void tick_nohz_stop_idle(int cpu) { } -static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; } +static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ #endif -- cgit v1.2.3 From c19e654ddbe3831252f61e76a74d661e1a755530 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 11:59:55 -0700 Subject: gre: Add netlink interface This patch adds a netlink interface that will eventually displace the existing ioctl interface. It utilises the elegant rtnl_link_ops mechanism. This also means that user-space no longer needs to rely on the tunnel interface being of type GRE to identify GRE tunnels. The identification can now occur using rtnl_link_ops. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index d4efe401470..aeab2cb32a9 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -2,6 +2,7 @@ #define _IF_TUNNEL_H_ #include +#include #define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0) #define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) @@ -47,4 +48,22 @@ struct ip_tunnel_prl { /* PRL flags */ #define PRL_DEFAULT 0x0001 +enum +{ + IFLA_GRE_UNSPEC, + IFLA_GRE_LINK, + IFLA_GRE_IFLAGS, + IFLA_GRE_OFLAGS, + IFLA_GRE_IKEY, + IFLA_GRE_OKEY, + IFLA_GRE_LOCAL, + IFLA_GRE_REMOTE, + IFLA_GRE_TTL, + IFLA_GRE_TOS, + IFLA_GRE_PMTUDISC, + __IFLA_GRE_MAX, +}; + +#define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1) + #endif /* _IF_TUNNEL_H_ */ -- cgit v1.2.3 From e1a8000228e16212c93b23cfbed4d622e2ec7a6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 12:00:17 -0700 Subject: gre: Add Transparent Ethernet Bridging This patch adds support for Ethernet over GRE encapsulation. This is exposed to user-space with a new link type of "gretap" instead of "gre". It will create an ARPHRD_ETHER device in lieu of the usual ARPHRD_IPGRE. Note that to preserver backwards compatibility all Transparent Ethernet Bridging packets are passed to an ARPHRD_IPGRE tunnel if its key matches and there is no ARPHRD_ETHER device whose key matches more closely. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index a0099e98b5c..bf1a53b2682 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -56,6 +56,7 @@ #define ETH_P_DIAG 0x6005 /* DEC Diagnostics */ #define ETH_P_CUST 0x6006 /* DEC Customer use */ #define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */ +#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */ #define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ #define ETH_P_ATALK 0x809B /* Appletalk DDP */ #define ETH_P_AARP 0x80F3 /* Appletalk AARP */ -- cgit v1.2.3 From 82b1519b345d61dcfae526e3fcb08128f39f9bcc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:09 +0100 Subject: dm: export struct dm_dev Split struct dm_dev in two and publish the part that other targets need in include/linux/device-mapper.h. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a90222e3297..178390de195 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -13,7 +13,6 @@ struct dm_target; struct dm_table; -struct dm_dev; struct mapped_device; struct bio_vec; @@ -84,6 +83,12 @@ void dm_error(const char *message); */ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); +struct dm_dev { + struct block_device *bdev; + int mode; + char name[16]; +}; + /* * Constructors should call these functions to ensure destination devices * are opened/closed correctly. -- cgit v1.2.3 From 89343da077ad564ed130c46e5ea6a79388410fa5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:10 +0100 Subject: dm: publish dm_get_mapinfo Publish dm_get_mapinfo in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 178390de195..6b80961650f 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -207,6 +207,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct mapped_device *md); int dm_noflush_suspending(struct dm_target *ti); +union map_info *dm_get_mapinfo(struct bio *bio); /* * Geometry functions. -- cgit v1.2.3 From ea0ec640940c2ae3a8d71af3249fccf06a9997a3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:11 +0100 Subject: dm: publish dm_table_unplug_all Publish dm_table_unplug_all in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6b80961650f..e462c7f3b39 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -237,6 +237,11 @@ int dm_table_add_target(struct dm_table *t, const char *type, */ int dm_table_complete(struct dm_table *t); +/* + * Unplug all devices in a table. + */ +void dm_table_unplug_all(struct dm_table *t); + /* * Table reference counting. */ -- cgit v1.2.3 From 54160904260fa764ba6e2dc738770be30fdf9553 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:12 +0100 Subject: dm: publish dm_vcalloc Publish dm_vcalloc in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e462c7f3b39..08d783592b7 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -267,6 +267,11 @@ void dm_table_event(struct dm_table *t); */ int dm_swap_table(struct mapped_device *md, struct dm_table *t); +/* + * A wrapper around vmalloc. + */ +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); + /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ -- cgit v1.2.3 From 4dde4492d850a4c9bcaa92e5bd7f4eebe3e2f5ab Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:19 +0200 Subject: ide: make drive->id an union (take 2) Make drive->id an unnamed union so id can be accessed either by using 'u16 *id' or 'struct hd_driveid *driveid'. Then convert all existing drive->id users accordingly (using 'u16 *id' when possible). This is an intermediate step to make ide 'struct hd_driveid'-free. While at it: - Add missing KERN_CONTs in it821x.c. - Use ATA_ID_WORDS and ATA_ID_*_LEN defines. - Remove unnecessary checks for drive->id. - s/drive_table/table/ in ide_in_drive_list(). - Cleanup ide_config_drive_speed() a bit. - s/drive1/dev1/ & s/drive0/dev0/ in ide_undecoded_slave(). v2: Fix typo in drivers/ide/ppc/pmac.c. (From Stephen Rothwell) There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 6514db8fd2e..0c85aff3edf 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -380,7 +380,11 @@ struct ide_drive_s { struct request *rq; /* current request */ struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ - struct hd_driveid *id; /* drive model identification info */ + union { + /* identification info */ + struct hd_driveid *driveid; + u16 *id; + }; #ifdef CONFIG_IDE_PROC_FS struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ struct ide_settings_s *settings;/* /proc/ide/ drive settings */ @@ -920,7 +924,7 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); -extern void ide_fix_driveid(struct hd_driveid *); +void ide_fix_driveid(u16 *); extern void ide_fixstring(u8 *, const int, const int); @@ -1240,7 +1244,7 @@ struct drive_list_entry { const char *id_firmware; }; -int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *); +int ide_in_drive_list(u16 *, const struct drive_list_entry *); #ifdef CONFIG_BLK_DEV_IDEDMA int __ide_dma_bad_drive(ide_drive_t *); @@ -1347,12 +1351,13 @@ const char *ide_xfer_verbose(u8 mode); extern void ide_toggle_bounce(ide_drive_t *drive, int on); extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); -static inline int ide_dev_has_iordy(struct hd_driveid *id) +static inline int ide_dev_has_iordy(u16 *id) { - return ((id->field_valid & 2) && (id->capability & 8)) ? 1 : 0; + return ((id[ATA_ID_FIELD_VALID] & 2) && + (((struct hd_driveid *)id)->capability & 8)) ? 1 : 0; } -static inline int ide_dev_is_sata(struct hd_driveid *id) +static inline int ide_dev_is_sata(u16 *id) { /* * See if word 93 is 0 AND drive is at least ATA-5 compatible @@ -1360,7 +1365,7 @@ static inline int ide_dev_is_sata(struct hd_driveid *id) * this trick allows us to filter out the reserved values of * 0x0000 and 0xffff along with the earlier ATA revisions... */ - if (id->hw_config == 0 && (short)id->major_rev_num >= 0x0020) + if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020) return 1; return 0; } @@ -1437,11 +1442,11 @@ extern struct bus_type ide_bus_type; extern struct class *ide_port_class; /* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */ -#define ide_id_has_flush_cache(id) ((id)->cfs_enable_2 & 0x3000) +#define ide_id_has_flush_cache(id) ((id)[ATA_ID_CFS_ENABLE_2] & 0x3000) /* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */ #define ide_id_has_flush_cache_ext(id) \ - (((id)->cfs_enable_2 & 0x2400) == 0x2400) + (((id)[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400) static inline void ide_dump_identify(u8 *id) { -- cgit v1.2.3 From 48fb2688aa67baba373531cc4ed2d9e695983c3f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:19 +0200 Subject: ide: remove drive->driveid * Factor out HDIO_[OBSOLETE,GET]_IDENTITY ioctls handling to ide_get_identity_ioctl(). * Use temporary buffer in ide_get_identity_ioctl() instead of accessing drive->id directly. * Add ide_id_to_hd_driveid() inline to convert raw id into struct hd_driveid format (needed on big-endian). * Use ide_id_to_hd_driveid() in ide_get_identity_ioctl(), cleanup ide_fix_driveid() and switch ide to use use raw id. * Remove no longer needed drive->driveid. This leaves us with 3 users of struct hd_driveid in tree: - arch/um/drivers/ubd_kern.c - drivers/block/xsysace.c - drivers/usb/storage/isd200.c While at it: * Use ata_id_u{32,64}() and ata_id_has_{dma,lba,iordy}() macros. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 0c85aff3edf..e887927e00e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -380,11 +380,7 @@ struct ide_drive_s { struct request *rq; /* current request */ struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ - union { - /* identification info */ - struct hd_driveid *driveid; - u16 *id; - }; + u16 *id; /* identification info */ #ifdef CONFIG_IDE_PROC_FS struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ struct ide_settings_s *settings;/* /proc/ide/ drive settings */ @@ -1353,8 +1349,7 @@ extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); static inline int ide_dev_has_iordy(u16 *id) { - return ((id[ATA_ID_FIELD_VALID] & 2) && - (((struct hd_driveid *)id)->capability & 8)) ? 1 : 0; + return ((id[ATA_ID_FIELD_VALID] & 2) && ata_id_has_iordy(id)) ? 1 : 0; } static inline int ide_dev_is_sata(u16 *id) -- cgit v1.2.3 From 3a7d24841ad794ae64c90d7d00d62a83741912aa Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:21 +0200 Subject: ide: use ATA_* defines instead of *_STAT and *_ERR ones * ERR_STAT -> ATA_ERR * INDEX_STAT -> ATA_IDX * ECC_STAT -> ATA_CORR * DRQ_STAT -> ATA_DRQ * SEEK_STAT -> ATA_DSC * WRERR_STAT -> ATA_DF * READY_STAT -> ATA_DRDY * BUSY_STAT -> ATA_BUSY * MARK_ERR -> ATA_AMNF * TRK0_ERR -> ATA_TRK0NF * ABRT_ERR -> ATA_ABORTED * MCR_ERR -> ATA_MCR * ID_ERR -> ATA_IDNF * MC_ERR -> ATA_MC * ECC_ERR -> ATA_UNC * ICRC_ERR -> ATA_ICRC * BBD_ERR -> ATA_BBK Also: * ILI_ERR -> ATAPI_ILI * EOM_ERR -> ATAPI_EOM * LFS_ERR -> ATAPI_LFS * CD -> ATAPI_COD * IO -> ATAPI_IO Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index e887927e00e..d963c1929c8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -87,12 +87,13 @@ struct ide_io_ports { }; #define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good)) -#define BAD_R_STAT (BUSY_STAT | ERR_STAT) -#define BAD_W_STAT (BAD_R_STAT | WRERR_STAT) -#define BAD_STAT (BAD_R_STAT | DRQ_STAT) -#define DRIVE_READY (READY_STAT | SEEK_STAT) -#define BAD_CRC (ABRT_ERR | ICRC_ERR) +#define BAD_R_STAT (ATA_BUSY | ATA_ERR) +#define BAD_W_STAT (BAD_R_STAT | ATA_DF) +#define BAD_STAT (BAD_R_STAT | ATA_DRQ) +#define DRIVE_READY (ATA_DRDY | ATA_DSC) + +#define BAD_CRC (ATA_ABORTED | ATA_ICRC) #define SATA_NR_PORTS (3) /* 16 possible ?? */ @@ -438,8 +439,8 @@ struct ide_drive_s { u8 mult_req; /* requested multiple sector setting */ u8 tune_req; /* requested drive tuning setting */ u8 io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */ - u8 bad_wstat; /* used for ignoring WRERR_STAT */ - u8 nowerr; /* used for ignoring WRERR_STAT */ + u8 bad_wstat; /* used for ignoring ATA_DF */ + u8 nowerr; /* used for ignoring ATA_DF */ u8 sect0; /* offset of first sector for DM6:DDO */ u8 head; /* "real" number of heads */ u8 sect; /* "real" sectors per track */ -- cgit v1.2.3 From 3c619ffd48d7fdb3b17f0df67c4eb4b0bd80e253 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:22 +0200 Subject: ide: remove no longer needed ide_drive_t fields Remove ->remap_0_to_1 and ->sect0 (they are always zero now). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index d963c1929c8..ca0b132de1a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -418,7 +418,6 @@ struct ide_drive_s { unsigned atapi_overlap : 1; /* ATAPI overlap (not supported) */ unsigned doorlocking : 1; /* for removable only: door lock/unlock works */ unsigned nodma : 1; /* disallow DMA */ - unsigned remap_0_to_1 : 1; /* 0=noremap, 1=remap 0->1 (for EZDrive) */ unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */ unsigned scsi : 1; /* 0=default, 1=ide-scsi emulation */ unsigned sleeping : 1; /* 1=sleeping & sleep field valid */ @@ -441,7 +440,6 @@ struct ide_drive_s { u8 io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */ u8 bad_wstat; /* used for ignoring ATA_DF */ u8 nowerr; /* used for ignoring ATA_DF */ - u8 sect0; /* offset of first sector for DM6:DDO */ u8 head; /* "real" number of heads */ u8 sect; /* "real" sectors per track */ u8 bios_head; /* BIOS/fdisk/LILO number of heads */ -- cgit v1.2.3 From b163f46d5ecf48d883ce156e5e5a21a1a9a125c7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:23 +0200 Subject: ide: enhance ide_busy_sleep() * Make ide_busy_sleep() take timeout value as a parameter and also allow use of AltStatus Register if requested with altstatus parameter. Update existing users accordingly. * Convert ide_driveid_update() and actual_try_to_identify() to use ide_busy_sleep(). There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ca0b132de1a..2ad8548135d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -923,6 +923,8 @@ void ide_fix_driveid(u16 *); extern void ide_fixstring(u8 *, const int, const int); +int ide_busy_sleep(ide_hwif_t *, unsigned long, int); + int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); -- cgit v1.2.3 From a2cdee5a9a93360165d0576bbc7e9ccb3127afee Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:24 +0200 Subject: ide: remove IDE_CHIPSET_* macros They just obfuscate the code. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 2ad8548135d..a9206c463d7 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -473,10 +473,6 @@ typedef struct ide_drive_s ide_drive_t; #define to_ide_device(dev)container_of(dev, ide_drive_t, gendev) -#define IDE_CHIPSET_PCI_MASK \ - ((1<> (c)) & 1) - struct ide_task_s; struct ide_port_info; -- cgit v1.2.3 From 7e59ea21aab1a91ca31bc64c7d3035ebdbd336d1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:26 +0200 Subject: ide: check drive->present in ide_get_paired_drive() * Change ide_get_paired_drive() to return NULL if peer device is not present and update all users accordingly. While at it: * ide_get_paired_drive() -> ide_get_pair_dev() * Use ide_get_pair_dev() in cs5530.c, sc1200.c and via82cxxx.c. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index a9206c463d7..1d9716a95dc 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1450,10 +1450,10 @@ static inline int hwif_to_node(ide_hwif_t *hwif) return hwif->dev ? dev_to_node(hwif->dev) : -1; } -static inline ide_drive_t *ide_get_paired_drive(ide_drive_t *drive) +static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1]; - return &hwif->drives[(drive->dn ^ 1) & 1]; + return peer->present ? peer : NULL; } #endif /* _IDE_H */ -- cgit v1.2.3 From 3ceca727fe3a38dd8d7a3adf938fefda83eee8af Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:27 +0200 Subject: ide: include only when needed * Include directly in instead of through . * Include only when needed. Cc: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1d9716a95dc..a7f980d2fe5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 263138a0ad6e38de7f6526b7de037ed4511308ef Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:27 +0200 Subject: ide: preparations for /proc/ide/hd*/settings rework After rework settings will be no longer created dynamically for each device so we need to make some fixups first. * Use set_[ksettings,unmaskirq]() as a set function for ["keepsettings","unmaskirq"] setting. * Allow writes to ["io_32bit","unmaskirq"] settings also when drive->no_[io_32bit,unmask] is set (this is checked later inside set_[io_32bit,unmaskirq]() anywyay and keeps consistency with the corresponding HDIO_SET_[32BIT,UNMASKINTR] ioctls). * Use max possible multi sectors value (16) as an allowed max for "multcount" setting. set_multcount() set function checks against device's max possbile value anyway and it makes the proc setting consistent with the corresponding HDIO_SET_MULTCOUNT ioctl. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index a7f980d2fe5..ad09e7c81ae 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -688,7 +688,9 @@ typedef struct ide_driver_s ide_driver_t; extern struct mutex ide_setting_mtx; int set_io_32bit(ide_drive_t *, int); +int set_ksettings(ide_drive_t *, int); int set_pio_mode(ide_drive_t *, int); +int set_unmaskirq(ide_drive_t *, int); int set_using_dma(ide_drive_t *, int); /* ATAPI packet command flags */ -- cgit v1.2.3 From 8185d5aa93e0a5c111adc4952a5b87193a68ae5b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:28 +0200 Subject: ide: /proc/ide/hd*/settings rework * Add struct ide_devset, S_* flags, *DEVSET() & ide*_devset_*() macros. * Add 'const struct ide_devset **settings' to ide_driver_t. * Use 'const struct ide_devset **settings' in ide_drive_t instead of 'struct ide_settings_s *settings'. Then convert core code and device drivers to use struct ide_devset and co.: - device settings are no longer allocated dynamically for each device but instead there is an unique struct ide_devset instance per setting - device driver keeps the pointer to the table of pointers to its settings in ide_driver_t.settings - generic settings are kept in ide_generic_setting[] - ide_proc_[un]register_driver(), ide_find_setting_by_name(), ide_{read,write}_setting() and proc_ide_{read,write}_settings() are updated accordingly - ide*_add_settings() are removed * Remove no longer used __ide_add_setting(), ide_add_setting(), __ide_remove_setting() and auto_remove_settings(). * Remove no longer used TYPE_*, SETTING_*, ide_procset_t and ide_settings_t. * ->keep_settings, ->using_dma, ->unmask, ->noflush, ->dsc_overlap, ->nice1, ->addressing, ->wcache and ->nowerr ide_drive_t fields can now be bitfield flags. While at it: * Rename ide_find_setting_by_name() to ide_find_setting(). * Rename write_wcache() to set_wcache(). There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 110 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ad09e7c81ae..4667ec8aeeb 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -304,8 +304,8 @@ typedef enum { ide_started, /* a drive operation was started, handler was set */ } ide_startstop_t; +struct ide_devset; struct ide_driver_s; -struct ide_settings_s; #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_drive_link; @@ -384,7 +384,7 @@ struct ide_drive_s { u16 *id; /* identification info */ #ifdef CONFIG_IDE_PROC_FS struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ - struct ide_settings_s *settings;/* /proc/ide/ drive settings */ + const struct ide_devset **settings; /* /proc/ide/ drive settings */ #endif struct hwif_s *hwif; /* actually (ide_hwif_t *) */ @@ -396,16 +396,16 @@ struct ide_drive_s { special_t special; /* special action flags */ select_t select; /* basic drive/head select reg value */ - u8 keep_settings; /* restore settings after drive reset */ - u8 using_dma; /* disk is using dma for read/write */ u8 retry_pio; /* retrying dma capable host in pio */ u8 state; /* retry state */ u8 waiting_for_dma; /* dma currently in progress */ - u8 unmask; /* okay to unmask other irqs */ - u8 noflush; /* don't attempt flushes */ - u8 dsc_overlap; /* DSC overlap */ - u8 nice1; /* give potential excess bandwidth */ + unsigned keep_settings : 1; /* restore settings after drive reset */ + unsigned using_dma : 1; /* disk is using dma for read/write */ + unsigned unmask : 1; /* okay to unmask other irqs */ + unsigned noflush : 1; /* don't attempt flushes */ + unsigned dsc_overlap : 1; /* DSC overlap */ + unsigned nice1 : 1; /* give potential excess bandwidth */ unsigned present : 1; /* drive is physically present */ unsigned dead : 1; /* device ejected hint */ unsigned id_read : 1; /* 1=id read from disk 0 = synthetic */ @@ -423,14 +423,15 @@ struct ide_drive_s { unsigned sleeping : 1; /* 1=sleeping & sleep field valid */ unsigned post_reset : 1; unsigned udma33_warned : 1; + unsigned addressing : 2; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ + unsigned wcache : 1; /* status of write cache */ + unsigned nowerr : 1; /* used for ignoring ATA_DF */ - u8 addressing; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ u8 quirk_list; /* considered quirky, set for a specific host */ u8 init_speed; /* transfer rate set at boot */ u8 current_speed; /* current transfer rate set */ u8 desired_speed; /* desired transfer rate set */ u8 dn; /* now wide spread use */ - u8 wcache; /* status of write cache */ u8 acoustic; /* acoustic management */ u8 media; /* disk, cdrom, tape, floppy, ... */ u8 ready_stat; /* min status value for drive ready */ @@ -439,7 +440,6 @@ struct ide_drive_s { u8 tune_req; /* requested drive tuning setting */ u8 io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */ u8 bad_wstat; /* used for ignoring ATA_DF */ - u8 nowerr; /* used for ignoring ATA_DF */ u8 head; /* "real" number of heads */ u8 sect; /* "real" sectors per track */ u8 bios_head; /* BIOS/fdisk/LILO number of heads */ @@ -687,12 +687,29 @@ typedef struct ide_driver_s ide_driver_t; extern struct mutex ide_setting_mtx; +int get_io_32bit(ide_drive_t *); int set_io_32bit(ide_drive_t *, int); +int get_ksettings(ide_drive_t *); int set_ksettings(ide_drive_t *, int); int set_pio_mode(ide_drive_t *, int); +int get_unmaskirq(ide_drive_t *); int set_unmaskirq(ide_drive_t *, int); +int get_using_dma(ide_drive_t *); int set_using_dma(ide_drive_t *, int); +#define ide_devset_get(name, field) \ +int get_##name(ide_drive_t *drive) \ +{ \ + return drive->field; \ +} + +#define ide_devset_set(name, field) \ +int set_##name(ide_drive_t *drive, int arg) \ +{ \ + drive->field = arg; \ + return 0; \ +} + /* ATAPI packet command flags */ enum { /* set when an error is considered normal - no retry (ide-tape) */ @@ -757,30 +774,53 @@ struct ide_atapi_pc { * configurable drive settings */ -#define TYPE_INT 0 -#define TYPE_BYTE 1 -#define TYPE_SHORT 2 +#define S_READ (1 << 0) +#define S_WRITE (1 << 1) +#define S_RW (S_READ | S_WRITE) +#define S_NOLOCK (1 << 2) -#define SETTING_READ (1 << 0) -#define SETTING_WRITE (1 << 1) -#define SETTING_RW (SETTING_READ | SETTING_WRITE) +struct ide_devset { + const char *name; + unsigned int flags; + int min, max; + int (*get)(ide_drive_t *); + int (*set)(ide_drive_t *, int); + int (*mulf)(ide_drive_t *); + int (*divf)(ide_drive_t *); +}; -typedef int (ide_procset_t)(ide_drive_t *, int); -typedef struct ide_settings_s { - char *name; - int rw; - int data_type; - int min; - int max; - int mul_factor; - int div_factor; - void *data; - ide_procset_t *set; - int auto_remove; - struct ide_settings_s *next; -} ide_settings_t; - -int ide_add_setting(ide_drive_t *, const char *, int, int, int, int, int, int, void *, ide_procset_t *set); +#define __DEVSET(_name, _flags, _min, _max, _get, _set, _mulf, _divf) { \ + .name = __stringify(_name), \ + .flags = _flags, \ + .min = _min, \ + .max = _max, \ + .get = _get, \ + .set = _set, \ + .mulf = _mulf, \ + .divf = _divf, \ +} + +#define __IDE_DEVSET(_name, _flags, _min, _max, _get, _set, _mulf, _divf) \ +static const struct ide_devset ide_devset_##_name = \ + __DEVSET(_name, _flags, _min, _max, _get, _set, _mulf, _divf) + +#define IDE_DEVSET(_name, _flags, _min, _max, _get, _set) \ +__IDE_DEVSET(_name, _flags, _min, _max, _get, _set, NULL, NULL) + +#define ide_devset_rw_nolock(_name, _min, _max, _func) \ +IDE_DEVSET(_name, S_RW | S_NOLOCK, _min, _max, get_##_func, set_##_func) + +#define ide_devset_w_nolock(_name, _min, _max, _func) \ +IDE_DEVSET(_name, S_WRITE | S_NOLOCK, _min, _max, NULL, set_##_func) + +#define ide_devset_rw(_name, _min, _max, _field) \ +static ide_devset_get(_name, _field); \ +static ide_devset_set(_name, _field); \ +IDE_DEVSET(_name, S_RW, _min, _max, get_##_name, set_##_name) + +#define ide_devset_r(_name, _min, _max, _field) \ +ide_devset_get(_name, _field) \ +IDE_DEVSET(_name, S_READ, _min, _max, get_##_name, NULL) /* * /proc/ide interface @@ -801,8 +841,6 @@ void ide_proc_unregister_port(ide_hwif_t *); void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); -void ide_add_generic_settings(ide_drive_t *); - read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -830,7 +868,6 @@ static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; } static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } -static inline void ide_add_generic_settings(ide_drive_t *drive) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif @@ -887,6 +924,7 @@ struct ide_driver_s { void (*shutdown)(ide_drive_t *); #ifdef CONFIG_IDE_PROC_FS ide_proc_entry_t *proc; + const struct ide_devset **settings; #endif }; -- cgit v1.2.3 From 151a670186a0f8441798f90c8701647adb7a1589 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:28 +0200 Subject: ide: remove SECTOR_WORDS define Just use SECTOR_SIZE instead. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 4667ec8aeeb..4444b0884e5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -126,7 +126,7 @@ struct ide_io_ports { #define PARTN_BITS 6 /* number of minor dev bits for partitions */ #define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ #define SECTOR_SIZE 512 -#define SECTOR_WORDS (SECTOR_SIZE / 4) /* number of 32bit words per sector */ + #define IDE_LARGE_SEEK(b1,b2,t) (((b1) > (b2) + (t)) || ((b2) > (b1) + (t))) /* -- cgit v1.2.3 From ebc6be520673f65aef188abde43972f9cd2162e9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:29 +0200 Subject: ide: remove read-only ->atapi_overlap field from ide_drive_t Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 4444b0884e5..9cb935f2e7c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -415,7 +415,6 @@ struct ide_drive_s { unsigned forced_geom : 1; /* 1 if hdx=c,h,s was given at boot */ unsigned no_unmask : 1; /* disallow setting unmask bit */ unsigned no_io_32bit : 1; /* disallow enabling 32bit I/O */ - unsigned atapi_overlap : 1; /* ATAPI overlap (not supported) */ unsigned doorlocking : 1; /* for removable only: door lock/unlock works */ unsigned nodma : 1; /* disallow DMA */ unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */ -- cgit v1.2.3 From 02d599a365d04658bc9ea71762ed17c895079927 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:29 +0200 Subject: ide: remove ->supports_dsc_overlap field from ide_driver_t * Use drive->media and drive->scsi to check if ->dsc_overlap can be set by HDIO_SET_NICE ioctl in generic_ide_ioctl(). * Remove unused ->supports_dsc_overlap field from ide_driver_t. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9cb935f2e7c..e5622bb5a4a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -912,7 +912,6 @@ enum { struct ide_driver_s { const char *version; u8 media; - unsigned supports_dsc_overlap : 1; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); -- cgit v1.2.3 From 5d5870f0a26e2304c4a82592870c5bc88017f7c9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:29 +0200 Subject: ide: ide_dev_has_iordy() -> ata_id_has_iordy() * Remove (id[ATA_ID_FIELD_VALID] & 2) check from ide_dev_has_iordy() (it is for validity of words 64-70, IORDY is in word 49). * ide_dev_has_iordy() -> ata_id_has_iordy() Cc: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index e5622bb5a4a..27829c13bef 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1382,11 +1382,6 @@ const char *ide_xfer_verbose(u8 mode); extern void ide_toggle_bounce(ide_drive_t *drive, int on); extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); -static inline int ide_dev_has_iordy(u16 *id) -{ - return ((id[ATA_ID_FIELD_VALID] & 2) && ata_id_has_iordy(id)) ? 1 : 0; -} - static inline int ide_dev_is_sata(u16 *id) { /* -- cgit v1.2.3 From 367d7e78dd48cf6ad35182a99d97abb5486e040e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:30 +0200 Subject: ide: ide_dev_is_sata() -> ata_id_is_sata() * Use optimized ATA version check from Sergei in ata_id_is_sata(). * ide_dev_is_sata() -> ata_id_is_sata() Cc: Jeff Garzik Cc: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 10 +++++++++- include/linux/ide.h | 13 ------------- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index be00973d1a8..d28aad991c7 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -645,7 +645,15 @@ static inline unsigned int ata_id_major_version(const u16 *id) static inline int ata_id_is_sata(const u16 *id) { - return ata_id_major_version(id) >= 5 && id[ATA_ID_HW_CONFIG] == 0; + /* + * See if word 93 is 0 AND drive is at least ATA-5 compatible + * verifying that word 80 by casting it to a signed type -- + * this trick allows us to filter out the reserved values of + * 0x0000 and 0xffff along with the earlier ATA revisions... + */ + if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020) + return 1; + return 0; } static inline int ata_id_has_tpm(const u16 *id) diff --git a/include/linux/ide.h b/include/linux/ide.h index 27829c13bef..87b5b5d3953 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1382,19 +1382,6 @@ const char *ide_xfer_verbose(u8 mode); extern void ide_toggle_bounce(ide_drive_t *drive, int on); extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); -static inline int ide_dev_is_sata(u16 *id) -{ - /* - * See if word 93 is 0 AND drive is at least ATA-5 compatible - * verifying that word 80 by casting it to a signed type -- - * this trick allows us to filter out the reserved values of - * 0x0000 and 0xffff along with the earlier ATA revisions... - */ - if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020) - return 1; - return 0; -} - u64 ide_get_lba_addr(struct ide_taskfile *, int); u8 ide_dump_status(ide_drive_t *, const char *, u8); -- cgit v1.2.3 From 942dcd85bf8edf38cdc3745306ca250684d99a61 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:30 +0200 Subject: ide: idedisk_supports_lba48() -> ata_id_lba48_enabled() * Add ata_id_lba48_enabled() inline helper to . * idedisk_supports_lba48() -> ata_id_lba48_enabled() The latter one also checks validity of words 83 & 86. Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index d28aad991c7..8162257b474 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -574,6 +574,15 @@ static inline int ata_id_has_lba48(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 10); } +static inline int ata_id_lba48_enabled(const u16 *id) +{ + if (ata_id_has_lba48(id) == 0) + return 0; + if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) + return 0; + return id[ATA_ID_CFS_ENABLE_2] & (1 << 10); +} + static inline int ata_id_hpa_enabled(const u16 *id) { /* Yes children, word 83 valid bits cover word 82 data */ -- cgit v1.2.3 From 1a4e4d4d2cceb72be70526a485914abd638c7de1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:30 +0200 Subject: ide: check only for CACHE FLUSH command support in ide_id_has_flush_cache() All devices supporting CACHE FLUSH EXT command should also support CACHE FLUSH command so it is sufficient to check only for CACHE FLUSH in ide_id_has_flush_cache(). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 87b5b5d3953..6e22cd20dd8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1453,8 +1453,8 @@ extern struct mutex ide_cfg_mtx; extern struct bus_type ide_bus_type; extern struct class *ide_port_class; -/* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */ -#define ide_id_has_flush_cache(id) ((id)[ATA_ID_CFS_ENABLE_2] & 0x3000) +/* check if CACHE FLUSH command is supported (as defined in ATA-6) */ +#define ide_id_has_flush_cache(id) ((id)[ATA_ID_CFS_ENABLE_2] & 0x1000) /* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */ #define ide_id_has_flush_cache_ext(id) \ -- cgit v1.2.3 From 4b58f17d7c45a8e5f4acda641bec388398b9c0fa Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:30 +0200 Subject: ide: ide_id_has_flush_cache() -> ata_id_flush_enabled() * Add ata_id_flush_enabled() inline helper to . * ide_id_has_flush_cache() -> ata_id_flush_enabled() The latter one also checks if the command is marked as supported in word 83 and validity of words 83 & 86. Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 9 +++++++++ include/linux/ide.h | 3 --- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 8162257b474..921cf0fc337 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -558,6 +558,15 @@ static inline int ata_id_has_flush(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 12); } +static inline int ata_id_flush_enabled(const u16 *id) +{ + if (ata_id_has_flush(id) == 0) + return 0; + if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) + return 0; + return id[ATA_ID_CFS_ENABLE_2] & (1 << 12); +} + static inline int ata_id_has_flush_ext(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) diff --git a/include/linux/ide.h b/include/linux/ide.h index 6e22cd20dd8..d2213d7cc4c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1453,9 +1453,6 @@ extern struct mutex ide_cfg_mtx; extern struct bus_type ide_bus_type; extern struct class *ide_port_class; -/* check if CACHE FLUSH command is supported (as defined in ATA-6) */ -#define ide_id_has_flush_cache(id) ((id)[ATA_ID_CFS_ENABLE_2] & 0x1000) - /* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */ #define ide_id_has_flush_cache_ext(id) \ (((id)[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400) -- cgit v1.2.3 From ff2779b568e70822e0ef2cc7afeeefbe7c607652 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:31 +0200 Subject: ide: ide_id_has_flush_cache_ext() -> ata_id_flush_ext_enabled() * Add ata_id_flush_ext_enabled() inline helper to . * ide_id_has_flush_cache_ext() -> ata_id_flush_ext_enabled() The latter one also checks if the command is marked as supported in word 83 and validity of words 83 & 86. Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 13 +++++++++++++ include/linux/ide.h | 4 ---- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 921cf0fc337..81d9adeb819 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -574,6 +574,19 @@ static inline int ata_id_has_flush_ext(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 13); } +static inline int ata_id_flush_ext_enabled(const u16 *id) +{ + if (ata_id_has_flush_ext(id) == 0) + return 0; + if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) + return 0; + /* + * some Maxtor disks have bit 13 defined incorrectly + * so check bit 10 too + */ + return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; +} + static inline int ata_id_has_lba48(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) diff --git a/include/linux/ide.h b/include/linux/ide.h index d2213d7cc4c..432eb98f7fe 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1453,10 +1453,6 @@ extern struct mutex ide_cfg_mtx; extern struct bus_type ide_bus_type; extern struct class *ide_port_class; -/* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */ -#define ide_id_has_flush_cache_ext(id) \ - (((id)[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400) - static inline void ide_dump_identify(u8 *id) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 2, id, 512, 0); -- cgit v1.2.3 From 93734a234447a3c091f76d76f7351af9d4dde518 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:31 +0200 Subject: ide: ide_id_to_hd_driveid() -> ata_id_to_hd_driveid() Rename ide_id_to_hd_driveid() to ata_id_to_hd_driveid() and move it to . Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 81d9adeb819..4c3f5007003 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -30,6 +30,7 @@ #define __LINUX_ATA_H__ #include +#include /* defines only for the constants which don't work well as enums */ #define ATA_DMA_BOUNDARY 0xffffUL @@ -781,6 +782,26 @@ static inline int atapi_id_dmadir(const u16 *dev_id) return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000); } +static inline void ata_id_to_hd_driveid(u16 *id) +{ +#ifdef __BIG_ENDIAN + /* accessed in struct hd_driveid as 8-bit values */ + id[ATA_ID_MAX_MULTSECT] = __cpu_to_le16(id[ATA_ID_MAX_MULTSECT]); + id[ATA_ID_CAPABILITY] = __cpu_to_le16(id[ATA_ID_CAPABILITY]); + id[ATA_ID_OLD_PIO_MODES] = __cpu_to_le16(id[ATA_ID_OLD_PIO_MODES]); + id[ATA_ID_OLD_DMA_MODES] = __cpu_to_le16(id[ATA_ID_OLD_DMA_MODES]); + id[ATA_ID_MULTSECT] = __cpu_to_le16(id[ATA_ID_MULTSECT]); + + /* as 32-bit values */ + *(u32 *)&id[ATA_ID_LBA_CAPACITY] = ata_id_u32(id, ATA_ID_LBA_CAPACITY); + *(u32 *)&id[ATA_ID_SPG] = ata_id_u32(id, ATA_ID_SPG); + + /* as 64-bit value */ + *(u64 *)&id[ATA_ID_LBA_CAPACITY_2] = + ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); +#endif +} + static inline int is_multi_taskfile(struct ata_taskfile *tf) { return (tf->command == ATA_CMD_READ_MULTI) || -- cgit v1.2.3 From a02227c9774b3bff08c7f557d06247e0a03ac435 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:31 +0200 Subject: ide: lba_capacity_is_ok() -> ata_id_is_lba_capacity_ok() Rename lba_capacity_is_ok() to ata_id_is_lba_capacity_ok() and move it to (remove needless parens while at it). Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 4c3f5007003..a53318b8cbd 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -782,6 +782,56 @@ static inline int atapi_id_dmadir(const u16 *dev_id) return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000); } +/* + * ata_id_is_lba_capacity_ok() performs a sanity check on + * the claimed LBA capacity value for the device. + * + * Returns 1 if LBA capacity looks sensible, 0 otherwise. + * + * It is called only once for each device. + */ +static inline int ata_id_is_lba_capacity_ok(u16 *id) +{ + unsigned long lba_sects, chs_sects, head, tail; + + /* No non-LBA info .. so valid! */ + if (id[ATA_ID_CYLS] == 0) + return 1; + + lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY); + + /* + * The ATA spec tells large drives to return + * C/H/S = 16383/16/63 independent of their size. + * Some drives can be jumpered to use 15 heads instead of 16. + * Some drives can be jumpered to use 4092 cyls instead of 16383. + */ + if ((id[ATA_ID_CYLS] == 16383 || + (id[ATA_ID_CYLS] == 4092 && id[ATA_ID_CUR_CYLS] == 16383)) && + id[ATA_ID_SECTORS] == 63 && + (id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) && + (lba_sects >= 16383 * 63 * id[ATA_ID_HEADS])) + return 1; + + chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS]; + + /* perform a rough sanity check on lba_sects: within 10% is OK */ + if (lba_sects - chs_sects < chs_sects/10) + return 1; + + /* some drives have the word order reversed */ + head = (lba_sects >> 16) & 0xffff; + tail = lba_sects & 0xffff; + lba_sects = head | (tail << 16); + + if (lba_sects - chs_sects < chs_sects/10) { + *(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects); + return 1; /* LBA capacity is (now) good */ + } + + return 0; /* LBA capacity value may be bad */ +} + static inline void ata_id_to_hd_driveid(u16 *id) { #ifdef __BIG_ENDIAN -- cgit v1.2.3 From feb22b7f8e62b1b987a3a1dbad95af767a1df832 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:32 +0200 Subject: ide: add proper PCI PM support (v2) * Keep pointer to ->init_chipset method also in struct ide_host and set it in ide_host_alloc_all(). * Add ide_pci_suspend() and ide_pci_resume() helpers (default ->suspend and ->resume implementations). * ->init_chipset can no longer be marked __devinit. * Add proper PCI PM support to IDE PCI host drivers (rz1000.c and tc86c001.c are skipped for now since they need to be converted from using ->init_hwif to use ->init_chipset instead). v2: * Cleanup CONFIG_PM #ifdef-s per akpm's suggestion. Cc: Andrew Morton Cc: "Rafael J. Wysocki" Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 432eb98f7fe..2c5d83ddaef 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_BLK_DEV_IDEACPI #include #endif @@ -639,6 +640,7 @@ struct ide_host { ide_hwif_t *ports[MAX_HWIFS]; unsigned int n_ports; struct device *dev[2]; + unsigned int (*init_chipset)(struct pci_dev *); unsigned long host_flags; void *host_priv; }; @@ -1264,6 +1266,14 @@ int ide_pci_init_two(struct pci_dev *, struct pci_dev *, const struct ide_port_info *, void *); void ide_pci_remove(struct pci_dev *); +#ifdef CONFIG_PM +int ide_pci_suspend(struct pci_dev *, pm_message_t); +int ide_pci_resume(struct pci_dev *); +#else +#define ide_pci_suspend NULL +#define ide_pci_resume NULL +#endif + void ide_map_sg(ide_drive_t *, struct request *); void ide_init_sg_cmd(ide_drive_t *, struct request *); -- cgit v1.2.3 From 9232c14bff36d65de254f34386c00b732c5b6099 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:33 +0200 Subject: ide: remove ->bus_state field from ide_hwif_t It is always set to BUSSTATE_ON. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 2c5d83ddaef..42f39781af8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -562,7 +562,6 @@ typedef struct hwif_s { u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ u8 channel; /* for dual-port chips: 0=primary, 1=secondary */ - u8 bus_state; /* power state of the IDE bus */ u32 host_flags; -- cgit v1.2.3 From aa7687738af3332470e02ac1060f6c046d83c9a3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:33 +0200 Subject: ide: add ide_setting_ioctl() helper * Add struct ide_ioctl_devset representing ioctl device setting. * Add ide_setting_ioctl() helper for matching given ioctl and its parameters against table of ioctl device settings. * Convert ide_setting_ioctl() and idedisk_ioctl() to use ide_setting_ioctl(). * Un-export ide_setting_mtx. While at it: * {get,set}_lba_addressing() -> {get,set}_addressing() There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 42f39781af8..64624b9b645 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -932,7 +932,19 @@ struct ide_driver_s { int ide_device_get(ide_drive_t *); void ide_device_put(ide_drive_t *); -int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); +struct ide_ioctl_devset { + unsigned int get_ioctl; + unsigned int set_ioctl; + + int (*get)(ide_drive_t *); + int (*set)(ide_drive_t *, int); +}; + +int ide_setting_ioctl(ide_drive_t *, struct block_device *, unsigned int, + unsigned long, const struct ide_ioctl_devset *); + +int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, + unsigned, unsigned long); extern int ide_vlb_clk; extern int ide_pci_clk; -- cgit v1.2.3 From 05236ea6df7419f0f37cf9603cfee265cfce5832 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:33 +0200 Subject: ide: move ioctls handling to ide-ioctls.c * Move ioctls handling to ide-ioctls.c (except HDIO_DRIVE_TASKFILE for now). * Make ide_{cmd,task}() static. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 64624b9b645..40102bd50a7 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1128,8 +1128,6 @@ int ide_raw_taskfile(ide_drive_t *, ide_task_t *, u8 *, u16); int ide_no_data_taskfile(ide_drive_t *, ide_task_t *); int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long); -int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long); -int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long); extern int ide_driveid_update(ide_drive_t *); extern int ide_config_drive_speed(ide_drive_t *, u8); -- cgit v1.2.3 From 51509eec34debffec3c6f481f7371c9aeb6c63c1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:34 +0200 Subject: ide: add ide_check_atapi_device() helper * Add ide_check_atapi_device() to ide-atapi.c and convert ide-{floppy,tape}.c to use it instead of ide*_identify_device(). While at it: * Add DRV_NAME defines to ide-{floppy,tape}.c. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 40102bd50a7..e63ff63d1f0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1109,6 +1109,8 @@ extern int drive_is_ready(ide_drive_t *); void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); +int ide_check_atapi_device(ide_drive_t *, const char *); + ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), -- cgit v1.2.3 From acaa0f5f675ccf6b8a3a11a933419068b1ea1f46 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:36 +0200 Subject: ide: add ide_io_buffers() helper * Make ->io_buffers method return number of bytes transferred. * Use ide_end_request() instead of idefloppy_end_request() in ide_floppy_io_buffers() and then move the call out to ide_pc_intr(). * Add ide_io_buffers() helper and convert ide-{floppy,scsi}.c to use it instead of ide*_io_buffers(). There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index e63ff63d1f0..03dc2157a2b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1099,6 +1099,8 @@ void ide_tf_read(ide_drive_t *, ide_task_t *); void ide_input_data(ide_drive_t *, struct request *, void *, unsigned int); void ide_output_data(ide_drive_t *, struct request *, void *, unsigned int); +int ide_io_buffers(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int); + extern void SELECT_DRIVE(ide_drive_t *); void SELECT_MASK(ide_drive_t *, int); @@ -1115,7 +1117,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), - void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, + int (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int)); ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, ide_handler_t *, unsigned int, ide_expiry_t *); -- cgit v1.2.3 From 7bf7420a318978cd6042e5a5da34b7cfa18ae559 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:37 +0200 Subject: ide: add ide_init_pc() helper * Add IDE_PC_BUFFER_SIZE define. * Add ide_init_pc() and convert ide-{floppy,tape}.c to use it instead of ide*_init_pc(). * Remove no longer used IDE*_PC_BUFFER_SIZE and ide*_init_pc(). There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 03dc2157a2b..bba2f73b99a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -724,6 +724,12 @@ enum { PC_FLAG_TIMEDOUT = (1 << 7), }; +/* + * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes. + * This is used for several packet commands (not for READ/WRITE commands). + */ +#define IDE_PC_BUFFER_SIZE 256 + struct ide_atapi_pc { /* actual packet bytes */ u8 c[12]; @@ -753,7 +759,7 @@ struct ide_atapi_pc { * those are more or less driver-specific and some of them are subject * to change/removal later. */ - u8 pc_buf[256]; + u8 pc_buf[IDE_PC_BUFFER_SIZE]; /* idetape only */ struct idetape_bh *bh; @@ -1113,6 +1119,8 @@ void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); int ide_check_atapi_device(ide_drive_t *, const char *); +void ide_init_pc(struct ide_atapi_pc *); + ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), -- cgit v1.2.3 From 7645c1514c7d34ebdf3ea0e8ee3a935c08abceb2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:37 +0200 Subject: ide: add ide_queue_pc_head() helper * Move REQ_IDETAPE_* enums to . * Add ide_queue_pc_head() and convert ide-{floppy,tape}.c to use it instead of ide*_queue_pc_head(). * Remove no longer used ide*_queue_pc_head(). There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index bba2f73b99a..1bd49784e4d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1121,6 +1121,22 @@ int ide_check_atapi_device(ide_drive_t *, const char *); void ide_init_pc(struct ide_atapi_pc *); +/* + * Special requests for ide-tape block device strategy routine. + * + * In order to service a character device command, we add special requests to + * the tail of our block device request queue and wait for their completion. + */ +enum { + REQ_IDETAPE_PC1 = (1 << 0), /* packet command (first stage) */ + REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */ + REQ_IDETAPE_READ = (1 << 2), + REQ_IDETAPE_WRITE = (1 << 3), +}; + +void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, + struct request *); + ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), -- cgit v1.2.3 From 2ac07d920604eeee8966d52e70161f9b31fe90a3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:38 +0200 Subject: ide: add ide_queue_pc_tail() helper * Add ide_queue_pc_tail() and convert ide-{floppy,tape}.c to use it instead of ide*_queue_pc_tail(). * Remove no longer used ide*_queue_pc_tail(). There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1bd49784e4d..b5be2368c96 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1136,6 +1136,7 @@ enum { void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, struct request *); +int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *); ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, -- cgit v1.2.3 From 49cac39e71bd6bbcf934c6ba837e21503902c088 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:38 +0200 Subject: ide-floppy: ->{srfp,wp} -> IDE_AFLAG_{SRFP,WP} Add IDE_AFLAG_{SRFP,WP} drive->atapi_flags and use them instead of ->{srfp,wp} struct ide_floppy_obj fields. There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index b5be2368c96..cc41a885688 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -356,21 +356,25 @@ enum { IDE_AFLAG_CLIK_DRIVE = (1 << 19), /* Requires BH algorithm for packets */ IDE_AFLAG_ZIP_DRIVE = (1 << 20), + /* Write protect */ + IDE_AFLAG_WP = (1 << 21), + /* Supports format progress report */ + IDE_AFLAG_SRFP = (1 << 22), /* ide-tape */ - IDE_AFLAG_IGNORE_DSC = (1 << 21), + IDE_AFLAG_IGNORE_DSC = (1 << 23), /* 0 When the tape position is unknown */ - IDE_AFLAG_ADDRESS_VALID = (1 << 22), + IDE_AFLAG_ADDRESS_VALID = (1 << 24), /* Device already opened */ - IDE_AFLAG_BUSY = (1 << 23), + IDE_AFLAG_BUSY = (1 << 25), /* Attempt to auto-detect the current user block size */ - IDE_AFLAG_DETECT_BS = (1 << 24), + IDE_AFLAG_DETECT_BS = (1 << 26), /* Currently on a filemark */ - IDE_AFLAG_FILEMARK = (1 << 25), + IDE_AFLAG_FILEMARK = (1 << 27), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), + IDE_AFLAG_MEDIUM_PRESENT = (1 << 28), - IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), + IDE_AFLAG_NO_AUTOCLOSE = (1 << 29), }; struct ide_drive_s { -- cgit v1.2.3 From 0578042db3191e1ac76b53d213f2a691c3e1eaed Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:38 +0200 Subject: ide: add ide_set_media_lock() helper * Set IDE_AFLAG_NO_DOORLOCK in idetape_get_mode_sense_result(), check it in ide_tape_set_media_lock() and cleanup idetape_create_prevent_cmd(). * Set IDE_AFLAG_NO_DOORLOCK in ide_floppy_create_read_capacity_cmd() and check it instead of IDE_AFLAG_CLIK_DRIVE in ide_floppy_set_media_lock(). * Add ide_set_media_lock() helper and convert ide-{floppy,tape}.c to use it. * Remove no longer used ide*_create_prevent_cmd()/ide*_set_media_lock(). * Update comment in accordingly. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index cc41a885688..ac067a3c1be 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -317,10 +317,10 @@ struct ide_acpi_hwif_link; enum { IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), IDE_AFLAG_MEDIA_CHANGED = (1 << 1), - - /* ide-cd */ /* Drive cannot lock the door. */ IDE_AFLAG_NO_DOORLOCK = (1 << 2), + + /* ide-cd */ /* Drive cannot eject the disc. */ IDE_AFLAG_NO_EJECT = (1 << 3), /* Drive is a pre ATAPI 1.2 drive. */ @@ -1142,6 +1142,8 @@ void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, struct request *); int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *); +int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); + ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), -- cgit v1.2.3 From 0c8a6c7aead1d3be85ce53e3aaacd52e38ede03e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:39 +0200 Subject: ide: add ide_do_start_stop() helper * Add ide_do_start_stop() helper and convert ide-{floppy,tape}.c to use it. * Remove no longer used idefloppy_create_start_stop_cmd() and idetape_create_load_unload_cmd(). There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ac067a3c1be..be79122b943 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1142,6 +1142,7 @@ void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, struct request *); int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *); +int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, -- cgit v1.2.3 From de699ad595fb45022d1b049ed91ffd06fdd16c13 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:39 +0200 Subject: ide: add ide_do_test_unit_ready() helper * Add ide_do_test_unit_ready() helper and convert ide-{floppy,tape}.c to use it. * Remove no longer used idetape_create_test_unit_ready_cmd(). There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index be79122b943..55098eed3b2 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1142,6 +1142,7 @@ void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, struct request *); int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *); +int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); -- cgit v1.2.3 From d6e2955a6b82d2312b5ff885ce13c8ab54d59d96 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Oct 2008 22:39:39 +0200 Subject: ide: move IDE{FLOPPY,TAPE}_WAIT_CMD defines to While at it: * IDE{FLOPPY,TAPE}_WAIT_CMD -> WAIT_{FLOPPY,TAPE}_CMD * Use enum for WAIT_* defines. There should be no functional changes caused by this patch. Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 55098eed3b2..a9eced25acc 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -133,12 +133,28 @@ struct ide_io_ports { /* * Timeouts for various operations: */ -#define WAIT_DRQ (HZ/10) /* 100msec - spec allows up to 20ms */ -#define WAIT_READY (5*HZ) /* 5sec - some laptops are very slow */ -#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?), if all ATAPI CD is closed at boot */ -#define WAIT_WORSTCASE (30*HZ) /* 30sec - worst case when spinning up */ -#define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ -#define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ +enum { + /* spec allows up to 20ms */ + WAIT_DRQ = HZ / 10, /* 100ms */ + /* some laptops are very slow */ + WAIT_READY = 5 * HZ, /* 5s */ + /* should be less than 3ms (?), if all ATAPI CD is closed at boot */ + WAIT_PIDENTIFY = 10 * HZ, /* 10s */ + /* worst case when spinning up */ + WAIT_WORSTCASE = 30 * HZ, /* 30s */ + /* maximum wait for an IRQ to happen */ + WAIT_CMD = 10 * HZ, /* 10s */ + /* Some drives require a longer IRQ timeout. */ + WAIT_FLOPPY_CMD = 50 * HZ, /* 50s */ + /* + * Some drives (for example, Seagate STT3401A Travan) require a very + * long timeout, because they don't return an interrupt or clear their + * BSY bit until after the command completes (even retension commands). + */ + WAIT_TAPE_CMD = 900 * HZ, /* 900s */ + /* minimum sleep time */ + WAIT_MIN_SLEEP = HZ / 50, /* 20ms */ +}; /* * Op codes for special requests to be handled by ide_special_rq(). -- cgit v1.2.3 From 92f1f8fd8040e7b50a67a850a935509bb01201bb Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Fri, 10 Oct 2008 22:39:40 +0200 Subject: ide: Remove ide_spin_wait_hwgroup() and use special requests instead Use a special request for serialisation purposes and get rid of the awkward ide_spin_wait_hwgroup(). This also involves converting the ide_devset structure so it can be shared by the /proc and the ioctl code. Signed-off-by: Elias Oltmanns [bart: use rq->cmd[] directly] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 138 +++++++++++++++++++++++++++------------------------- 1 file changed, 73 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index a9eced25acc..a9d82d6e6bd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -161,6 +161,7 @@ enum { * Values should be in the range of 0x20 to 0x3f. */ #define REQ_DRIVE_RESET 0x20 +#define REQ_DEVSET_EXEC 0x21 /* * Check for an interrupt and acknowledge the interrupt status @@ -405,7 +406,7 @@ struct ide_drive_s { u16 *id; /* identification info */ #ifdef CONFIG_IDE_PROC_FS struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ - const struct ide_devset **settings; /* /proc/ide/ drive settings */ + const struct ide_proc_devset *settings; /* /proc/ide/ drive settings */ #endif struct hwif_s *hwif; /* actually (ide_hwif_t *) */ @@ -707,29 +708,62 @@ typedef struct ide_driver_s ide_driver_t; extern struct mutex ide_setting_mtx; -int get_io_32bit(ide_drive_t *); -int set_io_32bit(ide_drive_t *, int); -int get_ksettings(ide_drive_t *); -int set_ksettings(ide_drive_t *, int); -int set_pio_mode(ide_drive_t *, int); -int get_unmaskirq(ide_drive_t *); -int set_unmaskirq(ide_drive_t *, int); -int get_using_dma(ide_drive_t *); -int set_using_dma(ide_drive_t *, int); +/* + * configurable drive settings + */ + +#define DS_SYNC (1 << 0) + +struct ide_devset { + int (*get)(ide_drive_t *); + int (*set)(ide_drive_t *, int); + unsigned int flags; +}; + +#define __DEVSET(_flags, _get, _set) { \ + .flags = _flags, \ + .get = _get, \ + .set = _set, \ +} #define ide_devset_get(name, field) \ -int get_##name(ide_drive_t *drive) \ +static int get_##name(ide_drive_t *drive) \ { \ return drive->field; \ } #define ide_devset_set(name, field) \ -int set_##name(ide_drive_t *drive, int arg) \ +static int set_##name(ide_drive_t *drive, int arg) \ { \ drive->field = arg; \ return 0; \ } +#define __IDE_DEVSET(_name, _flags, _get, _set) \ +const struct ide_devset ide_devset_##_name = \ + __DEVSET(_flags, _get, _set) + +#define IDE_DEVSET(_name, _flags, _get, _set) \ +static __IDE_DEVSET(_name, _flags, _get, _set) + +#define ide_devset_rw(_name, _func) \ +IDE_DEVSET(_name, 0, get_##_func, set_##_func) + +#define ide_devset_w(_name, _func) \ +IDE_DEVSET(_name, 0, NULL, set_##_func) + +#define ide_devset_rw_sync(_name, _func) \ +IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func) + +#define ide_decl_devset(_name) \ +extern const struct ide_devset ide_devset_##_name + +ide_decl_devset(io_32bit); +ide_decl_devset(keepsettings); +ide_decl_devset(pio_mode); +ide_decl_devset(unmaskirq); +ide_decl_devset(using_dma); + /* ATAPI packet command flags */ enum { /* set when an error is considered normal - no retry (ide-tape) */ @@ -797,60 +831,34 @@ struct ide_atapi_pc { #ifdef CONFIG_IDE_PROC_FS /* - * configurable drive settings + * /proc/ide interface */ -#define S_READ (1 << 0) -#define S_WRITE (1 << 1) -#define S_RW (S_READ | S_WRITE) -#define S_NOLOCK (1 << 2) - -struct ide_devset { - const char *name; - unsigned int flags; - int min, max; - int (*get)(ide_drive_t *); - int (*set)(ide_drive_t *, int); - int (*mulf)(ide_drive_t *); - int (*divf)(ide_drive_t *); +#define ide_devset_rw_field(_name, _field) \ +ide_devset_get(_name, _field); \ +ide_devset_set(_name, _field); \ +IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name) + +struct ide_proc_devset { + const char *name; + const struct ide_devset *setting; + int min, max; + int (*mulf)(ide_drive_t *); + int (*divf)(ide_drive_t *); }; -#define __DEVSET(_name, _flags, _min, _max, _get, _set, _mulf, _divf) { \ - .name = __stringify(_name), \ - .flags = _flags, \ - .min = _min, \ - .max = _max, \ - .get = _get, \ - .set = _set, \ - .mulf = _mulf, \ - .divf = _divf, \ +#define __IDE_PROC_DEVSET(_name, _min, _max, _mulf, _divf) { \ + .name = __stringify(_name), \ + .setting = &ide_devset_##_name, \ + .min = _min, \ + .max = _max, \ + .mulf = _mulf, \ + .divf = _divf, \ } -#define __IDE_DEVSET(_name, _flags, _min, _max, _get, _set, _mulf, _divf) \ -static const struct ide_devset ide_devset_##_name = \ - __DEVSET(_name, _flags, _min, _max, _get, _set, _mulf, _divf) - -#define IDE_DEVSET(_name, _flags, _min, _max, _get, _set) \ -__IDE_DEVSET(_name, _flags, _min, _max, _get, _set, NULL, NULL) - -#define ide_devset_rw_nolock(_name, _min, _max, _func) \ -IDE_DEVSET(_name, S_RW | S_NOLOCK, _min, _max, get_##_func, set_##_func) +#define IDE_PROC_DEVSET(_name, _min, _max) \ +__IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL) -#define ide_devset_w_nolock(_name, _min, _max, _func) \ -IDE_DEVSET(_name, S_WRITE | S_NOLOCK, _min, _max, NULL, set_##_func) - -#define ide_devset_rw(_name, _min, _max, _field) \ -static ide_devset_get(_name, _field); \ -static ide_devset_set(_name, _field); \ -IDE_DEVSET(_name, S_RW, _min, _max, get_##_name, set_##_name) - -#define ide_devset_r(_name, _min, _max, _field) \ -ide_devset_get(_name, _field) \ -IDE_DEVSET(_name, S_READ, _min, _max, get_##_name, NULL) - -/* - * /proc/ide interface - */ typedef struct { const char *name; mode_t mode; @@ -948,8 +956,8 @@ struct ide_driver_s { void (*resume)(ide_drive_t *); void (*shutdown)(ide_drive_t *); #ifdef CONFIG_IDE_PROC_FS - ide_proc_entry_t *proc; - const struct ide_devset **settings; + ide_proc_entry_t *proc; + const struct ide_proc_devset *settings; #endif }; @@ -961,9 +969,7 @@ void ide_device_put(ide_drive_t *); struct ide_ioctl_devset { unsigned int get_ioctl; unsigned int set_ioctl; - - int (*get)(ide_drive_t *); - int (*set)(ide_drive_t *, int); + const struct ide_devset *setting; }; int ide_setting_ioctl(ide_drive_t *, struct block_device *, unsigned int, @@ -1002,6 +1008,9 @@ int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); +extern int ide_devset_execute(ide_drive_t *drive, + const struct ide_devset *setting, int arg); + extern void ide_do_drive_cmd(ide_drive_t *, struct request *); extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); @@ -1191,7 +1200,6 @@ extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout); extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); -extern int ide_spin_wait_hwgroup(ide_drive_t *); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); extern void do_ide_request(struct request_queue *); -- cgit v1.2.3