aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/i386/xen/enlighten.c4
-rw-r--r--arch/powerpc/kernel/time.c8
-rw-r--r--arch/powerpc/kernel/vdso.c12
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c4
-rw-r--r--arch/um/include/kern_util.h2
-rw-r--r--arch/um/kernel/irq.c7
-rw-r--r--arch/um/os-Linux/file.c3
-rw-r--r--arch/um/os-Linux/signal.c4
-rw-r--r--arch/x86_64/mm/fault.c7
-rw-r--r--drivers/base/core.c29
-rw-r--r--drivers/block/DAC960.c1
-rw-r--r--drivers/char/agp/agp.h3
-rw-r--r--drivers/char/agp/intel-agp.c2
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c3
-rw-r--r--drivers/char/mspec.c69
-rw-r--r--drivers/media/video/usbvision/usbvision-cards.c1
-rw-r--r--drivers/mtd/nand/cafe_nand.c3
-rw-r--r--drivers/rtc/rtc-ds1553.c2
-rw-r--r--drivers/rtc/rtc-ds1742.c2
-rw-r--r--drivers/video/intelfb/intelfbhw.c2
-rw-r--r--fs/ext3/namei.c73
-rw-r--r--fs/ext4/namei.c73
-rw-r--r--fs/nfs/super.c2
-rw-r--r--include/asm-powerpc/time.h5
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/user_namespace.h2
-rw-r--r--init/Kconfig1
-rw-r--r--init/do_mounts_initrd.c4
-rw-r--r--kernel/user.c45
-rw-r--r--kernel/user_namespace.c4
-rw-r--r--kernel/utsname.c2
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/mempolicy.c79
34 files changed, 375 insertions, 94 deletions
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
index f0c37511d8d..f01bfcd4bde 100644
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -623,8 +623,8 @@ static unsigned long xen_read_cr2_direct(void)
static void xen_write_cr4(unsigned long cr4)
{
- /* never allow TSC to be disabled */
- native_write_cr4(cr4 & ~X86_CR4_TSD);
+ /* Just ignore cr4 changes; Xen doesn't allow us to do
+ anything anyway. */
}
static unsigned long xen_read_cr3(void)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 727a6699f2f..c627cf86d1e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -239,7 +239,7 @@ static void snapshot_tb_and_purr(void *data)
struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
local_irq_save(flags);
- p->tb = mftb();
+ p->tb = get_tb_or_rtc();
p->purr = mfspr(SPRN_PURR);
wmb();
p->initialized = 1;
@@ -317,7 +317,7 @@ static void snapshot_purr(void)
*/
void snapshot_timebase(void)
{
- __get_cpu_var(last_jiffy) = get_tb();
+ __get_cpu_var(last_jiffy) = get_tb_or_rtc();
snapshot_purr();
}
@@ -684,6 +684,8 @@ void timer_interrupt(struct pt_regs * regs)
write_seqlock(&xtime_lock);
tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
+ if (__USE_RTC() && tb_next_jiffy >= 1000000000)
+ tb_next_jiffy -= 1000000000;
if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
tb_last_jiffy = tb_next_jiffy;
do_timer(1);
@@ -977,7 +979,7 @@ void __init time_init(void)
tb_to_ns_scale = scale;
tb_to_ns_shift = shift;
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
- boot_tb = get_tb();
+ boot_tb = get_tb_or_rtc();
tm = get_boot_time();
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index cef01e4e898..213fa31ac53 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -98,6 +98,18 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_USE_TB, 0,
"__kernel_gettimeofday", NULL
},
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_clock_gettime", NULL
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_clock_getres", NULL
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_get_tbfreq", NULL
+ },
};
/*
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index c784edd40ea..5bebe7fbe05 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -579,7 +579,7 @@ static struct spu *find_victim(struct spu_context *ctx)
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
struct spu_context *tmp = spu->ctx;
- if (tmp->prio > ctx->prio &&
+ if (tmp && tmp->prio > ctx->prio &&
(!victim || tmp->prio > victim->prio))
victim = spu->ctx;
}
@@ -611,9 +611,9 @@ static struct spu *find_victim(struct spu_context *ctx)
mutex_lock(&cbe_spu_info[node].list_mutex);
cbe_spu_info[node].nr_active--;
+ spu_unbind_context(spu, victim);
mutex_unlock(&cbe_spu_info[node].list_mutex);
- spu_unbind_context(spu, victim);
victim->stats.invol_ctx_switch++;
spu->stats.invol_ctx_switch++;
mutex_unlock(&victim->state_mutex);
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index 8d7f7c1cb9c..6c2be26f1d7 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -117,7 +117,7 @@ extern void sigio_handler(int sig, union uml_pt_regs *regs);
extern void copy_sc(union uml_pt_regs *regs, void *from);
-unsigned long to_irq_stack(int sig, unsigned long *mask_out);
+extern unsigned long to_irq_stack(unsigned long *mask_out);
unsigned long from_irq_stack(int nested);
#endif
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9870febdbea..cf0dd9cf8c4 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -518,13 +518,13 @@ int init_aio_irq(int irq, char *name, irq_handler_t handler)
static unsigned long pending_mask;
-unsigned long to_irq_stack(int sig, unsigned long *mask_out)
+unsigned long to_irq_stack(unsigned long *mask_out)
{
struct thread_info *ti;
unsigned long mask, old;
int nested;
- mask = xchg(&pending_mask, 1 << sig);
+ mask = xchg(&pending_mask, *mask_out);
if(mask != 0){
/* If any interrupts come in at this point, we want to
* make sure that their bits aren't lost by our
@@ -534,7 +534,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out)
* and pending_mask contains a bit for each interrupt
* that came in.
*/
- old = 1 << sig;
+ old = *mask_out;
do {
old |= mask;
mask = xchg(&pending_mask, old);
@@ -550,6 +550,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out)
task = cpu_tasks[ti->cpu].task;
tti = task_thread_info(task);
+
*ti = *tti;
ti->real_thread = tti;
task->stack = ti;
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 6f92f732d25..c3ecc2a84e0 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -320,7 +320,8 @@ int os_file_size(char *file, unsigned long long *size_out)
}
if(S_ISBLK(buf.ust_mode)){
- int fd, blocks;
+ int fd;
+ long blocks;
fd = os_open_file(file, of_read(OPENFLAGS()), 0);
if(fd < 0){
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 18e5c8b67eb..b98f7ea2d2f 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -119,7 +119,7 @@ void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
void handle_signal(int sig, struct sigcontext *sc)
{
- unsigned long pending = 0;
+ unsigned long pending = 1UL << sig;
do {
int nested, bail;
@@ -134,7 +134,7 @@ void handle_signal(int sig, struct sigcontext *sc)
* have to return, and the upper handler will deal
* with this interrupt.
*/
- bail = to_irq_stack(sig, &pending);
+ bail = to_irq_stack(&pending);
if(bail)
return;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 327c9f2fa62..54816adb8e9 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -374,6 +374,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
if (unlikely(in_atomic() || !mm))
goto bad_area_nosemaphore;
+ /*
+ * User-mode registers count as a user access even for any
+ * potential system fault or CPU buglet.
+ */
+ if (user_mode_vm(regs))
+ error_code |= PF_USER;
+
again:
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
diff --git a/drivers/base/core.c b/drivers/base/core.c
index e6738bcbe5a..6de33d7a29b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -679,14 +679,26 @@ static int device_add_class_symlinks(struct device *dev)
goto out_subsys;
}
if (dev->parent) {
- error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
- "device");
- if (error)
- goto out_busid;
#ifdef CONFIG_SYSFS_DEPRECATED
{
- char * class_name = make_class_name(dev->class->name,
- &dev->kobj);
+ struct device *parent = dev->parent;
+ char *class_name;
+
+ /*
+ * In old sysfs stacked class devices had 'device'
+ * link pointing to real device instead of parent
+ */
+ while (parent->class && !parent->bus && parent->parent)
+ parent = parent->parent;
+
+ error = sysfs_create_link(&dev->kobj,
+ &parent->kobj,
+ "device");
+ if (error)
+ goto out_busid;
+
+ class_name = make_class_name(dev->class->name,
+ &dev->kobj);
if (class_name)
error = sysfs_create_link(&dev->parent->kobj,
&dev->kobj, class_name);
@@ -694,6 +706,11 @@ static int device_add_class_symlinks(struct device *dev)
if (error)
goto out_device;
}
+#else
+ error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
+ "device");
+ if (error)
+ goto out_busid;
#endif
}
return 0;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 504a95d888b..84d6aa500e2 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -31,6 +31,7 @@
#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/blkpg.h>
+#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/mm.h>
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 35ab1a9f8e8..8955e7ff759 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -176,7 +176,7 @@ struct agp_bridge_data {
#define I830_GMCH_MEM_MASK 0x1
#define I830_GMCH_MEM_64M 0x1
#define I830_GMCH_MEM_128M 0
-#define I830_GMCH_GMS_MASK 0xF0
+#define I830_GMCH_GMS_MASK 0x70
#define I830_GMCH_GMS_DISABLED 0x00
#define I830_GMCH_GMS_LOCAL 0x10
#define I830_GMCH_GMS_STOLEN_512 0x20
@@ -190,6 +190,7 @@ struct agp_bridge_data {
#define INTEL_I830_ERRSTS 0x92
/* Intel 855GM/852GM registers */
+#define I855_GMCH_GMS_MASK 0xF0
#define I855_GMCH_GMS_STOLEN_0M 0x0
#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4)
#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4)
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 7c69bf259ca..a5d0e95a227 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -511,7 +511,7 @@ static void intel_i830_init_gtt_entries(void)
*/
if (IS_G33)
size = 0;
- switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
+ switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
case I855_GMCH_GMS_STOLEN_1M:
gtt_entries = MB(1) - KB(size);
break;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 9b07f785106..dd441ff4af5 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2215,7 +2215,8 @@ static int ipmi_pci_resume(struct pci_dev *pdev)
static struct pci_device_id ipmi_pci_devices[] = {
{ PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) },
- { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) }
+ { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) },
+ { 0, }
};
MODULE_DEVICE_TABLE(pci, ipmi_pci_devices);
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index c08a4152ee8..049a46cc9f8 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -67,7 +67,7 @@
/*
* Page types allocated by the device.
*/
-enum {
+enum mspec_page_type {
MSPEC_FETCHOP = 1,
MSPEC_CACHED,
MSPEC_UNCACHED
@@ -83,15 +83,25 @@ static int is_sn2;
* One of these structures is allocated when an mspec region is mmaped. The
* structure is pointed to by the vma->vm_private_data field in the vma struct.
* This structure is used to record the addresses of the mspec pages.
+ * This structure is shared by all vma's that are split off from the
+ * original vma when split_vma()'s are done.
+ *
+ * The refcnt is incremented atomically because mm->mmap_sem does not
+ * protect in fork case where multiple tasks share the vma_data.
*/
struct vma_data {
atomic_t refcnt; /* Number of vmas sharing the data. */
- spinlock_t lock; /* Serialize access to the vma. */
+ spinlock_t lock; /* Serialize access to this structure. */
int count; /* Number of pages allocated. */
- int type; /* Type of pages allocated. */
+ enum mspec_page_type type; /* Type of pages allocated. */
+ int flags; /* See VMD_xxx below. */
+ unsigned long vm_start; /* Original (unsplit) base. */
+ unsigned long vm_end; /* Original (unsplit) end. */
unsigned long maddr[0]; /* Array of MSPEC addresses. */
};
+#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */
+
/* used on shub2 to clear FOP cache in the HUB */
static unsigned long scratch_page[MAX_NUMNODES];
#define SH2_AMO_CACHE_ENTRIES 4
@@ -129,8 +139,8 @@ mspec_zero_block(unsigned long addr, int len)
* mspec_open
*
* Called when a device mapping is created by a means other than mmap
- * (via fork, etc.). Increments the reference count on the underlying
- * mspec data so it is not freed prematurely.
+ * (via fork, munmap, etc.). Increments the reference count on the
+ * underlying mspec data so it is not freed prematurely.
*/
static void
mspec_open(struct vm_area_struct *vma)
@@ -151,34 +161,44 @@ static void
mspec_close(struct vm_area_struct *vma)
{
struct vma_data *vdata;
- int i, pages, result, vdata_size;
+ int index, last_index, result;
+ unsigned long my_page;
vdata = vma->vm_private_data;
- if (!atomic_dec_and_test(&vdata->refcnt))
- return;
- pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
- for (i = 0; i < pages; i++) {
- if (vdata->maddr[i] == 0)
+ BUG_ON(vma->vm_start < vdata->vm_start || vma->vm_end > vdata->vm_end);
+
+ spin_lock(&vdata->lock);
+ index = (vma->vm_start - vdata->vm_start) >> PAGE_SHIFT;
+ last_index = (vma->vm_end - vdata->vm_start) >> PAGE_SHIFT;
+ for (; index < last_index; index++) {
+ if (vdata->maddr[index] == 0)
continue;
/*
* Clear the page before sticking it back
* into the pool.
*/
- result = mspec_zero_block(vdata->maddr[i], PAGE_SIZE);
+ my_page = vdata->maddr[index];
+ vdata->maddr[index] = 0;
+ spin_unlock(&vdata->lock);
+ result = mspec_zero_block(my_page, PAGE_SIZE);
if (!result)
- uncached_free_page(vdata->maddr[i]);
+ uncached_free_page(my_page);
else
printk(KERN_WARNING "mspec_close(): "
"failed to zero page %i\n",
result);
+ spin_lock(&vdata->lock);
}
+ spin_unlock(&vdata->lock);
- if (vdata_size <= PAGE_SIZE)
- kfree(vdata);
- else
+ if (!atomic_dec_and_test(&vdata->refcnt))
+ return;
+
+ if (vdata->flags & VMD_VMALLOCED)
vfree(vdata);
+ else
+ kfree(vdata);
}
@@ -195,7 +215,8 @@ mspec_nopfn(struct vm_area_struct *vma, unsigned long address)
int index;
struct vma_data *vdata = vma->vm_private_data;
- index = (address - vma->vm_start) >> PAGE_SHIFT;
+ BUG_ON(address < vdata->vm_start || address >= vdata->vm_end);
+ index = (address - vdata->vm_start) >> PAGE_SHIFT;
maddr = (volatile unsigned long) vdata->maddr[index];
if (maddr == 0) {
maddr = uncached_alloc_page(numa_node_id());
@@ -237,10 +258,11 @@ static struct vm_operations_struct mspec_vm_ops = {
* underlying pages.
*/
static int
-mspec_mmap(struct file *file, struct vm_area_struct *vma, int type)
+mspec_mmap(struct file *file, struct vm_area_struct *vma,
+ enum mspec_page_type type)
{
struct vma_data *vdata;
- int pages, vdata_size;
+ int pages, vdata_size, flags = 0;
if (vma->vm_pgoff != 0)
return -EINVAL;
@@ -255,12 +277,17 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, int type)
vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
if (vdata_size <= PAGE_SIZE)
vdata = kmalloc(vdata_size, GFP_KERNEL);
- else
+ else {
vdata = vmalloc(vdata_size);
+ flags = VMD_VMALLOCED;
+ }
if (!vdata)
return -ENOMEM;
memset(vdata, 0, vdata_size);
+ vdata->vm_start = vma->vm_start;
+ vdata->vm_end = vma->vm_end;
+ vdata->flags = flags;
vdata->type = type;
spin_lock_init(&vdata->lock);
vdata->refcnt = ATOMIC_INIT(1);
diff --git a/drivers/media/video/usbvision/usbvision-cards.c b/drivers/media/video/usbvision/usbvision-cards.c
index 380564cd331..f09eb102731 100644
--- a/drivers/media/video/usbvision/usbvision-cards.c
+++ b/drivers/media/video/usbvision/usbvision-cards.c
@@ -1081,6 +1081,7 @@ struct usb_device_id usbvision_table [] = {
{ USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL },
{ USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM },
{ USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV },
+ { }, /* terminate list */
};
MODULE_DEVICE_TABLE (usb, usbvision_table);
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index cff969d05d4..6f32a35eb10 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -816,7 +816,8 @@ static void __devexit cafe_nand_remove(struct pci_dev *pdev)
}
static struct pci_device_id cafe_nand_tbl[] = {
- { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 }
+ { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 },
+ { 0, }
};
MODULE_DEVICE_TABLE(pci, cafe_nand_tbl);
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 46da5714932..5ab3492817d 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -61,7 +61,7 @@
struct rtc_plat_data {
struct rtc_device *rtc;
void __iomem *ioaddr;
- unsigned long baseaddr;
+ resource_size_t baseaddr;
unsigned long last_jiffies;
int irq;
unsigned int irqen;
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index b2e5481ba3b..67291b0f828 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -55,7 +55,7 @@ struct rtc_plat_data {
void __iomem *ioaddr_rtc;
size_t size_nvram;
size_t size;
- unsigned long baseaddr;
+ resource_size_t baseaddr;
unsigned long last_jiffies;
};
diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c
index b21d0dec928..6a47682d861 100644
--- a/drivers/video/intelfb/intelfbhw.c
+++ b/drivers/video/intelfb/intelfbhw.c
@@ -1352,7 +1352,7 @@ intelfbhw_program_mode(struct intelfb_info *dinfo,
/* turn off PLL */
tmp = INREG(dpll_reg);
- dpll_reg &= ~DPLL_VCO_ENABLE;
+ tmp &= ~DPLL_VCO_ENABLE;
OUTREG(dpll_reg, tmp);
/* Set PLL parameters */
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b817..c1fa1908dba 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
ext3fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
}
#ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext3_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
return (struct ext3_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext3_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af7..5fdb862e71c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
ext4fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
}
#ifdef CONFIG_EXT4_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext4_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
return (struct ext4_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext4_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8ed593766f1..b878528b64c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
unregister_shrinker(&acl_shrinker);
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
- nfs_unregister_sysctl();
#endif
+ nfs_unregister_sysctl();
unregister_filesystem(&nfs_fs_type);
}
diff --git a/include/asm-powerpc/time.h b/include/asm-powerpc/time.h
index d7f5ddfbaac..c104c15c662 100644
--- a/include/asm-powerpc/time.h
+++ b/include/asm-powerpc/time.h
@@ -149,6 +149,11 @@ static inline u64 get_tb(void)
}
#endif /* !CONFIG_PPC64 */
+static inline u64 get_tb_or_rtc(void)
+{
+ return __USE_RTC() ? get_rtc() : get_tb();
+}
+
static inline void set_tb(unsigned int upper, unsigned int lower)
{
mtspr(SPRN_TBWL, 0);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5bdd656e88c..a020eb2d4e2 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
extern struct mempolicy default_policy;
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
- unsigned long addr, gfp_t gfp_flags);
+ unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol);
extern unsigned slab_node(struct mempolicy *policy);
extern enum zone_type policy_zone;
@@ -256,7 +256,7 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
#define set_cpuset_being_rebound(x) do {} while (0)
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
- unsigned long addr, gfp_t gfp_flags)
+ unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
{
return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags);
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f4e324ed2e4..5445eaec690 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -593,7 +593,7 @@ struct user_struct {
#endif
/* Hash table maintenance information */
- struct list_head uidhash_list;
+ struct hlist_node uidhash_node;
uid_t uid;
};
@@ -1472,6 +1472,7 @@ static inline struct user_struct *get_uid(struct user_struct *u)
}
extern void free_uid(struct user_struct *);
extern void switch_uid(struct user_struct *);
+extern void release_uids(struct user_namespace *ns);
#include <asm/current.h>
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 1101b0ce878..b5f41d4c2ee 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -11,7 +11,7 @@
struct user_namespace {
struct kref kref;
- struct list_head uidhash_table[UIDHASH_SZ];
+ struct hlist_head uidhash_table[UIDHASH_SZ];
struct user_struct *root_user;
};
diff --git a/init/Kconfig b/init/Kconfig
index 96b54595f1d..d54d0cadcc0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -488,6 +488,7 @@ config SIGNALFD
config TIMERFD
bool "Enable timerfd() system call" if EMBEDDED
select ANON_INODES
+ depends on BROKEN
default y
help
Enable the timerfd() system call that allows to receive timer
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a6b4c0c08e1..fd4fc12d262 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -57,8 +57,10 @@ static void __init handle_initrd(void)
pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD);
if (pid > 0)
- while (pid != sys_wait4(-1, NULL, 0, NULL))
+ while (pid != sys_wait4(-1, NULL, 0, NULL)) {
+ try_to_freeze();
yield();
+ }
/* move initrd to rootfs' /old */
sys_fchdir(old_fd);
diff --git a/kernel/user.c b/kernel/user.c
index e7d11cef699..9ca2848fc35 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -55,25 +55,22 @@ struct user_struct root_user = {
/*
* These routines must be called with the uidhash spinlock held!
*/
-static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent)
+static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
{
- list_add(&up->uidhash_list, hashent);
+ hlist_add_head(&up->uidhash_node, hashent);
}
static inline void uid_hash_remove(struct user_struct *up)
{
- list_del(&up->uidhash_list);
+ hlist_del_init(&up->uidhash_node);
}
-static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent)
+static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
{
- struct list_head *up;
-
- list_for_each(up, hashent) {
- struct user_struct *user;
-
- user = list_entry(up, struct user_struct, uidhash_list);
+ struct user_struct *user;
+ struct hlist_node *h;
+ hlist_for_each_entry(user, h, hashent, uidhash_node) {
if(user->uid == uid) {
atomic_inc(&user->__count);
return user;
@@ -122,7 +119,7 @@ void free_uid(struct user_struct *up)
struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
{
- struct list_head *hashent = uidhashentry(ns, uid);
+ struct hlist_head *hashent = uidhashentry(ns, uid);
struct user_struct *up;
spin_lock_irq(&uidhash_lock);
@@ -202,6 +199,30 @@ void switch_uid(struct user_struct *new_user)
suid_keys(current);
}
+void release_uids(struct user_namespace *ns)
+{
+ int i;
+ unsigned long flags;
+ struct hlist_head *head;
+ struct hlist_node *nd;
+
+ spin_lock_irqsave(&uidhash_lock, flags);
+ /*
+ * collapse the chains so that the user_struct-s will
+ * be still alive, but not in hashes. subsequent free_uid()
+ * will free them.
+ */
+ for (i = 0; i < UIDHASH_SZ; i++) {
+ head = ns->uidhash_table + i;
+ while (!hlist_empty(head)) {
+ nd = head->first;
+ hlist_del_init(nd);
+ }
+ }
+ spin_unlock_irqrestore(&uidhash_lock, flags);
+
+ free_uid(ns->root_user);
+}
static int __init uid_cache_init(void)
{
@@ -211,7 +232,7 @@ static int __init uid_cache_init(void)
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
for(n = 0; n < UIDHASH_SZ; ++n)
- INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
+ INIT_HLIST_HEAD(init_user_ns.uidhash_table + n);
/* Insert the root user immediately (init already runs as root) */
spin_lock_irq(&uidhash_lock);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 85af9422ea6..7af90fc4f0f 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -39,7 +39,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
kref_init(&ns->kref);
for (n = 0; n < UIDHASH_SZ; ++n)
- INIT_LIST_HEAD(ns->uidhash_table + n);
+ INIT_HLIST_HEAD(ns->uidhash_table + n);
/* Insert new root user. */
ns->root_user = alloc_uid(ns, 0);
@@ -81,7 +81,7 @@ void free_user_ns(struct kref *kref)
struct user_namespace *ns;
ns = container_of(kref, struct user_namespace, kref);
- free_uid(ns->root_user);
+ release_uids(ns);
kfree(ns);
}
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 9d8180a0f0d..816d7b24fa0 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -28,7 +28,9 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
if (!ns)
return ERR_PTR(-ENOMEM);
+ down_read(&uts_sem);
memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+ up_read(&uts_sem);
kref_init(&ns->kref);
return ns;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index de4cf458d6e..84c795ee2d6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
{
int nid;
struct page *page = NULL;
+ struct mempolicy *mpol;
struct zonelist *zonelist = huge_zonelist(vma, address,
- htlb_alloc_mask);
+ htlb_alloc_mask, &mpol);
struct zone **z;
for (z = zonelist->zones; *z; z++) {
@@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
break;
}
}
+ mpol_free(mpol); /* unref if mpol !NULL */
return page;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bb54b88c3d5..3d6ac9505d0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1077,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
#endif
-/* Return effective policy for a VMA */
+/*
+ * get_vma_policy(@task, @vma, @addr)
+ * @task - task for fallback if vma policy == default
+ * @vma - virtual memory area whose policy is sought
+ * @addr - address in @vma for shared policy lookup
+ *
+ * Returns effective policy for a VMA at specified address.
+ * Falls back to @task or system default policy, as necessary.
+ * Returned policy has extra reference count if shared, vma,
+ * or some other task's policy [show_numa_maps() can pass
+ * @task != current]. It is the caller's responsibility to
+ * free the reference in these cases.
+ */
static struct mempolicy * get_vma_policy(struct task_struct *task,
struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol = task->mempolicy;
+ int shared_pol = 0;
if (vma) {
- if (vma->vm_ops && vma->vm_ops->get_policy)
+ if (vma->vm_ops && vma->vm_ops->get_policy) {
pol = vma->vm_ops->get_policy(vma, addr);
- else if (vma->vm_policy &&
+ shared_pol = 1; /* if pol non-NULL, add ref below */
+ } else if (vma->vm_policy &&
vma->vm_policy->policy != MPOL_DEFAULT)
pol = vma->vm_policy;
}
if (!pol)
pol = &default_policy;
+ else if (!shared_pol && pol != current->mempolicy)
+ mpol_get(pol); /* vma or other task's policy */
return pol;
}
@@ -1207,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
}
#ifdef CONFIG_HUGETLBFS
-/* Return a zonelist suitable for a huge page allocation. */
+/*
+ * huge_zonelist(@vma, @addr, @gfp_flags, @mpol)
+ * @vma = virtual memory area whose policy is sought
+ * @addr = address in @vma for shared policy lookup and interleave policy
+ * @gfp_flags = for requested zone
+ * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy
+ *
+ * Returns a zonelist suitable for a huge page allocation.
+ * If the effective policy is 'BIND, returns pointer to policy's zonelist.
+ * If it is also a policy for which get_vma_policy() returns an extra
+ * reference, we must hold that reference until after allocation.
+ * In that case, return policy via @mpol so hugetlb allocation can drop
+ * the reference. For non-'BIND referenced policies, we can/do drop the
+ * reference here, so the caller doesn't need to know about the special case
+ * for default and current task policy.
+ */
struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
- gfp_t gfp_flags)
+ gfp_t gfp_flags, struct mempolicy **mpol)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
+ struct zonelist *zl;
+ *mpol = NULL; /* probably no unref needed */
if (pol->policy == MPOL_INTERLEAVE) {
unsigned nid;
nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+ __mpol_free(pol); /* finished with pol */
return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
}
- return zonelist_policy(GFP_HIGHUSER, pol);
+
+ zl = zonelist_policy(GFP_HIGHUSER, pol);
+ if (unlikely(pol != &default_policy && pol != current->mempolicy)) {
+ if (pol->policy != MPOL_BIND)
+ __mpol_free(pol); /* finished with pol */
+ else
+ *mpol = pol; /* unref needed after allocation */
+ }
+ return zl;
}
#endif
@@ -1264,6 +1306,7 @@ struct page *
alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
+ struct zonelist *zl;
cpuset_update_task_memory_state();
@@ -1273,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
return alloc_page_interleave(gfp, 0, nid);
}
- return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
+ zl = zonelist_policy(gfp, pol);
+ if (pol != &default_policy && pol != current->mempolicy) {
+ /*
+ * slow path: ref counted policy -- shared or vma
+ */
+ struct page *page = __alloc_pages(gfp, 0, zl);
+ __mpol_free(pol);
+ return page;
+ }
+ /*
+ * fast path: default or task policy
+ */
+ return __alloc_pages(gfp, 0, zl);
}
/**
@@ -1872,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v)
struct numa_maps *md;
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
+ struct mempolicy *pol;
int n;
char buffer[50];
@@ -1882,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v)
if (!md)
return 0;
- mpol_to_str(buffer, sizeof(buffer),
- get_vma_policy(priv->task, vma, vma->vm_start));
+ pol = get_vma_policy(priv->task, vma, vma->vm_start);
+ mpol_to_str(buffer, sizeof(buffer), pol);
+ /*
+ * unref shared or other task's mempolicy
+ */
+ if (pol != &default_policy && pol != current->mempolicy)
+ __mpol_free(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer);