diff options
Diffstat (limited to 'drivers/misc')
33 files changed, 884 insertions, 5 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 2191c8d896a..0d0d625fece 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -311,6 +311,22 @@ config TI_DAC7512 This driver can also be built as a module. If so, the module will be calles ti_dac7512. +config VMWARE_BALLOON + tristate "VMware Balloon Driver" + depends on X86 + help + This is VMware physical memory management driver which acts + like a "balloon" that can be inflated to reclaim physical pages + by reserving them in the guest and invalidating them in the + monitor, freeing up the underlying machine pages so they can + be allocated to other guests. The balloon can also be deflated + to allow the guest to use more physical memory. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called vmware_balloon. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 27c48435541..7b6f7eefdf8 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -29,3 +29,4 @@ obj-$(CONFIG_C2PORT) += c2port/ obj-$(CONFIG_IWMC3200TOP) += iwmc3200top/ obj-y += eeprom/ obj-y += cb710/ +obj-$(CONFIG_VMWARE_BALLOON) += vmware_balloon.o diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index 558bf3f2c27..4afffe610f9 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/spinlock.h> #include <linux/atmel-ssc.h> +#include <linux/slab.h> /* Serialize access to ssc_list and user count */ static DEFINE_SPINLOCK(user_lock); diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c index 6aa5294dfec..0f3fb4f03bd 100644 --- a/drivers/misc/atmel_pwm.c +++ b/drivers/misc/atmel_pwm.c @@ -1,6 +1,7 @@ #include <linux/module.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/slab.h> #include <linux/io.h> #include <linux/interrupt.h> #include <linux/platform_device.h> diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index 05dc8a31f28..3891124001f 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -6,6 +6,7 @@ #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/platform_device.h> +#include <linux/slab.h> /* Number of bytes to reserve for the iomem resource */ #define ATMEL_TC_IOMEM_SIZE 256 diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index b5346b4db91..ed090e77c9c 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c @@ -20,6 +20,7 @@ #include <linux/delay.h> #include <linux/idr.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/c2port.h> @@ -912,8 +913,8 @@ struct c2port_device *c2port_device_register(char *name, c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, "c2port%d", id); - if (unlikely(!c2dev->dev)) { - ret = -ENOMEM; + if (unlikely(IS_ERR(c2dev->dev))) { + ret = PTR_ERR(c2dev->dev); goto error_device_create; } dev_set_drvdata(c2dev->dev, c2dev); diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c index b14eab0f2ba..efec4139c3f 100644 --- a/drivers/misc/cb710/core.c +++ b/drivers/misc/cb710/core.c @@ -9,11 +9,11 @@ */ #include <linux/kernel.h> #include <linux/module.h> -#include <linux/slab.h> #include <linux/pci.h> #include <linux/spinlock.h> #include <linux/idr.h> #include <linux/cb710.h> +#include <linux/gfp.h> static DEFINE_IDA(cb710_ida); static DEFINE_SPINLOCK(cb710_ida_lock); diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c index 02358d086e0..fcb3b8e30c5 100644 --- a/drivers/misc/cb710/debug.c +++ b/drivers/misc/cb710/debug.c @@ -10,7 +10,6 @@ #include <linux/cb710.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/slab.h> #define CB710_REG_COUNT 0x80 diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c index 8110460558f..9bec24db4d4 100644 --- a/drivers/misc/cs5535-mfgpt.c +++ b/drivers/misc/cs5535-mfgpt.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/cs5535.h> +#include <linux/slab.h> #define DRV_NAME "cs5535-mfgpt" #define MFGPT_BAR 2 diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c index f3ee4a1abb7..9197cfc5501 100644 --- a/drivers/misc/ds1682.c +++ b/drivers/misc/ds1682.c @@ -33,7 +33,6 @@ #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/i2c.h> #include <linux/string.h> #include <linux/list.h> diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 1eac626e710..48c84a58163 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -27,6 +27,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/slab.h> static LIST_HEAD(container_list); static DEFINE_MUTEX(container_list_lock); diff --git a/drivers/misc/ep93xx_pwm.c b/drivers/misc/ep93xx_pwm.c index ba4694169d7..46b3439673e 100644 --- a/drivers/misc/ep93xx_pwm.c +++ b/drivers/misc/ep93xx_pwm.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/slab.h> #include <linux/clk.h> #include <linux/err.h> #include <linux/io.h> diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index a92a3a742b4..98ad0120aa9 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -25,6 +25,7 @@ #include <linux/io.h> #include <linux/wait.h> #include <linux/poll.h> +#include <linux/slab.h> #include "hpilo.h" static struct class *ilo_class; diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c index e2031739aa2..5c766b4fb23 100644 --- a/drivers/misc/ibmasm/command.c +++ b/drivers/misc/ibmasm/command.c @@ -23,6 +23,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include "ibmasm.h" #include "lowlevel.h" diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c index 572d41ffc18..76bfda1ffaa 100644 --- a/drivers/misc/ibmasm/event.c +++ b/drivers/misc/ibmasm/event.c @@ -23,6 +23,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include "ibmasm.h" #include "lowlevel.h" diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index aecf40ecb3a..8844a3f4538 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -75,6 +75,7 @@ #include <linux/fs.h> #include <linux/pagemap.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> #include "ibmasm.h" diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c index dc14b0b9cbf..a234d965243 100644 --- a/drivers/misc/ibmasm/module.c +++ b/drivers/misc/ibmasm/module.c @@ -52,6 +52,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <linux/slab.h> #include "ibmasm.h" #include "lowlevel.h" #include "remote.h" diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c index 395a4ea64e9..152e9d93eec 100644 --- a/drivers/misc/ics932s401.c +++ b/drivers/misc/ics932s401.c @@ -26,6 +26,7 @@ #include <linux/mutex.h> #include <linux/delay.h> #include <linux/log2.h> +#include <linux/slab.h> /* Addresses to scan */ static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END }; diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c index 09dcb699e66..193206602d8 100644 --- a/drivers/misc/ioc4.c +++ b/drivers/misc/ioc4.c @@ -30,6 +30,7 @@ #include <linux/pci.h> #include <linux/ioc4.h> #include <linux/ktime.h> +#include <linux/slab.h> #include <linux/mutex.h> #include <linux/time.h> #include <asm/io.h> diff --git a/drivers/misc/iwmc3200top/debugfs.c b/drivers/misc/iwmc3200top/debugfs.c index 0c8ea0a1c8a..e9eda471f6e 100644 --- a/drivers/misc/iwmc3200top/debugfs.c +++ b/drivers/misc/iwmc3200top/debugfs.c @@ -25,6 +25,7 @@ */ #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/mmc/sdio_func.h> diff --git a/drivers/misc/iwmc3200top/fw-download.c b/drivers/misc/iwmc3200top/fw-download.c index 9dbaeb574e6..e27afde6e99 100644 --- a/drivers/misc/iwmc3200top/fw-download.c +++ b/drivers/misc/iwmc3200top/fw-download.c @@ -26,6 +26,7 @@ #include <linux/firmware.h> #include <linux/mmc/sdio_func.h> +#include <linux/slab.h> #include <asm/unaligned.h> #include "iwmc3200top.h" diff --git a/drivers/misc/iwmc3200top/log.c b/drivers/misc/iwmc3200top/log.c index d569279698f..a36a55a49ca 100644 --- a/drivers/misc/iwmc3200top/log.c +++ b/drivers/misc/iwmc3200top/log.c @@ -26,6 +26,7 @@ #include <linux/kernel.h> #include <linux/mmc/sdio_func.h> +#include <linux/slab.h> #include <linux/ctype.h> #include "fw-msg.h" #include "iwmc3200top.h" diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c index 3b7292a5cea..c73cef2c3c5 100644 --- a/drivers/misc/iwmc3200top/main.c +++ b/drivers/misc/iwmc3200top/main.c @@ -25,6 +25,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/debugfs.h> diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index fcb6ec1af17..72450237a0f 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -295,6 +295,10 @@ static int check_and_rewind_pc(char *put_str, char *arg) /* On x86 a breakpoint stop requires it to be decremented */ if (addr + 1 == kgdbts_regs.ip) offset = -1; +#elif defined(CONFIG_SUPERH) + /* On SUPERH a breakpoint stop requires it to be decremented */ + if (addr + 2 == kgdbts_regs.pc) + offset = -2; #endif if (strcmp(arg, "silent") && instruction_pointer(&kgdbts_regs) + offset != addr) { @@ -305,6 +309,8 @@ static int check_and_rewind_pc(char *put_str, char *arg) #ifdef CONFIG_X86 /* On x86 adjust the instruction pointer if needed */ kgdbts_regs.ip += offset; +#elif defined(CONFIG_SUPERH) + kgdbts_regs.pc += offset; #endif return 0; } diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 4a0648301fd..31a991161f0 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -40,6 +40,7 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/hrtimer.h> +#include <linux/slab.h> #include <scsi/scsi_cmnd.h> #include <linux/debugfs.h> diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index 779aa8ebe4c..75ee0d3f6f4 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -21,6 +21,7 @@ #include <linux/poll.h> #include <linux/interrupt.h> #include <linux/cdev.h> +#include <linux/slab.h> #include <linux/phantom.h> #include <linux/sched.h> #include <linux/smp_lock.h> diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 832ed4c88cf..8d082b46426 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -44,6 +44,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/sysctl.h> #include <linux/device.h> #include <linux/delay.h> diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c index 9a6268c89fd..d551f09ccb7 100644 --- a/drivers/misc/sgi-xp/xpc_partition.c +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -17,6 +17,7 @@ #include <linux/device.h> #include <linux/hardirq.h> +#include <linux/slab.h> #include "xpc.h" #include <asm/uv/uv_hub.h> diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c index 8b70e03f939..7d71c04fc93 100644 --- a/drivers/misc/sgi-xp/xpc_sn2.c +++ b/drivers/misc/sgi-xp/xpc_sn2.c @@ -14,6 +14,7 @@ */ #include <linux/delay.h> +#include <linux/slab.h> #include <asm/uncached.h> #include <asm/sn/mspec.h> #include <asm/sn/sn_sal.h> diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 8725d5e8ab0..1f59ee2226c 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -19,6 +19,7 @@ #include <linux/delay.h> #include <linux/device.h> #include <linux/err.h> +#include <linux/slab.h> #include <asm/uv/uv_hub.h> #if defined CONFIG_X86_64 #include <asm/uv/bios.h> diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index 57b152f8d1b..ee5109a3cd9 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -20,6 +20,7 @@ * */ +#include <linux/slab.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c index 98bcba521da..5f6852dff40 100644 --- a/drivers/misc/tifm_core.c +++ b/drivers/misc/tifm_core.c @@ -10,6 +10,7 @@ */ #include <linux/tifm.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/idr.h> diff --git a/drivers/misc/vmware_balloon.c b/drivers/misc/vmware_balloon.c new file mode 100644 index 00000000000..e7161c4e379 --- /dev/null +++ b/drivers/misc/vmware_balloon.c @@ -0,0 +1,832 @@ +/* + * VMware Balloon driver. + * + * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Maintained by: Dmitry Torokhov <dtor@vmware.com> + */ + +/* + * This is VMware physical memory management driver for Linux. The driver + * acts like a "balloon" that can be inflated to reclaim physical pages by + * reserving them in the guest and invalidating them in the monitor, + * freeing up the underlying machine pages so they can be allocated to + * other guests. The balloon can also be deflated to allow the guest to + * use more physical memory. Higher level policies can control the sizes + * of balloons in VMs in order to manage physical memory resources. + */ + +//#define DEBUG +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/workqueue.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <asm/vmware.h> + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); +MODULE_VERSION("1.2.1.0-K"); +MODULE_ALIAS("dmi:*:svnVMware*:*"); +MODULE_ALIAS("vmware_vmmemctl"); +MODULE_LICENSE("GPL"); + +/* + * Various constants controlling rate of inflaint/deflating balloon, + * measured in pages. + */ + +/* + * Rate of allocating memory when there is no memory pressure + * (driver performs non-sleeping allocations). + */ +#define VMW_BALLOON_NOSLEEP_ALLOC_MAX 16384U + +/* + * Rates of memory allocaton when guest experiences memory pressure + * (driver performs sleeping allocations). + */ +#define VMW_BALLOON_RATE_ALLOC_MIN 512U +#define VMW_BALLOON_RATE_ALLOC_MAX 2048U +#define VMW_BALLOON_RATE_ALLOC_INC 16U + +/* + * Rates for releasing pages while deflating balloon. + */ +#define VMW_BALLOON_RATE_FREE_MIN 512U +#define VMW_BALLOON_RATE_FREE_MAX 16384U +#define VMW_BALLOON_RATE_FREE_INC 16U + +/* + * When guest is under memory pressure, use a reduced page allocation + * rate for next several cycles. + */ +#define VMW_BALLOON_SLOW_CYCLES 4 + +/* + * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't + * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use + * __GFP_NOWARN, to suppress page allocation failure warnings. + */ +#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) + +/* + * Use GFP_HIGHUSER when executing in a separate kernel thread + * context and allocation can sleep. This is less stressful to + * the guest memory system, since it allows the thread to block + * while memory is reclaimed, and won't take pages from emergency + * low-memory pools. + */ +#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) + +/* Maximum number of page allocations without yielding processor */ +#define VMW_BALLOON_YIELD_THRESHOLD 1024 + + +/* + * Hypervisor communication port definitions. + */ +#define VMW_BALLOON_HV_PORT 0x5670 +#define VMW_BALLOON_HV_MAGIC 0x456c6d6f +#define VMW_BALLOON_PROTOCOL_VERSION 2 +#define VMW_BALLOON_GUEST_ID 1 /* Linux */ + +#define VMW_BALLOON_CMD_START 0 +#define VMW_BALLOON_CMD_GET_TARGET 1 +#define VMW_BALLOON_CMD_LOCK 2 +#define VMW_BALLOON_CMD_UNLOCK 3 +#define VMW_BALLOON_CMD_GUEST_ID 4 + +/* error codes */ +#define VMW_BALLOON_SUCCESS 0 +#define VMW_BALLOON_FAILURE -1 +#define VMW_BALLOON_ERROR_CMD_INVALID 1 +#define VMW_BALLOON_ERROR_PPN_INVALID 2 +#define VMW_BALLOON_ERROR_PPN_LOCKED 3 +#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 +#define VMW_BALLOON_ERROR_PPN_PINNED 5 +#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 +#define VMW_BALLOON_ERROR_RESET 7 +#define VMW_BALLOON_ERROR_BUSY 8 + +#define VMWARE_BALLOON_CMD(cmd, data, result) \ +({ \ + unsigned long __stat, __dummy1, __dummy2; \ + __asm__ __volatile__ ("inl (%%dx)" : \ + "=a"(__stat), \ + "=c"(__dummy1), \ + "=d"(__dummy2), \ + "=b"(result) : \ + "0"(VMW_BALLOON_HV_MAGIC), \ + "1"(VMW_BALLOON_CMD_##cmd), \ + "2"(VMW_BALLOON_HV_PORT), \ + "3"(data) : \ + "memory"); \ + result &= -1UL; \ + __stat & -1UL; \ +}) + +#ifdef CONFIG_DEBUG_FS +struct vmballoon_stats { + unsigned int timer; + + /* allocation statustics */ + unsigned int alloc; + unsigned int alloc_fail; + unsigned int sleep_alloc; + unsigned int sleep_alloc_fail; + unsigned int refused_alloc; + unsigned int refused_free; + unsigned int free; + + /* monitor operations */ + unsigned int lock; + unsigned int lock_fail; + unsigned int unlock; + unsigned int unlock_fail; + unsigned int target; + unsigned int target_fail; + unsigned int start; + unsigned int start_fail; + unsigned int guest_type; + unsigned int guest_type_fail; +}; + +#define STATS_INC(stat) (stat)++ +#else +#define STATS_INC(stat) +#endif + +struct vmballoon { + + /* list of reserved physical pages */ + struct list_head pages; + + /* transient list of non-balloonable pages */ + struct list_head refused_pages; + + /* balloon size in pages */ + unsigned int size; + unsigned int target; + + /* reset flag */ + bool reset_required; + + /* adjustment rates (pages per second) */ + unsigned int rate_alloc; + unsigned int rate_free; + + /* slowdown page allocations for next few cycles */ + unsigned int slow_allocation_cycles; + +#ifdef CONFIG_DEBUG_FS + /* statistics */ + struct vmballoon_stats stats; + + /* debugfs file exporting statistics */ + struct dentry *dbg_entry; +#endif + + struct sysinfo sysinfo; + + struct delayed_work dwork; +}; + +static struct vmballoon balloon; +static struct workqueue_struct *vmballoon_wq; + +/* + * Send "start" command to the host, communicating supported version + * of the protocol. + */ +static bool vmballoon_send_start(struct vmballoon *b) +{ + unsigned long status, dummy; + + STATS_INC(b->stats.start); + + status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_PROTOCOL_VERSION, dummy); + if (status == VMW_BALLOON_SUCCESS) + return true; + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.start_fail); + return false; +} + +static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) +{ + switch (status) { + case VMW_BALLOON_SUCCESS: + return true; + + case VMW_BALLOON_ERROR_RESET: + b->reset_required = true; + /* fall through */ + + default: + return false; + } +} + +/* + * Communicate guest type to the host so that it can adjust ballooning + * algorithm to the one most appropriate for the guest. This command + * is normally issued after sending "start" command and is part of + * standard reset sequence. + */ +static bool vmballoon_send_guest_id(struct vmballoon *b) +{ + unsigned long status, dummy; + + status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy); + + STATS_INC(b->stats.guest_type); + + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.guest_type_fail); + return false; +} + +/* + * Retrieve desired balloon size from the host. + */ +static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) +{ + unsigned long status; + unsigned long target; + unsigned long limit; + u32 limit32; + + /* + * si_meminfo() is cheap. Moreover, we want to provide dynamic + * max balloon size later. So let us call si_meminfo() every + * iteration. + */ + si_meminfo(&b->sysinfo); + limit = b->sysinfo.totalram; + + /* Ensure limit fits in 32-bits */ + limit32 = (u32)limit; + if (limit != limit32) + return false; + + /* update stats */ + STATS_INC(b->stats.target); + + status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target); + if (vmballoon_check_status(b, status)) { + *new_target = target; + return true; + } + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.target_fail); + return false; +} + +/* + * Notify the host about allocated page so that host can use it without + * fear that guest will need it. Host may reject some pages, we need to + * check the return value and maybe submit a different page. + */ +static bool vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn) +{ + unsigned long status, dummy; + u32 pfn32; + + pfn32 = (u32)pfn; + if (pfn32 != pfn) + return false; + + STATS_INC(b->stats.lock); + + status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy); + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.lock_fail); + return false; +} + +/* + * Notify the host that guest intends to release given page back into + * the pool of available (to the guest) pages. + */ +static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn) +{ + unsigned long status, dummy; + u32 pfn32; + + pfn32 = (u32)pfn; + if (pfn32 != pfn) + return false; + + STATS_INC(b->stats.unlock); + + status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy); + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.unlock_fail); + return false; +} + +/* + * Quickly release all pages allocated for the balloon. This function is + * called when host decides to "reset" balloon for one reason or another. + * Unlike normal "deflate" we do not (shall not) notify host of the pages + * being released. + */ +static void vmballoon_pop(struct vmballoon *b) +{ + struct page *page, *next; + unsigned int count = 0; + + list_for_each_entry_safe(page, next, &b->pages, lru) { + list_del(&page->lru); + __free_page(page); + STATS_INC(b->stats.free); + b->size--; + + if (++count >= b->rate_free) { + count = 0; + cond_resched(); + } + } +} + +/* + * Perform standard reset sequence by popping the balloon (in case it + * is not empty) and then restarting protocol. This operation normally + * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. + */ +static void vmballoon_reset(struct vmballoon *b) +{ + /* free all pages, skipping monitor unlock */ + vmballoon_pop(b); + + if (vmballoon_send_start(b)) { + b->reset_required = false; + if (!vmballoon_send_guest_id(b)) + pr_err("failed to send guest ID to the host\n"); + } +} + +/* + * Allocate (or reserve) a page for the balloon and notify the host. If host + * refuses the page put it on "refuse" list and allocate another one until host + * is satisfied. "Refused" pages are released at the end of inflation cycle + * (when we allocate b->rate_alloc pages). + */ +static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep) +{ + struct page *page; + gfp_t flags; + bool locked = false; + + do { + if (!can_sleep) + STATS_INC(b->stats.alloc); + else + STATS_INC(b->stats.sleep_alloc); + + flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP; + page = alloc_page(flags); + if (!page) { + if (!can_sleep) + STATS_INC(b->stats.alloc_fail); + else + STATS_INC(b->stats.sleep_alloc_fail); + return -ENOMEM; + } + + /* inform monitor */ + locked = vmballoon_send_lock_page(b, page_to_pfn(page)); + if (!locked) { + if (b->reset_required) { + __free_page(page); + return -EIO; + } + + /* place on list of non-balloonable pages, retry allocation */ + list_add(&page->lru, &b->refused_pages); + STATS_INC(b->stats.refused_alloc); + } + } while (!locked); + + /* track allocated page */ + list_add(&page->lru, &b->pages); + + /* update balloon size */ + b->size++; + + return 0; +} + +/* + * Release the page allocated for the balloon. Note that we first notify + * the host so it can make sure the page will be available for the guest + * to use, if needed. + */ +static int vmballoon_release_page(struct vmballoon *b, struct page *page) +{ + if (!vmballoon_send_unlock_page(b, page_to_pfn(page))) + return -EIO; + + list_del(&page->lru); + + /* deallocate page */ + __free_page(page); + STATS_INC(b->stats.free); + + /* update balloon size */ + b->size--; + + return 0; +} + +/* + * Release pages that were allocated while attempting to inflate the + * balloon but were refused by the host for one reason or another. + */ +static void vmballoon_release_refused_pages(struct vmballoon *b) +{ + struct page *page, *next; + + list_for_each_entry_safe(page, next, &b->refused_pages, lru) { + list_del(&page->lru); + __free_page(page); + STATS_INC(b->stats.refused_free); + } +} + +/* + * Inflate the balloon towards its target size. Note that we try to limit + * the rate of allocation to make sure we are not choking the rest of the + * system. + */ +static void vmballoon_inflate(struct vmballoon *b) +{ + unsigned int goal; + unsigned int rate; + unsigned int i; + unsigned int allocations = 0; + int error = 0; + bool alloc_can_sleep = false; + + pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); + + /* + * First try NOSLEEP page allocations to inflate balloon. + * + * If we do not throttle nosleep allocations, we can drain all + * free pages in the guest quickly (if the balloon target is high). + * As a side-effect, draining free pages helps to inform (force) + * the guest to start swapping if balloon target is not met yet, + * which is a desired behavior. However, balloon driver can consume + * all available CPU cycles if too many pages are allocated in a + * second. Therefore, we throttle nosleep allocations even when + * the guest is not under memory pressure. OTOH, if we have already + * predicted that the guest is under memory pressure, then we + * slowdown page allocations considerably. + */ + + goal = b->target - b->size; + /* + * Start with no sleep allocation rate which may be higher + * than sleeping allocation rate. + */ + rate = b->slow_allocation_cycles ? + b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX; + + pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n", + __func__, goal, rate, b->rate_alloc); + + for (i = 0; i < goal; i++) { + + error = vmballoon_reserve_page(b, alloc_can_sleep); + if (error) { + if (error != -ENOMEM) { + /* + * Not a page allocation failure, stop this + * cycle. Maybe we'll get new target from + * the host soon. + */ + break; + } + + if (alloc_can_sleep) { + /* + * CANSLEEP page allocation failed, so guest + * is under severe memory pressure. Quickly + * decrease allocation rate. + */ + b->rate_alloc = max(b->rate_alloc / 2, + VMW_BALLOON_RATE_ALLOC_MIN); + break; + } + + /* + * NOSLEEP page allocation failed, so the guest is + * under memory pressure. Let us slow down page + * allocations for next few cycles so that the guest + * gets out of memory pressure. Also, if we already + * allocated b->rate_alloc pages, let's pause, + * otherwise switch to sleeping allocations. + */ + b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES; + + if (i >= b->rate_alloc) + break; + + alloc_can_sleep = true; + /* Lower rate for sleeping allocations. */ + rate = b->rate_alloc; + } + + if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) { + cond_resched(); + allocations = 0; + } + + if (i >= rate) { + /* We allocated enough pages, let's take a break. */ + break; + } + } + + /* + * We reached our goal without failures so try increasing + * allocation rate. + */ + if (error == 0 && i >= b->rate_alloc) { + unsigned int mult = i / b->rate_alloc; + + b->rate_alloc = + min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC, + VMW_BALLOON_RATE_ALLOC_MAX); + } + + vmballoon_release_refused_pages(b); +} + +/* + * Decrease the size of the balloon allowing guest to use more memory. + */ +static void vmballoon_deflate(struct vmballoon *b) +{ + struct page *page, *next; + unsigned int i = 0; + unsigned int goal; + int error; + + pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); + + /* limit deallocation rate */ + goal = min(b->size - b->target, b->rate_free); + + pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free); + + /* free pages to reach target */ + list_for_each_entry_safe(page, next, &b->pages, lru) { + error = vmballoon_release_page(b, page); + if (error) { + /* quickly decrease rate in case of error */ + b->rate_free = max(b->rate_free / 2, + VMW_BALLOON_RATE_FREE_MIN); + return; + } + + if (++i >= goal) + break; + } + + /* slowly increase rate if there were no errors */ + b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC, + VMW_BALLOON_RATE_FREE_MAX); +} + +/* + * Balloon work function: reset protocol, if needed, get the new size and + * adjust balloon as needed. Repeat in 1 sec. + */ +static void vmballoon_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); + unsigned int target; + + STATS_INC(b->stats.timer); + + if (b->reset_required) + vmballoon_reset(b); + + if (b->slow_allocation_cycles > 0) + b->slow_allocation_cycles--; + + if (vmballoon_send_get_target(b, &target)) { + /* update target, adjust size */ + b->target = target; + + if (b->size < target) + vmballoon_inflate(b); + else if (b->size > target) + vmballoon_deflate(b); + } + + queue_delayed_work(vmballoon_wq, dwork, round_jiffies_relative(HZ)); +} + +/* + * DEBUGFS Interface + */ +#ifdef CONFIG_DEBUG_FS + +static int vmballoon_debug_show(struct seq_file *f, void *offset) +{ + struct vmballoon *b = f->private; + struct vmballoon_stats *stats = &b->stats; + + /* format size info */ + seq_printf(f, + "target: %8d pages\n" + "current: %8d pages\n", + b->target, b->size); + + /* format rate info */ + seq_printf(f, + "rateNoSleepAlloc: %8d pages/sec\n" + "rateSleepAlloc: %8d pages/sec\n" + "rateFree: %8d pages/sec\n", + VMW_BALLOON_NOSLEEP_ALLOC_MAX, + b->rate_alloc, b->rate_free); + + seq_printf(f, + "\n" + "timer: %8u\n" + "start: %8u (%4u failed)\n" + "guestType: %8u (%4u failed)\n" + "lock: %8u (%4u failed)\n" + "unlock: %8u (%4u failed)\n" + "target: %8u (%4u failed)\n" + "primNoSleepAlloc: %8u (%4u failed)\n" + "primCanSleepAlloc: %8u (%4u failed)\n" + "primFree: %8u\n" + "errAlloc: %8u\n" + "errFree: %8u\n", + stats->timer, + stats->start, stats->start_fail, + stats->guest_type, stats->guest_type_fail, + stats->lock, stats->lock_fail, + stats->unlock, stats->unlock_fail, + stats->target, stats->target_fail, + stats->alloc, stats->alloc_fail, + stats->sleep_alloc, stats->sleep_alloc_fail, + stats->free, + stats->refused_alloc, stats->refused_free); + + return 0; +} + +static int vmballoon_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, vmballoon_debug_show, inode->i_private); +} + +static const struct file_operations vmballoon_debug_fops = { + .owner = THIS_MODULE, + .open = vmballoon_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init vmballoon_debugfs_init(struct vmballoon *b) +{ + int error; + + b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, + &vmballoon_debug_fops); + if (IS_ERR(b->dbg_entry)) { + error = PTR_ERR(b->dbg_entry); + pr_err("failed to create debugfs entry, error: %d\n", error); + return error; + } + + return 0; +} + +static void __exit vmballoon_debugfs_exit(struct vmballoon *b) +{ + debugfs_remove(b->dbg_entry); +} + +#else + +static inline int vmballoon_debugfs_init(struct vmballoon *b) +{ + return 0; +} + +static inline void vmballoon_debugfs_exit(struct vmballoon *b) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +static int __init vmballoon_init(void) +{ + int error; + + /* + * Check if we are running on VMware's hypervisor and bail out + * if we are not. + */ + if (!vmware_platform()) + return -ENODEV; + + vmballoon_wq = create_freezeable_workqueue("vmmemctl"); + if (!vmballoon_wq) { + pr_err("failed to create workqueue\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&balloon.pages); + INIT_LIST_HEAD(&balloon.refused_pages); + + /* initialize rates */ + balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX; + balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX; + + INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); + + /* + * Start balloon. + */ + if (!vmballoon_send_start(&balloon)) { + pr_err("failed to send start command to the host\n"); + error = -EIO; + goto fail; + } + + if (!vmballoon_send_guest_id(&balloon)) { + pr_err("failed to send guest ID to the host\n"); + error = -EIO; + goto fail; + } + + error = vmballoon_debugfs_init(&balloon); + if (error) + goto fail; + + queue_delayed_work(vmballoon_wq, &balloon.dwork, 0); + + return 0; + +fail: + destroy_workqueue(vmballoon_wq); + return error; +} +module_init(vmballoon_init); + +static void __exit vmballoon_exit(void) +{ + cancel_delayed_work_sync(&balloon.dwork); + destroy_workqueue(vmballoon_wq); + + vmballoon_debugfs_exit(&balloon); + + /* + * Deallocate all reserved memory, and reset connection with monitor. + * Reset connection before deallocating memory to avoid potential for + * additional spurious resets from guest touching deallocated pages. + */ + vmballoon_send_start(&balloon); + vmballoon_pop(&balloon); +} +module_exit(vmballoon_exit); |