aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/ata/libata-core.c10
-rw-r--r--drivers/ata/libata-scsi.c2
-rw-r--r--drivers/base/memory.c9
-rw-r--r--drivers/block/DAC960.c3
-rw-r--r--drivers/block/Kconfig6
-rw-r--r--drivers/block/Makefile2
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/cpqarray.c4
-rw-r--r--drivers/block/cryptoloop.c12
-rw-r--r--drivers/block/lguest_blk.c421
-rw-r--r--drivers/block/sunvdc.c1
-rw-r--r--drivers/block/sx8.c2
-rw-r--r--drivers/block/ub.c11
-rw-r--r--drivers/block/viodasd.c2
-rw-r--r--drivers/block/virtio_blk.c308
-rw-r--r--drivers/bluetooth/Kconfig35
-rw-r--r--drivers/bluetooth/Makefile4
-rw-r--r--drivers/bluetooth/bluecard_cs.c5
-rw-r--r--drivers/bluetooth/bpa10x.c624
-rw-r--r--drivers/bluetooth/bt3c_cs.c5
-rw-r--r--drivers/bluetooth/btsdio.c406
-rw-r--r--drivers/bluetooth/btuart_cs.c5
-rw-r--r--drivers/bluetooth/btusb.c564
-rw-r--r--drivers/bluetooth/dtl1_cs.c5
-rw-r--r--drivers/bluetooth/hci_bcsp.c3
-rw-r--r--drivers/bluetooth/hci_ldisc.c8
-rw-r--r--drivers/bluetooth/hci_ll.c531
-rw-r--r--drivers/bluetooth/hci_uart.h8
-rw-r--r--drivers/char/Kconfig4
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/cyclades.c2
-rw-r--r--drivers/char/hvc_lguest.c177
-rw-r--r--drivers/char/virtio_console.c225
-rw-r--r--drivers/firewire/fw-ohci.c13
-rw-r--r--drivers/ide/cris/ide-cris.c4
-rw-r--r--drivers/ide/ide-probe.c5
-rw-r--r--drivers/ide/ide-taskfile.c2
-rw-r--r--drivers/ide/mips/au1xxx-ide.c6
-rw-r--r--drivers/ieee1394/dma.c4
-rw-r--r--drivers/ieee1394/sbp2.c2
-rw-r--r--drivers/infiniband/core/cma.c160
-rw-r--r--drivers/infiniband/core/umem.c11
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c20
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c63
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c53
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c45
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c114
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c52
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c8
-rw-r--r--drivers/input/keyboard/bf54x-keys.c1
-rw-r--r--drivers/input/mouse/appletouch.c25
-rw-r--r--drivers/input/serio/i8042.c4
-rw-r--r--drivers/input/serio/i8042.h22
-rw-r--r--drivers/input/touchscreen/Kconfig6
-rw-r--r--drivers/input/touchscreen/usbtouchscreen.c36
-rw-r--r--drivers/kvm/Kconfig4
-rw-r--r--drivers/kvm/kvm_main.c37
-rw-r--r--drivers/kvm/lapic.c38
-rw-r--r--drivers/kvm/mmu.c3
-rw-r--r--drivers/kvm/vmx.c16
-rw-r--r--drivers/kvm/x86_emulate.c77
-rw-r--r--drivers/lguest/Kconfig13
-rw-r--r--drivers/lguest/Makefile10
-rw-r--r--drivers/lguest/core.c568
-rw-r--r--drivers/lguest/hypercalls.c177
-rw-r--r--drivers/lguest/interrupts_and_traps.c125
-rw-r--r--drivers/lguest/io.c626
-rw-r--r--drivers/lguest/lg.h189
-rw-r--r--drivers/lguest/lguest.c1108
-rw-r--r--drivers/lguest/lguest_asm.S93
-rw-r--r--drivers/lguest/lguest_bus.c218
-rw-r--r--drivers/lguest/lguest_device.c373
-rw-r--r--drivers/lguest/lguest_user.c138
-rw-r--r--drivers/lguest/page_tables.c250
-rw-r--r--drivers/lguest/segments.c28
-rw-r--r--drivers/lguest/x86/core.c577
-rw-r--r--drivers/lguest/x86/switcher_32.S (renamed from drivers/lguest/switcher.S)7
-rw-r--r--drivers/md/bitmap.c2
-rw-r--r--drivers/md/dm-crypt.c21
-rw-r--r--drivers/md/raid5.c17
-rw-r--r--drivers/media/common/ir-keymaps.c70
-rw-r--r--drivers/media/common/saa7146_core.c3
-rw-r--r--drivers/media/dvb/cinergyT2/cinergyT2.c42
-rw-r--r--drivers/media/dvb/dvb-core/dvb_ca_en50221.c2
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c2
-rw-r--r--drivers/media/radio/miropcm20-radio.c1
-rw-r--r--drivers/media/radio/radio-gemtek.c1
-rw-r--r--drivers/media/video/arv.c1
-rw-r--r--drivers/media/video/bt8xx/bttv-driver.c3
-rw-r--r--drivers/media/video/bw-qcam.c1
-rw-r--r--drivers/media/video/c-qcam.c1
-rw-r--r--drivers/media/video/cpia.c5
-rw-r--r--drivers/media/video/cpia2/cpia2_v4l.c5
-rw-r--r--drivers/media/video/cx23885/cx23885-core.c6
-rw-r--r--drivers/media/video/cx88/cx88-alsa.c86
-rw-r--r--drivers/media/video/cx88/cx88-blackbird.c57
-rw-r--r--drivers/media/video/cx88/cx88-dvb.c3
-rw-r--r--drivers/media/video/cx88/cx88-mpeg.c133
-rw-r--r--drivers/media/video/cx88/cx88-video.c1
-rw-r--r--drivers/media/video/cx88/cx88-vp3054-i2c.c16
-rw-r--r--drivers/media/video/cx88/cx88.h24
-rw-r--r--drivers/media/video/em28xx/em28xx-core.c3
-rw-r--r--drivers/media/video/em28xx/em28xx-video.c2
-rw-r--r--drivers/media/video/et61x251/et61x251_core.c1
-rw-r--r--drivers/media/video/ir-kbd-i2c.c1
-rw-r--r--drivers/media/video/ivtv/ivtv-driver.c11
-rw-r--r--drivers/media/video/ivtv/ivtv-driver.h1
-rw-r--r--drivers/media/video/ivtv/ivtv-fileops.c8
-rw-r--r--drivers/media/video/ivtv/ivtv-ioctl.c13
-rw-r--r--drivers/media/video/ivtv/ivtv-streams.c116
-rw-r--r--drivers/media/video/ivtv/ivtv-streams.h1
-rw-r--r--drivers/media/video/ivtv/ivtv-udma.c4
-rw-r--r--drivers/media/video/ivtv/ivtv-yuv.c160
-rw-r--r--drivers/media/video/ivtv/ivtv-yuv.h1
-rw-r--r--drivers/media/video/ivtv/ivtvfb.c92
-rw-r--r--drivers/media/video/meye.c1
-rw-r--r--drivers/media/video/ov511.c1
-rw-r--r--drivers/media/video/planb.c1
-rw-r--r--drivers/media/video/pms.c1
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-encoder.c6
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h11
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-hdw.c3
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-v4l2.c3
-rw-r--r--drivers/media/video/pwc/pwc-if.c1
-rw-r--r--drivers/media/video/saa7134/saa6752hs.c111
-rw-r--r--drivers/media/video/saa7134/saa7134-core.c44
-rw-r--r--drivers/media/video/saa7134/saa7134-empress.c12
-rw-r--r--drivers/media/video/saa7134/saa7134-input.c29
-rw-r--r--drivers/media/video/saa7134/saa7134-tvaudio.c32
-rw-r--r--drivers/media/video/saa7134/saa7134-video.c28
-rw-r--r--drivers/media/video/saa7134/saa7134.h7
-rw-r--r--drivers/media/video/se401.c1
-rw-r--r--drivers/media/video/sn9c102/sn9c102_core.c1
-rw-r--r--drivers/media/video/stradis.c1
-rw-r--r--drivers/media/video/stv680.c1
-rw-r--r--drivers/media/video/tuner-core.c2
-rw-r--r--drivers/media/video/usbvideo/usbvideo.c1
-rw-r--r--drivers/media/video/usbvideo/vicam.c1
-rw-r--r--drivers/media/video/usbvision/usbvision-video.c3
-rw-r--r--drivers/media/video/v4l2-common.c2
-rw-r--r--drivers/media/video/videobuf-core.c2
-rw-r--r--drivers/media/video/videobuf-dma-sg.c9
-rw-r--r--drivers/media/video/videocodec.c4
-rw-r--r--drivers/media/video/videodev.c42
-rw-r--r--drivers/media/video/vivi.c1
-rw-r--r--drivers/media/video/w9966.c1
-rw-r--r--drivers/media/video/w9968cf.c1
-rw-r--r--drivers/media/video/zc0301/zc0301_core.c1
-rw-r--r--drivers/media/video/zoran_card.c10
-rw-r--r--drivers/media/video/zoran_driver.c2
-rw-r--r--drivers/mmc/card/queue.c15
-rw-r--r--drivers/mmc/host/at91_mci.c8
-rw-r--r--drivers/mmc/host/au1xmmc.c11
-rw-r--r--drivers/mmc/host/imxmmc.c2
-rw-r--r--drivers/mmc/host/mmc_spi.c8
-rw-r--r--drivers/mmc/host/mmci.h2
-rw-r--r--drivers/mmc/host/omap.c4
-rw-r--r--drivers/mmc/host/sdhci.c3
-rw-r--r--drivers/mmc/host/tifm_sd.c8
-rw-r--r--drivers/mmc/host/wbsd.c6
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c146
-rw-r--r--drivers/mtd/nand/Kconfig2
-rw-r--r--drivers/mtd/nand/diskonchip.c4
-rw-r--r--drivers/mtd/nand/nand_base.c6
-rw-r--r--drivers/mtd/nand/nand_ecc.c2
-rw-r--r--drivers/mtd/nand/nandsim.c2
-rw-r--r--drivers/mtd/nand/s3c2410.c14
-rw-r--r--drivers/mtd/onenand/onenand_sim.c50
-rw-r--r--drivers/net/Kconfig6
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/cpmac.c2
-rw-r--r--drivers/net/fec.c24
-rw-r--r--drivers/net/lguest_net.c555
-rw-r--r--drivers/net/mlx4/fw.c2
-rw-r--r--drivers/net/mlx4/icm.c14
-rw-r--r--drivers/net/mv643xx_eth.c1
-rw-r--r--drivers/net/niu.c34
-rw-r--r--drivers/net/ppp_mppe.c6
-rw-r--r--drivers/net/r8169.c406
-rw-r--r--drivers/net/tg3.c95
-rw-r--r--drivers/net/tg3.h11
-rw-r--r--drivers/net/virtio_net.c435
-rw-r--r--drivers/parisc/ccio-dma.c1
-rw-r--r--drivers/parisc/lba_pci.c51
-rw-r--r--drivers/parisc/pdc_stable.c11
-rw-r--r--drivers/parisc/sba_iommu.c5
-rw-r--r--drivers/parisc/superio.c4
-rw-r--r--drivers/pci/Makefile3
-rw-r--r--drivers/pci/dmar.c329
-rw-r--r--drivers/pci/intel-iommu.c2271
-rw-r--r--drivers/pci/intel-iommu.h325
-rw-r--r--drivers/pci/iova.c394
-rw-r--r--drivers/pci/iova.h63
-rw-r--r--drivers/pci/pci.h1
-rw-r--r--drivers/pci/probe.c14
-rw-r--r--drivers/pci/search.c34
-rw-r--r--drivers/power/apm_power.c141
-rw-r--r--drivers/s390/char/raw3270.c26
-rw-r--r--drivers/s390/char/tape_class.c19
-rw-r--r--drivers/s390/char/tape_class.h4
-rw-r--r--drivers/s390/char/vmlogrdr.c15
-rw-r--r--drivers/s390/cio/chp.c12
-rw-r--r--drivers/s390/cio/css.c9
-rw-r--r--drivers/s390/scsi/zfcp_aux.c1
-rw-r--r--drivers/s390/scsi/zfcp_def.h4
-rw-r--r--drivers/s390/scsi/zfcp_erp.c10
-rw-r--r--drivers/sbus/char/vfc_dev.c2
-rw-r--r--drivers/scsi/3w-9xxx.c4
-rw-r--r--drivers/scsi/3w-xxxx.c2
-rw-r--r--drivers/scsi/NCR5380.c6
-rw-r--r--drivers/scsi/NCR53C9x.c4
-rw-r--r--drivers/scsi/NCR53c406a.c6
-rw-r--r--drivers/scsi/aacraid/aachba.c2
-rw-r--r--drivers/scsi/aha152x.c2
-rw-r--r--drivers/scsi/aha1542.c8
-rw-r--r--drivers/scsi/arcmsr/arcmsr_hba.c4
-rw-r--r--drivers/scsi/atari_NCR5380.c6
-rw-r--r--drivers/scsi/eata_pio.c4
-rw-r--r--drivers/scsi/fd_mcs.c6
-rw-r--r--drivers/scsi/fdomain.c7
-rw-r--r--drivers/scsi/gdth.c6
-rw-r--r--drivers/scsi/ibmmca.c2
-rw-r--r--drivers/scsi/ide-scsi.c12
-rw-r--r--drivers/scsi/imm.c8
-rw-r--r--drivers/scsi/in2000.c4
-rw-r--r--drivers/scsi/ipr.c19
-rw-r--r--drivers/scsi/ips.c6
-rw-r--r--drivers/scsi/iscsi_tcp.c15
-rw-r--r--drivers/scsi/megaraid.c8
-rw-r--r--drivers/scsi/megaraid/megaraid_mbox.c12
-rw-r--r--drivers/scsi/oktagon_esp.c6
-rw-r--r--drivers/scsi/osst.c32
-rw-r--r--drivers/scsi/pcmcia/nsp_cs.h2
-rw-r--r--drivers/scsi/pcmcia/sym53c500_cs.c6
-rw-r--r--drivers/scsi/ppa.c7
-rw-r--r--drivers/scsi/ps3rom.c6
-rw-r--r--drivers/scsi/qlogicfas408.c2
-rw-r--r--drivers/scsi/scsi_debug.c4
-rw-r--r--drivers/scsi/scsi_lib.c13
-rw-r--r--drivers/scsi/seagate.c8
-rw-r--r--drivers/scsi/sg.c30
-rw-r--r--drivers/scsi/st.c8
-rw-r--r--drivers/scsi/sun3_NCR5380.c3
-rw-r--r--drivers/scsi/sym53c416.c2
-rw-r--r--drivers/scsi/tmscsim.c5
-rw-r--r--drivers/scsi/ultrastor.c2
-rw-r--r--drivers/scsi/wd33c93.c6
-rw-r--r--drivers/scsi/wd7000.c2
-rw-r--r--drivers/serial/Kconfig2
-rw-r--r--drivers/serial/mcf.c653
-rw-r--r--drivers/usb/core/message.c8
-rw-r--r--drivers/usb/image/microtek.c5
-rw-r--r--drivers/usb/misc/usbtest.c4
-rw-r--r--drivers/usb/storage/protocol.c2
-rw-r--r--drivers/virtio/Kconfig8
-rw-r--r--drivers/virtio/Makefile2
-rw-r--r--drivers/virtio/config.c13
-rw-r--r--drivers/virtio/virtio.c189
-rw-r--r--drivers/virtio/virtio_ring.c313
-rw-r--r--drivers/watchdog/mpc5200_wdt.c3
275 files changed, 10799 insertions, 6497 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 34f40ea0ba6..f4076d9e990 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -94,5 +94,5 @@ source "drivers/kvm/Kconfig"
source "drivers/uio/Kconfig"
-source "drivers/lguest/Kconfig"
+source "drivers/virtio/Kconfig"
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index cfe38ffff28..560496b4330 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -91,3 +91,4 @@ obj-$(CONFIG_HID) += hid/
obj-$(CONFIG_PPC_PS3) += ps3/
obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/
+obj-$(CONFIG_VIRTIO) += virtio/
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 629eadbd0ec..69092bce1ad 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4296,7 +4296,7 @@ void ata_sg_clean(struct ata_queued_cmd *qc)
sg_last(sg, qc->orig_n_elem)->length += qc->pad_len;
if (pad_buf) {
struct scatterlist *psg = &qc->pad_sgent;
- void *addr = kmap_atomic(psg->page, KM_IRQ0);
+ void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
memcpy(addr + psg->offset, pad_buf, qc->pad_len);
kunmap_atomic(addr, KM_IRQ0);
}
@@ -4686,11 +4686,11 @@ static int ata_sg_setup(struct ata_queued_cmd *qc)
* data in this function or read data in ata_sg_clean.
*/
offset = lsg->offset + lsg->length - qc->pad_len;
- psg->page = nth_page(lsg->page, offset >> PAGE_SHIFT);
+ sg_set_page(psg, nth_page(sg_page(lsg), offset >> PAGE_SHIFT));
psg->offset = offset_in_page(offset);
if (qc->tf.flags & ATA_TFLAG_WRITE) {
- void *addr = kmap_atomic(psg->page, KM_IRQ0);
+ void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
memcpy(pad_buf, addr + psg->offset, qc->pad_len);
kunmap_atomic(addr, KM_IRQ0);
}
@@ -4836,7 +4836,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
if (qc->curbytes == qc->nbytes - qc->sect_size)
ap->hsm_task_state = HSM_ST_LAST;
- page = qc->cursg->page;
+ page = sg_page(qc->cursg);
offset = qc->cursg->offset + qc->cursg_ofs;
/* get the current page and offset */
@@ -4988,7 +4988,7 @@ next_sg:
sg = qc->cursg;
- page = sg->page;
+ page = sg_page(sg);
offset = sg->offset + qc->cursg_ofs;
/* get the current page and offset */
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 9fbb39cd0f5..5b758b9ad0b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1544,7 +1544,7 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out)
struct scatterlist *sg = scsi_sglist(cmd);
if (sg) {
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
buflen = sg->length;
} else {
buf = NULL;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c41d0728efe..7868707c7ed 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf)
return len;
}
-static inline int memory_notify(unsigned long val, void *v)
+int memory_notify(unsigned long val, void *v)
{
return blocking_notifier_call_chain(&memory_chain, val, v);
}
@@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
break;
case MEM_OFFLINE:
mem->state = MEM_GOING_OFFLINE;
- memory_notify(MEM_GOING_OFFLINE, NULL);
start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
ret = remove_memory(start_paddr,
PAGES_PER_SECTION << PAGE_SHIFT);
@@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
mem->state = old_state;
break;
}
- memory_notify(MEM_MAPPING_INVALID, NULL);
break;
default:
printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
@@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
WARN_ON(1);
ret = -EINVAL;
}
- /*
- * For now, only notify on successful memory operations
- */
- if (!ret)
- memory_notify(action, NULL);
return ret;
}
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 84d6aa500e2..9030c373ce6 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -44,6 +44,7 @@
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/random.h>
+#include <linux/scatterlist.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include "DAC960.h"
@@ -345,6 +346,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
Command->V1.ScatterGatherList =
(DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU;
Command->V1.ScatterGatherListDMA = ScatterGatherDMA;
+ sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit);
} else {
Command->cmd_sglist = Command->V2.ScatterList;
Command->V2.ScatterGatherList =
@@ -353,6 +355,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
Command->V2.RequestSense =
(DAC960_SCSI_RequestSense_T *)RequestSenseCPU;
Command->V2.RequestSenseDMA = RequestSenseDMA;
+ sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit);
}
}
return true;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index ce4b1e484e6..4d0119ea9e3 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -425,4 +425,10 @@ config XEN_BLKDEV_FRONTEND
block device driver. It communicates with a back-end driver
in another domain which drives the actual block device.
+config VIRTIO_BLK
+ tristate "Virtio block driver (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && VIRTIO
+ ---help---
+ This is the virtual block driver for lguest. Say Y or M.
+
endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 014e72121b5..7691505a2e1 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -25,10 +25,10 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
+obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
obj-$(CONFIG_VIODASD) += viodasd.o
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
obj-$(CONFIG_BLK_DEV_UB) += ub.o
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
-obj-$(CONFIG_LGUEST_BLOCK) += lguest_blk.o
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 7c2cfde08f1..5a6fe17fc63 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2610,7 +2610,7 @@ static void do_cciss_request(struct request_queue *q)
(int)creq->nr_sectors);
#endif /* CCISS_DEBUG */
- memset(tmp_sg, 0, sizeof(tmp_sg));
+ sg_init_table(tmp_sg, MAXSGENTRIES);
seg = blk_rq_map_sg(q, creq, tmp_sg);
/* get the DMA records for the setup */
@@ -2621,7 +2621,7 @@ static void do_cciss_request(struct request_queue *q)
for (i = 0; i < seg; i++) {
c->SG[i].Len = tmp_sg[i].length;
- temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page,
+ temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
tmp_sg[i].offset,
tmp_sg[i].length, dir);
c->SG[i].Addr.lower = temp64.val32.lower;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 568603d3043..c8132d95879 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -37,6 +37,7 @@
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
+#include <linux/scatterlist.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -918,6 +919,7 @@ queue_next:
DBGPX(
printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
);
+ sg_init_table(tmp_sg, SG_MAX);
seg = blk_rq_map_sg(q, creq, tmp_sg);
/* Now do all the DMA Mappings */
@@ -929,7 +931,7 @@ DBGPX(
{
c->req.sg[i].size = tmp_sg[i].length;
c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev,
- tmp_sg[i].page,
+ sg_page(&tmp_sg[i]),
tmp_sg[i].offset,
tmp_sg[i].length, dir);
}
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 40535036e89..1b58b010797 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -26,6 +26,7 @@
#include <linux/crypto.h>
#include <linux/blkdev.h>
#include <linux/loop.h>
+#include <linux/scatterlist.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
@@ -119,14 +120,17 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
.tfm = tfm,
.flags = CRYPTO_TFM_REQ_MAY_SLEEP,
};
- struct scatterlist sg_out = { NULL, };
- struct scatterlist sg_in = { NULL, };
+ struct scatterlist sg_out;
+ struct scatterlist sg_in;
encdec_cbc_t encdecfunc;
struct page *in_page, *out_page;
unsigned in_offs, out_offs;
int err;
+ sg_init_table(&sg_out, 1);
+ sg_init_table(&sg_in, 1);
+
if (cmd == READ) {
in_page = raw_page;
in_offs = raw_off;
@@ -146,11 +150,11 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
u32 iv[4] = { 0, };
iv[0] = cpu_to_le32(IV & 0xffffffff);
- sg_in.page = in_page;
+ sg_set_page(&sg_in, in_page);
sg_in.offset = in_offs;
sg_in.length = sz;
- sg_out.page = out_page;
+ sg_set_page(&sg_out, out_page);
sg_out.offset = out_offs;
sg_out.length = sz;
diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c
deleted file mode 100644
index fa8e42341b8..00000000000
--- a/drivers/block/lguest_blk.c
+++ /dev/null
@@ -1,421 +0,0 @@
-/*D:400
- * The Guest block driver
- *
- * This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc.
- * The mechanism is simple: we place the information about the request in the
- * device page, then use SEND_DMA (containing the data for a write, or an empty
- * "ping" DMA for a read).
- :*/
-/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-//#define DEBUG
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/lguest_bus.h>
-
-static char next_block_index = 'a';
-
-/*D:420 Here is the structure which holds all the information we need about
- * each Guest block device.
- *
- * I'm sure at this stage, you're wondering "hey, where was the adventure I was
- * promised?" and thinking "Rusty sucks, I shall say nasty things about him on
- * my blog". I think Real adventures have boring bits, too, and you're in the
- * middle of one. But it gets better. Just not quite yet. */
-struct blockdev
-{
- /* The block queue infrastructure wants a spinlock: it is held while it
- * calls our block request function. We grab it in our interrupt
- * handler so the responses don't mess with new requests. */
- spinlock_t lock;
-
- /* The disk structure registered with kernel. */
- struct gendisk *disk;
-
- /* The major device number for this disk, and the interrupt. We only
- * really keep them here for completeness; we'd need them if we
- * supported device unplugging. */
- int major;
- int irq;
-
- /* The physical address of this device's memory page */
- unsigned long phys_addr;
- /* The mapped memory page for convenient acces. */
- struct lguest_block_page *lb_page;
-
- /* We only have a single request outstanding at a time: this is it. */
- struct lguest_dma dma;
- struct request *req;
-};
-
-/*D:495 We originally used end_request() throughout the driver, but it turns
- * out that end_request() is deprecated, and doesn't actually end the request
- * (which seems like a good reason to deprecate it!). It simply ends the first
- * bio. So if we had 3 bios in a "struct request" we would do all 3,
- * end_request(), do 2, end_request(), do 1 and end_request(): twice as much
- * work as we needed to do.
- *
- * This reinforced to me that I do not understand the block layer.
- *
- * Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a
- * request. This improved disk speed by 130%. */
-static void end_entire_request(struct request *req, int uptodate)
-{
- if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
- BUG();
- add_disk_randomness(req->rq_disk);
- blkdev_dequeue_request(req);
- end_that_request_last(req, uptodate);
-}
-
-/* I'm told there are only two stories in the world worth telling: love and
- * hate. So there used to be a love scene here like this:
- *
- * Launcher: We could make beautiful I/O together, you and I.
- * Guest: My, that's a big disk!
- *
- * Unfortunately, it was just too raunchy for our otherwise-gentle tale. */
-
-/*D:490 This is the interrupt handler, called when a block read or write has
- * been completed for us. */
-static irqreturn_t lgb_irq(int irq, void *_bd)
-{
- /* We handed our "struct blockdev" as the argument to request_irq(), so
- * it is passed through to us here. This tells us which device we're
- * dealing with in case we have more than one. */
- struct blockdev *bd = _bd;
- unsigned long flags;
-
- /* We weren't doing anything? Strange, but could happen if we shared
- * interrupts (we don't!). */
- if (!bd->req) {
- pr_debug("No work!\n");
- return IRQ_NONE;
- }
-
- /* Not done yet? That's equally strange. */
- if (!bd->lb_page->result) {
- pr_debug("No result!\n");
- return IRQ_NONE;
- }
-
- /* We have to grab the lock before ending the request. */
- spin_lock_irqsave(&bd->lock, flags);
- /* "result" is 1 for success, 2 for failure: end_entire_request() wants
- * to know whether this succeeded or not. */
- end_entire_request(bd->req, bd->lb_page->result == 1);
- /* Clear out request, it's done. */
- bd->req = NULL;
- /* Reset incoming DMA for next time. */
- bd->dma.used_len = 0;
- /* Ready for more reads or writes */
- blk_start_queue(bd->disk->queue);
- spin_unlock_irqrestore(&bd->lock, flags);
-
- /* The interrupt was for us, we dealt with it. */
- return IRQ_HANDLED;
-}
-
-/*D:480 The block layer's "struct request" contains a number of "struct bio"s,
- * each of which contains "struct bio_vec"s, each of which contains a page, an
- * offset and a length.
- *
- * Fortunately there are iterators to help us walk through the "struct
- * request". Even more fortunately, there were plenty of places to steal the
- * code from. We pack the "struct request" into our "struct lguest_dma" and
- * return the total length. */
-static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
-{
- unsigned int i = 0, len = 0;
- struct req_iterator iter;
- struct bio_vec *bvec;
-
- rq_for_each_segment(bvec, req, iter) {
- /* We told the block layer not to give us too many. */
- BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
- /* If we had a zero-length segment, it would look like
- * the end of the data referred to by the "struct
- * lguest_dma", so make sure that doesn't happen. */
- BUG_ON(!bvec->bv_len);
- /* Convert page & offset to a physical address */
- dma->addr[i] = page_to_phys(bvec->bv_page)
- + bvec->bv_offset;
- dma->len[i] = bvec->bv_len;
- len += bvec->bv_len;
- i++;
- }
- /* If the array isn't full, we mark the end with a 0 length */
- if (i < LGUEST_MAX_DMA_SECTIONS)
- dma->len[i] = 0;
- return len;
-}
-
-/* This creates an empty DMA, useful for prodding the Host without sending data
- * (ie. when we want to do a read) */
-static void empty_dma(struct lguest_dma *dma)
-{
- dma->len[0] = 0;
-}
-
-/*D:470 Setting up a request is fairly easy: */
-static void setup_req(struct blockdev *bd,
- int type, struct request *req, struct lguest_dma *dma)
-{
- /* The type is 1 (write) or 0 (read). */
- bd->lb_page->type = type;
- /* The sector on disk where the read or write starts. */
- bd->lb_page->sector = req->sector;
- /* The result is initialized to 0 (unfinished). */
- bd->lb_page->result = 0;
- /* The current request (so we can end it in the interrupt handler). */
- bd->req = req;
- /* The number of bytes: returned as a side-effect of req_to_dma(),
- * which packs the block layer's "struct request" into our "struct
- * lguest_dma" */
- bd->lb_page->bytes = req_to_dma(req, dma);
-}
-
-/*D:450 Write is pretty straightforward: we pack the request into a "struct
- * lguest_dma", then use SEND_DMA to send the request. */
-static void do_write(struct blockdev *bd, struct request *req)
-{
- struct lguest_dma send;
-
- pr_debug("lgb: WRITE sector %li\n", (long)req->sector);
- setup_req(bd, 1, req, &send);
-
- lguest_send_dma(bd->phys_addr, &send);
-}
-
-/* Read is similar to write, except we pack the request into our receive
- * "struct lguest_dma" and send through an empty DMA just to tell the Host that
- * there's a request pending. */
-static void do_read(struct blockdev *bd, struct request *req)
-{
- struct lguest_dma ping;
-
- pr_debug("lgb: READ sector %li\n", (long)req->sector);
- setup_req(bd, 0, req, &bd->dma);
-
- empty_dma(&ping);
- lguest_send_dma(bd->phys_addr, &ping);
-}
-
-/*D:440 This where requests come in: we get handed the request queue and are
- * expected to pull a "struct request" off it until we've finished them or
- * we're waiting for a reply: */
-static void do_lgb_request(struct request_queue *q)
-{
- struct blockdev *bd;
- struct request *req;
-
-again:
- /* This sometimes returns NULL even on the very first time around. I
- * wonder if it's something to do with letting elves handle the request
- * queue... */
- req = elv_next_request(q);
- if (!req)
- return;
-
- /* We attached the struct blockdev to the disk: get it back */
- bd = req->rq_disk->private_data;
- /* Sometimes we get repeated requests after blk_stop_queue(), but we
- * can only handle one at a time. */
- if (bd->req)
- return;
-
- /* We only do reads and writes: no tricky business! */
- if (!blk_fs_request(req)) {
- pr_debug("Got non-command 0x%08x\n", req->cmd_type);
- req->errors++;
- end_entire_request(req, 0);
- goto again;
- }
-
- if (rq_data_dir(req) == WRITE)
- do_write(bd, req);
- else
- do_read(bd, req);
-
- /* We've put out the request, so stop any more coming in until we get
- * an interrupt, which takes us to lgb_irq() to re-enable the queue. */
- blk_stop_queue(q);
-}
-
-/*D:430 This is the "struct block_device_operations" we attach to the disk at
- * the end of lguestblk_probe(). It doesn't seem to want much. */
-static struct block_device_operations lguestblk_fops = {
- .owner = THIS_MODULE,
-};
-
-/*D:425 Setting up a disk device seems to involve a lot of code. I'm not sure
- * quite why. I do know that the IDE code sent two or three of the maintainers
- * insane, perhaps this is the fringe of the same disease?
- *
- * As in the console code, the probe function gets handed the generic
- * lguest_device from lguest_bus.c: */
-static int lguestblk_probe(struct lguest_device *lgdev)
-{
- struct blockdev *bd;
- int err;
- int irqflags = IRQF_SHARED;
-
- /* First we allocate our own "struct blockdev" and initialize the easy
- * fields. */
- bd = kmalloc(sizeof(*bd), GFP_KERNEL);
- if (!bd)
- return -ENOMEM;
-
- spin_lock_init(&bd->lock);
- bd->irq = lgdev_irq(lgdev);
- bd->req = NULL;
- bd->dma.used_len = 0;
- bd->dma.len[0] = 0;
- /* The descriptor in the lguest_devices array provided by the Host
- * gives the Guest the physical page number of the device's page. */
- bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);
-
- /* We use lguest_map() to get a pointer to the device page */
- bd->lb_page = lguest_map(bd->phys_addr, 1);
- if (!bd->lb_page) {
- err = -ENOMEM;
- goto out_free_bd;
- }
-
- /* We need a major device number: 0 means "assign one dynamically". */
- bd->major = register_blkdev(0, "lguestblk");
- if (bd->major < 0) {
- err = bd->major;
- goto out_unmap;
- }
-
- /* This allocates a "struct gendisk" where we pack all the information
- * about the disk which the rest of Linux sees. The argument is the
- * number of minor devices desired: we need one minor for the main
- * disk, and one for each partition. Of course, we can't possibly know
- * how many partitions are on the disk (add_disk does that).
- */
- bd->disk = alloc_disk(16);
- if (!bd->disk) {
- err = -ENOMEM;
- goto out_unregister_blkdev;
- }
-
- /* Every disk needs a queue for requests to come in: we set up the
- * queue with a callback function (the core of our driver) and the lock
- * to use. */
- bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock);
- if (!bd->disk->queue) {
- err = -ENOMEM;
- goto out_put_disk;
- }
-
- /* We can only handle a certain number of pointers in our SEND_DMA
- * call, so we set that with blk_queue_max_hw_segments(). This is not
- * to be confused with blk_queue_max_phys_segments() of course! I
- * know, who could possibly confuse the two?
- *
- * Well, it's simple to tell them apart: this one seems to work and the
- * other one didn't. */
- blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS);
-
- /* Due to technical limitations of our Host (and simple coding) we
- * can't have a single buffer which crosses a page boundary. Tell it
- * here. This means that our maximum request size is 16
- * (LGUEST_MAX_DMA_SECTIONS) pages. */
- blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1);
-
- /* We name our disk: this becomes the device name when udev does its
- * magic thing and creates the device node, such as /dev/lgba.
- * next_block_index is a global which starts at 'a'. Unfortunately
- * this simple increment logic means that the 27th disk will be called
- * "/dev/lgb{". In that case, I recommend having at least 29 disks, so
- * your /dev directory will be balanced. */
- sprintf(bd->disk->disk_name, "lgb%c", next_block_index++);
-
- /* We look to the device descriptor again to see if this device's
- * interrupts are expected to be random. If they are, we tell the irq
- * subsystem. At the moment this bit is always set. */
- if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
- irqflags |= IRQF_SAMPLE_RANDOM;
-
- /* Now we have the name and irqflags, we can request the interrupt; we
- * give it the "struct blockdev" we have set up to pass to lgb_irq()
- * when there is an interrupt. */
- err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd);
- if (err)
- goto out_cleanup_queue;
-
- /* We bind our one-entry DMA pool to the key for this block device so
- * the Host can reply to our requests. The key is equal to the
- * physical address of the device's page, which is conveniently
- * unique. */
- err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
- if (err)
- goto out_free_irq;
-
- /* We finish our disk initialization and add the disk to the system. */
- bd->disk->major = bd->major;
- bd->disk->first_minor = 0;
- bd->disk->private_data = bd;
- bd->disk->fops = &lguestblk_fops;
- /* This is initialized to the disk size by the Launcher. */
- set_capacity(bd->disk, bd->lb_page->num_sectors);
- add_disk(bd->disk);
-
- printk(KERN_INFO "%s: device %i at major %d\n",
- bd->disk->disk_name, lgdev->index, bd->major);
-
- /* We don't need to keep the "struct blockdev" around, but if we ever
- * implemented device removal, we'd need this. */
- lgdev->private = bd;
- return 0;
-
-out_free_irq:
- free_irq(bd->irq, bd);
-out_cleanup_queue:
- blk_cleanup_queue(bd->disk->queue);
-out_put_disk:
- put_disk(bd->disk);
-out_unregister_blkdev:
- unregister_blkdev(bd->major, "lguestblk");
-out_unmap:
- lguest_unmap(bd->lb_page);
-out_free_bd:
- kfree(bd);
- return err;
-}
-
-/*D:410 The boilerplate code for registering the lguest block driver is just
- * like the console: */
-static struct lguest_driver lguestblk_drv = {
- .name = "lguestblk",
- .owner = THIS_MODULE,
- .device_type = LGUEST_DEVICE_T_BLOCK,
- .probe = lguestblk_probe,
-};
-
-static __init int lguestblk_init(void)
-{
- return register_lguest_driver(&lguestblk_drv);
-}
-module_init(lguestblk_init);
-
-MODULE_DESCRIPTION("Lguest block driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 317a790c153..7276f7d207c 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -388,6 +388,7 @@ static int __send_request(struct request *req)
op = VD_OP_BWRITE;
}
+ sg_init_table(sg, port->ring_cookies);
nsg = blk_rq_map_sg(req->q, req, sg);
len = 0;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 402209fec59..52dc5e13171 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -27,6 +27,7 @@
#include <linux/hdreg.h>
#include <linux/dma-mapping.h>
#include <linux/completion.h>
+#include <linux/scatterlist.h>
#include <asm/io.h>
#include <asm/uaccess.h>
@@ -522,6 +523,7 @@ static struct carm_request *carm_get_request(struct carm_host *host)
host->n_msgs++;
assert(host->n_msgs <= CARM_MAX_REQ);
+ sg_init_table(crq->sg, CARM_MAX_REQ_SG);
return crq;
}
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index c57dd2b3a0c..14143f2c484 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -25,6 +25,7 @@
#include <linux/usb_usual.h>
#include <linux/blkdev.h>
#include <linux/timer.h>
+#include <linux/scatterlist.h>
#include <scsi/scsi.h>
#define DRV_NAME "ub"
@@ -656,6 +657,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
if ((cmd = ub_get_cmd(lun)) == NULL)
return -1;
memset(cmd, 0, sizeof(struct ub_scsi_cmd));
+ sg_init_table(cmd->sgv, UB_MAX_REQ_SG);
blkdev_dequeue_request(rq);
@@ -1309,9 +1311,8 @@ static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
else
pipe = sc->send_bulk_pipe;
sc->last_pipe = pipe;
- usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe,
- page_address(sg->page) + sg->offset, sg->length,
- ub_urb_complete, sc);
+ usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, sg_virt(sg),
+ sg->length, ub_urb_complete, sc);
sc->work_urb.actual_length = 0;
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
@@ -1427,7 +1428,7 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
scmd->state = UB_CMDST_INIT;
scmd->nsg = 1;
sg = &scmd->sgv[0];
- sg->page = virt_to_page(sc->top_sense);
+ sg_set_page(sg, virt_to_page(sc->top_sense));
sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
sg->length = UB_SENSE_SIZE;
scmd->len = UB_SENSE_SIZE;
@@ -1863,7 +1864,7 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
cmd->state = UB_CMDST_INIT;
cmd->nsg = 1;
sg = &cmd->sgv[0];
- sg->page = virt_to_page(p);
+ sg_set_page(sg, virt_to_page(p));
sg->offset = (unsigned long)p & (PAGE_SIZE-1);
sg->length = 8;
cmd->len = 8;
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index e824b672e05..ab5d404faa1 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -41,6 +41,7 @@
#include <linux/dma-mapping.h>
#include <linux/completion.h>
#include <linux/device.h>
+#include <linux/scatterlist.h>
#include <asm/uaccess.h>
#include <asm/vio.h>
@@ -270,6 +271,7 @@ static int send_request(struct request *req)
d = req->rq_disk->private_data;
/* Now build the scatter-gather list */
+ sg_init_table(sg, VIOMAXBLOCKDMA);
nsg = blk_rq_map_sg(req->q, req, sg);
nsg = dma_map_sg(d->dev, sg, nsg, direction);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
new file mode 100644
index 00000000000..a901eee64ba
--- /dev/null
+++ b/drivers/block/virtio_blk.c
@@ -0,0 +1,308 @@
+//#define DEBUG
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/virtio.h>
+#include <linux/virtio_blk.h>
+#include <linux/virtio_blk.h>
+
+static unsigned char virtblk_index = 'a';
+struct virtio_blk
+{
+ spinlock_t lock;
+
+ struct virtio_device *vdev;
+ struct virtqueue *vq;
+
+ /* The disk structure for the kernel. */
+ struct gendisk *disk;
+
+ /* Request tracking. */
+ struct list_head reqs;
+
+ mempool_t *pool;
+
+ /* Scatterlist: can be too big for stack. */
+ struct scatterlist sg[3+MAX_PHYS_SEGMENTS];
+};
+
+struct virtblk_req
+{
+ struct list_head list;
+ struct request *req;
+ struct virtio_blk_outhdr out_hdr;
+ struct virtio_blk_inhdr in_hdr;
+};
+
+static bool blk_done(struct virtqueue *vq)
+{
+ struct virtio_blk *vblk = vq->vdev->priv;
+ struct virtblk_req *vbr;
+ unsigned int len;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vblk->lock, flags);
+ while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
+ int uptodate;
+ switch (vbr->in_hdr.status) {
+ case VIRTIO_BLK_S_OK:
+ uptodate = 1;
+ break;
+ case VIRTIO_BLK_S_UNSUPP:
+ uptodate = -ENOTTY;
+ break;
+ default:
+ uptodate = 0;
+ break;
+ }
+
+ end_dequeued_request(vbr->req, uptodate);
+ list_del(&vbr->list);
+ mempool_free(vbr, vblk->pool);
+ }
+ /* In case queue is stopped waiting for more buffers. */
+ blk_start_queue(vblk->disk->queue);
+ spin_unlock_irqrestore(&vblk->lock, flags);
+ return true;
+}
+
+static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
+ struct request *req)
+{
+ unsigned long num, out, in;
+ struct virtblk_req *vbr;
+
+ vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
+ if (!vbr)
+ /* When another request finishes we'll try again. */
+ return false;
+
+ vbr->req = req;
+ if (blk_fs_request(vbr->req)) {
+ vbr->out_hdr.type = 0;
+ vbr->out_hdr.sector = vbr->req->sector;
+ vbr->out_hdr.ioprio = vbr->req->ioprio;
+ } else if (blk_pc_request(vbr->req)) {
+ vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
+ vbr->out_hdr.sector = 0;
+ vbr->out_hdr.ioprio = vbr->req->ioprio;
+ } else {
+ /* We don't put anything else in the queue. */
+ BUG();
+ }
+
+ if (blk_barrier_rq(vbr->req))
+ vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
+
+ /* We have to zero this, otherwise blk_rq_map_sg gets upset. */
+ memset(vblk->sg, 0, sizeof(vblk->sg));
+ sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
+ num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
+ sg_set_buf(&vblk->sg[num+1], &vbr->in_hdr, sizeof(vbr->in_hdr));
+
+ if (rq_data_dir(vbr->req) == WRITE) {
+ vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+ out = 1 + num;
+ in = 1;
+ } else {
+ vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+ out = 1;
+ in = 1 + num;
+ }
+
+ if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
+ mempool_free(vbr, vblk->pool);
+ return false;
+ }
+
+ list_add_tail(&vbr->list, &vblk->reqs);
+ return true;
+}
+
+static void do_virtblk_request(struct request_queue *q)
+{
+ struct virtio_blk *vblk = NULL;
+ struct request *req;
+ unsigned int issued = 0;
+
+ while ((req = elv_next_request(q)) != NULL) {
+ vblk = req->rq_disk->private_data;
+ BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg));
+
+ /* If this request fails, stop queue and wait for something to
+ finish to restart it. */
+ if (!do_req(q, vblk, req)) {
+ blk_stop_queue(q);
+ break;
+ }
+ blkdev_dequeue_request(req);
+ issued++;
+ }
+
+ if (issued)
+ vblk->vq->vq_ops->kick(vblk->vq);
+}
+
+static int virtblk_ioctl(struct inode *inode, struct file *filp,
+ unsigned cmd, unsigned long data)
+{
+ return scsi_cmd_ioctl(filp, inode->i_bdev->bd_disk->queue,
+ inode->i_bdev->bd_disk, cmd,
+ (void __user *)data);
+}
+
+static struct block_device_operations virtblk_fops = {
+ .ioctl = virtblk_ioctl,
+ .owner = THIS_MODULE,
+};
+
+static int virtblk_probe(struct virtio_device *vdev)
+{
+ struct virtio_blk *vblk;
+ int err, major;
+ void *token;
+ unsigned int len;
+ u64 cap;
+ u32 v;
+
+ vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
+ if (!vblk) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&vblk->reqs);
+ spin_lock_init(&vblk->lock);
+ vblk->vdev = vdev;
+
+ /* We expect one virtqueue, for output. */
+ vblk->vq = vdev->config->find_vq(vdev, blk_done);
+ if (IS_ERR(vblk->vq)) {
+ err = PTR_ERR(vblk->vq);
+ goto out_free_vblk;
+ }
+
+ vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
+ if (!vblk->pool) {
+ err = -ENOMEM;
+ goto out_free_vq;
+ }
+
+ major = register_blkdev(0, "virtblk");
+ if (major < 0) {
+ err = major;
+ goto out_mempool;
+ }
+
+ /* FIXME: How many partitions? How long is a piece of string? */
+ vblk->disk = alloc_disk(1 << 4);
+ if (!vblk->disk) {
+ err = -ENOMEM;
+ goto out_unregister_blkdev;
+ }
+
+ vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
+ if (!vblk->disk->queue) {
+ err = -ENOMEM;
+ goto out_put_disk;
+ }
+
+ sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++);
+ vblk->disk->major = major;
+ vblk->disk->first_minor = 0;
+ vblk->disk->private_data = vblk;
+ vblk->disk->fops = &virtblk_fops;
+
+ /* If barriers are supported, tell block layer that queue is ordered */
+ token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len);
+ if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER))
+ blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
+
+ err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap);
+ if (err) {
+ dev_err(&vdev->dev, "Bad/missing capacity in config\n");
+ goto out_put_disk;
+ }
+
+ /* If capacity is too big, truncate with warning. */
+ if ((sector_t)cap != cap) {
+ dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
+ (unsigned long long)cap);
+ cap = (sector_t)-1;
+ }
+ set_capacity(vblk->disk, cap);
+
+ err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v);
+ if (!err)
+ blk_queue_max_segment_size(vblk->disk->queue, v);
+ else if (err != -ENOENT) {
+ dev_err(&vdev->dev, "Bad SIZE_MAX in config\n");
+ goto out_put_disk;
+ }
+
+ err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v);
+ if (!err)
+ blk_queue_max_hw_segments(vblk->disk->queue, v);
+ else if (err != -ENOENT) {
+ dev_err(&vdev->dev, "Bad SEG_MAX in config\n");
+ goto out_put_disk;
+ }
+
+ add_disk(vblk->disk);
+ return 0;
+
+out_put_disk:
+ put_disk(vblk->disk);
+out_unregister_blkdev:
+ unregister_blkdev(major, "virtblk");
+out_mempool:
+ mempool_destroy(vblk->pool);
+out_free_vq:
+ vdev->config->del_vq(vblk->vq);
+out_free_vblk:
+ kfree(vblk);
+out:
+ return err;
+}
+
+static void virtblk_remove(struct virtio_device *vdev)
+{
+ struct virtio_blk *vblk = vdev->priv;
+ int major = vblk->disk->major;
+
+ BUG_ON(!list_empty(&vblk->reqs));
+ blk_cleanup_queue(vblk->disk->queue);
+ put_disk(vblk->disk);
+ unregister_blkdev(major, "virtblk");
+ mempool_destroy(vblk->pool);
+ kfree(vblk);
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_blk = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtblk_probe,
+ .remove = __devexit_p(virtblk_remove),
+};
+
+static int __init init(void)
+{
+ return register_virtio_driver(&virtio_blk);
+}
+
+static void __exit fini(void)
+{
+ unregister_virtio_driver(&virtio_blk);
+}
+module_init(init);
+module_exit(fini);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio block driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index b9fbe6e7f9a..075598e1c50 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -22,6 +22,30 @@ config BT_HCIUSB_SCO
Say Y here to compile support for SCO over HCI USB.
+config BT_HCIBTUSB
+ tristate "HCI USB driver (alternate version)"
+ depends on USB && EXPERIMENTAL && BT_HCIUSB=n
+ help
+ Bluetooth HCI USB driver.
+ This driver is required if you want to use Bluetooth devices with
+ USB interface.
+
+ This driver is still experimental and has no SCO support.
+
+ Say Y here to compile support for Bluetooth USB devices into the
+ kernel or say M to compile it as module (btusb).
+
+config BT_HCIBTSDIO
+ tristate "HCI SDIO driver"
+ depends on MMC
+ help
+ Bluetooth HCI SDIO driver.
+ This driver is required if you want to use Bluetooth device with
+ SDIO interface.
+
+ Say Y here to compile support for Bluetooth SDIO devices into the
+ kernel or say M to compile it as module (btsdio).
+
config BT_HCIUART
tristate "HCI UART driver"
help
@@ -55,6 +79,17 @@ config BT_HCIUART_BCSP
Say Y here to compile support for HCI BCSP protocol.
+config BT_HCIUART_LL
+ bool "HCILL protocol support"
+ depends on BT_HCIUART
+ help
+ HCILL (HCI Low Level) is a serial protocol for communication
+ between Bluetooth device and host. This protocol is required for
+ serial Bluetooth devices that are based on Texas Instruments'
+ BRF chips.
+
+ Say Y here to compile support for HCILL protocol.
+
config BT_HCIBCM203X
tristate "HCI BCM203x USB driver"
depends on USB
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 08c10e178e0..77444afbf10 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -13,7 +13,11 @@ obj-$(CONFIG_BT_HCIBT3C) += bt3c_cs.o
obj-$(CONFIG_BT_HCIBLUECARD) += bluecard_cs.o
obj-$(CONFIG_BT_HCIBTUART) += btuart_cs.o
+obj-$(CONFIG_BT_HCIBTUSB) += btusb.o
+obj-$(CONFIG_BT_HCIBTSDIO) += btsdio.o
+
hci_uart-y := hci_ldisc.o
hci_uart-$(CONFIG_BT_HCIUART_H4) += hci_h4.o
hci_uart-$(CONFIG_BT_HCIUART_BCSP) += hci_bcsp.o
+hci_uart-$(CONFIG_BT_HCIUART_LL) += hci_ll.o
hci_uart-objs := $(hci_uart-y)
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 851de4d5b7d..bcf57927b7a 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -503,10 +503,7 @@ static irqreturn_t bluecard_interrupt(int irq, void *dev_inst)
unsigned int iobase;
unsigned char reg;
- if (!info || !info->hdev) {
- BT_ERR("Call of irq %d for unknown device", irq);
- return IRQ_NONE;
- }
+ BUG_ON(!info->hdev);
if (!test_bit(CARD_READY, &(info->hw_state)))
return IRQ_HANDLED;
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index e8ebd5d3de8..1375b5345a0 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -2,7 +2,7 @@
*
* Digianswer Bluetooth USB driver
*
- * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2007 Marcel Holtmann <marcel@holtmann.org>
*
*
* This program is free software; you can redistribute it and/or modify
@@ -21,13 +21,14 @@
*
*/
-#include <linux/module.h>
-
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/sched.h>
#include <linux/errno.h>
+#include <linux/skbuff.h>
#include <linux/usb.h>
@@ -39,7 +40,7 @@
#define BT_DBG(D...)
#endif
-#define VERSION "0.8"
+#define VERSION "0.9"
static int ignore = 0;
@@ -52,393 +53,285 @@ static struct usb_device_id bpa10x_table[] = {
MODULE_DEVICE_TABLE(usb, bpa10x_table);
-#define BPA10X_CMD_EP 0x00
-#define BPA10X_EVT_EP 0x81
-#define BPA10X_TX_EP 0x02
-#define BPA10X_RX_EP 0x82
-
-#define BPA10X_CMD_BUF_SIZE 252
-#define BPA10X_EVT_BUF_SIZE 16
-#define BPA10X_TX_BUF_SIZE 384
-#define BPA10X_RX_BUF_SIZE 384
-
struct bpa10x_data {
- struct hci_dev *hdev;
- struct usb_device *udev;
+ struct hci_dev *hdev;
+ struct usb_device *udev;
- rwlock_t lock;
+ struct usb_anchor tx_anchor;
+ struct usb_anchor rx_anchor;
- struct sk_buff_head cmd_queue;
- struct urb *cmd_urb;
- struct urb *evt_urb;
- struct sk_buff *evt_skb;
- unsigned int evt_len;
-
- struct sk_buff_head tx_queue;
- struct urb *tx_urb;
- struct urb *rx_urb;
+ struct sk_buff *rx_skb[2];
};
-#define HCI_VENDOR_HDR_SIZE 5
+#define HCI_VENDOR_HDR_SIZE 5
struct hci_vendor_hdr {
- __u8 type;
- __le16 snum;
- __le16 dlen;
+ __u8 type;
+ __le16 snum;
+ __le16 dlen;
} __attribute__ ((packed));
-static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int count)
+static int bpa10x_recv(struct hci_dev *hdev, int queue, void *buf, int count)
{
- struct hci_acl_hdr *ah;
- struct hci_sco_hdr *sh;
- struct hci_vendor_hdr *vh;
- struct sk_buff *skb;
- int len;
+ struct bpa10x_data *data = hdev->driver_data;
+
+ BT_DBG("%s queue %d buffer %p count %d", hdev->name,
+ queue, buf, count);
+
+ if (queue < 0 || queue > 1)
+ return -EILSEQ;
+
+ hdev->stat.byte_rx += count;
while (count) {
- switch (*buf++) {
- case HCI_ACLDATA_PKT:
- ah = (struct hci_acl_hdr *) buf;
- len = HCI_ACL_HDR_SIZE + __le16_to_cpu(ah->dlen);
- skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (skb) {
- memcpy(skb_put(skb, len), buf, len);
- skb->dev = (void *) data->hdev;
- bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
- hci_recv_frame(skb);
- }
- break;
+ struct sk_buff *skb = data->rx_skb[queue];
+ struct { __u8 type; int expect; } *scb;
+ int type, len = 0;
- case HCI_SCODATA_PKT:
- sh = (struct hci_sco_hdr *) buf;
- len = HCI_SCO_HDR_SIZE + sh->dlen;
- skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (skb) {
- memcpy(skb_put(skb, len), buf, len);
- skb->dev = (void *) data->hdev;
- bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
- hci_recv_frame(skb);
+ if (!skb) {
+ /* Start of the frame */
+
+ type = *((__u8 *) buf);
+ count--; buf++;
+
+ switch (type) {
+ case HCI_EVENT_PKT:
+ if (count >= HCI_EVENT_HDR_SIZE) {
+ struct hci_event_hdr *h = buf;
+ len = HCI_EVENT_HDR_SIZE + h->plen;
+ } else
+ return -EILSEQ;
+ break;
+
+ case HCI_ACLDATA_PKT:
+ if (count >= HCI_ACL_HDR_SIZE) {
+ struct hci_acl_hdr *h = buf;
+ len = HCI_ACL_HDR_SIZE +
+ __le16_to_cpu(h->dlen);
+ } else
+ return -EILSEQ;
+ break;
+
+ case HCI_SCODATA_PKT:
+ if (count >= HCI_SCO_HDR_SIZE) {
+ struct hci_sco_hdr *h = buf;
+ len = HCI_SCO_HDR_SIZE + h->dlen;
+ } else
+ return -EILSEQ;
+ break;
+
+ case HCI_VENDOR_PKT:
+ if (count >= HCI_VENDOR_HDR_SIZE) {
+ struct hci_vendor_hdr *h = buf;
+ len = HCI_VENDOR_HDR_SIZE +
+ __le16_to_cpu(h->dlen);
+ } else
+ return -EILSEQ;
+ break;
}
- break;
- case HCI_VENDOR_PKT:
- vh = (struct hci_vendor_hdr *) buf;
- len = HCI_VENDOR_HDR_SIZE + __le16_to_cpu(vh->dlen);
skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (skb) {
- memcpy(skb_put(skb, len), buf, len);
- skb->dev = (void *) data->hdev;
- bt_cb(skb)->pkt_type = HCI_VENDOR_PKT;
- hci_recv_frame(skb);
+ if (!skb) {
+ BT_ERR("%s no memory for packet", hdev->name);
+ return -ENOMEM;
}
- break;
-
- default:
- len = count - 1;
- break;
- }
- buf += len;
- count -= (len + 1);
- }
-}
-
-static int bpa10x_recv_event(struct bpa10x_data *data, unsigned char *buf, int size)
-{
- BT_DBG("data %p buf %p size %d", data, buf, size);
+ skb->dev = (void *) hdev;
- if (data->evt_skb) {
- struct sk_buff *skb = data->evt_skb;
+ data->rx_skb[queue] = skb;
- memcpy(skb_put(skb, size), buf, size);
+ scb = (void *) skb->cb;
+ scb->type = type;
+ scb->expect = len;
+ } else {
+ /* Continuation */
- if (skb->len == data->evt_len) {
- data->evt_skb = NULL;
- data->evt_len = 0;
- hci_recv_frame(skb);
- }
- } else {
- struct sk_buff *skb;
- struct hci_event_hdr *hdr;
- unsigned char pkt_type;
- int pkt_len = 0;
-
- if (size < HCI_EVENT_HDR_SIZE + 1) {
- BT_ERR("%s event packet block with size %d is too short",
- data->hdev->name, size);
- return -EILSEQ;
+ scb = (void *) skb->cb;
+ len = scb->expect;
}
- pkt_type = *buf++;
- size--;
-
- if (pkt_type != HCI_EVENT_PKT) {
- BT_ERR("%s unexpected event packet start byte 0x%02x",
- data->hdev->name, pkt_type);
- return -EPROTO;
- }
+ len = min(len, count);
- hdr = (struct hci_event_hdr *) buf;
- pkt_len = HCI_EVENT_HDR_SIZE + hdr->plen;
+ memcpy(skb_put(skb, len), buf, len);
- skb = bt_skb_alloc(pkt_len, GFP_ATOMIC);
- if (!skb) {
- BT_ERR("%s no memory for new event packet",
- data->hdev->name);
- return -ENOMEM;
- }
+ scb->expect -= len;
- skb->dev = (void *) data->hdev;
- bt_cb(skb)->pkt_type = pkt_type;
+ if (scb->expect == 0) {
+ /* Complete frame */
- memcpy(skb_put(skb, size), buf, size);
+ data->rx_skb[queue] = NULL;
- if (pkt_len == size) {
+ bt_cb(skb)->pkt_type = scb->type;
hci_recv_frame(skb);
- } else {
- data->evt_skb = skb;
- data->evt_len = pkt_len;
}
+
+ count -= len; buf += len;
}
return 0;
}
-static void bpa10x_wakeup(struct bpa10x_data *data)
+static void bpa10x_tx_complete(struct urb *urb)
{
- struct urb *urb;
- struct sk_buff *skb;
- int err;
+ struct sk_buff *skb = urb->context;
+ struct hci_dev *hdev = (struct hci_dev *) skb->dev;
- BT_DBG("data %p", data);
+ BT_DBG("%s urb %p status %d count %d", hdev->name,
+ urb, urb->status, urb->actual_length);
- urb = data->cmd_urb;
- if (urb->status == -EINPROGRESS)
- skb = NULL;
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ goto done;
+
+ if (!urb->status)
+ hdev->stat.byte_tx += urb->transfer_buffer_length;
else
- skb = skb_dequeue(&data->cmd_queue);
+ hdev->stat.err_tx++;
- if (skb) {
- struct usb_ctrlrequest *cr;
+done:
+ kfree(urb->setup_packet);
- if (skb->len > BPA10X_CMD_BUF_SIZE) {
- BT_ERR("%s command packet with size %d is too big",
- data->hdev->name, skb->len);
- kfree_skb(skb);
- return;
- }
+ kfree_skb(skb);
+}
+
+static void bpa10x_rx_complete(struct urb *urb)
+{
+ struct hci_dev *hdev = urb->context;
+ struct bpa10x_data *data = hdev->driver_data;
+ int err;
- cr = (struct usb_ctrlrequest *) urb->setup_packet;
- cr->wLength = __cpu_to_le16(skb->len);
+ BT_DBG("%s urb %p status %d count %d", hdev->name,
+ urb, urb->status, urb->actual_length);
- skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
- urb->transfer_buffer_length = skb->len;
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return;
- err = usb_submit_urb(urb, GFP_ATOMIC);
- if (err < 0 && err != -ENODEV) {
- BT_ERR("%s submit failed for command urb %p with error %d",
- data->hdev->name, urb, err);
- skb_queue_head(&data->cmd_queue, skb);
- } else
- kfree_skb(skb);
+ if (urb->status == 0) {
+ if (bpa10x_recv(hdev, usb_pipebulk(urb->pipe),
+ urb->transfer_buffer,
+ urb->actual_length) < 0) {
+ BT_ERR("%s corrupted event packet", hdev->name);
+ hdev->stat.err_rx++;
+ }
}
- urb = data->tx_urb;
- if (urb->status == -EINPROGRESS)
- skb = NULL;
- else
- skb = skb_dequeue(&data->tx_queue);
-
- if (skb) {
- skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
- urb->transfer_buffer_length = skb->len;
-
- err = usb_submit_urb(urb, GFP_ATOMIC);
- if (err < 0 && err != -ENODEV) {
- BT_ERR("%s submit failed for command urb %p with error %d",
- data->hdev->name, urb, err);
- skb_queue_head(&data->tx_queue, skb);
- } else
- kfree_skb(skb);
+ usb_anchor_urb(urb, &data->rx_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ BT_ERR("%s urb %p failed to resubmit (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
}
}
-static void bpa10x_complete(struct urb *urb)
+static inline int bpa10x_submit_intr_urb(struct hci_dev *hdev)
{
- struct bpa10x_data *data = urb->context;
- unsigned char *buf = urb->transfer_buffer;
- int err, count = urb->actual_length;
+ struct bpa10x_data *data = hdev->driver_data;
+ struct urb *urb;
+ unsigned char *buf;
+ unsigned int pipe;
+ int err, size = 16;
- BT_DBG("data %p urb %p buf %p count %d", data, urb, buf, count);
+ BT_DBG("%s", hdev->name);
- read_lock(&data->lock);
+ urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!urb)
+ return -ENOMEM;
- if (!test_bit(HCI_RUNNING, &data->hdev->flags))
- goto unlock;
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
- if (urb->status < 0 || !count)
- goto resubmit;
+ pipe = usb_rcvintpipe(data->udev, 0x81);
- if (usb_pipein(urb->pipe)) {
- data->hdev->stat.byte_rx += count;
+ usb_fill_int_urb(urb, data->udev, pipe, buf, size,
+ bpa10x_rx_complete, hdev, 1);
- if (usb_pipetype(urb->pipe) == PIPE_INTERRUPT)
- bpa10x_recv_event(data, buf, count);
+ urb->transfer_flags |= URB_FREE_BUFFER;
- if (usb_pipetype(urb->pipe) == PIPE_BULK)
- bpa10x_recv_bulk(data, buf, count);
- } else {
- data->hdev->stat.byte_tx += count;
+ usb_anchor_urb(urb, &data->rx_anchor);
- bpa10x_wakeup(data);
+ err = usb_submit_urb(urb, GFP_KERNEL);
+ if (err < 0) {
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ kfree(buf);
}
-resubmit:
- if (usb_pipein(urb->pipe)) {
- err = usb_submit_urb(urb, GFP_ATOMIC);
- if (err < 0 && err != -ENODEV) {
- BT_ERR("%s urb %p type %d resubmit status %d",
- data->hdev->name, urb, usb_pipetype(urb->pipe), err);
- }
- }
+ usb_free_urb(urb);
-unlock:
- read_unlock(&data->lock);
+ return err;
}
-static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe,
- size_t size, gfp_t flags, void *data)
+static inline int bpa10x_submit_bulk_urb(struct hci_dev *hdev)
{
+ struct bpa10x_data *data = hdev->driver_data;
struct urb *urb;
- struct usb_ctrlrequest *cr;
unsigned char *buf;
+ unsigned int pipe;
+ int err, size = 64;
- BT_DBG("udev %p data %p", udev, data);
+ BT_DBG("%s", hdev->name);
- urb = usb_alloc_urb(0, flags);
+ urb = usb_alloc_urb(0, GFP_KERNEL);
if (!urb)
- return NULL;
+ return -ENOMEM;
- buf = kmalloc(size, flags);
+ buf = kmalloc(size, GFP_KERNEL);
if (!buf) {
usb_free_urb(urb);
- return NULL;
+ return -ENOMEM;
}
- switch (usb_pipetype(pipe)) {
- case PIPE_CONTROL:
- cr = kmalloc(sizeof(*cr), flags);
- if (!cr) {
- kfree(buf);
- usb_free_urb(urb);
- return NULL;
- }
+ pipe = usb_rcvbulkpipe(data->udev, 0x82);
- cr->bRequestType = USB_TYPE_VENDOR;
- cr->bRequest = 0;
- cr->wIndex = 0;
- cr->wValue = 0;
- cr->wLength = __cpu_to_le16(0);
+ usb_fill_bulk_urb(urb, data->udev, pipe,
+ buf, size, bpa10x_rx_complete, hdev);
- usb_fill_control_urb(urb, udev, pipe, (void *) cr, buf, 0, bpa10x_complete, data);
- break;
+ urb->transfer_flags |= URB_FREE_BUFFER;
- case PIPE_INTERRUPT:
- usb_fill_int_urb(urb, udev, pipe, buf, size, bpa10x_complete, data, 1);
- break;
+ usb_anchor_urb(urb, &data->rx_anchor);
- case PIPE_BULK:
- usb_fill_bulk_urb(urb, udev, pipe, buf, size, bpa10x_complete, data);
- break;
-
- default:
+ err = usb_submit_urb(urb, GFP_KERNEL);
+ if (err < 0) {
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
kfree(buf);
- usb_free_urb(urb);
- return NULL;
}
- return urb;
-}
-
-static inline void bpa10x_free_urb(struct urb *urb)
-{
- BT_DBG("urb %p", urb);
-
- if (!urb)
- return;
-
- kfree(urb->setup_packet);
- kfree(urb->transfer_buffer);
-
usb_free_urb(urb);
+
+ return err;
}
static int bpa10x_open(struct hci_dev *hdev)
{
struct bpa10x_data *data = hdev->driver_data;
- struct usb_device *udev = data->udev;
- unsigned long flags;
int err;
- BT_DBG("hdev %p data %p", hdev, data);
+ BT_DBG("%s", hdev->name);
if (test_and_set_bit(HCI_RUNNING, &hdev->flags))
return 0;
- data->cmd_urb = bpa10x_alloc_urb(udev, usb_sndctrlpipe(udev, BPA10X_CMD_EP),
- BPA10X_CMD_BUF_SIZE, GFP_KERNEL, data);
- if (!data->cmd_urb) {
- err = -ENOMEM;
- goto done;
- }
-
- data->evt_urb = bpa10x_alloc_urb(udev, usb_rcvintpipe(udev, BPA10X_EVT_EP),
- BPA10X_EVT_BUF_SIZE, GFP_KERNEL, data);
- if (!data->evt_urb) {
- bpa10x_free_urb(data->cmd_urb);
- err = -ENOMEM;
- goto done;
- }
-
- data->rx_urb = bpa10x_alloc_urb(udev, usb_rcvbulkpipe(udev, BPA10X_RX_EP),
- BPA10X_RX_BUF_SIZE, GFP_KERNEL, data);
- if (!data->rx_urb) {
- bpa10x_free_urb(data->evt_urb);
- bpa10x_free_urb(data->cmd_urb);
- err = -ENOMEM;
- goto done;
- }
-
- data->tx_urb = bpa10x_alloc_urb(udev, usb_sndbulkpipe(udev, BPA10X_TX_EP),
- BPA10X_TX_BUF_SIZE, GFP_KERNEL, data);
- if (!data->rx_urb) {
- bpa10x_free_urb(data->rx_urb);
- bpa10x_free_urb(data->evt_urb);
- bpa10x_free_urb(data->cmd_urb);
- err = -ENOMEM;
- goto done;
- }
+ err = bpa10x_submit_intr_urb(hdev);
+ if (err < 0)
+ goto error;
- write_lock_irqsave(&data->lock, flags);
+ err = bpa10x_submit_bulk_urb(hdev);
+ if (err < 0)
+ goto error;
- err = usb_submit_urb(data->evt_urb, GFP_ATOMIC);
- if (err < 0) {
- BT_ERR("%s submit failed for event urb %p with error %d",
- data->hdev->name, data->evt_urb, err);
- } else {
- err = usb_submit_urb(data->rx_urb, GFP_ATOMIC);
- if (err < 0) {
- BT_ERR("%s submit failed for rx urb %p with error %d",
- data->hdev->name, data->evt_urb, err);
- usb_kill_urb(data->evt_urb);
- }
- }
+ return 0;
- write_unlock_irqrestore(&data->lock, flags);
+error:
+ usb_kill_anchored_urbs(&data->rx_anchor);
-done:
- if (err < 0)
- clear_bit(HCI_RUNNING, &hdev->flags);
+ clear_bit(HCI_RUNNING, &hdev->flags);
return err;
}
@@ -446,27 +339,13 @@ done:
static int bpa10x_close(struct hci_dev *hdev)
{
struct bpa10x_data *data = hdev->driver_data;
- unsigned long flags;
- BT_DBG("hdev %p data %p", hdev, data);
+ BT_DBG("%s", hdev->name);
if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
return 0;
- write_lock_irqsave(&data->lock, flags);
-
- skb_queue_purge(&data->cmd_queue);
- usb_kill_urb(data->cmd_urb);
- usb_kill_urb(data->evt_urb);
- usb_kill_urb(data->rx_urb);
- usb_kill_urb(data->tx_urb);
-
- write_unlock_irqrestore(&data->lock, flags);
-
- bpa10x_free_urb(data->cmd_urb);
- bpa10x_free_urb(data->evt_urb);
- bpa10x_free_urb(data->rx_urb);
- bpa10x_free_urb(data->tx_urb);
+ usb_kill_anchored_urbs(&data->rx_anchor);
return 0;
}
@@ -475,9 +354,9 @@ static int bpa10x_flush(struct hci_dev *hdev)
{
struct bpa10x_data *data = hdev->driver_data;
- BT_DBG("hdev %p data %p", hdev, data);
+ BT_DBG("%s", hdev->name);
- skb_queue_purge(&data->cmd_queue);
+ usb_kill_anchored_urbs(&data->tx_anchor);
return 0;
}
@@ -485,45 +364,78 @@ static int bpa10x_flush(struct hci_dev *hdev)
static int bpa10x_send_frame(struct sk_buff *skb)
{
struct hci_dev *hdev = (struct hci_dev *) skb->dev;
- struct bpa10x_data *data;
-
- BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len);
+ struct bpa10x_data *data = hdev->driver_data;
+ struct usb_ctrlrequest *dr;
+ struct urb *urb;
+ unsigned int pipe;
+ int err;
- if (!hdev) {
- BT_ERR("Frame for unknown HCI device");
- return -ENODEV;
- }
+ BT_DBG("%s", hdev->name);
if (!test_bit(HCI_RUNNING, &hdev->flags))
return -EBUSY;
- data = hdev->driver_data;
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb)
+ return -ENOMEM;
/* Prepend skb with frame type */
- memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
+ *skb_push(skb, 1) = bt_cb(skb)->pkt_type;
switch (bt_cb(skb)->pkt_type) {
case HCI_COMMAND_PKT:
+ dr = kmalloc(sizeof(*dr), GFP_ATOMIC);
+ if (!dr) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ dr->bRequestType = USB_TYPE_VENDOR;
+ dr->bRequest = 0;
+ dr->wIndex = 0;
+ dr->wValue = 0;
+ dr->wLength = __cpu_to_le16(skb->len);
+
+ pipe = usb_sndctrlpipe(data->udev, 0x00);
+
+ usb_fill_control_urb(urb, data->udev, pipe, (void *) dr,
+ skb->data, skb->len, bpa10x_tx_complete, skb);
+
hdev->stat.cmd_tx++;
- skb_queue_tail(&data->cmd_queue, skb);
break;
case HCI_ACLDATA_PKT:
+ pipe = usb_sndbulkpipe(data->udev, 0x02);
+
+ usb_fill_bulk_urb(urb, data->udev, pipe,
+ skb->data, skb->len, bpa10x_tx_complete, skb);
+
hdev->stat.acl_tx++;
- skb_queue_tail(&data->tx_queue, skb);
break;
case HCI_SCODATA_PKT:
+ pipe = usb_sndbulkpipe(data->udev, 0x02);
+
+ usb_fill_bulk_urb(urb, data->udev, pipe,
+ skb->data, skb->len, bpa10x_tx_complete, skb);
+
hdev->stat.sco_tx++;
- skb_queue_tail(&data->tx_queue, skb);
break;
- };
- read_lock(&data->lock);
+ default:
+ return -EILSEQ;
+ }
+
+ usb_anchor_urb(urb, &data->tx_anchor);
- bpa10x_wakeup(data);
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ BT_ERR("%s urb %p submission failed", hdev->name, urb);
+ kfree(urb->setup_packet);
+ usb_unanchor_urb(urb);
+ }
- read_unlock(&data->lock);
+ usb_free_urb(urb);
return 0;
}
@@ -532,16 +444,17 @@ static void bpa10x_destruct(struct hci_dev *hdev)
{
struct bpa10x_data *data = hdev->driver_data;
- BT_DBG("hdev %p data %p", hdev, data);
+ BT_DBG("%s", hdev->name);
+ kfree(data->rx_skb[0]);
+ kfree(data->rx_skb[1]);
kfree(data);
}
static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
- struct usb_device *udev = interface_to_usbdev(intf);
- struct hci_dev *hdev;
struct bpa10x_data *data;
+ struct hci_dev *hdev;
int err;
BT_DBG("intf %p id %p", intf, id);
@@ -549,48 +462,43 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *
if (ignore)
return -ENODEV;
- if (intf->cur_altsetting->desc.bInterfaceNumber > 0)
+ if (intf->cur_altsetting->desc.bInterfaceNumber != 0)
return -ENODEV;
data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data) {
- BT_ERR("Can't allocate data structure");
+ if (!data)
return -ENOMEM;
- }
-
- data->udev = udev;
- rwlock_init(&data->lock);
+ data->udev = interface_to_usbdev(intf);
- skb_queue_head_init(&data->cmd_queue);
- skb_queue_head_init(&data->tx_queue);
+ init_usb_anchor(&data->tx_anchor);
+ init_usb_anchor(&data->rx_anchor);
hdev = hci_alloc_dev();
if (!hdev) {
- BT_ERR("Can't allocate HCI device");
kfree(data);
return -ENOMEM;
}
- data->hdev = hdev;
-
hdev->type = HCI_USB;
hdev->driver_data = data;
+
+ data->hdev = hdev;
+
SET_HCIDEV_DEV(hdev, &intf->dev);
- hdev->open = bpa10x_open;
- hdev->close = bpa10x_close;
- hdev->flush = bpa10x_flush;
- hdev->send = bpa10x_send_frame;
- hdev->destruct = bpa10x_destruct;
+ hdev->open = bpa10x_open;
+ hdev->close = bpa10x_close;
+ hdev->flush = bpa10x_flush;
+ hdev->send = bpa10x_send_frame;
+ hdev->destruct = bpa10x_destruct;
hdev->owner = THIS_MODULE;
err = hci_register_dev(hdev);
if (err < 0) {
- BT_ERR("Can't register HCI device");
- kfree(data);
hci_free_dev(hdev);
+ kfree(data);
return err;
}
@@ -602,19 +510,17 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *
static void bpa10x_disconnect(struct usb_interface *intf)
{
struct bpa10x_data *data = usb_get_intfdata(intf);
- struct hci_dev *hdev = data->hdev;
BT_DBG("intf %p", intf);
- if (!hdev)
+ if (!data)
return;
usb_set_intfdata(intf, NULL);
- if (hci_unregister_dev(hdev) < 0)
- BT_ERR("Can't unregister HCI device %s", hdev->name);
+ hci_unregister_dev(data->hdev);
- hci_free_dev(hdev);
+ hci_free_dev(data->hdev);
}
static struct usb_driver bpa10x_driver = {
@@ -626,15 +532,9 @@ static struct usb_driver bpa10x_driver = {
static int __init bpa10x_init(void)
{
- int err;
-
BT_INFO("Digianswer Bluetooth USB driver ver %s", VERSION);
- err = usb_register(&bpa10x_driver);
- if (err < 0)
- BT_ERR("Failed to register USB driver");
-
- return err;
+ return usb_register(&bpa10x_driver);
}
static void __exit bpa10x_exit(void)
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 39516074636..a18f9b8c9e1 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -344,10 +344,7 @@ static irqreturn_t bt3c_interrupt(int irq, void *dev_inst)
unsigned int iobase;
int iir;
- if (!info || !info->hdev) {
- BT_ERR("Call of irq %d for unknown device", irq);
- return IRQ_NONE;
- }
+ BUG_ON(!info->hdev);
iobase = info->p_dev->io.BasePort1;
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
new file mode 100644
index 00000000000..b786f618790
--- /dev/null
+++ b/drivers/bluetooth/btsdio.c
@@ -0,0 +1,406 @@
+/*
+ *
+ * Generic Bluetooth SDIO driver
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ * Copyright (C) 2007 Marcel Holtmann <marcel@holtmann.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+
+#include <linux/mmc/sdio_ids.h>
+#include <linux/mmc/sdio_func.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#ifndef CONFIG_BT_HCIBTSDIO_DEBUG
+#undef BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "0.1"
+
+static const struct sdio_device_id btsdio_table[] = {
+ /* Generic Bluetooth Type-A SDIO device */
+ { SDIO_DEVICE_CLASS(SDIO_CLASS_BT_A) },
+
+ /* Generic Bluetooth Type-B SDIO device */
+ { SDIO_DEVICE_CLASS(SDIO_CLASS_BT_B) },
+
+ { } /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(sdio, btsdio_table);
+
+struct btsdio_data {
+ struct hci_dev *hdev;
+ struct sdio_func *func;
+
+ struct work_struct work;
+
+ struct sk_buff_head txq;
+};
+
+#define REG_RDAT 0x00 /* Receiver Data */
+#define REG_TDAT 0x00 /* Transmitter Data */
+#define REG_PC_RRT 0x10 /* Read Packet Control */
+#define REG_PC_WRT 0x11 /* Write Packet Control */
+#define REG_RTC_STAT 0x12 /* Retry Control Status */
+#define REG_RTC_SET 0x12 /* Retry Control Set */
+#define REG_INTRD 0x13 /* Interrupt Indication */
+#define REG_CL_INTRD 0x13 /* Interrupt Clear */
+#define REG_EN_INTRD 0x14 /* Interrupt Enable */
+#define REG_MD_STAT 0x20 /* Bluetooth Mode Status */
+
+static int btsdio_tx_packet(struct btsdio_data *data, struct sk_buff *skb)
+{
+ int err;
+
+ BT_DBG("%s", data->hdev->name);
+
+ /* Prepend Type-A header */
+ skb_push(skb, 4);
+ skb->data[0] = (skb->len & 0x0000ff);
+ skb->data[1] = (skb->len & 0x00ff00) >> 8;
+ skb->data[2] = (skb->len & 0xff0000) >> 16;
+ skb->data[3] = bt_cb(skb)->pkt_type;
+
+ err = sdio_writesb(data->func, REG_TDAT, skb->data, skb->len);
+ if (err < 0) {
+ sdio_writeb(data->func, 0x01, REG_PC_WRT, NULL);
+ return err;
+ }
+
+ data->hdev->stat.byte_tx += skb->len;
+
+ kfree_skb(skb);
+
+ return 0;
+}
+
+static void btsdio_work(struct work_struct *work)
+{
+ struct btsdio_data *data = container_of(work, struct btsdio_data, work);
+ struct sk_buff *skb;
+ int err;
+
+ BT_DBG("%s", data->hdev->name);
+
+ sdio_claim_host(data->func);
+
+ while ((skb = skb_dequeue(&data->txq))) {
+ err = btsdio_tx_packet(data, skb);
+ if (err < 0) {
+ data->hdev->stat.err_tx++;
+ skb_queue_head(&data->txq, skb);
+ break;
+ }
+ }
+
+ sdio_release_host(data->func);
+}
+
+static int btsdio_rx_packet(struct btsdio_data *data)
+{
+ u8 hdr[4] __attribute__ ((aligned(4)));
+ struct sk_buff *skb;
+ int err, len;
+
+ BT_DBG("%s", data->hdev->name);
+
+ err = sdio_readsb(data->func, hdr, REG_RDAT, 4);
+ if (err < 0)
+ return err;
+
+ len = hdr[0] | (hdr[1] << 8) | (hdr[2] << 16);
+ if (len < 4 || len > 65543)
+ return -EILSEQ;
+
+ skb = bt_skb_alloc(len - 4, GFP_KERNEL);
+ if (!skb) {
+ /* Out of memory. Prepare a read retry and just
+ * return with the expectation that the next time
+ * we're called we'll have more memory. */
+ return -ENOMEM;
+ }
+
+ skb_put(skb, len - 4);
+
+ err = sdio_readsb(data->func, skb->data, REG_RDAT, len - 4);
+ if (err < 0) {
+ kfree(skb);
+ return err;
+ }
+
+ data->hdev->stat.byte_rx += len;
+
+ skb->dev = (void *) data->hdev;
+ bt_cb(skb)->pkt_type = hdr[3];
+
+ err = hci_recv_frame(skb);
+ if (err < 0) {
+ kfree(skb);
+ return err;
+ }
+
+ sdio_writeb(data->func, 0x00, REG_PC_RRT, NULL);
+
+ return 0;
+}
+
+static void btsdio_interrupt(struct sdio_func *func)
+{
+ struct btsdio_data *data = sdio_get_drvdata(func);
+ int intrd;
+
+ BT_DBG("%s", data->hdev->name);
+
+ intrd = sdio_readb(func, REG_INTRD, NULL);
+ if (intrd & 0x01) {
+ sdio_writeb(func, 0x01, REG_CL_INTRD, NULL);
+
+ if (btsdio_rx_packet(data) < 0) {
+ data->hdev->stat.err_rx++;
+ sdio_writeb(data->func, 0x01, REG_PC_RRT, NULL);
+ }
+ }
+}
+
+static int btsdio_open(struct hci_dev *hdev)
+{
+ struct btsdio_data *data = hdev->driver_data;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (test_and_set_bit(HCI_RUNNING, &hdev->flags))
+ return 0;
+
+ sdio_claim_host(data->func);
+
+ err = sdio_enable_func(data->func);
+ if (err < 0) {
+ clear_bit(HCI_RUNNING, &hdev->flags);
+ goto release;
+ }
+
+ err = sdio_claim_irq(data->func, btsdio_interrupt);
+ if (err < 0) {
+ sdio_disable_func(data->func);
+ clear_bit(HCI_RUNNING, &hdev->flags);
+ goto release;
+ }
+
+ if (data->func->class == SDIO_CLASS_BT_B)
+ sdio_writeb(data->func, 0x00, REG_MD_STAT, NULL);
+
+ sdio_writeb(data->func, 0x01, REG_EN_INTRD, NULL);
+
+release:
+ sdio_release_host(data->func);
+
+ return err;
+}
+
+static int btsdio_close(struct hci_dev *hdev)
+{
+ struct btsdio_data *data = hdev->driver_data;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
+ return 0;
+
+ sdio_claim_host(data->func);
+
+ sdio_writeb(data->func, 0x00, REG_EN_INTRD, NULL);
+
+ sdio_release_irq(data->func);
+ sdio_disable_func(data->func);
+
+ sdio_release_host(data->func);
+
+ return 0;
+}
+
+static int btsdio_flush(struct hci_dev *hdev)
+{
+ struct btsdio_data *data = hdev->driver_data;
+
+ BT_DBG("%s", hdev->name);
+
+ skb_queue_purge(&data->txq);
+
+ return 0;
+}
+
+static int btsdio_send_frame(struct sk_buff *skb)
+{
+ struct hci_dev *hdev = (struct hci_dev *) skb->dev;
+ struct btsdio_data *data = hdev->driver_data;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return -EBUSY;
+
+ switch (bt_cb(skb)->pkt_type) {
+ case HCI_COMMAND_PKT:
+ hdev->stat.cmd_tx++;
+ break;
+
+ case HCI_ACLDATA_PKT:
+ hdev->stat.acl_tx++;
+ break;
+
+ case HCI_SCODATA_PKT:
+ hdev->stat.sco_tx++;
+ break;
+
+ default:
+ return -EILSEQ;
+ }
+
+ skb_queue_tail(&data->txq, skb);
+
+ schedule_work(&data->work);
+
+ return 0;
+}
+
+static void btsdio_destruct(struct hci_dev *hdev)
+{
+ struct btsdio_data *data = hdev->driver_data;
+
+ BT_DBG("%s", hdev->name);
+
+ kfree(data);
+}
+
+static int btsdio_probe(struct sdio_func *func,
+ const struct sdio_device_id *id)
+{
+ struct btsdio_data *data;
+ struct hci_dev *hdev;
+ struct sdio_func_tuple *tuple = func->tuples;
+ int err;
+
+ BT_DBG("func %p id %p class 0x%04x", func, id, func->class);
+
+ while (tuple) {
+ BT_DBG("code 0x%x size %d", tuple->code, tuple->size);
+ tuple = tuple->next;
+ }
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->func = func;
+
+ INIT_WORK(&data->work, btsdio_work);
+
+ skb_queue_head_init(&data->txq);
+
+ hdev = hci_alloc_dev();
+ if (!hdev) {
+ kfree(data);
+ return -ENOMEM;
+ }
+
+ hdev->type = HCI_SDIO;
+ hdev->driver_data = data;
+
+ data->hdev = hdev;
+
+ SET_HCIDEV_DEV(hdev, &func->dev);
+
+ hdev->open = btsdio_open;
+ hdev->close = btsdio_close;
+ hdev->flush = btsdio_flush;
+ hdev->send = btsdio_send_frame;
+ hdev->destruct = btsdio_destruct;
+
+ hdev->owner = THIS_MODULE;
+
+ err = hci_register_dev(hdev);
+ if (err < 0) {
+ hci_free_dev(hdev);
+ kfree(data);
+ return err;
+ }
+
+ sdio_set_drvdata(func, data);
+
+ return 0;
+}
+
+static void btsdio_remove(struct sdio_func *func)
+{
+ struct btsdio_data *data = sdio_get_drvdata(func);
+ struct hci_dev *hdev;
+
+ BT_DBG("func %p", func);
+
+ if (!data)
+ return;
+
+ hdev = data->hdev;
+
+ sdio_set_drvdata(func, NULL);
+
+ hci_unregister_dev(hdev);
+
+ hci_free_dev(hdev);
+}
+
+static struct sdio_driver btsdio_driver = {
+ .name = "btsdio",
+ .probe = btsdio_probe,
+ .remove = btsdio_remove,
+ .id_table = btsdio_table,
+};
+
+static int __init btsdio_init(void)
+{
+ BT_INFO("Generic Bluetooth SDIO driver ver %s", VERSION);
+
+ return sdio_register_driver(&btsdio_driver);
+}
+
+static void __exit btsdio_exit(void)
+{
+ sdio_unregister_driver(&btsdio_driver);
+}
+
+module_init(btsdio_init);
+module_exit(btsdio_exit);
+
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Generic Bluetooth SDIO driver ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index d7d2ea0d86a..08f48d577ab 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -294,10 +294,7 @@ static irqreturn_t btuart_interrupt(int irq, void *dev_inst)
int boguscount = 0;
int iir, lsr;
- if (!info || !info->hdev) {
- BT_ERR("Call of irq %d for unknown device", irq);
- return IRQ_NONE;
- }
+ BUG_ON(!info->hdev);
iobase = info->p_dev->io.BasePort1;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
new file mode 100644
index 00000000000..12e108914f1
--- /dev/null
+++ b/drivers/bluetooth/btusb.c
@@ -0,0 +1,564 @@
+/*
+ *
+ * Generic Bluetooth USB driver
+ *
+ * Copyright (C) 2005-2007 Marcel Holtmann <marcel@holtmann.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+
+#include <linux/usb.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+//#define CONFIG_BT_HCIBTUSB_DEBUG
+#ifndef CONFIG_BT_HCIBTUSB_DEBUG
+#undef BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "0.1"
+
+static struct usb_device_id btusb_table[] = {
+ /* Generic Bluetooth USB device */
+ { USB_DEVICE_INFO(0xe0, 0x01, 0x01) },
+
+ { } /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, btusb_table);
+
+static struct usb_device_id blacklist_table[] = {
+ { } /* Terminating entry */
+};
+
+#define BTUSB_INTR_RUNNING 0
+#define BTUSB_BULK_RUNNING 1
+
+struct btusb_data {
+ struct hci_dev *hdev;
+ struct usb_device *udev;
+
+ spinlock_t lock;
+
+ unsigned long flags;
+
+ struct work_struct work;
+
+ struct usb_anchor tx_anchor;
+ struct usb_anchor intr_anchor;
+ struct usb_anchor bulk_anchor;
+
+ struct usb_endpoint_descriptor *intr_ep;
+ struct usb_endpoint_descriptor *bulk_tx_ep;
+ struct usb_endpoint_descriptor *bulk_rx_ep;
+};
+
+static void btusb_intr_complete(struct urb *urb)
+{
+ struct hci_dev *hdev = urb->context;
+ struct btusb_data *data = hdev->driver_data;
+ int err;
+
+ BT_DBG("%s urb %p status %d count %d", hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return;
+
+ if (urb->status == 0) {
+ if (hci_recv_fragment(hdev, HCI_EVENT_PKT,
+ urb->transfer_buffer,
+ urb->actual_length) < 0) {
+ BT_ERR("%s corrupted event packet", hdev->name);
+ hdev->stat.err_rx++;
+ }
+ }
+
+ if (!test_bit(BTUSB_INTR_RUNNING, &data->flags))
+ return;
+
+ usb_anchor_urb(urb, &data->intr_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ BT_ERR("%s urb %p failed to resubmit (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+}
+
+static inline int btusb_submit_intr_urb(struct hci_dev *hdev)
+{
+ struct btusb_data *data = hdev->driver_data;
+ struct urb *urb;
+ unsigned char *buf;
+ unsigned int pipe;
+ int err, size;
+
+ BT_DBG("%s", hdev->name);
+
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb)
+ return -ENOMEM;
+
+ size = le16_to_cpu(data->intr_ep->wMaxPacketSize);
+
+ buf = kmalloc(size, GFP_ATOMIC);
+ if (!buf) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ pipe = usb_rcvintpipe(data->udev, data->intr_ep->bEndpointAddress);
+
+ usb_fill_int_urb(urb, data->udev, pipe, buf, size,
+ btusb_intr_complete, hdev,
+ data->intr_ep->bInterval);
+
+ urb->transfer_flags |= URB_FREE_BUFFER;
+
+ usb_anchor_urb(urb, &data->intr_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ kfree(buf);
+ }
+
+ usb_free_urb(urb);
+
+ return err;
+}
+
+static void btusb_bulk_complete(struct urb *urb)
+{
+ struct hci_dev *hdev = urb->context;
+ struct btusb_data *data = hdev->driver_data;
+ int err;
+
+ BT_DBG("%s urb %p status %d count %d", hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return;
+
+ if (urb->status == 0) {
+ if (hci_recv_fragment(hdev, HCI_ACLDATA_PKT,
+ urb->transfer_buffer,
+ urb->actual_length) < 0) {
+ BT_ERR("%s corrupted ACL packet", hdev->name);
+ hdev->stat.err_rx++;
+ }
+ }
+
+ if (!test_bit(BTUSB_BULK_RUNNING, &data->flags))
+ return;
+
+ usb_anchor_urb(urb, &data->bulk_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ BT_ERR("%s urb %p failed to resubmit (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+}
+
+static inline int btusb_submit_bulk_urb(struct hci_dev *hdev)
+{
+ struct btusb_data *data = hdev->driver_data;
+ struct urb *urb;
+ unsigned char *buf;
+ unsigned int pipe;
+ int err, size;
+
+ BT_DBG("%s", hdev->name);
+
+ urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!urb)
+ return -ENOMEM;
+
+ size = le16_to_cpu(data->bulk_rx_ep->wMaxPacketSize);
+
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ pipe = usb_rcvbulkpipe(data->udev, data->bulk_rx_ep->bEndpointAddress);
+
+ usb_fill_bulk_urb(urb, data->udev, pipe,
+ buf, size, btusb_bulk_complete, hdev);
+
+ urb->transfer_flags |= URB_FREE_BUFFER;
+
+ usb_anchor_urb(urb, &data->bulk_anchor);
+
+ err = usb_submit_urb(urb, GFP_KERNEL);
+ if (err < 0) {
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ kfree(buf);
+ }
+
+ usb_free_urb(urb);
+
+ return err;
+}
+
+static void btusb_tx_complete(struct urb *urb)
+{
+ struct sk_buff *skb = urb->context;
+ struct hci_dev *hdev = (struct hci_dev *) skb->dev;
+
+ BT_DBG("%s urb %p status %d count %d", hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ goto done;
+
+ if (!urb->status)
+ hdev->stat.byte_tx += urb->transfer_buffer_length;
+ else
+ hdev->stat.err_tx++;
+
+done:
+ kfree(urb->setup_packet);
+
+ kfree_skb(skb);
+}
+
+static int btusb_open(struct hci_dev *hdev)
+{
+ struct btusb_data *data = hdev->driver_data;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (test_and_set_bit(HCI_RUNNING, &hdev->flags))
+ return 0;
+
+ if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags))
+ return 0;
+
+ err = btusb_submit_intr_urb(hdev);
+ if (err < 0) {
+ clear_bit(BTUSB_INTR_RUNNING, &hdev->flags);
+ clear_bit(HCI_RUNNING, &hdev->flags);
+ }
+
+ return err;
+}
+
+static int btusb_close(struct hci_dev *hdev)
+{
+ struct btusb_data *data = hdev->driver_data;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
+ return 0;
+
+ clear_bit(BTUSB_BULK_RUNNING, &data->flags);
+ usb_kill_anchored_urbs(&data->bulk_anchor);
+
+ clear_bit(BTUSB_INTR_RUNNING, &data->flags);
+ usb_kill_anchored_urbs(&data->intr_anchor);
+
+ return 0;
+}
+
+static int btusb_flush(struct hci_dev *hdev)
+{
+ struct btusb_data *data = hdev->driver_data;
+
+ BT_DBG("%s", hdev->name);
+
+ usb_kill_anchored_urbs(&data->tx_anchor);
+
+ return 0;
+}
+
+static int btusb_send_frame(struct sk_buff *skb)
+{
+ struct hci_dev *hdev = (struct hci_dev *) skb->dev;
+ struct btusb_data *data = hdev->driver_data;
+ struct usb_ctrlrequest *dr;
+ struct urb *urb;
+ unsigned int pipe;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return -EBUSY;
+
+ switch (bt_cb(skb)->pkt_type) {
+ case HCI_COMMAND_PKT:
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb)
+ return -ENOMEM;
+
+ dr = kmalloc(sizeof(*dr), GFP_ATOMIC);
+ if (!dr) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ dr->bRequestType = USB_TYPE_CLASS;
+ dr->bRequest = 0;
+ dr->wIndex = 0;
+ dr->wValue = 0;
+ dr->wLength = __cpu_to_le16(skb->len);
+
+ pipe = usb_sndctrlpipe(data->udev, 0x00);
+
+ usb_fill_control_urb(urb, data->udev, pipe, (void *) dr,
+ skb->data, skb->len, btusb_tx_complete, skb);
+
+ hdev->stat.cmd_tx++;
+ break;
+
+ case HCI_ACLDATA_PKT:
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb)
+ return -ENOMEM;
+
+ pipe = usb_sndbulkpipe(data->udev,
+ data->bulk_tx_ep->bEndpointAddress);
+
+ usb_fill_bulk_urb(urb, data->udev, pipe,
+ skb->data, skb->len, btusb_tx_complete, skb);
+
+ hdev->stat.acl_tx++;
+ break;
+
+ case HCI_SCODATA_PKT:
+ hdev->stat.sco_tx++;
+ kfree_skb(skb);
+ return 0;
+
+ default:
+ return -EILSEQ;
+ }
+
+ usb_anchor_urb(urb, &data->tx_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ BT_ERR("%s urb %p submission failed", hdev->name, urb);
+ kfree(urb->setup_packet);
+ usb_unanchor_urb(urb);
+ }
+
+ usb_free_urb(urb);
+
+ return err;
+}
+
+static void btusb_destruct(struct hci_dev *hdev)
+{
+ struct btusb_data *data = hdev->driver_data;
+
+ BT_DBG("%s", hdev->name);
+
+ kfree(data);
+}
+
+static void btusb_notify(struct hci_dev *hdev, unsigned int evt)
+{
+ struct btusb_data *data = hdev->driver_data;
+
+ BT_DBG("%s evt %d", hdev->name, evt);
+
+ if (evt == HCI_NOTIFY_CONN_ADD || evt == HCI_NOTIFY_CONN_DEL)
+ schedule_work(&data->work);
+}
+
+static void btusb_work(struct work_struct *work)
+{
+ struct btusb_data *data = container_of(work, struct btusb_data, work);
+ struct hci_dev *hdev = data->hdev;
+
+ if (hdev->conn_hash.acl_num == 0) {
+ clear_bit(BTUSB_BULK_RUNNING, &data->flags);
+ usb_kill_anchored_urbs(&data->bulk_anchor);
+ return;
+ }
+
+ if (!test_and_set_bit(BTUSB_BULK_RUNNING, &data->flags)) {
+ if (btusb_submit_bulk_urb(hdev) < 0)
+ clear_bit(BTUSB_BULK_RUNNING, &data->flags);
+ else
+ btusb_submit_bulk_urb(hdev);
+ }
+}
+
+static int btusb_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
+{
+ struct usb_endpoint_descriptor *ep_desc;
+ struct btusb_data *data;
+ struct hci_dev *hdev;
+ int i, err;
+
+ BT_DBG("intf %p id %p", intf, id);
+
+ if (intf->cur_altsetting->desc.bInterfaceNumber != 0)
+ return -ENODEV;
+
+ if (!id->driver_info) {
+ const struct usb_device_id *match;
+ match = usb_match_id(intf, blacklist_table);
+ if (match)
+ id = match;
+ }
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
+ ep_desc = &intf->cur_altsetting->endpoint[i].desc;
+
+ if (!data->intr_ep && usb_endpoint_is_int_in(ep_desc)) {
+ data->intr_ep = ep_desc;
+ continue;
+ }
+
+ if (!data->bulk_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) {
+ data->bulk_tx_ep = ep_desc;
+ continue;
+ }
+
+ if (!data->bulk_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) {
+ data->bulk_rx_ep = ep_desc;
+ continue;
+ }
+ }
+
+ if (!data->intr_ep || !data->bulk_tx_ep || !data->bulk_rx_ep) {
+ kfree(data);
+ return -ENODEV;
+ }
+
+ data->udev = interface_to_usbdev(intf);
+
+ spin_lock_init(&data->lock);
+
+ INIT_WORK(&data->work, btusb_work);
+
+ init_usb_anchor(&data->tx_anchor);
+ init_usb_anchor(&data->intr_anchor);
+ init_usb_anchor(&data->bulk_anchor);
+
+ hdev = hci_alloc_dev();
+ if (!hdev) {
+ kfree(data);
+ return -ENOMEM;
+ }
+
+ hdev->type = HCI_USB;
+ hdev->driver_data = data;
+
+ data->hdev = hdev;
+
+ SET_HCIDEV_DEV(hdev, &intf->dev);
+
+ hdev->open = btusb_open;
+ hdev->close = btusb_close;
+ hdev->flush = btusb_flush;
+ hdev->send = btusb_send_frame;
+ hdev->destruct = btusb_destruct;
+ hdev->notify = btusb_notify;
+
+ hdev->owner = THIS_MODULE;
+
+ set_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks);
+
+ err = hci_register_dev(hdev);
+ if (err < 0) {
+ hci_free_dev(hdev);
+ kfree(data);
+ return err;
+ }
+
+ usb_set_intfdata(intf, data);
+
+ return 0;
+}
+
+static void btusb_disconnect(struct usb_interface *intf)
+{
+ struct btusb_data *data = usb_get_intfdata(intf);
+ struct hci_dev *hdev;
+
+ BT_DBG("intf %p", intf);
+
+ if (!data)
+ return;
+
+ hdev = data->hdev;
+
+ usb_set_intfdata(intf, NULL);
+
+ hci_unregister_dev(hdev);
+
+ hci_free_dev(hdev);
+}
+
+static struct usb_driver btusb_driver = {
+ .name = "btusb",
+ .probe = btusb_probe,
+ .disconnect = btusb_disconnect,
+ .id_table = btusb_table,
+};
+
+static int __init btusb_init(void)
+{
+ BT_INFO("Generic Bluetooth USB driver ver %s", VERSION);
+
+ return usb_register(&btusb_driver);
+}
+
+static void __exit btusb_exit(void)
+{
+ usb_deregister(&btusb_driver);
+}
+
+module_init(btusb_init);
+module_exit(btusb_exit);
+
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Generic Bluetooth USB driver ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 7f9c54b9964..dae45cdf02b 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -298,10 +298,7 @@ static irqreturn_t dtl1_interrupt(int irq, void *dev_inst)
int boguscount = 0;
int iir, lsr;
- if (!info || !info->hdev) {
- BT_ERR("Call of irq %d for unknown device", irq);
- return IRQ_NONE;
- }
+ BUG_ON(!info->hdev);
iobase = info->p_dev->io.BasePort1;
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index d66064ccb31..696f7528f02 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -237,7 +237,8 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data,
if (hciextn && chan == 5) {
struct hci_command_hdr *hdr = (struct hci_command_hdr *) data;
- if (hci_opcode_ogf(__le16_to_cpu(hdr->opcode)) == OGF_VENDOR_CMD) {
+ /* Vendor specific commands */
+ if (hci_opcode_ogf(__le16_to_cpu(hdr->opcode)) == 0x3f) {
u8 desc = *(data + HCI_COMMAND_HDR_SIZE);
if ((desc & 0xf0) == 0xc0) {
data += HCI_COMMAND_HDR_SIZE + 1;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 6055b9c0ac0..e68821d074b 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -549,7 +549,10 @@ static int __init hci_uart_init(void)
#ifdef CONFIG_BT_HCIUART_BCSP
bcsp_init();
#endif
-
+#ifdef CONFIG_BT_HCIUART_LL
+ ll_init();
+#endif
+
return 0;
}
@@ -563,6 +566,9 @@ static void __exit hci_uart_exit(void)
#ifdef CONFIG_BT_HCIUART_BCSP
bcsp_deinit();
#endif
+#ifdef CONFIG_BT_HCIUART_LL
+ ll_deinit();
+#endif
/* Release tty registration of line discipline */
if ((err = tty_unregister_ldisc(N_HCI)))
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
new file mode 100644
index 00000000000..8c3e62a17b4
--- /dev/null
+++ b/drivers/bluetooth/hci_ll.c
@@ -0,0 +1,531 @@
+/*
+ * Texas Instruments' Bluetooth HCILL UART protocol
+ *
+ * HCILL (HCI Low Level) is a Texas Instruments' power management
+ * protocol extension to H4.
+ *
+ * Copyright (C) 2007 Texas Instruments, Inc.
+ *
+ * Written by Ohad Ben-Cohen <ohad@bencohen.org>
+ *
+ * Acknowledgements:
+ * This file is based on hci_h4.c, which was written
+ * by Maxim Krasnyansky and Marcel Holtmann.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/interrupt.h>
+#include <linux/ptrace.h>
+#include <linux/poll.h>
+
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/signal.h>
+#include <linux/ioctl.h>
+#include <linux/skbuff.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "hci_uart.h"
+
+/* HCILL commands */
+#define HCILL_GO_TO_SLEEP_IND 0x30
+#define HCILL_GO_TO_SLEEP_ACK 0x31
+#define HCILL_WAKE_UP_IND 0x32
+#define HCILL_WAKE_UP_ACK 0x33
+
+/* HCILL receiver States */
+#define HCILL_W4_PACKET_TYPE 0
+#define HCILL_W4_EVENT_HDR 1
+#define HCILL_W4_ACL_HDR 2
+#define HCILL_W4_SCO_HDR 3
+#define HCILL_W4_DATA 4
+
+/* HCILL states */
+enum hcill_states_e {
+ HCILL_ASLEEP,
+ HCILL_ASLEEP_TO_AWAKE,
+ HCILL_AWAKE,
+ HCILL_AWAKE_TO_ASLEEP
+};
+
+struct hcill_cmd {
+ u8 cmd;
+} __attribute__((packed));
+
+struct ll_struct {
+ unsigned long rx_state;
+ unsigned long rx_count;
+ struct sk_buff *rx_skb;
+ struct sk_buff_head txq;
+ spinlock_t hcill_lock; /* HCILL state lock */
+ unsigned long hcill_state; /* HCILL power state */
+ struct sk_buff_head tx_wait_q; /* HCILL wait queue */
+};
+
+/*
+ * Builds and sends an HCILL command packet.
+ * These are very simple packets with only 1 cmd byte
+ */
+static int send_hcill_cmd(u8 cmd, struct hci_uart *hu)
+{
+ int err = 0;
+ struct sk_buff *skb = NULL;
+ struct ll_struct *ll = hu->priv;
+ struct hcill_cmd *hcill_packet;
+
+ BT_DBG("hu %p cmd 0x%x", hu, cmd);
+
+ /* allocate packet */
+ skb = bt_skb_alloc(1, GFP_ATOMIC);
+ if (!skb) {
+ BT_ERR("cannot allocate memory for HCILL packet");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* prepare packet */
+ hcill_packet = (struct hcill_cmd *) skb_put(skb, 1);
+ hcill_packet->cmd = cmd;
+ skb->dev = (void *) hu->hdev;
+
+ /* send packet */
+ skb_queue_tail(&ll->txq, skb);
+out:
+ return err;
+}
+
+/* Initialize protocol */
+static int ll_open(struct hci_uart *hu)
+{
+ struct ll_struct *ll;
+
+ BT_DBG("hu %p", hu);
+
+ ll = kzalloc(sizeof(*ll), GFP_ATOMIC);
+ if (!ll)
+ return -ENOMEM;
+
+ skb_queue_head_init(&ll->txq);
+ skb_queue_head_init(&ll->tx_wait_q);
+ spin_lock_init(&ll->hcill_lock);
+
+ ll->hcill_state = HCILL_AWAKE;
+
+ hu->priv = ll;
+
+ return 0;
+}
+
+/* Flush protocol data */
+static int ll_flush(struct hci_uart *hu)
+{
+ struct ll_struct *ll = hu->priv;
+
+ BT_DBG("hu %p", hu);
+
+ skb_queue_purge(&ll->tx_wait_q);
+ skb_queue_purge(&ll->txq);
+
+ return 0;
+}
+
+/* Close protocol */
+static int ll_close(struct hci_uart *hu)
+{
+ struct ll_struct *ll = hu->priv;
+
+ BT_DBG("hu %p", hu);
+
+ skb_queue_purge(&ll->tx_wait_q);
+ skb_queue_purge(&ll->txq);
+
+ if (ll->rx_skb)
+ kfree_skb(ll->rx_skb);
+
+ hu->priv = NULL;
+
+ kfree(ll);
+
+ return 0;
+}
+
+/*
+ * internal function, which does common work of the device wake up process:
+ * 1. places all pending packets (waiting in tx_wait_q list) in txq list.
+ * 2. changes internal state to HCILL_AWAKE.
+ * Note: assumes that hcill_lock spinlock is taken,
+ * shouldn't be called otherwise!
+ */
+static void __ll_do_awake(struct ll_struct *ll)
+{
+ struct sk_buff *skb = NULL;
+
+ while ((skb = skb_dequeue(&ll->tx_wait_q)))
+ skb_queue_tail(&ll->txq, skb);
+
+ ll->hcill_state = HCILL_AWAKE;
+}
+
+/*
+ * Called upon a wake-up-indication from the device
+ */
+static void ll_device_want_to_wakeup(struct hci_uart *hu)
+{
+ unsigned long flags;
+ struct ll_struct *ll = hu->priv;
+
+ BT_DBG("hu %p", hu);
+
+ /* lock hcill state */
+ spin_lock_irqsave(&ll->hcill_lock, flags);
+
+ switch (ll->hcill_state) {
+ case HCILL_ASLEEP:
+ /* acknowledge device wake up */
+ if (send_hcill_cmd(HCILL_WAKE_UP_ACK, hu) < 0) {
+ BT_ERR("cannot acknowledge device wake up");
+ goto out;
+ }
+ break;
+ case HCILL_ASLEEP_TO_AWAKE:
+ /*
+ * this state means that a wake-up-indication
+ * is already on its way to the device,
+ * and will serve as the required wake-up-ack
+ */
+ BT_DBG("dual wake-up-indication");
+ break;
+ default:
+ /* any other state are illegal */
+ BT_ERR("received HCILL_WAKE_UP_IND in state %ld", ll->hcill_state);
+ break;
+ }
+
+ /* send pending packets and change state to HCILL_AWAKE */
+ __ll_do_awake(ll);
+
+out:
+ spin_unlock_irqrestore(&ll->hcill_lock, flags);
+
+ /* actually send the packets */
+ hci_uart_tx_wakeup(hu);
+}
+
+/*
+ * Called upon a sleep-indication from the device
+ */
+static void ll_device_want_to_sleep(struct hci_uart *hu)
+{
+ unsigned long flags;
+ struct ll_struct *ll = hu->priv;
+
+ BT_DBG("hu %p", hu);
+
+ /* lock hcill state */
+ spin_lock_irqsave(&ll->hcill_lock, flags);
+
+ /* sanity check */
+ if (ll->hcill_state != HCILL_AWAKE)
+ BT_ERR("ERR: HCILL_GO_TO_SLEEP_IND in state %ld", ll->hcill_state);
+
+ /* acknowledge device sleep */
+ if (send_hcill_cmd(HCILL_GO_TO_SLEEP_ACK, hu) < 0) {
+ BT_ERR("cannot acknowledge device sleep");
+ goto out;
+ }
+
+ /* update state */
+ ll->hcill_state = HCILL_ASLEEP;
+
+out:
+ spin_unlock_irqrestore(&ll->hcill_lock, flags);
+
+ /* actually send the sleep ack packet */
+ hci_uart_tx_wakeup(hu);
+}
+
+/*
+ * Called upon wake-up-acknowledgement from the device
+ */
+static void ll_device_woke_up(struct hci_uart *hu)
+{
+ unsigned long flags;
+ struct ll_struct *ll = hu->priv;
+
+ BT_DBG("hu %p", hu);
+
+ /* lock hcill state */
+ spin_lock_irqsave(&ll->hcill_lock, flags);
+
+ /* sanity check */
+ if (ll->hcill_state != HCILL_ASLEEP_TO_AWAKE)
+ BT_ERR("received HCILL_WAKE_UP_ACK in state %ld", ll->hcill_state);
+
+ /* send pending packets and change state to HCILL_AWAKE */
+ __ll_do_awake(ll);
+
+ spin_unlock_irqrestore(&ll->hcill_lock, flags);
+
+ /* actually send the packets */
+ hci_uart_tx_wakeup(hu);
+}
+
+/* Enqueue frame for transmittion (padding, crc, etc) */
+/* may be called from two simultaneous tasklets */
+static int ll_enqueue(struct hci_uart *hu, struct sk_buff *skb)
+{
+ unsigned long flags = 0;
+ struct ll_struct *ll = hu->priv;
+
+ BT_DBG("hu %p skb %p", hu, skb);
+
+ /* Prepend skb with frame type */
+ memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
+
+ /* lock hcill state */
+ spin_lock_irqsave(&ll->hcill_lock, flags);
+
+ /* act according to current state */
+ switch (ll->hcill_state) {
+ case HCILL_AWAKE:
+ BT_DBG("device awake, sending normally");
+ skb_queue_tail(&ll->txq, skb);
+ break;
+ case HCILL_ASLEEP:
+ BT_DBG("device asleep, waking up and queueing packet");
+ /* save packet for later */
+ skb_queue_tail(&ll->tx_wait_q, skb);
+ /* awake device */
+ if (send_hcill_cmd(HCILL_WAKE_UP_IND, hu) < 0) {
+ BT_ERR("cannot wake up device");
+ break;
+ }
+ ll->hcill_state = HCILL_ASLEEP_TO_AWAKE;
+ break;
+ case HCILL_ASLEEP_TO_AWAKE:
+ BT_DBG("device waking up, queueing packet");
+ /* transient state; just keep packet for later */
+ skb_queue_tail(&ll->tx_wait_q, skb);
+ break;
+ default:
+ BT_ERR("illegal hcill state: %ld (losing packet)", ll->hcill_state);
+ kfree_skb(skb);
+ break;
+ }
+
+ spin_unlock_irqrestore(&ll->hcill_lock, flags);
+
+ return 0;
+}
+
+static inline int ll_check_data_len(struct ll_struct *ll, int len)
+{
+ register int room = skb_tailroom(ll->rx_skb);
+
+ BT_DBG("len %d room %d", len, room);
+
+ if (!len) {
+ hci_recv_frame(ll->rx_skb);
+ } else if (len > room) {
+ BT_ERR("Data length is too large");
+ kfree_skb(ll->rx_skb);
+ } else {
+ ll->rx_state = HCILL_W4_DATA;
+ ll->rx_count = len;
+ return len;
+ }
+
+ ll->rx_state = HCILL_W4_PACKET_TYPE;
+ ll->rx_skb = NULL;
+ ll->rx_count = 0;
+
+ return 0;
+}
+
+/* Recv data */
+static int ll_recv(struct hci_uart *hu, void *data, int count)
+{
+ struct ll_struct *ll = hu->priv;
+ register char *ptr;
+ struct hci_event_hdr *eh;
+ struct hci_acl_hdr *ah;
+ struct hci_sco_hdr *sh;
+ register int len, type, dlen;
+
+ BT_DBG("hu %p count %d rx_state %ld rx_count %ld", hu, count, ll->rx_state, ll->rx_count);
+
+ ptr = data;
+ while (count) {
+ if (ll->rx_count) {
+ len = min_t(unsigned int, ll->rx_count, count);
+ memcpy(skb_put(ll->rx_skb, len), ptr, len);
+ ll->rx_count -= len; count -= len; ptr += len;
+
+ if (ll->rx_count)
+ continue;
+
+ switch (ll->rx_state) {
+ case HCILL_W4_DATA:
+ BT_DBG("Complete data");
+ hci_recv_frame(ll->rx_skb);
+
+ ll->rx_state = HCILL_W4_PACKET_TYPE;
+ ll->rx_skb = NULL;
+ continue;
+
+ case HCILL_W4_EVENT_HDR:
+ eh = (struct hci_event_hdr *) ll->rx_skb->data;
+
+ BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen);
+
+ ll_check_data_len(ll, eh->plen);
+ continue;
+
+ case HCILL_W4_ACL_HDR:
+ ah = (struct hci_acl_hdr *) ll->rx_skb->data;
+ dlen = __le16_to_cpu(ah->dlen);
+
+ BT_DBG("ACL header: dlen %d", dlen);
+
+ ll_check_data_len(ll, dlen);
+ continue;
+
+ case HCILL_W4_SCO_HDR:
+ sh = (struct hci_sco_hdr *) ll->rx_skb->data;
+
+ BT_DBG("SCO header: dlen %d", sh->dlen);
+
+ ll_check_data_len(ll, sh->dlen);
+ continue;
+ }
+ }
+
+ /* HCILL_W4_PACKET_TYPE */
+ switch (*ptr) {
+ case HCI_EVENT_PKT:
+ BT_DBG("Event packet");
+ ll->rx_state = HCILL_W4_EVENT_HDR;
+ ll->rx_count = HCI_EVENT_HDR_SIZE;
+ type = HCI_EVENT_PKT;
+ break;
+
+ case HCI_ACLDATA_PKT:
+ BT_DBG("ACL packet");
+ ll->rx_state = HCILL_W4_ACL_HDR;
+ ll->rx_count = HCI_ACL_HDR_SIZE;
+ type = HCI_ACLDATA_PKT;
+ break;
+
+ case HCI_SCODATA_PKT:
+ BT_DBG("SCO packet");
+ ll->rx_state = HCILL_W4_SCO_HDR;
+ ll->rx_count = HCI_SCO_HDR_SIZE;
+ type = HCI_SCODATA_PKT;
+ break;
+
+ /* HCILL signals */
+ case HCILL_GO_TO_SLEEP_IND:
+ BT_DBG("HCILL_GO_TO_SLEEP_IND packet");
+ ll_device_want_to_sleep(hu);
+ ptr++; count--;
+ continue;
+
+ case HCILL_GO_TO_SLEEP_ACK:
+ /* shouldn't happen */
+ BT_ERR("received HCILL_GO_TO_SLEEP_ACK (in state %ld)", ll->hcill_state);
+ ptr++; count--;
+ continue;
+
+ case HCILL_WAKE_UP_IND:
+ BT_DBG("HCILL_WAKE_UP_IND packet");
+ ll_device_want_to_wakeup(hu);
+ ptr++; count--;
+ continue;
+
+ case HCILL_WAKE_UP_ACK:
+ BT_DBG("HCILL_WAKE_UP_ACK packet");
+ ll_device_woke_up(hu);
+ ptr++; count--;
+ continue;
+
+ default:
+ BT_ERR("Unknown HCI packet type %2.2x", (__u8)*ptr);
+ hu->hdev->stat.err_rx++;
+ ptr++; count--;
+ continue;
+ };
+
+ ptr++; count--;
+
+ /* Allocate packet */
+ ll->rx_skb = bt_skb_alloc(HCI_MAX_FRAME_SIZE, GFP_ATOMIC);
+ if (!ll->rx_skb) {
+ BT_ERR("Can't allocate mem for new packet");
+ ll->rx_state = HCILL_W4_PACKET_TYPE;
+ ll->rx_count = 0;
+ return 0;
+ }
+
+ ll->rx_skb->dev = (void *) hu->hdev;
+ bt_cb(ll->rx_skb)->pkt_type = type;
+ }
+
+ return count;
+}
+
+static struct sk_buff *ll_dequeue(struct hci_uart *hu)
+{
+ struct ll_struct *ll = hu->priv;
+ return skb_dequeue(&ll->txq);
+}
+
+static struct hci_uart_proto llp = {
+ .id = HCI_UART_LL,
+ .open = ll_open,
+ .close = ll_close,
+ .recv = ll_recv,
+ .enqueue = ll_enqueue,
+ .dequeue = ll_dequeue,
+ .flush = ll_flush,
+};
+
+int ll_init(void)
+{
+ int err = hci_uart_register_proto(&llp);
+
+ if (!err)
+ BT_INFO("HCILL protocol initialized");
+ else
+ BT_ERR("HCILL protocol registration failed");
+
+ return err;
+}
+
+int ll_deinit(void)
+{
+ return hci_uart_unregister_proto(&llp);
+}
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index 1097ce72393..50113db06b9 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -33,12 +33,13 @@
#define HCIUARTGETDEVICE _IOR('U', 202, int)
/* UART protocols */
-#define HCI_UART_MAX_PROTO 4
+#define HCI_UART_MAX_PROTO 5
#define HCI_UART_H4 0
#define HCI_UART_BCSP 1
#define HCI_UART_3WIRE 2
#define HCI_UART_H4DS 3
+#define HCI_UART_LL 4
struct hci_uart;
@@ -85,3 +86,8 @@ int h4_deinit(void);
int bcsp_init(void);
int bcsp_deinit(void);
#endif
+
+#ifdef CONFIG_BT_HCIUART_LL
+int ll_init(void);
+int ll_deinit(void);
+#endif
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 65491103e0f..bf18d757b87 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -613,6 +613,10 @@ config HVC_XEN
help
Xen virtual console device driver
+config VIRTIO_CONSOLE
+ bool
+ select HVC_DRIVER
+
config HVCS
tristate "IBM Hypervisor Virtual Console Server support"
depends on PPC_PSERIES
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index c78ff26647e..07304d50e0c 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o
obj-$(CONFIG_N_HDLC) += n_hdlc.o
obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
obj-$(CONFIG_SX) += sx.o generic_serial.o
-obj-$(CONFIG_LGUEST_GUEST) += hvc_lguest.o
obj-$(CONFIG_RIO) += rio/ generic_serial.o
obj-$(CONFIG_HVC_CONSOLE) += hvc_vio.o hvsi.o
obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o
@@ -50,6 +49,7 @@ obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
obj-$(CONFIG_HVC_XEN) += hvc_xen.o
+obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o
obj-$(CONFIG_RAW_DRIVER) += raw.o
obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
obj-$(CONFIG_MSPEC) += mspec.o
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index d1bd0f08a33..e4f579c3e24 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -1602,8 +1602,8 @@ static void cyz_handle_tx(struct cyclades_port *info,
info->icount.tx++;
}
#endif
-ztxdone:
tty_wakeup(tty);
+ztxdone:
/* Update tx_put */
cy_writel(&buf_ctrl->tx_put, tx_put);
}
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c
deleted file mode 100644
index efccb215583..00000000000
--- a/drivers/char/hvc_lguest.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*D:300
- * The Guest console driver
- *
- * This is a trivial console driver: we use lguest's DMA mechanism to send
- * bytes out, and register a DMA buffer to receive bytes in. It is assumed to
- * be present and available from the very beginning of boot.
- *
- * Writing console drivers is one of the few remaining Dark Arts in Linux.
- * Fortunately for us, the path of virtual consoles has been well-trodden by
- * the PowerPC folks, who wrote "hvc_console.c" to generically support any
- * virtual console. We use that infrastructure which only requires us to write
- * the basic put_chars and get_chars functions and call the right register
- * functions.
- :*/
-
-/*M:002 The console can be flooded: while the Guest is processing input the
- * Host can send more. Buffering in the Host could alleviate this, but it is a
- * difficult problem in general. :*/
-/* Copyright (C) 2006 Rusty Russell, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/lguest_bus.h>
-#include <asm/paravirt.h>
-#include "hvc_console.h"
-
-/*D:340 This is our single console input buffer, with associated "struct
- * lguest_dma" referring to it. Note the 0-terminated length array, and the
- * use of physical address for the buffer itself. */
-static char inbuf[256];
-static struct lguest_dma cons_input = { .used_len = 0,
- .addr[0] = __pa(inbuf),
- .len[0] = sizeof(inbuf),
- .len[1] = 0 };
-
-/*D:310 The put_chars() callback is pretty straightforward.
- *
- * First we put the pointer and length in a "struct lguest_dma": we only have
- * one pointer, so we set the second length to 0. Then we use SEND_DMA to send
- * the data to (Host) buffers attached to the console key. Usually a device's
- * key is a physical address within the device's memory, but because the
- * console device doesn't have any associated physical memory, we use the
- * LGUEST_CONSOLE_DMA_KEY constant (aka 0). */
-static int put_chars(u32 vtermno, const char *buf, int count)
-{
- struct lguest_dma dma;
-
- /* FIXME: DMA buffers in a "struct lguest_dma" are not allowed
- * to go over page boundaries. This never seems to happen,
- * but if it did we'd need to fix this code. */
- dma.len[0] = count;
- dma.len[1] = 0;
- dma.addr[0] = __pa(buf);
-
- lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma);
- /* We're expected to return the amount of data we wrote: all of it. */
- return count;
-}
-
-/*D:350 get_chars() is the callback from the hvc_console infrastructure when
- * an interrupt is received.
- *
- * Firstly we see if our buffer has been filled: if not, we return. The rest
- * of the code deals with the fact that the hvc_console() infrastructure only
- * asks us for 16 bytes at a time. We keep a "cons_offset" variable for
- * partially-read buffers. */
-static int get_chars(u32 vtermno, char *buf, int count)
-{
- static int cons_offset;
-
- /* Nothing left to see here... */
- if (!cons_input.used_len)
- return 0;
-
- /* You want more than we have to give? Well, try wanting less! */
- if (cons_input.used_len - cons_offset < count)
- count = cons_input.used_len - cons_offset;
-
- /* Copy across to their buffer and increment offset. */
- memcpy(buf, inbuf + cons_offset, count);
- cons_offset += count;
-
- /* Finished? Zero offset, and reset cons_input so Host will use it
- * again. */
- if (cons_offset == cons_input.used_len) {
- cons_offset = 0;
- cons_input.used_len = 0;
- }
- return count;
-}
-/*:*/
-
-static struct hv_ops lguest_cons = {
- .get_chars = get_chars,
- .put_chars = put_chars,
-};
-
-/*D:320 Console drivers are initialized very early so boot messages can go
- * out. At this stage, the console is output-only. Our driver checks we're a
- * Guest, and if so hands hvc_instantiate() the console number (0), priority
- * (0), and the struct hv_ops containing the put_chars() function. */
-static int __init cons_init(void)
-{
- if (strcmp(pv_info.name, "lguest") != 0)
- return 0;
-
- return hvc_instantiate(0, 0, &lguest_cons);
-}
-console_initcall(cons_init);
-
-/*D:370 To set up and manage our virtual console, we call hvc_alloc() and
- * stash the result in the private pointer of the "struct lguest_device".
- * Since we never remove the console device we never need this pointer again,
- * but using ->private is considered good form, and you never know who's going
- * to copy your driver.
- *
- * Once the console is set up, we bind our input buffer ready for input. */
-static int lguestcons_probe(struct lguest_device *lgdev)
-{
- int err;
-
- /* The first argument of hvc_alloc() is the virtual console number, so
- * we use zero. The second argument is the interrupt number.
- *
- * The third argument is a "struct hv_ops" containing the put_chars()
- * and get_chars() pointers. The final argument is the output buffer
- * size: we use 256 and expect the Host to have room for us to send
- * that much. */
- lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256);
- if (IS_ERR(lgdev->private))
- return PTR_ERR(lgdev->private);
-
- /* We bind a single DMA buffer at key LGUEST_CONSOLE_DMA_KEY.
- * "cons_input" is that statically-initialized global DMA buffer we saw
- * above, and we also give the interrupt we want. */
- err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1,
- lgdev_irq(lgdev));
- if (err)
- printk("lguest console: failed to bind buffer.\n");
- return err;
-}
-/* Note the use of lgdev_irq() for the interrupt number. We tell hvc_alloc()
- * to expect input when this interrupt is triggered, and then tell
- * lguest_bind_dma() that is the interrupt to send us when input comes in. */
-
-/*D:360 From now on the console driver follows standard Guest driver form:
- * register_lguest_driver() registers the device type and probe function, and
- * the probe function sets up the device.
- *
- * The standard "struct lguest_driver": */
-static struct lguest_driver lguestcons_drv = {
- .name = "lguestcons",
- .owner = THIS_MODULE,
- .device_type = LGUEST_DEVICE_T_CONSOLE,
- .probe = lguestcons_probe,
-};
-
-/* The standard init function */
-static int __init hvc_lguest_init(void)
-{
- return register_lguest_driver(&lguestcons_drv);
-}
-module_init(hvc_lguest_init);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
new file mode 100644
index 00000000000..100e8a201e3
--- /dev/null
+++ b/drivers/char/virtio_console.c
@@ -0,0 +1,225 @@
+/*D:300
+ * The Guest console driver
+ *
+ * Writing console drivers is one of the few remaining Dark Arts in Linux.
+ * Fortunately for us, the path of virtual consoles has been well-trodden by
+ * the PowerPC folks, who wrote "hvc_console.c" to generically support any
+ * virtual console. We use that infrastructure which only requires us to write
+ * the basic put_chars and get_chars functions and call the right register
+ * functions.
+ :*/
+
+/*M:002 The console can be flooded: while the Guest is processing input the
+ * Host can send more. Buffering in the Host could alleviate this, but it is a
+ * difficult problem in general. :*/
+/* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/virtio.h>
+#include <linux/virtio_console.h>
+#include "hvc_console.h"
+
+/*D:340 These represent our input and output console queues, and the virtio
+ * operations for them. */
+static struct virtqueue *in_vq, *out_vq;
+static struct virtio_device *vdev;
+
+/* This is our input buffer, and how much data is left in it. */
+static unsigned int in_len;
+static char *in, *inbuf;
+
+/* The operations for our console. */
+static struct hv_ops virtio_cons;
+
+/*D:310 The put_chars() callback is pretty straightforward.
+ *
+ * We turn the characters into a scatter-gather list, add it to the output
+ * queue and then kick the Host. Then we sit here waiting for it to finish:
+ * inefficient in theory, but in practice implementations will do it
+ * immediately (lguest's Launcher does). */
+static int put_chars(u32 vtermno, const char *buf, int count)
+{
+ struct scatterlist sg[1];
+ unsigned int len;
+
+ /* This is a convenient routine to initialize a single-elem sg list */
+ sg_init_one(sg, buf, count);
+
+ /* add_buf wants a token to identify this buffer: we hand it any
+ * non-NULL pointer, since there's only ever one buffer. */
+ if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
+ /* Tell Host to go! */
+ out_vq->vq_ops->kick(out_vq);
+ /* Chill out until it's done with the buffer. */
+ while (!out_vq->vq_ops->get_buf(out_vq, &len))
+ cpu_relax();
+ }
+
+ /* We're expected to return the amount of data we wrote: all of it. */
+ return count;
+}
+
+/* Create a scatter-gather list representing our input buffer and put it in the
+ * queue. */
+static void add_inbuf(void)
+{
+ struct scatterlist sg[1];
+ sg_init_one(sg, inbuf, PAGE_SIZE);
+
+ /* We should always be able to add one buffer to an empty queue. */
+ if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
+ BUG();
+ in_vq->vq_ops->kick(in_vq);
+}
+
+/*D:350 get_chars() is the callback from the hvc_console infrastructure when
+ * an interrupt is received.
+ *
+ * Most of the code deals with the fact that the hvc_console() infrastructure
+ * only asks us for 16 bytes at a time. We keep in_offset and in_used fields
+ * for partially-filled buffers. */
+static int get_chars(u32 vtermno, char *buf, int count)
+{
+ /* If we don't have an input queue yet, we can't get input. */
+ BUG_ON(!in_vq);
+
+ /* No buffer? Try to get one. */
+ if (!in_len) {
+ in = in_vq->vq_ops->get_buf(in_vq, &in_len);
+ if (!in)
+ return 0;
+ }
+
+ /* You want more than we have to give? Well, try wanting less! */
+ if (in_len < count)
+ count = in_len;
+
+ /* Copy across to their buffer and increment offset. */
+ memcpy(buf, in, count);
+ in += count;
+ in_len -= count;
+
+ /* Finished? Re-register buffer so Host will use it again. */
+ if (in_len == 0)
+ add_inbuf();
+
+ return count;
+}
+/*:*/
+
+/*D:320 Console drivers are initialized very early so boot messages can go out,
+ * so we do things slightly differently from the generic virtio initialization
+ * of the net and block drivers.
+ *
+ * At this stage, the console is output-only. It's too early to set up a
+ * virtqueue, so we let the drivers do some boutique early-output thing. */
+int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
+{
+ virtio_cons.put_chars = put_chars;
+ return hvc_instantiate(0, 0, &virtio_cons);
+}
+
+/*D:370 Once we're further in boot, we get probed like any other virtio device.
+ * At this stage we set up the output virtqueue.
+ *
+ * To set up and manage our virtual console, we call hvc_alloc(). Since we
+ * never remove the console device we never need this pointer again.
+ *
+ * Finally we put our input buffer in the input queue, ready to receive. */
+static int virtcons_probe(struct virtio_device *dev)
+{
+ int err;
+ struct hvc_struct *hvc;
+
+ vdev = dev;
+
+ /* This is the scratch page we use to receive console input */
+ inbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!inbuf) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /* Find the input queue. */
+ /* FIXME: This is why we want to wean off hvc: we do nothing
+ * when input comes in. */
+ in_vq = vdev->config->find_vq(vdev, NULL);
+ if (IS_ERR(in_vq)) {
+ err = PTR_ERR(in_vq);
+ goto free;
+ }
+
+ out_vq = vdev->config->find_vq(vdev, NULL);
+ if (IS_ERR(out_vq)) {
+ err = PTR_ERR(out_vq);
+ goto free_in_vq;
+ }
+
+ /* Start using the new console output. */
+ virtio_cons.get_chars = get_chars;
+ virtio_cons.put_chars = put_chars;
+
+ /* The first argument of hvc_alloc() is the virtual console number, so
+ * we use zero. The second argument is the interrupt number; we
+ * currently leave this as zero: it would be better not to use the
+ * hvc mechanism and fix this (FIXME!).
+ *
+ * The third argument is a "struct hv_ops" containing the put_chars()
+ * and get_chars() pointers. The final argument is the output buffer
+ * size: we can do any size, so we put PAGE_SIZE here. */
+ hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
+ if (IS_ERR(hvc)) {
+ err = PTR_ERR(hvc);
+ goto free_out_vq;
+ }
+
+ /* Register the input buffer the first time. */
+ add_inbuf();
+ return 0;
+
+free_out_vq:
+ vdev->config->del_vq(out_vq);
+free_in_vq:
+ vdev->config->del_vq(in_vq);
+free:
+ kfree(inbuf);
+fail:
+ return err;
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_CONSOLE, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_console = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtcons_probe,
+};
+
+static int __init init(void)
+{
+ return register_virtio_driver(&virtio_console);
+}
+module_init(init);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio console driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 2f307c4df33..67588326ae5 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -606,7 +606,7 @@ static int
at_context_queue_packet(struct context *ctx, struct fw_packet *packet)
{
struct fw_ohci *ohci = ctx->ohci;
- dma_addr_t d_bus, payload_bus;
+ dma_addr_t d_bus, uninitialized_var(payload_bus);
struct driver_data *driver_data;
struct descriptor *d, *last;
__le32 *header;
@@ -1459,7 +1459,7 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size)
/* FIXME: We need a fallback for pre 1.1 OHCI. */
if (callback == handle_ir_dualbuffer_packet &&
ohci->version < OHCI_VERSION_1_1)
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ENOSYS);
spin_lock_irqsave(&ohci->lock, flags);
index = ffs(*mask) - 1;
@@ -1778,7 +1778,7 @@ ohci_queue_iso(struct fw_iso_context *base,
buffer, payload);
else
/* FIXME: Implement fallback for OHCI 1.0 controllers. */
- return -EINVAL;
+ return -ENOSYS;
}
static const struct fw_card_driver ohci_driver = {
@@ -1898,7 +1898,12 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n",
dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff);
-
+ if (ohci->version < OHCI_VERSION_1_1) {
+ fw_notify(" Isochronous I/O is not yet implemented for "
+ "OHCI 1.0 chips.\n");
+ fw_notify(" Cameras, audio devices etc. won't work on "
+ "this controller with this driver version.\n");
+ }
return 0;
fail_self_id:
diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c
index ff20377b4c8..e196aefa207 100644
--- a/drivers/ide/cris/ide-cris.c
+++ b/drivers/ide/cris/ide-cris.c
@@ -935,11 +935,11 @@ static int cris_ide_build_dmatable (ide_drive_t *drive)
* than two possibly non-adjacent physical 4kB pages.
*/
/* group sequential buffers into one large buffer */
- addr = page_to_phys(sg->page) + sg->offset;
+ addr = sg_phys(sg);
size = sg_dma_len(sg);
while (--i) {
sg = sg_next(sg);
- if ((addr + size) != page_to_phys(sg->page) + sg->offset)
+ if ((addr + size) != sg_phys(sg))
break;
size += sg_dma_len(sg);
}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index d5146c57e5b..6a6f2e066b4 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -47,6 +47,7 @@
#include <linux/spinlock.h>
#include <linux/kmod.h>
#include <linux/pci.h>
+#include <linux/scatterlist.h>
#include <asm/byteorder.h>
#include <asm/irq.h>
@@ -1317,12 +1318,14 @@ static int hwif_init(ide_hwif_t *hwif)
if (!hwif->sg_max_nents)
hwif->sg_max_nents = PRD_ENTRIES;
- hwif->sg_table = kzalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
+ hwif->sg_table = kmalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
GFP_KERNEL);
if (!hwif->sg_table) {
printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name);
goto out;
}
+
+ sg_init_table(hwif->sg_table, hwif->sg_max_nents);
if (init_irq(hwif) == 0)
goto done;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 73ef6bf5fbc..d066546f283 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -261,7 +261,7 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write)
hwif->cursg = sg;
}
- page = cursg->page;
+ page = sg_page(cursg);
offset = cursg->offset + hwif->cursg_ofs * SECTOR_SIZE;
/* get the current page and offset */
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 1de58566e5b..a4ce3ba15d6 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -276,8 +276,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
if (iswrite) {
if(!put_source_flags(ahwif->tx_chan,
- (void*)(page_address(sg->page)
- + sg->offset),
+ (void*) sg_virt(sg),
tc, flags)) {
printk(KERN_ERR "%s failed %d\n",
__FUNCTION__, __LINE__);
@@ -285,8 +284,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
} else
{
if(!put_dest_flags(ahwif->rx_chan,
- (void*)(page_address(sg->page)
- + sg->offset),
+ (void*) sg_virt(sg),
tc, flags)) {
printk(KERN_ERR "%s failed %d\n",
__FUNCTION__, __LINE__);
diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c
index 45d60558192..3051e312fdc 100644
--- a/drivers/ieee1394/dma.c
+++ b/drivers/ieee1394/dma.c
@@ -12,7 +12,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include "dma.h"
@@ -111,7 +111,7 @@ int dma_region_alloc(struct dma_region *dma, unsigned long n_bytes,
unsigned long va =
(unsigned long)dma->kvirt + (i << PAGE_SHIFT);
- dma->sglist[i].page = vmalloc_to_page((void *)va);
+ sg_set_page(&dma->sglist[i], vmalloc_to_page((void *)va));
dma->sglist[i].length = PAGE_SIZE;
}
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 1b353b964b3..d5dfe11aa5c 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -1466,7 +1466,7 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb,
cmd->dma_size = sgpnt[0].length;
cmd->dma_type = CMD_DMA_PAGE;
cmd->cmd_dma = dma_map_page(hi->host->device.parent,
- sgpnt[0].page, sgpnt[0].offset,
+ sg_page(&sgpnt[0]), sgpnt[0].offset,
cmd->dma_size, cmd->dma_dir);
orb->data_descriptor_lo = cmd->cmd_dma;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d08fb30768b..0751697ef98 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -114,13 +114,16 @@ struct rdma_id_private {
struct rdma_bind_list *bind_list;
struct hlist_node node;
- struct list_head list;
- struct list_head listen_list;
+ struct list_head list; /* listen_any_list or cma_device.list */
+ struct list_head listen_list; /* per device listens */
struct cma_device *cma_dev;
struct list_head mc_list;
+ int internal_id;
enum cma_state state;
spinlock_t lock;
+ struct mutex qp_mutex;
+
struct completion comp;
atomic_t refcount;
wait_queue_head_t wait_remove;
@@ -389,6 +392,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
spin_lock_init(&id_priv->lock);
+ mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
atomic_set(&id_priv->refcount, 1);
init_waitqueue_head(&id_priv->wait_remove);
@@ -474,61 +478,86 @@ EXPORT_SYMBOL(rdma_create_qp);
void rdma_destroy_qp(struct rdma_cm_id *id)
{
- ib_destroy_qp(id->qp);
+ struct rdma_id_private *id_priv;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ ib_destroy_qp(id_priv->id.qp);
+ id_priv->id.qp = NULL;
+ mutex_unlock(&id_priv->qp_mutex);
}
EXPORT_SYMBOL(rdma_destroy_qp);
-static int cma_modify_qp_rtr(struct rdma_cm_id *id)
+static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
- if (!id->qp)
- return 0;
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
/* Need to update QP attributes from default values. */
qp_attr.qp_state = IB_QPS_INIT;
- ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
- return ret;
+ goto out;
- ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
if (ret)
- return ret;
+ goto out;
qp_attr.qp_state = IB_QPS_RTR;
- ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
- return ret;
+ goto out;
- return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
}
-static int cma_modify_qp_rts(struct rdma_cm_id *id)
+static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
- if (!id->qp)
- return 0;
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
qp_attr.qp_state = IB_QPS_RTS;
- ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
- return ret;
+ goto out;
- return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
}
-static int cma_modify_qp_err(struct rdma_cm_id *id)
+static int cma_modify_qp_err(struct rdma_id_private *id_priv)
{
struct ib_qp_attr qp_attr;
+ int ret;
- if (!id->qp)
- return 0;
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
qp_attr.qp_state = IB_QPS_ERR;
- return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
}
static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
@@ -717,50 +746,27 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
}
}
-static inline int cma_internal_listen(struct rdma_id_private *id_priv)
-{
- return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
- cma_any_addr(&id_priv->id.route.addr.src_addr);
-}
-
-static void cma_destroy_listen(struct rdma_id_private *id_priv)
-{
- cma_exch(id_priv, CMA_DESTROYING);
-
- if (id_priv->cma_dev) {
- switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
- case RDMA_TRANSPORT_IB:
- if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
- ib_destroy_cm_id(id_priv->cm_id.ib);
- break;
- case RDMA_TRANSPORT_IWARP:
- if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
- iw_destroy_cm_id(id_priv->cm_id.iw);
- break;
- default:
- break;
- }
- cma_detach_from_dev(id_priv);
- }
- list_del(&id_priv->listen_list);
-
- cma_deref_id(id_priv);
- wait_for_completion(&id_priv->comp);
-
- kfree(id_priv);
-}
-
static void cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
+ /*
+ * Remove from listen_any_list to prevent added devices from spawning
+ * additional listen requests.
+ */
mutex_lock(&lock);
list_del(&id_priv->list);
while (!list_empty(&id_priv->listen_list)) {
dev_id_priv = list_entry(id_priv->listen_list.next,
struct rdma_id_private, listen_list);
- cma_destroy_listen(dev_id_priv);
+ /* sync with device removal to avoid duplicate destruction */
+ list_del_init(&dev_id_priv->list);
+ list_del(&dev_id_priv->listen_list);
+ mutex_unlock(&lock);
+
+ rdma_destroy_id(&dev_id_priv->id);
+ mutex_lock(&lock);
}
mutex_unlock(&lock);
}
@@ -848,6 +854,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
cma_deref_id(id_priv);
wait_for_completion(&id_priv->comp);
+ if (id_priv->internal_id)
+ cma_deref_id(id_priv->id.context);
+
kfree(id_priv->id.route.path_rec);
kfree(id_priv);
}
@@ -857,11 +866,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
{
int ret;
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
goto reject;
- ret = cma_modify_qp_rts(&id_priv->id);
+ ret = cma_modify_qp_rts(id_priv);
if (ret)
goto reject;
@@ -871,7 +880,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0;
reject:
- cma_modify_qp_err(&id_priv->id);
+ cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
@@ -947,7 +956,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
- cma_modify_qp_err(&id_priv->id);
+ cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
event.param.conn.private_data = ib_event->private_data;
@@ -1404,14 +1413,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+ atomic_inc(&id_priv->refcount);
+ dev_id_priv->internal_id = 1;
ret = rdma_listen(id, id_priv->backlog);
if (ret)
- goto err;
-
- return;
-err:
- cma_destroy_listen(dev_id_priv);
+ printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
+ "listening on device %s", ret, cma_dev->device->name);
}
static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2264,7 +2272,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
cm_id->remote_addr = *sin;
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
goto out;
@@ -2331,7 +2339,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
int qp_attr_mask, ret;
if (id_priv->id.qp) {
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
goto out;
@@ -2370,7 +2378,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
return ret;
@@ -2442,7 +2450,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
return 0;
reject:
- cma_modify_qp_err(id);
+ cma_modify_qp_err(id_priv);
rdma_reject(id, NULL, 0);
return ret;
}
@@ -2512,7 +2520,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- ret = cma_modify_qp_err(id);
+ ret = cma_modify_qp_err(id_priv);
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
@@ -2543,9 +2551,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
return 0;
+ mutex_lock(&id_priv->qp_mutex);
if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
multicast->rec.mlid);
+ mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
event.status = status;
@@ -2757,16 +2767,12 @@ static void cma_process_remove(struct cma_device *cma_dev)
id_priv = list_entry(cma_dev->id_list.next,
struct rdma_id_private, list);
- if (cma_internal_listen(id_priv)) {
- cma_destroy_listen(id_priv);
- continue;
- }
-
+ list_del(&id_priv->listen_list);
list_del_init(&id_priv->list);
atomic_inc(&id_priv->refcount);
mutex_unlock(&lock);
- ret = cma_remove_id_dev(id_priv);
+ ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
cma_deref_id(id_priv);
if (ret)
rdma_destroy_id(&id_priv->id);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 2f54e29dc7a..14159ff2940 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -55,9 +55,11 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
ib_dma_unmap_sg(dev, chunk->page_list,
chunk->nents, DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->nents; ++i) {
+ struct page *page = sg_page(&chunk->page_list[i]);
+
if (umem->writable && dirty)
- set_page_dirty_lock(chunk->page_list[i].page);
- put_page(chunk->page_list[i].page);
+ set_page_dirty_lock(page);
+ put_page(page);
}
kfree(chunk);
@@ -164,11 +166,12 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
}
chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
+ sg_init_table(chunk->page_list, chunk->nents);
for (i = 0; i < chunk->nents; ++i) {
if (vma_list &&
!is_vm_hugetlb_page(vma_list[i + off]))
umem->hugetlb = 0;
- chunk->page_list[i].page = page_list[i + off];
+ sg_set_page(&chunk->page_list[i], page_list[i + off]);
chunk->page_list[i].offset = 0;
chunk->page_list[i].length = PAGE_SIZE;
}
@@ -179,7 +182,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
DMA_BIDIRECTIONAL);
if (chunk->nmap <= 0) {
for (i = 0; i < chunk->nents; ++i)
- put_page(chunk->page_list[i].page);
+ put_page(sg_page(&chunk->page_list[i]));
kfree(chunk);
ret = -ENOMEM;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 01d70084aeb..495c803fb11 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -147,8 +147,12 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
spin_lock(&ib_uverbs_idr_lock);
uobj = idr_find(idr, id);
- if (uobj)
- kref_get(&uobj->ref);
+ if (uobj) {
+ if (uobj->context == context)
+ kref_get(&uobj->ref);
+ else
+ uobj = NULL;
+ }
spin_unlock(&ib_uverbs_idr_lock);
return uobj;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 3f2d68cff76..2d660ae189e 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -323,7 +323,6 @@ extern int ehca_static_rate;
extern int ehca_port_act_time;
extern int ehca_use_hp_mr;
extern int ehca_scaling_code;
-extern int ehca_mr_largepage;
struct ipzu_queue_resp {
u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 4aa3ffa6a19..15806d14046 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -77,6 +77,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
}
memset(props, 0, sizeof(struct ib_device_attr));
+ props->page_size_cap = shca->hca_cap_mr_pgsize;
props->fw_ver = rblock->hw_ver;
props->max_mr_size = rblock->max_mr_size;
props->vendor_id = rblock->vendor_id >> 8;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 7a7dab890f6..c6cd38c5321 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -65,7 +65,7 @@ int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
int ehca_scaling_code = 0;
-int ehca_mr_largepage = 0;
+int ehca_mr_largepage = 1;
module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
@@ -260,13 +260,20 @@ static struct cap_descr {
{ HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
};
-int ehca_sense_attributes(struct ehca_shca *shca)
+static int ehca_sense_attributes(struct ehca_shca *shca)
{
int i, ret = 0;
u64 h_ret;
struct hipz_query_hca *rblock;
struct hipz_query_port *port;
+ static const u32 pgsize_map[] = {
+ HCA_CAP_MR_PGSIZE_4K, 0x1000,
+ HCA_CAP_MR_PGSIZE_64K, 0x10000,
+ HCA_CAP_MR_PGSIZE_1M, 0x100000,
+ HCA_CAP_MR_PGSIZE_16M, 0x1000000,
+ };
+
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_gen_err("Cannot allocate rblock memory.");
@@ -329,8 +336,15 @@ int ehca_sense_attributes(struct ehca_shca *shca)
if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
- shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported;
+ /* translate supported MR page sizes; always support 4K */
+ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
+ if (ehca_mr_largepage) { /* support extra sizes only if enabled */
+ for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
+ if (rblock->memory_page_size_supported & pgsize_map[i])
+ shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
+ }
+ /* query max MTU from first port -- it's the same for all ports */
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) {
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738265e..e239bbf54da 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -72,24 +72,14 @@ enum ehca_mr_pgsize {
static u32 ehca_encode_hwpage_size(u32 pgsize)
{
- u32 idx = 0;
- pgsize >>= 12;
- /*
- * map mr page size into hw code:
- * 0, 1, 2, 3 for 4K, 64K, 1M, 64M
- */
- while (!(pgsize & 1)) {
- idx++;
- pgsize >>= 4;
- }
- return idx;
+ int log = ilog2(pgsize);
+ WARN_ON(log < 12 || log > 24 || log & 3);
+ return (log - 12) / 4;
}
static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
{
- if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)
- return EHCA_MR_PGSIZE16M;
- return EHCA_MR_PGSIZE4K;
+ return 1UL << ilog2(shca->hca_cap_mr_pgsize);
}
static struct ehca_mr *ehca_mr_new(void)
@@ -259,7 +249,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
pginfo.u.phy.num_phys_buf = num_phys_buf;
pginfo.u.phy.phys_buf_array = phys_buf_array;
pginfo.next_hwpage =
- ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize;
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
@@ -296,7 +286,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
container_of(pd->device, struct ehca_shca, ib_device);
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_mr_pginfo pginfo;
- int ret;
+ int ret, page_shift;
u32 num_kpages;
u32 num_hwpages;
u64 hwpage_size;
@@ -351,19 +341,20 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
/* determine number of MR pages */
num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
/* select proper hw_pgsize */
- if (ehca_mr_largepage &&
- (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
- int page_shift = PAGE_SHIFT;
- if (e_mr->umem->hugetlb) {
- /* determine page_shift, clamp between 4K and 16M */
- page_shift = (fls64(length - 1) + 3) & ~3;
- page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
- EHCA_MR_PGSHIFT16M);
- }
- hwpage_size = 1UL << page_shift;
- } else
- hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */
- ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
+ page_shift = PAGE_SHIFT;
+ if (e_mr->umem->hugetlb) {
+ /* determine page_shift, clamp between 4K and 16M */
+ page_shift = (fls64(length - 1) + 3) & ~3;
+ page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
+ EHCA_MR_PGSHIFT16M);
+ }
+ hwpage_size = 1UL << page_shift;
+
+ /* now that we have the desired page size, shift until it's
+ * supported, too. 4K is always supported, so this terminates.
+ */
+ while (!(hwpage_size & shca->hca_cap_mr_pgsize))
+ hwpage_size >>= 4;
reg_user_mr_fallback:
num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
@@ -547,7 +538,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
pginfo.u.phy.num_phys_buf = num_phys_buf;
pginfo.u.phy.phys_buf_array = phys_buf_array;
pginfo.next_hwpage =
- ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize;
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
}
if (mr_rereg_mask & IB_MR_REREG_ACCESS)
new_acl = mr_access_flags;
@@ -809,8 +800,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
ib_fmr = ERR_PTR(-EINVAL);
goto alloc_fmr_exit0;
}
- hw_pgsize = ehca_get_max_hwpage_size(shca);
- if ((1 << fmr_attr->page_shift) != hw_pgsize) {
+
+ hw_pgsize = 1 << fmr_attr->page_shift;
+ if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
fmr_attr->page_shift);
ib_fmr = ERR_PTR(-EINVAL);
@@ -826,6 +818,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
/* register MR on HCA */
memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.hwpage_size = hw_pgsize;
/*
* pginfo.num_hwpages==0, ie register_rpages() will not be called
* but deferred to map_phys_fmr()
@@ -1776,7 +1769,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(chunk->page_list[i].page)
+ pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT ;
*kpage = phys_to_abs(pgaddr +
(pginfo->next_hwpage *
@@ -1832,7 +1825,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
{
int t;
for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT;
+ u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
*(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
@@ -1867,7 +1860,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
if (nr_kpages == kpages_per_hwpage) {
- pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+ pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT );
*kpage = phys_to_abs(pgaddr);
if ( !(*kpage) ) {
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index e2bd62be11e..de182648b28 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -451,7 +451,6 @@ static struct ehca_qp *internal_create_qp(
has_srq = 1;
parms.ext_type = EQPT_SRQBASE;
parms.srq_qpn = my_srq->real_qp_num;
- parms.srq_token = my_srq->token;
}
if (is_llqp && has_srq) {
@@ -583,6 +582,9 @@ static struct ehca_qp *internal_create_qp(
goto create_qp_exit1;
}
+ if (has_srq)
+ parms.srq_token = my_qp->token;
+
parms.servicetype = ibqptype2servicetype(qp_type);
if (parms.servicetype < 0) {
ret = -EINVAL;
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 22709a4f8fc..e90a0ea538a 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -108,7 +108,7 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
BUG_ON(!valid_dma_direction(direction));
for_each_sg(sgl, sg, nents, i) {
- addr = (u64) page_address(sg->page);
+ addr = (u64) page_address(sg_page(sg));
/* TODO: handle highmem pages */
if (!addr) {
ret = 0;
@@ -127,7 +127,7 @@ static void ipath_unmap_sg(struct ib_device *dev,
static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
{
- u64 addr = (u64) page_address(sg->page);
+ u64 addr = (u64) page_address(sg_page(sg));
if (addr)
addr += sg->offset;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index e442470a237..db4ba92f79f 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -225,7 +225,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
for (i = 0; i < chunk->nents; i++) {
void *vaddr;
- vaddr = page_address(chunk->page_list[i].page);
+ vaddr = page_address(sg_page(&chunk->page_list[i]));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 31a480e5b0d..6b3322486b5 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -63,6 +63,10 @@ struct mlx4_ib_sqp {
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
};
+enum {
+ MLX4_IB_MIN_SQ_STRIDE = 6
+};
+
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
@@ -285,9 +289,17 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
return 0;
}
-static int set_user_sq_size(struct mlx4_ib_qp *qp,
+static int set_user_sq_size(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_qp *qp,
struct mlx4_ib_create_qp *ucmd)
{
+ /* Sanity check SQ size before proceeding */
+ if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
+ ucmd->log_sq_stride >
+ ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
+ ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
+ return -EINVAL;
+
qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
qp->sq.wqe_shift = ucmd->log_sq_stride;
@@ -330,7 +342,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->sq_no_prefetch = ucmd.sq_no_prefetch;
- err = set_user_sq_size(qp, &ucmd);
+ err = set_user_sq_size(dev, qp, &ucmd);
if (err)
goto err;
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index be6e1e03bda..6bd9f139334 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -204,16 +204,11 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
int incr)
{
- __be32 doorbell[2];
-
if (mthca_is_memfree(dev)) {
*cq->set_ci_db = cpu_to_be32(cq->cons_index);
wmb();
} else {
- doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn);
- doorbell[1] = cpu_to_be32(incr - 1);
-
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
dev->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
/*
@@ -731,17 +726,12 @@ repoll:
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
{
- __be32 doorbell[2];
+ u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
+ MTHCA_TAVOR_CQ_DB_REQ_NOT) |
+ to_mcq(cq)->cqn;
- doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) ==
- IB_CQ_SOLICITED ?
- MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
- MTHCA_TAVOR_CQ_DB_REQ_NOT) |
- to_mcq(cq)->cqn);
- doorbell[1] = (__force __be32) 0xffffffff;
-
- mthca_write64(doorbell,
- to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
+ mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
return 0;
@@ -750,19 +740,16 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mthca_cq *cq = to_mcq(ibcq);
- __be32 doorbell[2];
- u32 sn;
- __be32 ci;
-
- sn = cq->arm_sn & 3;
- ci = cpu_to_be32(cq->cons_index);
+ __be32 db_rec[2];
+ u32 dbhi;
+ u32 sn = cq->arm_sn & 3;
- doorbell[0] = ci;
- doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
- ((flags & IB_CQ_SOLICITED_MASK) ==
- IB_CQ_SOLICITED ? 1 : 2));
+ db_rec[0] = cpu_to_be32(cq->cons_index);
+ db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
+ ((flags & IB_CQ_SOLICITED_MASK) ==
+ IB_CQ_SOLICITED ? 1 : 2));
- mthca_write_db_rec(doorbell, cq->arm_db);
+ mthca_write_db_rec(db_rec, cq->arm_db);
/*
* Make sure that the doorbell record in host memory is
@@ -770,14 +757,12 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
*/
wmb();
- doorbell[0] = cpu_to_be32((sn << 28) |
- ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
- MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
- MTHCA_ARBEL_CQ_DB_REQ_NOT) |
- cq->cqn);
- doorbell[1] = ci;
+ dbhi = (sn << 28) |
+ ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
+ MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
- mthca_write64(doorbell,
+ mthca_write64(dbhi, cq->cons_index,
to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index dd9a44d170c..b374dc395be 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -58,10 +58,10 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
__raw_writeq((__force u64) val, dest);
}
-static inline void mthca_write64(__be32 val[2], void __iomem *dest,
+static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock)
{
- __raw_writeq(*(u64 *) val, dest);
+ __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
}
static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
@@ -87,14 +87,17 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
__raw_writel(((__force u32 *) &val)[1], dest + 4);
}
-static inline void mthca_write64(__be32 val[2], void __iomem *dest,
+static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock)
{
unsigned long flags;
+ hi = (__force u32) cpu_to_be32(hi);
+ lo = (__force u32) cpu_to_be32(lo);
+
spin_lock_irqsave(doorbell_lock, flags);
- __raw_writel((__force u32) val[0], dest);
- __raw_writel((__force u32) val[1], dest + 4);
+ __raw_writel(hi, dest);
+ __raw_writel(lo, dest + 4);
spin_unlock_irqrestore(doorbell_lock, flags);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8592b26dc4e..b29de51b7f3 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -173,11 +173,6 @@ static inline u64 async_mask(struct mthca_dev *dev)
static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{
- __be32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
- doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
-
/*
* This barrier makes sure that all updates to ownership bits
* done by set_eqe_hw() hit memory before the consumer index
@@ -187,7 +182,7 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u
* having set_eqe_hw() overwrite the owner field.
*/
wmb();
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
@@ -212,12 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
{
- __be32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
- doorbell[1] = 0;
-
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
@@ -230,12 +220,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
{
if (!mthca_is_memfree(dev)) {
- __be32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
- doorbell[1] = cpu_to_be32(cqn);
-
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index e61f3e62698..007b38157fc 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -71,7 +71,7 @@ static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *
PCI_DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i)
- __free_pages(chunk->mem[i].page,
+ __free_pages(sg_page(&chunk->mem[i]),
get_order(chunk->mem[i].length));
}
@@ -81,7 +81,7 @@ static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chun
for (i = 0; i < chunk->npages; ++i) {
dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
- lowmem_page_address(chunk->mem[i].page),
+ lowmem_page_address(sg_page(&chunk->mem[i])),
sg_dma_address(&chunk->mem[i]));
}
}
@@ -107,10 +107,13 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
{
- mem->page = alloc_pages(gfp_mask, order);
- if (!mem->page)
+ struct page *page;
+
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
return -ENOMEM;
+ sg_set_page(mem, page);
mem->length = PAGE_SIZE << order;
mem->offset = 0;
return 0;
@@ -157,6 +160,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
if (!chunk)
goto fail;
+ sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
chunk->npages = 0;
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list);
@@ -304,7 +308,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_h
* so if we found the page, dma_handle has already
* been assigned to. */
if (chunk->mem[i].length > offset) {
- page = chunk->mem[i].page;
+ page = sg_page(&chunk->mem[i]);
goto out;
}
offset -= chunk->mem[i].length;
@@ -445,6 +449,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index, u64 uaddr)
{
+ struct page *pages[1];
int ret = 0;
u8 status;
int i;
@@ -472,16 +477,17 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
}
ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
- &db_tab->page[i].mem.page, NULL);
+ pages, NULL);
if (ret < 0)
goto out;
+ sg_set_page(&db_tab->page[i].mem, pages[0]);
db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE;
db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
if (ret < 0) {
- put_page(db_tab->page[i].mem.page);
+ put_page(pages[0]);
goto out;
}
@@ -491,7 +497,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
ret = -EINVAL;
if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(db_tab->page[i].mem.page);
+ put_page(sg_page(&db_tab->page[i].mem));
goto out;
}
@@ -557,7 +563,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(db_tab->page[i].mem.page);
+ put_page(sg_page(&db_tab->page[i].mem));
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index df01b2026a6..0e5461c6573 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1799,15 +1799,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out:
if (likely(nreq)) {
- __be32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
- qp->send_wqe_offset) | f0 | op0);
- doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
-
wmb();
- mthca_write64(doorbell,
+ mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) | f0 | op0,
+ (qp->qpn << 8) | size0,
dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
/*
@@ -1829,7 +1825,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
- __be32 doorbell[2];
unsigned long flags;
int err = 0;
int nreq;
@@ -1907,13 +1902,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
nreq = 0;
- doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
- doorbell[1] = cpu_to_be32(qp->qpn << 8);
-
wmb();
- mthca_write64(doorbell,
- dev->kar + MTHCA_RECEIVE_DOORBELL,
+ mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+ qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
qp->rq.next_ind = ind;
@@ -1923,13 +1915,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
out:
if (likely(nreq)) {
- doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
- doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
-
wmb();
- mthca_write64(doorbell,
- dev->kar + MTHCA_RECEIVE_DOORBELL,
+ mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+ qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
@@ -1951,7 +1940,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
- __be32 doorbell[2];
+ u32 dbhi;
void *wqe;
void *prev_wqe;
unsigned long flags;
@@ -1981,10 +1970,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
nreq = 0;
- doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
- ((qp->sq.head & 0xffff) << 8) |
- f0 | op0);
- doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+ dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
+ ((qp->sq.head & 0xffff) << 8) | f0 | op0;
qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
@@ -2000,7 +1987,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* write MMIO send doorbell.
*/
wmb();
- mthca_write64(doorbell,
+
+ mthca_write64(dbhi, (qp->qpn << 8) | size0,
dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
@@ -2154,10 +2142,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out:
if (likely(nreq)) {
- doorbell[0] = cpu_to_be32((nreq << 24) |
- ((qp->sq.head & 0xffff) << 8) |
- f0 | op0);
- doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+ dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
qp->sq.head += nreq;
@@ -2173,8 +2158,8 @@ out:
* write MMIO send doorbell.
*/
wmb();
- mthca_write64(doorbell,
- dev->kar + MTHCA_SEND_DOORBELL,
+
+ mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 3f58c11a62b..553d681f681 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -491,7 +491,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
- __be32 doorbell[2];
unsigned long flags;
int err = 0;
int first_ind;
@@ -563,16 +562,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
nreq = 0;
- doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
- doorbell[1] = cpu_to_be32(srq->srqn << 8);
-
/*
* Make sure that descriptors are written
* before doorbell is rung.
*/
wmb();
- mthca_write64(doorbell,
+ mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
@@ -581,16 +577,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
}
if (likely(nreq)) {
- doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
- doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq);
-
/*
* Make sure that descriptors are written before
* doorbell is rung.
*/
wmb();
- mthca_write64(doorbell,
+ mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 1b3327ad6bc..eb7edab0e83 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -84,9 +84,8 @@ enum {
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
IPOIB_MCAST_STARTED = 8,
- IPOIB_FLAG_NETIF_STOPPED = 9,
- IPOIB_FLAG_ADMIN_CM = 10,
- IPOIB_FLAG_UMCAST = 11,
+ IPOIB_FLAG_ADMIN_CM = 9,
+ IPOIB_FLAG_UMCAST = 10,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -98,9 +97,9 @@ enum {
#define IPOIB_OP_RECV (1ul << 31)
#ifdef CONFIG_INFINIBAND_IPOIB_CM
-#define IPOIB_CM_OP_SRQ (1ul << 30)
+#define IPOIB_OP_CM (1ul << 30)
#else
-#define IPOIB_CM_OP_SRQ (0)
+#define IPOIB_OP_CM (0)
#endif
/* structs */
@@ -197,7 +196,6 @@ struct ipoib_cm_rx {
struct ipoib_cm_tx {
struct ib_cm_id *id;
- struct ib_cq *cq;
struct ib_qp *qp;
struct list_head list;
struct net_device *dev;
@@ -294,6 +292,7 @@ struct ipoib_dev_priv {
unsigned tx_tail;
struct ib_sge tx_sge;
struct ib_send_wr tx_wr;
+ unsigned tx_outstanding;
struct ib_wc ibwc[IPOIB_NUM_WC];
@@ -504,6 +503,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
unsigned int mtu);
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
+void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
#else
struct ipoib_cm_tx;
@@ -592,6 +592,9 @@ static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *w
{
}
+static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+}
#endif
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0a0dcb8fdfd..87610772a97 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -87,7 +87,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
struct ib_recv_wr *bad_wr;
int i, ret;
- priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
+ priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
for (i = 0; i < IPOIB_CM_RX_SG; ++i)
priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
@@ -401,7 +401,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
+ unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
struct ipoib_cm_rx *p;
unsigned long flags;
@@ -412,7 +412,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
- if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
+ if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
@@ -434,7 +434,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
goto repost;
}
- if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
+ if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
p = wc->qp->qp_context;
if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags);
@@ -498,7 +498,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
priv->tx_sge.addr = addr;
priv->tx_sge.length = len;
- priv->tx_wr.wr_id = wr_id;
+ priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
}
@@ -549,20 +549,19 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
dev->trans_start = jiffies;
++tx->tx_head;
- if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) {
+ if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num);
netif_stop_queue(dev);
- set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
}
}
}
-static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx,
- struct ib_wc *wc)
+void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- unsigned int wr_id = wc->wr_id;
+ struct ipoib_cm_tx *tx = wc->qp->qp_context;
+ unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req;
unsigned long flags;
@@ -587,11 +586,10 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
spin_lock_irqsave(&priv->tx_lock, flags);
++tx->tx_tail;
- if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) &&
- tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) {
- clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
+ if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
+ netif_queue_stopped(dev) &&
+ test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev);
- }
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -614,11 +612,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
tx->neigh = NULL;
}
- /* queue would be re-started anyway when TX is destroyed,
- * but it makes sense to do it ASAP here. */
- if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
- netif_wake_queue(dev);
-
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task);
@@ -632,19 +625,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
spin_unlock_irqrestore(&priv->tx_lock, flags);
}
-static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
-{
- struct ipoib_cm_tx *tx = tx_ptr;
- int n, i;
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- do {
- n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
- for (i = 0; i < n; ++i)
- ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
- } while (n == IPOIB_NUM_WC);
-}
-
int ipoib_cm_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -807,17 +787,18 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
return 0;
}
-static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq)
+static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
- .send_cq = cq,
+ .send_cq = priv->cq,
.recv_cq = priv->cq,
.srq = priv->cm.srq,
.cap.max_send_wr = ipoib_sendq_size,
.cap.max_send_sge = 1,
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
+ .qp_context = tx
};
return ib_create_qp(priv->pd, &attr);
@@ -899,21 +880,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
goto err_tx;
}
- p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p,
- ipoib_sendq_size + 1, 0);
- if (IS_ERR(p->cq)) {
- ret = PTR_ERR(p->cq);
- ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
- goto err_cq;
- }
-
- ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
- if (ret) {
- ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
- goto err_req_notify;
- }
-
- p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
+ p->qp = ipoib_cm_create_tx_qp(p->dev, p);
if (IS_ERR(p->qp)) {
ret = PTR_ERR(p->qp);
ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
@@ -950,12 +917,8 @@ err_modify:
err_id:
p->id = NULL;
ib_destroy_qp(p->qp);
-err_req_notify:
err_qp:
p->qp = NULL;
- ib_destroy_cq(p->cq);
-err_cq:
- p->cq = NULL;
err_tx:
return ret;
}
@@ -964,6 +927,8 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
struct ipoib_tx_buf *tx_req;
+ unsigned long flags;
+ unsigned long begin;
ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
@@ -971,27 +936,40 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
if (p->id)
ib_destroy_cm_id(p->id);
- if (p->qp)
- ib_destroy_qp(p->qp);
-
- if (p->cq)
- ib_destroy_cq(p->cq);
-
- if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
- netif_wake_queue(p->dev);
-
if (p->tx_ring) {
+ /* Wait for all sends to complete */
+ begin = jiffies;
while ((int) p->tx_tail - (int) p->tx_head < 0) {
- tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
- ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
- DMA_TO_DEVICE);
- dev_kfree_skb_any(tx_req->skb);
- ++p->tx_tail;
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ ipoib_warn(priv, "timing out; %d sends not completed\n",
+ p->tx_head - p->tx_tail);
+ goto timeout;
+ }
+
+ msleep(1);
}
+ }
- kfree(p->tx_ring);
+timeout:
+
+ while ((int) p->tx_tail - (int) p->tx_head < 0) {
+ tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
+ ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
+ DMA_TO_DEVICE);
+ dev_kfree_skb_any(tx_req->skb);
+ ++p->tx_tail;
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
+ netif_queue_stopped(p->dev) &&
+ test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+ netif_wake_queue(p->dev);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
}
+ if (p->qp)
+ ib_destroy_qp(p->qp);
+
+ kfree(p->tx_ring);
kfree(p);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 1a77e79f6b4..5063dd509ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -267,11 +267,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail;
- if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
- priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
- clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+ if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
+ netif_queue_stopped(dev) &&
+ test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev);
- }
spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS &&
@@ -301,14 +300,18 @@ poll_more:
for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i;
- if (wc->wr_id & IPOIB_CM_OP_SRQ) {
- ++done;
- ipoib_cm_handle_rx_wc(dev, wc);
- } else if (wc->wr_id & IPOIB_OP_RECV) {
+ if (wc->wr_id & IPOIB_OP_RECV) {
++done;
- ipoib_ib_handle_rx_wc(dev, wc);
- } else
- ipoib_ib_handle_tx_wc(dev, wc);
+ if (wc->wr_id & IPOIB_OP_CM)
+ ipoib_cm_handle_rx_wc(dev, wc);
+ else
+ ipoib_ib_handle_rx_wc(dev, wc);
+ } else {
+ if (wc->wr_id & IPOIB_OP_CM)
+ ipoib_cm_handle_tx_wc(dev, wc);
+ else
+ ipoib_ib_handle_tx_wc(dev, wc);
+ }
}
if (n != t)
@@ -401,10 +404,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
address->last_send = priv->tx_head;
++priv->tx_head;
- if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
+ if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
- set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
}
}
}
@@ -436,7 +438,8 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+ queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ round_jiffies_relative(HZ));
}
int ipoib_ib_dev_open(struct net_device *dev)
@@ -472,7 +475,8 @@ int ipoib_ib_dev_open(struct net_device *dev)
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+ queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ round_jiffies_relative(HZ));
set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
@@ -561,12 +565,17 @@ void ipoib_drain_cq(struct net_device *dev)
if (priv->ibwc[i].status == IB_WC_SUCCESS)
priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
- if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
- ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
- else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
- ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
- else
- ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
+ if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
+ if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
+ ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
+ else
+ ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
+ } else {
+ if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
+ ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
+ else
+ ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
+ }
}
} while (n == IPOIB_NUM_WC);
}
@@ -612,6 +621,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
+ --priv->tx_outstanding;
}
for (i = 0; i < ipoib_recvq_size; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 362610d870e..a03a65ebcf0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -148,8 +148,6 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
-
/*
* Now flush workqueue to make sure a scheduled task doesn't
* bring our internal state back up.
@@ -902,7 +900,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out_rx_ring_cleanup;
}
- /* priv->tx_head & tx_tail are already 0 */
+ /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index f3529b6f0a3..d6879806179 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -131,7 +131,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
p = mem;
for_each_sg(sgl, sg, data->size, i) {
- from = kmap_atomic(sg->page, KM_USER0);
+ from = kmap_atomic(sg_page(sg), KM_USER0);
memcpy(p,
from + sg->offset,
sg->length);
@@ -191,7 +191,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
p = mem;
for_each_sg(sgl, sg, sg_size, i) {
- to = kmap_atomic(sg->page, KM_SOFTIRQ0);
+ to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
memcpy(to + sg->offset,
p,
sg->length);
@@ -300,7 +300,7 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
for_each_sg(sgl, sg, data->dma_nents, i) {
/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
"offset: %ld sz: %ld\n", i,
- (unsigned long)page_to_phys(sg->page),
+ (unsigned long)sg_phys(sg),
(unsigned long)sg->offset,
(unsigned long)sg->length); */
end_addr = ib_sg_dma_address(ibdev, sg) +
@@ -336,7 +336,7 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
- sg->page, sg->offset,
+ sg_page(sg), sg->offset,
sg->length, ib_sg_dma_len(ibdev, sg));
}
diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c
index a67b29b089e..e5f4da92834 100644
--- a/drivers/input/keyboard/bf54x-keys.c
+++ b/drivers/input/keyboard/bf54x-keys.c
@@ -256,7 +256,6 @@ static int __devinit bfin_kpad_probe(struct platform_device *pdev)
printk(KERN_ERR DRV_NAME
": unable to claim irq %d; error %d\n",
bf54x_kpad->irq, error);
- error = -EBUSY;
goto out2;
}
diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
index 0117817bf53..f132702d137 100644
--- a/drivers/input/mouse/appletouch.c
+++ b/drivers/input/mouse/appletouch.c
@@ -504,25 +504,22 @@ static void atp_complete(struct urb* urb)
memset(dev->xy_acc, 0, sizeof(dev->xy_acc));
}
- /* Geyser 3 will continue to send packets continually after
+ input_report_key(dev->input, BTN_LEFT, key);
+ input_sync(dev->input);
+
+ /* Many Geysers will continue to send packets continually after
the first touch unless reinitialised. Do so if it's been
idle for a while in order to avoid waking the kernel up
several hundred times a second */
- if (atp_is_geyser_3(dev)) {
- if (!x && !y && !key) {
- dev->idlecount++;
- if (dev->idlecount == 10) {
- dev->valid = 0;
- schedule_work(&dev->work);
- }
+ if (!x && !y && !key) {
+ dev->idlecount++;
+ if (dev->idlecount == 10) {
+ dev->valid = 0;
+ schedule_work(&dev->work);
}
- else
- dev->idlecount = 0;
- }
-
- input_report_key(dev->input, BTN_LEFT, key);
- input_sync(dev->input);
+ } else
+ dev->idlecount = 0;
exit:
retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 11dafc0ee99..1a0cea3c529 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -20,6 +20,7 @@
#include <linux/err.h>
#include <linux/rcupdate.h>
#include <linux/platform_device.h>
+#include <linux/i8042.h>
#include <asm/io.h>
@@ -208,7 +209,7 @@ static int __i8042_command(unsigned char *param, int command)
return 0;
}
-static int i8042_command(unsigned char *param, int command)
+int i8042_command(unsigned char *param, int command)
{
unsigned long flags;
int retval;
@@ -219,6 +220,7 @@ static int i8042_command(unsigned char *param, int command)
return retval;
}
+EXPORT_SYMBOL(i8042_command);
/*
* i8042_kbd_write() sends a byte out through the keyboard interface.
diff --git a/drivers/input/serio/i8042.h b/drivers/input/serio/i8042.h
index b3eb7a72d96..dd22d91f8b3 100644
--- a/drivers/input/serio/i8042.h
+++ b/drivers/input/serio/i8042.h
@@ -61,28 +61,6 @@
#define I8042_CTR_XLATE 0x40
/*
- * Commands.
- */
-
-#define I8042_CMD_CTL_RCTR 0x0120
-#define I8042_CMD_CTL_WCTR 0x1060
-#define I8042_CMD_CTL_TEST 0x01aa
-
-#define I8042_CMD_KBD_DISABLE 0x00ad
-#define I8042_CMD_KBD_ENABLE 0x00ae
-#define I8042_CMD_KBD_TEST 0x01ab
-#define I8042_CMD_KBD_LOOP 0x11d2
-
-#define I8042_CMD_AUX_DISABLE 0x00a7
-#define I8042_CMD_AUX_ENABLE 0x00a8
-#define I8042_CMD_AUX_TEST 0x01a9
-#define I8042_CMD_AUX_SEND 0x10d4
-#define I8042_CMD_AUX_LOOP 0x11d3
-
-#define I8042_CMD_MUX_PFX 0x0090
-#define I8042_CMD_MUX_SEND 0x1090
-
-/*
* Return codes.
*/
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index e3e0baa1a15..fa8442b6241 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -202,6 +202,7 @@ config TOUCHSCREEN_USB_COMPOSITE
- DMC TSC-10/25
- IRTOUCHSYSTEMS/UNITOP
- IdealTEK URTC1000
+ - GoTop Super_Q2/GogoPen/PenPower tablets
Have a look at <http://linux.chapter7.ch/touchkit/> for
a usage description and the required user-space stuff.
@@ -259,4 +260,9 @@ config TOUCHSCREEN_USB_GENERAL_TOUCH
bool "GeneralTouch Touchscreen device support" if EMBEDDED
depends on TOUCHSCREEN_USB_COMPOSITE
+config TOUCHSCREEN_USB_GOTOP
+ default y
+ bool "GoTop Super_Q2/GogoPen/PenPower tablet device support" if EMBEDDED
+ depends on TOUCHSCREEN_USB_COMPOSITE
+
endif
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 5f34b78d5dd..19055e7381f 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -11,8 +11,9 @@
* - DMC TSC-10/25
* - IRTOUCHSYSTEMS/UNITOP
* - IdealTEK URTC1000
+ * - GoTop Super_Q2/GogoPen/PenPower tablets
*
- * Copyright (C) 2004-2006 by Daniel Ritz <daniel.ritz@gmx.ch>
+ * Copyright (C) 2004-2007 by Daniel Ritz <daniel.ritz@gmx.ch>
* Copyright (C) by Todd E. Johnson (mtouchusb.c)
*
* This program is free software; you can redistribute it and/or
@@ -115,6 +116,7 @@ enum {
DEVTYPE_IRTOUCH,
DEVTYPE_IDEALTEK,
DEVTYPE_GENERAL_TOUCH,
+ DEVTYPE_GOTOP,
};
static struct usb_device_id usbtouch_devices[] = {
@@ -168,6 +170,12 @@ static struct usb_device_id usbtouch_devices[] = {
{USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH},
#endif
+#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
+ {USB_DEVICE(0x08f2, 0x007f), .driver_info = DEVTYPE_GOTOP},
+ {USB_DEVICE(0x08f2, 0x00ce), .driver_info = DEVTYPE_GOTOP},
+ {USB_DEVICE(0x08f2, 0x00f4), .driver_info = DEVTYPE_GOTOP},
+#endif
+
{}
};
@@ -501,6 +509,20 @@ static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
#endif
/*****************************************************************************
+ * GoTop Part
+ */
+#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
+static int gotop_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
+{
+ dev->x = ((pkt[1] & 0x38) << 4) | pkt[2];
+ dev->y = ((pkt[1] & 0x07) << 7) | pkt[3];
+ dev->touch = pkt[0] & 0x01;
+ return 1;
+}
+#endif
+
+
+/*****************************************************************************
* the different device descriptors
*/
static struct usbtouch_device_info usbtouch_dev_info[] = {
@@ -623,9 +645,19 @@ static struct usbtouch_device_info usbtouch_dev_info[] = {
.max_yc = 0x0500,
.rept_size = 7,
.read_data = general_touch_read_data,
- }
+ },
#endif
+#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
+ [DEVTYPE_GOTOP] = {
+ .min_xc = 0x0,
+ .max_xc = 0x03ff,
+ .min_yc = 0x0,
+ .max_yc = 0x03ff,
+ .rept_size = 4,
+ .read_data = gotop_read_data,
+ },
+#endif
};
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
index 8749fa4ffce..656920636cb 100644
--- a/drivers/kvm/Kconfig
+++ b/drivers/kvm/Kconfig
@@ -47,4 +47,8 @@ config KVM_AMD
Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions.
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/lguest/Kconfig
+
endif # VIRTUALIZATION
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index af2d288c881..07ae280e8fe 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -198,21 +198,15 @@ static void vcpu_put(struct kvm_vcpu *vcpu)
static void ack_flush(void *_completed)
{
- atomic_t *completed = _completed;
-
- atomic_inc(completed);
}
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- int i, cpu, needed;
+ int i, cpu;
cpumask_t cpus;
struct kvm_vcpu *vcpu;
- atomic_t completed;
- atomic_set(&completed, 0);
cpus_clear(cpus);
- needed = 0;
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
@@ -221,23 +215,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
continue;
cpu = vcpu->cpu;
if (cpu != -1 && cpu != raw_smp_processor_id())
- if (!cpu_isset(cpu, cpus)) {
- cpu_set(cpu, cpus);
- ++needed;
- }
- }
-
- /*
- * We really want smp_call_function_mask() here. But that's not
- * available, so ipi all cpus in parallel and wait for them
- * to complete.
- */
- for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus))
- smp_call_function_single(cpu, ack_flush, &completed, 1, 0);
- while (atomic_read(&completed) != needed) {
- cpu_relax();
- barrier();
+ cpu_set(cpu, cpus);
}
+ smp_call_function_mask(cpus, ack_flush, NULL, 1);
}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -2054,12 +2034,21 @@ again:
kvm_x86_ops->run(vcpu, kvm_run);
- kvm_guest_exit();
vcpu->guest_mode = 0;
local_irq_enable();
++vcpu->stat.exits;
+ /*
+ * We must have an instruction between local_irq_enable() and
+ * kvm_guest_exit(), so the timer interrupt isn't delayed by
+ * the interrupt shadow. The stat.exits increment will do nicely.
+ * But we need to prevent reordering, hence this barrier():
+ */
+ barrier();
+
+ kvm_guest_exit();
+
preempt_enable();
/*
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index a190587cf6a..238fcad3cec 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -494,12 +494,19 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
- u32 counter_passed;
- ktime_t passed, now = apic->timer.dev.base->get_time();
- u32 tmcct = apic_get_reg(apic, APIC_TMICT);
+ u64 counter_passed;
+ ktime_t passed, now;
+ u32 tmcct;
ASSERT(apic != NULL);
+ now = apic->timer.dev.base->get_time();
+ tmcct = apic_get_reg(apic, APIC_TMICT);
+
+ /* if initial count is 0, current count should also be 0 */
+ if (tmcct == 0)
+ return 0;
+
if (unlikely(ktime_to_ns(now) <=
ktime_to_ns(apic->timer.last_update))) {
/* Wrap around */
@@ -514,15 +521,24 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
counter_passed = div64_64(ktime_to_ns(passed),
(APIC_BUS_CYCLE_NS * apic->timer.divide_count));
- tmcct -= counter_passed;
- if (tmcct <= 0) {
- if (unlikely(!apic_lvtt_period(apic)))
+ if (counter_passed > tmcct) {
+ if (unlikely(!apic_lvtt_period(apic))) {
+ /* one-shot timers stick at 0 until reset */
tmcct = 0;
- else
- do {
- tmcct += apic_get_reg(apic, APIC_TMICT);
- } while (tmcct <= 0);
+ } else {
+ /*
+ * periodic timers reset to APIC_TMICT when they
+ * hit 0. The while loop simulates this happening N
+ * times. (counter_passed %= tmcct) would also work,
+ * but might be slower or not work on 32-bit??
+ */
+ while (counter_passed > tmcct)
+ counter_passed -= tmcct;
+ tmcct -= counter_passed;
+ }
+ } else {
+ tmcct -= counter_passed;
}
return tmcct;
@@ -853,7 +869,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
- apic->timer.divide_count = 0;
+ update_divide_count(apic);
atomic_set(&apic->timer.pending, 0);
if (vcpu->vcpu_id == 0)
vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 6d84d30f5ed..feb5ac986c5 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1049,6 +1049,7 @@ int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
destroy_kvm_mmu(vcpu);
return init_kvm_mmu(vcpu);
}
+EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
int kvm_mmu_load(struct kvm_vcpu *vcpu)
{
@@ -1088,7 +1089,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
mmu_page_remove_parent_pte(child, spte);
}
}
- *spte = 0;
+ set_shadow_pte(spte, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 4f115a8e45e..bb56ae3f89b 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -523,6 +523,8 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ if (vcpu->rmode.active)
+ rflags |= IOPL_MASK | X86_EFLAGS_VM;
vmcs_writel(GUEST_RFLAGS, rflags);
}
@@ -1128,6 +1130,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
+ kvm_mmu_reset_context(vcpu);
init_rmode_tss(vcpu->kvm);
}
@@ -1760,10 +1763,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
}
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
- asm ("int $2");
- return 1;
- }
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+ return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) {
vmx_fpu_activate(vcpu);
@@ -2196,6 +2197,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 intr_info;
/*
* Loading guest fpu may have cleared host cr0.ts
@@ -2322,6 +2324,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
+
+ intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+ /* We need to handle NMIs before interrupts are enabled */
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+ asm("int $2");
}
static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 9737c3b2f48..a6ace302e0c 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -212,7 +212,8 @@ static u16 twobyte_table[256] = {
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov,
/* 0xC0 - 0xCF */
- 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
+ 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xD0 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xEF */
@@ -596,11 +597,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xf0: /* LOCK */
lock_prefix = 1;
break;
+ case 0xf2: /* REPNE/REPNZ */
case 0xf3: /* REP/REPE/REPZ */
rep_prefix = 1;
break;
- case 0xf2: /* REPNE/REPNZ */
- break;
default:
goto done_prefixes;
}
@@ -825,6 +825,14 @@ done_prefixes:
if (twobyte && b == 0x01 && modrm_reg == 7)
break;
srcmem_common:
+ /*
+ * For instructions with a ModR/M byte, switch to register
+ * access if Mod = 3.
+ */
+ if ((d & ModRM) && modrm_mod == 3) {
+ src.type = OP_REG;
+ break;
+ }
src.type = OP_MEM;
src.ptr = (unsigned long *)cr2;
src.val = 0;
@@ -893,6 +901,14 @@ done_prefixes:
dst.ptr = (unsigned long *)cr2;
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.val = 0;
+ /*
+ * For instructions with a ModR/M byte, switch to register
+ * access if Mod = 3.
+ */
+ if ((d & ModRM) && modrm_mod == 3) {
+ dst.type = OP_REG;
+ break;
+ }
if (d & BitOp) {
unsigned long mask = ~(dst.bytes * 8 - 1);
@@ -1083,31 +1099,6 @@ push:
case 0xd2 ... 0xd3: /* Grp2 */
src.val = _regs[VCPU_REGS_RCX];
goto grp2;
- case 0xe8: /* call (near) */ {
- long int rel;
- switch (op_bytes) {
- case 2:
- rel = insn_fetch(s16, 2, _eip);
- break;
- case 4:
- rel = insn_fetch(s32, 4, _eip);
- break;
- case 8:
- rel = insn_fetch(s64, 8, _eip);
- break;
- default:
- DPRINTF("Call: Invalid op_bytes\n");
- goto cannot_emulate;
- }
- src.val = (unsigned long) _eip;
- JMP_REL(rel);
- goto push;
- }
- case 0xe9: /* jmp rel */
- case 0xeb: /* jmp rel short */
- JMP_REL(src.val);
- no_wb = 1; /* Disable writeback. */
- break;
case 0xf6 ... 0xf7: /* Grp3 */
switch (modrm_reg) {
case 0 ... 1: /* test */
@@ -1350,6 +1341,32 @@ special_insn:
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate;
+ case 0xe8: /* call (near) */ {
+ long int rel;
+ switch (op_bytes) {
+ case 2:
+ rel = insn_fetch(s16, 2, _eip);
+ break;
+ case 4:
+ rel = insn_fetch(s32, 4, _eip);
+ break;
+ case 8:
+ rel = insn_fetch(s64, 8, _eip);
+ break;
+ default:
+ DPRINTF("Call: Invalid op_bytes\n");
+ goto cannot_emulate;
+ }
+ src.val = (unsigned long) _eip;
+ JMP_REL(rel);
+ goto push;
+ }
+ case 0xe9: /* jmp rel */
+ case 0xeb: /* jmp rel short */
+ JMP_REL(src.val);
+ no_wb = 1; /* Disable writeback. */
+ break;
+
}
goto writeback;
@@ -1501,6 +1518,10 @@ twobyte_insn:
dst.bytes = op_bytes;
dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
break;
+ case 0xc3: /* movnti */
+ dst.bytes = op_bytes;
+ dst.val = (op_bytes == 4) ? (u32) src.val : (u64) src.val;
+ break;
}
goto writeback;
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 41e2250613a..7eb9ecff8f4 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,7 +1,6 @@
config LGUEST
tristate "Linux hypervisor example code"
- depends on X86 && PARAVIRT && EXPERIMENTAL && !X86_PAE && FUTEX
- select LGUEST_GUEST
+ depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !(X86_VISWS || X86_VOYAGER)
select HVC_DRIVER
---help---
This is a very simple module which allows you to run
@@ -18,13 +17,3 @@ config LGUEST_GUEST
The guest needs code built-in, even if the host has lguest
support as a module. The drivers are tiny, so we build them
in too.
-
-config LGUEST_NET
- tristate
- default y
- depends on LGUEST_GUEST && NET
-
-config LGUEST_BLOCK
- tristate
- default y
- depends on LGUEST_GUEST && BLOCK
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index e5047471c33..5e8272d296d 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -1,10 +1,12 @@
-# Guest requires the paravirt_ops replacement and the bus driver.
-obj-$(CONFIG_LGUEST_GUEST) += lguest.o lguest_asm.o lguest_bus.o
+# Guest requires the device configuration and probing code.
+obj-$(CONFIG_LGUEST_GUEST) += lguest_device.o
# Host requires the other files, which can be a module.
obj-$(CONFIG_LGUEST) += lg.o
-lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \
- segments.o io.o lguest_user.o switcher.o
+lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \
+ segments.o lguest_user.o
+
+lg-$(CONFIG_X86_32) += x86/switcher_32.o x86/core.o
Preparation Preparation!: PREFIX=P
Guest: PREFIX=G
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index a0788c12b39..35d19ae58de 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -11,58 +11,20 @@
#include <linux/vmalloc.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <linux/highmem.h>
#include <asm/paravirt.h>
-#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/poll.h>
-#include <asm/highmem.h>
#include <asm/asm-offsets.h>
-#include <asm/i387.h>
#include "lg.h"
-/* Found in switcher.S */
-extern char start_switcher_text[], end_switcher_text[], switch_to_guest[];
-extern unsigned long default_idt_entries[];
-
-/* Every guest maps the core switcher code. */
-#define SHARED_SWITCHER_PAGES \
- DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE)
-/* Pages for switcher itself, then two pages per cpu */
-#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS)
-
-/* We map at -4M for ease of mapping into the guest (one PTE page). */
-#define SWITCHER_ADDR 0xFFC00000
static struct vm_struct *switcher_vma;
static struct page **switcher_page;
-static int cpu_had_pge;
-static struct {
- unsigned long offset;
- unsigned short segment;
-} lguest_entry;
-
/* This One Big lock protects all inter-guest data structures. */
DEFINE_MUTEX(lguest_lock);
-static DEFINE_PER_CPU(struct lguest *, last_guest);
-
-/* FIXME: Make dynamic. */
-#define MAX_LGUEST_GUESTS 16
-struct lguest lguests[MAX_LGUEST_GUESTS];
-
-/* Offset from where switcher.S was compiled to where we've copied it */
-static unsigned long switcher_offset(void)
-{
- return SWITCHER_ADDR - (unsigned long)start_switcher_text;
-}
-
-/* This cpu's struct lguest_pages. */
-static struct lguest_pages *lguest_pages(unsigned int cpu)
-{
- return &(((struct lguest_pages *)
- (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
-}
/*H:010 We need to set up the Switcher at a high virtual address. Remember the
* Switcher is a few hundred bytes of assembler code which actually changes the
@@ -73,9 +35,7 @@ static struct lguest_pages *lguest_pages(unsigned int cpu)
* Host since it will be running as the switchover occurs.
*
* Trying to map memory at a particular address is an unusual thing to do, so
- * it's not a simple one-liner. We also set up the per-cpu parts of the
- * Switcher here.
- */
+ * it's not a simple one-liner. */
static __init int map_switcher(void)
{
int i, err;
@@ -132,90 +92,11 @@ static __init int map_switcher(void)
goto free_vma;
}
- /* Now the switcher is mapped at the right address, we can't fail!
- * Copy in the compiled-in Switcher code (from switcher.S). */
+ /* Now the Switcher is mapped at the right address, we can't fail!
+ * Copy in the compiled-in Switcher code (from <arch>_switcher.S). */
memcpy(switcher_vma->addr, start_switcher_text,
end_switcher_text - start_switcher_text);
- /* Most of the switcher.S doesn't care that it's been moved; on Intel,
- * jumps are relative, and it doesn't access any references to external
- * code or data.
- *
- * The only exception is the interrupt handlers in switcher.S: their
- * addresses are placed in a table (default_idt_entries), so we need to
- * update the table with the new addresses. switcher_offset() is a
- * convenience function which returns the distance between the builtin
- * switcher code and the high-mapped copy we just made. */
- for (i = 0; i < IDT_ENTRIES; i++)
- default_idt_entries[i] += switcher_offset();
-
- /*
- * Set up the Switcher's per-cpu areas.
- *
- * Each CPU gets two pages of its own within the high-mapped region
- * (aka. "struct lguest_pages"). Much of this can be initialized now,
- * but some depends on what Guest we are running (which is set up in
- * copy_in_guest_info()).
- */
- for_each_possible_cpu(i) {
- /* lguest_pages() returns this CPU's two pages. */
- struct lguest_pages *pages = lguest_pages(i);
- /* This is a convenience pointer to make the code fit one
- * statement to a line. */
- struct lguest_ro_state *state = &pages->state;
-
- /* The Global Descriptor Table: the Host has a different one
- * for each CPU. We keep a descriptor for the GDT which says
- * where it is and how big it is (the size is actually the last
- * byte, not the size, hence the "-1"). */
- state->host_gdt_desc.size = GDT_SIZE-1;
- state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
-
- /* All CPUs on the Host use the same Interrupt Descriptor
- * Table, so we just use store_idt(), which gets this CPU's IDT
- * descriptor. */
- store_idt(&state->host_idt_desc);
-
- /* The descriptors for the Guest's GDT and IDT can be filled
- * out now, too. We copy the GDT & IDT into ->guest_gdt and
- * ->guest_idt before actually running the Guest. */
- state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
- state->guest_idt_desc.address = (long)&state->guest_idt;
- state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
- state->guest_gdt_desc.address = (long)&state->guest_gdt;
-
- /* We know where we want the stack to be when the Guest enters
- * the switcher: in pages->regs. The stack grows upwards, so
- * we start it at the end of that structure. */
- state->guest_tss.esp0 = (long)(&pages->regs + 1);
- /* And this is the GDT entry to use for the stack: we keep a
- * couple of special LGUEST entries. */
- state->guest_tss.ss0 = LGUEST_DS;
-
- /* x86 can have a finegrained bitmap which indicates what I/O
- * ports the process can use. We set it to the end of our
- * structure, meaning "none". */
- state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
-
- /* Some GDT entries are the same across all Guests, so we can
- * set them up now. */
- setup_default_gdt_entries(state);
- /* Most IDT entries are the same for all Guests, too.*/
- setup_default_idt_entries(state, default_idt_entries);
-
- /* The Host needs to be able to use the LGUEST segments on this
- * CPU, too, so put them in the Host GDT. */
- get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
- get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
- }
-
- /* In the Switcher, we want the %cs segment register to use the
- * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
- * it will be undisturbed when we switch. To change %cs and jump we
- * need this structure to feed to Intel's "lcall" instruction. */
- lguest_entry.offset = (long)switch_to_guest + switcher_offset();
- lguest_entry.segment = LGUEST_CS;
-
printk(KERN_INFO "lguest: mapped switcher at %p\n",
switcher_vma->addr);
/* And we succeeded... */
@@ -247,86 +128,12 @@ static void unmap_switcher(void)
__free_pages(switcher_page[i], 0);
}
-/*H:130 Our Guest is usually so well behaved; it never tries to do things it
- * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't
- * quite complete, because it doesn't contain replacements for the Intel I/O
- * instructions. As a result, the Guest sometimes fumbles across one during
- * the boot process as it probes for various things which are usually attached
- * to a PC.
- *
- * When the Guest uses one of these instructions, we get trap #13 (General
- * Protection Fault) and come here. We see if it's one of those troublesome
- * instructions and skip over it. We return true if we did. */
-static int emulate_insn(struct lguest *lg)
-{
- u8 insn;
- unsigned int insnlen = 0, in = 0, shift = 0;
- /* The eip contains the *virtual* address of the Guest's instruction:
- * guest_pa just subtracts the Guest's page_offset. */
- unsigned long physaddr = guest_pa(lg, lg->regs->eip);
-
- /* The guest_pa() function only works for Guest kernel addresses, but
- * that's all we're trying to do anyway. */
- if (lg->regs->eip < lg->page_offset)
- return 0;
-
- /* Decoding x86 instructions is icky. */
- lgread(lg, &insn, physaddr, 1);
-
- /* 0x66 is an "operand prefix". It means it's using the upper 16 bits
- of the eax register. */
- if (insn == 0x66) {
- shift = 16;
- /* The instruction is 1 byte so far, read the next byte. */
- insnlen = 1;
- lgread(lg, &insn, physaddr + insnlen, 1);
- }
-
- /* We can ignore the lower bit for the moment and decode the 4 opcodes
- * we need to emulate. */
- switch (insn & 0xFE) {
- case 0xE4: /* in <next byte>,%al */
- insnlen += 2;
- in = 1;
- break;
- case 0xEC: /* in (%dx),%al */
- insnlen += 1;
- in = 1;
- break;
- case 0xE6: /* out %al,<next byte> */
- insnlen += 2;
- break;
- case 0xEE: /* out %al,(%dx) */
- insnlen += 1;
- break;
- default:
- /* OK, we don't know what this is, can't emulate. */
- return 0;
- }
-
- /* If it was an "IN" instruction, they expect the result to be read
- * into %eax, so we change %eax. We always return all-ones, which
- * traditionally means "there's nothing there". */
- if (in) {
- /* Lower bit tells is whether it's a 16 or 32 bit access */
- if (insn & 0x1)
- lg->regs->eax = 0xFFFFFFFF;
- else
- lg->regs->eax |= (0xFFFF << shift);
- }
- /* Finally, we've "done" the instruction, so move past it. */
- lg->regs->eip += insnlen;
- /* Success! */
- return 1;
-}
-/*:*/
-
/*L:305
* Dealing With Guest Memory.
*
* When the Guest gives us (what it thinks is) a physical address, we can use
- * the normal copy_from_user() & copy_to_user() on that address: remember,
- * Guest physical == Launcher virtual.
+ * the normal copy_from_user() & copy_to_user() on the corresponding place in
+ * the memory region allocated by the Launcher.
*
* But we can't trust the Guest: it might be trying to access the Launcher
* code. We have to check that the range is below the pfn_limit the Launcher
@@ -338,148 +145,27 @@ int lguest_address_ok(const struct lguest *lg,
return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
}
-/* This is a convenient routine to get a 32-bit value from the Guest (a very
- * common operation). Here we can see how useful the kill_lguest() routine we
- * met in the Launcher can be: we return a random value (0) instead of needing
- * to return an error. */
-u32 lgread_u32(struct lguest *lg, unsigned long addr)
-{
- u32 val = 0;
-
- /* Don't let them access lguest binary. */
- if (!lguest_address_ok(lg, addr, sizeof(val))
- || get_user(val, (u32 __user *)addr) != 0)
- kill_guest(lg, "bad read address %#lx", addr);
- return val;
-}
-
-/* Same thing for writing a value. */
-void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
-{
- if (!lguest_address_ok(lg, addr, sizeof(val))
- || put_user(val, (u32 __user *)addr) != 0)
- kill_guest(lg, "bad write address %#lx", addr);
-}
-
-/* This routine is more generic, and copies a range of Guest bytes into a
- * buffer. If the copy_from_user() fails, we fill the buffer with zeroes, so
- * the caller doesn't end up using uninitialized kernel memory. */
-void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
+/* This routine copies memory from the Guest. Here we can see how useful the
+ * kill_lguest() routine we met in the Launcher can be: we return a random
+ * value (all zeroes) instead of needing to return an error. */
+void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
{
if (!lguest_address_ok(lg, addr, bytes)
- || copy_from_user(b, (void __user *)addr, bytes) != 0) {
+ || copy_from_user(b, lg->mem_base + addr, bytes) != 0) {
/* copy_from_user should do this, but as we rely on it... */
memset(b, 0, bytes);
kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
}
}
-/* Similarly, our generic routine to copy into a range of Guest bytes. */
-void lgwrite(struct lguest *lg, unsigned long addr, const void *b,
- unsigned bytes)
+/* This is the write (copy into guest) version. */
+void __lgwrite(struct lguest *lg, unsigned long addr, const void *b,
+ unsigned bytes)
{
if (!lguest_address_ok(lg, addr, bytes)
- || copy_to_user((void __user *)addr, b, bytes) != 0)
+ || copy_to_user(lg->mem_base + addr, b, bytes) != 0)
kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
}
-/* (end of memory access helper routines) :*/
-
-static void set_ts(void)
-{
- u32 cr0;
-
- cr0 = read_cr0();
- if (!(cr0 & 8))
- write_cr0(cr0|8);
-}
-
-/*S:010
- * We are getting close to the Switcher.
- *
- * Remember that each CPU has two pages which are visible to the Guest when it
- * runs on that CPU. This has to contain the state for that Guest: we copy the
- * state in just before we run the Guest.
- *
- * Each Guest has "changed" flags which indicate what has changed in the Guest
- * since it last ran. We saw this set in interrupts_and_traps.c and
- * segments.c.
- */
-static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
-{
- /* Copying all this data can be quite expensive. We usually run the
- * same Guest we ran last time (and that Guest hasn't run anywhere else
- * meanwhile). If that's not the case, we pretend everything in the
- * Guest has changed. */
- if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
- __get_cpu_var(last_guest) = lg;
- lg->last_pages = pages;
- lg->changed = CHANGED_ALL;
- }
-
- /* These copies are pretty cheap, so we do them unconditionally: */
- /* Save the current Host top-level page directory. */
- pages->state.host_cr3 = __pa(current->mm->pgd);
- /* Set up the Guest's page tables to see this CPU's pages (and no
- * other CPU's pages). */
- map_switcher_in_guest(lg, pages);
- /* Set up the two "TSS" members which tell the CPU what stack to use
- * for traps which do directly into the Guest (ie. traps at privilege
- * level 1). */
- pages->state.guest_tss.esp1 = lg->esp1;
- pages->state.guest_tss.ss1 = lg->ss1;
-
- /* Copy direct-to-Guest trap entries. */
- if (lg->changed & CHANGED_IDT)
- copy_traps(lg, pages->state.guest_idt, default_idt_entries);
-
- /* Copy all GDT entries which the Guest can change. */
- if (lg->changed & CHANGED_GDT)
- copy_gdt(lg, pages->state.guest_gdt);
- /* If only the TLS entries have changed, copy them. */
- else if (lg->changed & CHANGED_GDT_TLS)
- copy_gdt_tls(lg, pages->state.guest_gdt);
-
- /* Mark the Guest as unchanged for next time. */
- lg->changed = 0;
-}
-
-/* Finally: the code to actually call into the Switcher to run the Guest. */
-static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
-{
- /* This is a dummy value we need for GCC's sake. */
- unsigned int clobber;
-
- /* Copy the guest-specific information into this CPU's "struct
- * lguest_pages". */
- copy_in_guest_info(lg, pages);
-
- /* Set the trap number to 256 (impossible value). If we fault while
- * switching to the Guest (bad segment registers or bug), this will
- * cause us to abort the Guest. */
- lg->regs->trapnum = 256;
-
- /* Now: we push the "eflags" register on the stack, then do an "lcall".
- * This is how we change from using the kernel code segment to using
- * the dedicated lguest code segment, as well as jumping into the
- * Switcher.
- *
- * The lcall also pushes the old code segment (KERNEL_CS) onto the
- * stack, then the address of this call. This stack layout happens to
- * exactly match the stack of an interrupt... */
- asm volatile("pushf; lcall *lguest_entry"
- /* This is how we tell GCC that %eax ("a") and %ebx ("b")
- * are changed by this routine. The "=" means output. */
- : "=a"(clobber), "=b"(clobber)
- /* %eax contains the pages pointer. ("0" refers to the
- * 0-th argument above, ie "a"). %ebx contains the
- * physical address of the Guest's top-level page
- * directory. */
- : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
- /* We tell gcc that all these registers could change,
- * which means we don't have to save and restore them in
- * the Switcher. */
- : "memory", "%edx", "%ecx", "%edi", "%esi");
-}
/*:*/
/*H:030 Let's jump straight to the the main loop which runs the Guest.
@@ -489,22 +175,16 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
{
/* We stop running once the Guest is dead. */
while (!lg->dead) {
- /* We need to initialize this, otherwise gcc complains. It's
- * not (yet) clever enough to see that it's initialized when we
- * need it. */
- unsigned int cr2 = 0; /* Damn gcc */
-
- /* First we run any hypercalls the Guest wants done: either in
- * the hypercall ring in "struct lguest_data", or directly by
- * using int 31 (LGUEST_TRAP_ENTRY). */
- do_hypercalls(lg);
- /* It's possible the Guest did a SEND_DMA hypercall to the
+ /* First we run any hypercalls the Guest wants done. */
+ if (lg->hcall)
+ do_hypercalls(lg);
+
+ /* It's possible the Guest did a NOTIFY hypercall to the
* Launcher, in which case we return from the read() now. */
- if (lg->dma_is_pending) {
- if (put_user(lg->pending_dma, user) ||
- put_user(lg->pending_key, user+1))
+ if (lg->pending_notify) {
+ if (put_user(lg->pending_notify, user))
return -EFAULT;
- return sizeof(unsigned long)*2;
+ return sizeof(lg->pending_notify);
}
/* Check for signals */
@@ -542,144 +222,20 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
* the "Do Not Disturb" sign: */
local_irq_disable();
- /* Remember the awfully-named TS bit? If the Guest has asked
- * to set it we set it now, so we can trap and pass that trap
- * to the Guest if it uses the FPU. */
- if (lg->ts)
- set_ts();
-
- /* SYSENTER is an optimized way of doing system calls. We
- * can't allow it because it always jumps to privilege level 0.
- * A normal Guest won't try it because we don't advertise it in
- * CPUID, but a malicious Guest (or malicious Guest userspace
- * program) could, so we tell the CPU to disable it before
- * running the Guest. */
- if (boot_cpu_has(X86_FEATURE_SEP))
- wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
-
- /* Now we actually run the Guest. It will pop back out when
- * something interesting happens, and we can examine its
- * registers to see what it was doing. */
- run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
-
- /* The "regs" pointer contains two extra entries which are not
- * really registers: a trap number which says what interrupt or
- * trap made the switcher code come back, and an error code
- * which some traps set. */
-
- /* If the Guest page faulted, then the cr2 register will tell
- * us the bad virtual address. We have to grab this now,
- * because once we re-enable interrupts an interrupt could
- * fault and thus overwrite cr2, or we could even move off to a
- * different CPU. */
- if (lg->regs->trapnum == 14)
- cr2 = read_cr2();
- /* Similarly, if we took a trap because the Guest used the FPU,
- * we have to restore the FPU it expects to see. */
- else if (lg->regs->trapnum == 7)
- math_state_restore();
-
- /* Restore SYSENTER if it's supposed to be on. */
- if (boot_cpu_has(X86_FEATURE_SEP))
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+ /* Actually run the Guest until something happens. */
+ lguest_arch_run_guest(lg);
/* Now we're ready to be interrupted or moved to other CPUs */
local_irq_enable();
- /* OK, so what happened? */
- switch (lg->regs->trapnum) {
- case 13: /* We've intercepted a GPF. */
- /* Check if this was one of those annoying IN or OUT
- * instructions which we need to emulate. If so, we
- * just go back into the Guest after we've done it. */
- if (lg->regs->errcode == 0) {
- if (emulate_insn(lg))
- continue;
- }
- break;
- case 14: /* We've intercepted a page fault. */
- /* The Guest accessed a virtual address that wasn't
- * mapped. This happens a lot: we don't actually set
- * up most of the page tables for the Guest at all when
- * we start: as it runs it asks for more and more, and
- * we set them up as required. In this case, we don't
- * even tell the Guest that the fault happened.
- *
- * The errcode tells whether this was a read or a
- * write, and whether kernel or userspace code. */
- if (demand_page(lg, cr2, lg->regs->errcode))
- continue;
-
- /* OK, it's really not there (or not OK): the Guest
- * needs to know. We write out the cr2 value so it
- * knows where the fault occurred.
- *
- * Note that if the Guest were really messed up, this
- * could happen before it's done the INITIALIZE
- * hypercall, so lg->lguest_data will be NULL, so
- * &lg->lguest_data->cr2 will be address 8. Writing
- * into that address won't hurt the Host at all,
- * though. */
- if (put_user(cr2, &lg->lguest_data->cr2))
- kill_guest(lg, "Writing cr2");
- break;
- case 7: /* We've intercepted a Device Not Available fault. */
- /* If the Guest doesn't want to know, we already
- * restored the Floating Point Unit, so we just
- * continue without telling it. */
- if (!lg->ts)
- continue;
- break;
- case 32 ... 255:
- /* These values mean a real interrupt occurred, in
- * which case the Host handler has already been run.
- * We just do a friendly check if another process
- * should now be run, then fall through to loop
- * around: */
- cond_resched();
- case LGUEST_TRAP_ENTRY: /* Handled at top of loop */
- continue;
- }
-
- /* If we get here, it's a trap the Guest wants to know
- * about. */
- if (deliver_trap(lg, lg->regs->trapnum))
- continue;
-
- /* If the Guest doesn't have a handler (either it hasn't
- * registered any yet, or it's one of the faults we don't let
- * it handle), it dies with a cryptic error message. */
- kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
- lg->regs->trapnum, lg->regs->eip,
- lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode);
+ /* Now we deal with whatever happened to the Guest. */
+ lguest_arch_handle_trap(lg);
}
+
/* The Guest is dead => "No such file or directory" */
return -ENOENT;
}
-/* Now we can look at each of the routines this calls, in increasing order of
- * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
- * deliver_trap() and demand_page(). After all those, we'll be ready to
- * examine the Switcher, and our philosophical understanding of the Host/Guest
- * duality will be complete. :*/
-
-int find_free_guest(void)
-{
- unsigned int i;
- for (i = 0; i < MAX_LGUEST_GUESTS; i++)
- if (!lguests[i].tsk)
- return i;
- return -1;
-}
-
-static void adjust_pge(void *on)
-{
- if (on)
- write_cr4(read_cr4() | X86_CR4_PGE);
- else
- write_cr4(read_cr4() & ~X86_CR4_PGE);
-}
-
/*H:000
* Welcome to the Host!
*
@@ -701,72 +257,50 @@ static int __init init(void)
/* First we put the Switcher up in very high virtual memory. */
err = map_switcher();
if (err)
- return err;
+ goto out;
/* Now we set up the pagetable implementation for the Guests. */
err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES);
- if (err) {
- unmap_switcher();
- return err;
- }
+ if (err)
+ goto unmap;
- /* The I/O subsystem needs some things initialized. */
- lguest_io_init();
+ /* We might need to reserve an interrupt vector. */
+ err = init_interrupts();
+ if (err)
+ goto free_pgtables;
/* /dev/lguest needs to be registered. */
err = lguest_device_init();
- if (err) {
- free_pagetables();
- unmap_switcher();
- return err;
- }
+ if (err)
+ goto free_interrupts;
- /* Finally, we need to turn off "Page Global Enable". PGE is an
- * optimization where page table entries are specially marked to show
- * they never change. The Host kernel marks all the kernel pages this
- * way because it's always present, even when userspace is running.
- *
- * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
- * switch to the Guest kernel. If you don't disable this on all CPUs,
- * you'll get really weird bugs that you'll chase for two days.
- *
- * I used to turn PGE off every time we switched to the Guest and back
- * on when we return, but that slowed the Switcher down noticibly. */
-
- /* We don't need the complexity of CPUs coming and going while we're
- * doing this. */
- lock_cpu_hotplug();
- if (cpu_has_pge) { /* We have a broader idea of "global". */
- /* Remember that this was originally set (for cleanup). */
- cpu_had_pge = 1;
- /* adjust_pge is a helper function which sets or unsets the PGE
- * bit on its CPU, depending on the argument (0 == unset). */
- on_each_cpu(adjust_pge, (void *)0, 0, 1);
- /* Turn off the feature in the global feature set. */
- clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
- }
- unlock_cpu_hotplug();
+ /* Finally we do some architecture-specific setup. */
+ lguest_arch_host_init();
/* All good! */
return 0;
+
+free_interrupts:
+ free_interrupts();
+free_pgtables:
+ free_pagetables();
+unmap:
+ unmap_switcher();
+out:
+ return err;
}
/* Cleaning up is just the same code, backwards. With a little French. */
static void __exit fini(void)
{
lguest_device_remove();
+ free_interrupts();
free_pagetables();
unmap_switcher();
- /* If we had PGE before we started, turn it back on now. */
- lock_cpu_hotplug();
- if (cpu_had_pge) {
- set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
- /* adjust_pge's argument "1" means set PGE. */
- on_each_cpu(adjust_pge, (void *)1, 0, 1);
- }
- unlock_cpu_hotplug();
+ lguest_arch_host_fini();
}
+/*:*/
/* The Host side of lguest can be a module. This is a nice way for people to
* play with it. */
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index db6caace3b9..9d5184c7c14 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -25,17 +25,13 @@
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/pgtable.h>
-#include <irq_vectors.h>
#include "lg.h"
-/*H:120 This is the core hypercall routine: where the Guest gets what it
- * wants. Or gets killed. Or, in the case of LHCALL_CRASH, both.
- *
- * Remember from the Guest: %eax == which call to make, and the arguments are
- * packed into %edx, %ebx and %ecx if needed. */
-static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
+/*H:120 This is the core hypercall routine: where the Guest gets what it wants.
+ * Or gets killed. Or, in the case of LHCALL_CRASH, both. */
+static void do_hcall(struct lguest *lg, struct hcall_args *args)
{
- switch (regs->eax) {
+ switch (args->arg0) {
case LHCALL_FLUSH_ASYNC:
/* This call does nothing, except by breaking out of the Guest
* it makes us process all the asynchronous hypercalls. */
@@ -51,7 +47,7 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
char msg[128];
/* If the lgread fails, it will call kill_guest() itself; the
* kill_guest() with the message will be ignored. */
- lgread(lg, msg, regs->edx, sizeof(msg));
+ __lgread(lg, msg, args->arg1, sizeof(msg));
msg[sizeof(msg)-1] = '\0';
kill_guest(lg, "CRASH: %s", msg);
break;
@@ -59,67 +55,49 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
case LHCALL_FLUSH_TLB:
/* FLUSH_TLB comes in two flavors, depending on the
* argument: */
- if (regs->edx)
+ if (args->arg1)
guest_pagetable_clear_all(lg);
else
guest_pagetable_flush_user(lg);
break;
- case LHCALL_BIND_DMA:
- /* BIND_DMA really wants four arguments, but it's the only call
- * which does. So the Guest packs the number of buffers and
- * the interrupt number into the final argument, and we decode
- * it here. This can legitimately fail, since we currently
- * place a limit on the number of DMA pools a Guest can have.
- * So we return true or false from this call. */
- regs->eax = bind_dma(lg, regs->edx, regs->ebx,
- regs->ecx >> 8, regs->ecx & 0xFF);
- break;
/* All these calls simply pass the arguments through to the right
* routines. */
- case LHCALL_SEND_DMA:
- send_dma(lg, regs->edx, regs->ebx);
- break;
- case LHCALL_LOAD_GDT:
- load_guest_gdt(lg, regs->edx, regs->ebx);
- break;
- case LHCALL_LOAD_IDT_ENTRY:
- load_guest_idt_entry(lg, regs->edx, regs->ebx, regs->ecx);
- break;
case LHCALL_NEW_PGTABLE:
- guest_new_pagetable(lg, regs->edx);
+ guest_new_pagetable(lg, args->arg1);
break;
case LHCALL_SET_STACK:
- guest_set_stack(lg, regs->edx, regs->ebx, regs->ecx);
+ guest_set_stack(lg, args->arg1, args->arg2, args->arg3);
break;
case LHCALL_SET_PTE:
- guest_set_pte(lg, regs->edx, regs->ebx, mkgpte(regs->ecx));
+ guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3));
break;
case LHCALL_SET_PMD:
- guest_set_pmd(lg, regs->edx, regs->ebx);
- break;
- case LHCALL_LOAD_TLS:
- guest_load_tls(lg, regs->edx);
+ guest_set_pmd(lg, args->arg1, args->arg2);
break;
case LHCALL_SET_CLOCKEVENT:
- guest_set_clockevent(lg, regs->edx);
+ guest_set_clockevent(lg, args->arg1);
break;
-
case LHCALL_TS:
/* This sets the TS flag, as we saw used in run_guest(). */
- lg->ts = regs->edx;
+ lg->ts = args->arg1;
break;
case LHCALL_HALT:
/* Similarly, this sets the halted flag for run_guest(). */
lg->halted = 1;
break;
+ case LHCALL_NOTIFY:
+ lg->pending_notify = args->arg1;
+ break;
default:
- kill_guest(lg, "Bad hypercall %li\n", regs->eax);
+ if (lguest_arch_do_hcall(lg, args))
+ kill_guest(lg, "Bad hypercall %li\n", args->arg0);
}
}
+/*:*/
-/* Asynchronous hypercalls are easy: we just look in the array in the Guest's
- * "struct lguest_data" and see if there are any new ones marked "ready".
+/*H:124 Asynchronous hypercalls are easy: we just look in the array in the
+ * Guest's "struct lguest_data" to see if any new ones are marked "ready".
*
* We are careful to do these in order: obviously we respect the order the
* Guest put them in the ring, but we also promise the Guest that they will
@@ -134,10 +112,9 @@ static void do_async_hcalls(struct lguest *lg)
if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
return;
-
/* We process "struct lguest_data"s hcalls[] ring once. */
for (i = 0; i < ARRAY_SIZE(st); i++) {
- struct lguest_regs regs;
+ struct hcall_args args;
/* We remember where we were up to from last time. This makes
* sure that the hypercalls are done in the order the Guest
* places them in the ring. */
@@ -152,18 +129,16 @@ static void do_async_hcalls(struct lguest *lg)
if (++lg->next_hcall == LHCALL_RING_SIZE)
lg->next_hcall = 0;
- /* We copy the hypercall arguments into a fake register
- * structure. This makes life simple for do_hcall(). */
- if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax)
- || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx)
- || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx)
- || get_user(regs.ebx, &lg->lguest_data->hcalls[n].ebx)) {
+ /* Copy the hypercall arguments into a local copy of
+ * the hcall_args struct. */
+ if (copy_from_user(&args, &lg->lguest_data->hcalls[n],
+ sizeof(struct hcall_args))) {
kill_guest(lg, "Fetching async hypercalls");
break;
}
/* Do the hypercall, same as a normal one. */
- do_hcall(lg, &regs);
+ do_hcall(lg, &args);
/* Mark the hypercall done. */
if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
@@ -171,9 +146,9 @@ static void do_async_hcalls(struct lguest *lg)
break;
}
- /* Stop doing hypercalls if we've just done a DMA to the
- * Launcher: it needs to service this first. */
- if (lg->dma_is_pending)
+ /* Stop doing hypercalls if they want to notify the Launcher:
+ * it needs to service this first. */
+ if (lg->pending_notify)
break;
}
}
@@ -182,76 +157,35 @@ static void do_async_hcalls(struct lguest *lg)
* Guest makes a hypercall, we end up here to set things up: */
static void initialize(struct lguest *lg)
{
- u32 tsc_speed;
/* You can't do anything until you're initialized. The Guest knows the
* rules, so we're unforgiving here. */
- if (lg->regs->eax != LHCALL_LGUEST_INIT) {
- kill_guest(lg, "hypercall %li before LGUEST_INIT",
- lg->regs->eax);
+ if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) {
+ kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0);
return;
}
- /* We insist that the Time Stamp Counter exist and doesn't change with
- * cpu frequency. Some devious chip manufacturers decided that TSC
- * changes could be handled in software. I decided that time going
- * backwards might be good for benchmarks, but it's bad for users.
- *
- * We also insist that the TSC be stable: the kernel detects unreliable
- * TSCs for its own purposes, and we use that here. */
- if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
- tsc_speed = tsc_khz;
- else
- tsc_speed = 0;
-
- /* The pointer to the Guest's "struct lguest_data" is the only
- * argument. */
- lg->lguest_data = (struct lguest_data __user *)lg->regs->edx;
- /* If we check the address they gave is OK now, we can simply
- * copy_to_user/from_user from now on rather than using lgread/lgwrite.
- * I put this in to show that I'm not immune to writing stupid
- * optimizations. */
- if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
+ if (lguest_arch_init_hypercalls(lg))
kill_guest(lg, "bad guest page %p", lg->lguest_data);
- return;
- }
+
/* The Guest tells us where we're not to deliver interrupts by putting
* the range of addresses into "struct lguest_data". */
if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
- || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)
- /* We tell the Guest that it can't use the top 4MB of virtual
- * addresses used by the Switcher. */
- || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
- || put_user(tsc_speed, &lg->lguest_data->tsc_khz)
- /* We also give the Guest a unique id, as used in lguest_net.c. */
- || put_user(lg->guestid, &lg->lguest_data->guestid))
+ || get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
kill_guest(lg, "bad guest page %p", lg->lguest_data);
/* We write the current time into the Guest's data page once now. */
write_timestamp(lg);
+ /* page_tables.c will also do some setup. */
+ page_table_guest_data_init(lg);
+
/* This is the one case where the above accesses might have been the
* first write to a Guest page. This may have caused a copy-on-write
* fault, but the Guest might be referring to the old (read-only)
* page. */
guest_pagetable_clear_all(lg);
}
-/* Now we've examined the hypercall code; our Guest can make requests. There
- * is one other way we can do things for the Guest, as we see in
- * emulate_insn(). */
-
-/*H:110 Tricky point: we mark the hypercall as "done" once we've done it.
- * Normally we don't need to do this: the Guest will run again and update the
- * trap number before we come back around the run_guest() loop to
- * do_hypercalls().
- *
- * However, if we are signalled or the Guest sends DMA to the Launcher, that
- * loop will exit without running the Guest. When it comes back it would try
- * to re-run the hypercall. */
-static void clear_hcall(struct lguest *lg)
-{
- lg->regs->trapnum = 255;
-}
/*H:100
* Hypercalls
@@ -261,16 +195,12 @@ static void clear_hcall(struct lguest *lg)
*/
void do_hypercalls(struct lguest *lg)
{
- /* Not initialized yet? */
+ /* Not initialized yet? This hypercall must do it. */
if (unlikely(!lg->lguest_data)) {
- /* Did the Guest make a hypercall? We might have come back for
- * some other reason (an interrupt, a different trap). */
- if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
- /* Set up the "struct lguest_data" */
- initialize(lg);
- /* The hypercall is done. */
- clear_hcall(lg);
- }
+ /* Set up the "struct lguest_data" */
+ initialize(lg);
+ /* Hcall is done. */
+ lg->hcall = NULL;
return;
}
@@ -280,12 +210,21 @@ void do_hypercalls(struct lguest *lg)
do_async_hcalls(lg);
/* If we stopped reading the hypercall ring because the Guest did a
- * SEND_DMA to the Launcher, we want to return now. Otherwise if the
- * Guest asked us to do a hypercall, we do it. */
- if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
- do_hcall(lg, lg->regs);
- /* The hypercall is done. */
- clear_hcall(lg);
+ * NOTIFY to the Launcher, we want to return now. Otherwise we do
+ * the hypercall. */
+ if (!lg->pending_notify) {
+ do_hcall(lg, lg->hcall);
+ /* Tricky point: we reset the hcall pointer to mark the
+ * hypercall as "done". We use the hcall pointer rather than
+ * the trap number to indicate a hypercall is pending.
+ * Normally it doesn't matter: the Guest will run again and
+ * update the trap number before we come back here.
+ *
+ * However, if we are signalled or the Guest sends DMA to the
+ * Launcher, the run_guest() loop will exit without running the
+ * Guest. When it comes back it would try to re-run the
+ * hypercall. */
+ lg->hcall = NULL;
}
}
@@ -295,6 +234,6 @@ void write_timestamp(struct lguest *lg)
{
struct timespec now;
ktime_get_real_ts(&now);
- if (put_user(now, &lg->lguest_data->time))
+ if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
kill_guest(lg, "Writing timestamp");
}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 39731232d82..82966982cb3 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -12,8 +12,14 @@
* them first, so we also have a way of "reflecting" them into the Guest as if
* they had been delivered to it directly. :*/
#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
#include "lg.h"
+/* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */
+static unsigned int syscall_vector = SYSCALL_VECTOR;
+module_param(syscall_vector, uint, 0444);
+
/* The address of the interrupt handler is split into two bits: */
static unsigned long idt_address(u32 lo, u32 hi)
{
@@ -39,7 +45,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
{
/* Stack grows upwards: move stack then write value. */
*gstack -= 4;
- lgwrite_u32(lg, *gstack, val);
+ lgwrite(lg, *gstack, u32, val);
}
/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
@@ -56,8 +62,9 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
* it). */
static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
{
- unsigned long gstack;
+ unsigned long gstack, origstack;
u32 eflags, ss, irq_enable;
+ unsigned long virtstack;
/* There are two cases for interrupts: one where the Guest is already
* in the kernel, and a more complex one where the Guest is in
@@ -65,8 +72,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
if ((lg->regs->ss&0x3) != GUEST_PL) {
/* The Guest told us their kernel stack with the SET_STACK
* hypercall: both the virtual address and the segment */
- gstack = guest_pa(lg, lg->esp1);
+ virtstack = lg->esp1;
ss = lg->ss1;
+
+ origstack = gstack = guest_pa(lg, virtstack);
/* We push the old stack segment and pointer onto the new
* stack: when the Guest does an "iret" back from the interrupt
* handler the CPU will notice they're dropping privilege
@@ -75,8 +84,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
push_guest_stack(lg, &gstack, lg->regs->esp);
} else {
/* We're staying on the same Guest (kernel) stack. */
- gstack = guest_pa(lg, lg->regs->esp);
+ virtstack = lg->regs->esp;
ss = lg->regs->ss;
+
+ origstack = gstack = guest_pa(lg, virtstack);
}
/* Remember that we never let the Guest actually disable interrupts, so
@@ -102,7 +113,7 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
/* Now we've pushed all the old state, we change the stack, the code
* segment and the address to execute. */
lg->regs->ss = ss;
- lg->regs->esp = gstack + lg->page_offset;
+ lg->regs->esp = virtstack + (gstack - origstack);
lg->regs->cs = (__KERNEL_CS|GUEST_PL);
lg->regs->eip = idt_address(lo, hi);
@@ -165,7 +176,7 @@ void maybe_do_interrupt(struct lguest *lg)
/* Look at the IDT entry the Guest gave us for this interrupt. The
* first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
* over them. */
- idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq];
+ idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
/* If they don't have a handler (yet?), we just ignore it */
if (idt_present(idt->a, idt->b)) {
/* OK, mark it no longer pending and deliver it. */
@@ -183,6 +194,47 @@ void maybe_do_interrupt(struct lguest *lg)
* timer interrupt. */
write_timestamp(lg);
}
+/*:*/
+
+/* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent
+ * me a patch, so we support that too. It'd be a big step for lguest if half
+ * the Plan 9 user base were to start using it.
+ *
+ * Actually now I think of it, it's possible that Ron *is* half the Plan 9
+ * userbase. Oh well. */
+static bool could_be_syscall(unsigned int num)
+{
+ /* Normal Linux SYSCALL_VECTOR or reserved vector? */
+ return num == SYSCALL_VECTOR || num == syscall_vector;
+}
+
+/* The syscall vector it wants must be unused by Host. */
+bool check_syscall_vector(struct lguest *lg)
+{
+ u32 vector;
+
+ if (get_user(vector, &lg->lguest_data->syscall_vec))
+ return false;
+
+ return could_be_syscall(vector);
+}
+
+int init_interrupts(void)
+{
+ /* If they want some strange system call vector, reserve it now */
+ if (syscall_vector != SYSCALL_VECTOR
+ && test_and_set_bit(syscall_vector, used_vectors)) {
+ printk("lg: couldn't reserve syscall %u\n", syscall_vector);
+ return -EBUSY;
+ }
+ return 0;
+}
+
+void free_interrupts(void)
+{
+ if (syscall_vector != SYSCALL_VECTOR)
+ clear_bit(syscall_vector, used_vectors);
+}
/*H:220 Now we've got the routines to deliver interrupts, delivering traps
* like page fault is easy. The only trick is that Intel decided that some
@@ -197,14 +249,14 @@ int deliver_trap(struct lguest *lg, unsigned int num)
{
/* Trap numbers are always 8 bit, but we set an impossible trap number
* for traps inside the Switcher, so check that here. */
- if (num >= ARRAY_SIZE(lg->idt))
+ if (num >= ARRAY_SIZE(lg->arch.idt))
return 0;
/* Early on the Guest hasn't set the IDT entries (or maybe it put a
* bogus one in): if we fail here, the Guest will be killed. */
- if (!idt_present(lg->idt[num].a, lg->idt[num].b))
+ if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b))
return 0;
- set_guest_interrupt(lg, lg->idt[num].a, lg->idt[num].b, has_err(num));
+ set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, has_err(num));
return 1;
}
@@ -218,28 +270,20 @@ int deliver_trap(struct lguest *lg, unsigned int num)
* system calls down from 1750ns to 270ns. Plus, if lguest didn't do it, all
* the other hypervisors would tease it.
*
- * This routine determines if a trap can be delivered directly. */
-static int direct_trap(const struct lguest *lg,
- const struct desc_struct *trap,
- unsigned int num)
+ * This routine indicates if a particular trap number could be delivered
+ * directly. */
+static int direct_trap(unsigned int num)
{
/* Hardware interrupts don't go to the Guest at all (except system
* call). */
- if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR)
+ if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num))
return 0;
/* The Host needs to see page faults (for shadow paging and to save the
* fault address), general protection faults (in/out emulation) and
* device not available (TS handling), and of course, the hypercall
* trap. */
- if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY)
- return 0;
-
- /* Only trap gates (type 15) can go direct to the Guest. Interrupt
- * gates (type 14) disable interrupts as they are entered, which we
- * never let the Guest do. Not present entries (type 0x0) also can't
- * go direct, of course 8) */
- return idt_type(trap->a, trap->b) == 0xF;
+ return num != 14 && num != 13 && num != 7 && num != LGUEST_TRAP_ENTRY;
}
/*:*/
@@ -348,15 +392,11 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
* to copy this again. */
lg->changed |= CHANGED_IDT;
- /* The IDT which we keep in "struct lguest" only contains 32 entries
- * for the traps and LGUEST_IRQS (32) entries for interrupts. We
- * ignore attempts to set handlers for higher interrupt numbers, except
- * for the system call "interrupt" at 128: we have a special IDT entry
- * for that. */
- if (num < ARRAY_SIZE(lg->idt))
- set_trap(lg, &lg->idt[num], num, lo, hi);
- else if (num == SYSCALL_VECTOR)
- set_trap(lg, &lg->syscall_idt, num, lo, hi);
+ /* Check that the Guest doesn't try to step outside the bounds. */
+ if (num >= ARRAY_SIZE(lg->arch.idt))
+ kill_guest(lg, "Setting idt entry %u", num);
+ else
+ set_trap(lg, &lg->arch.idt[num], num, lo, hi);
}
/* The default entry for each interrupt points into the Switcher routines which
@@ -399,20 +439,21 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
/* We can simply copy the direct traps, otherwise we use the default
* ones in the Switcher: they will return to the Host. */
- for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) {
- if (direct_trap(lg, &lg->idt[i], i))
- idt[i] = lg->idt[i];
+ for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) {
+ /* If no Guest can ever override this trap, leave it alone. */
+ if (!direct_trap(i))
+ continue;
+
+ /* Only trap gates (type 15) can go direct to the Guest.
+ * Interrupt gates (type 14) disable interrupts as they are
+ * entered, which we never let the Guest do. Not present
+ * entries (type 0x0) also can't go direct, of course. */
+ if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF)
+ idt[i] = lg->arch.idt[i];
else
+ /* Reset it to the default. */
default_idt_entry(&idt[i], i, def[i]);
}
-
- /* Don't forget the system call trap! The IDT entries for other
- * interupts never change, so no need to copy them. */
- i = SYSCALL_VECTOR;
- if (direct_trap(lg, &lg->syscall_idt, i))
- idt[i] = lg->syscall_idt;
- else
- default_idt_entry(&idt[i], i, def[i]);
}
void guest_set_clockevent(struct lguest *lg, unsigned long delta)
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c
deleted file mode 100644
index ea68613b43f..00000000000
--- a/drivers/lguest/io.c
+++ /dev/null
@@ -1,626 +0,0 @@
-/*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest
- * to talk to the Launcher or directly to another Guest. It uses familiar
- * concepts of DMA and interrupts, plus some neat code stolen from
- * futexes... :*/
-
-/* Copyright (C) 2006 Rusty Russell IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <linux/types.h>
-#include <linux/futex.h>
-#include <linux/jhash.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/uaccess.h>
-#include "lg.h"
-
-/*L:300
- * I/O
- *
- * Getting data in and out of the Guest is quite an art. There are numerous
- * ways to do it, and they all suck differently. We try to keep things fairly
- * close to "real" hardware so our Guest's drivers don't look like an alien
- * visitation in the middle of the Linux code, and yet make sure that Guests
- * can talk directly to other Guests, not just the Launcher.
- *
- * To do this, the Guest gives us a key when it binds or sends DMA buffers.
- * The key corresponds to a "physical" address inside the Guest (ie. a virtual
- * address inside the Launcher process). We don't, however, use this key
- * directly.
- *
- * We want Guests which share memory to be able to DMA to each other: two
- * Launchers can mmap memory the same file, then the Guests can communicate.
- * Fortunately, the futex code provides us with a way to get a "union
- * futex_key" corresponding to the memory lying at a virtual address: if the
- * two processes share memory, the "union futex_key" for that memory will match
- * even if the memory is mapped at different addresses in each. So we always
- * convert the keys to "union futex_key"s to compare them.
- *
- * Before we dive into this though, we need to look at another set of helper
- * routines used throughout the Host kernel code to access Guest memory.
- :*/
-static struct list_head dma_hash[61];
-
-/* An unfortunate side effect of the Linux double-linked list implementation is
- * that there's no good way to statically initialize an array of linked
- * lists. */
-void lguest_io_init(void)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(dma_hash); i++)
- INIT_LIST_HEAD(&dma_hash[i]);
-}
-
-/* FIXME: allow multi-page lengths. */
-static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma)
-{
- unsigned int i;
-
- for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
- if (!dma->len[i])
- return 1;
- if (!lguest_address_ok(lg, dma->addr[i], dma->len[i]))
- goto kill;
- if (dma->len[i] > PAGE_SIZE)
- goto kill;
- /* We could do over a page, but is it worth it? */
- if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE)
- goto kill;
- }
- return 1;
-
-kill:
- kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]);
- return 0;
-}
-
-/*L:330 This is our hash function, using the wonderful Jenkins hash.
- *
- * The futex key is a union with three parts: an unsigned long word, a pointer,
- * and an int "offset". We could use jhash_2words() which takes three u32s.
- * (Ok, the hash functions are great: the naming sucks though).
- *
- * It's nice to be portable to 64-bit platforms, so we use the more generic
- * jhash2(), which takes an array of u32, the number of u32s, and an initial
- * u32 to roll in. This is uglier, but breaks down to almost the same code on
- * 32-bit platforms like this one.
- *
- * We want a position in the array, so we modulo ARRAY_SIZE(dma_hash) (ie. 61).
- */
-static unsigned int hash(const union futex_key *key)
-{
- return jhash2((u32*)&key->both.word,
- (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
- key->both.offset)
- % ARRAY_SIZE(dma_hash);
-}
-
-/* This is a convenience routine to compare two keys. It's a much bemoaned C
- * weakness that it doesn't allow '==' on structures or unions, so we have to
- * open-code it like this. */
-static inline int key_eq(const union futex_key *a, const union futex_key *b)
-{
- return (a->both.word == b->both.word
- && a->both.ptr == b->both.ptr
- && a->both.offset == b->both.offset);
-}
-
-/*L:360 OK, when we need to actually free up a Guest's DMA array we do several
- * things, so we have a convenient function to do it.
- *
- * The caller must hold a read lock on dmainfo owner's current->mm->mmap_sem
- * for the drop_futex_key_refs(). */
-static void unlink_dma(struct lguest_dma_info *dmainfo)
-{
- /* You locked this too, right? */
- BUG_ON(!mutex_is_locked(&lguest_lock));
- /* This is how we know that the entry is free. */
- dmainfo->interrupt = 0;
- /* Remove it from the hash table. */
- list_del(&dmainfo->list);
- /* Drop the references we were holding (to the inode or mm). */
- drop_futex_key_refs(&dmainfo->key);
-}
-
-/*L:350 This is the routine which we call when the Guest asks to unregister a
- * DMA array attached to a given key. Returns true if the array was found. */
-static int unbind_dma(struct lguest *lg,
- const union futex_key *key,
- unsigned long dmas)
-{
- int i, ret = 0;
-
- /* We don't bother with the hash table, just look through all this
- * Guest's DMA arrays. */
- for (i = 0; i < LGUEST_MAX_DMA; i++) {
- /* In theory it could have more than one array on the same key,
- * or one array on multiple keys, so we check both */
- if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) {
- unlink_dma(&lg->dma[i]);
- ret = 1;
- break;
- }
- }
- return ret;
-}
-
-/*L:340 BIND_DMA: this is the hypercall which sets up an array of "struct
- * lguest_dma" for receiving I/O.
- *
- * The Guest wants to bind an array of "struct lguest_dma"s to a particular key
- * to receive input. This only happens when the Guest is setting up a new
- * device, so it doesn't have to be very fast.
- *
- * It returns 1 on a successful registration (it can fail if we hit the limit
- * of registrations for this Guest).
- */
-int bind_dma(struct lguest *lg,
- unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt)
-{
- unsigned int i;
- int ret = 0;
- union futex_key key;
- /* Futex code needs the mmap_sem. */
- struct rw_semaphore *fshared = &current->mm->mmap_sem;
-
- /* Invalid interrupt? (We could kill the guest here). */
- if (interrupt >= LGUEST_IRQS)
- return 0;
-
- /* We need to grab the Big Lguest Lock, because other Guests may be
- * trying to look through this Guest's DMAs to send something while
- * we're doing this. */
- mutex_lock(&lguest_lock);
- down_read(fshared);
- if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
- kill_guest(lg, "bad dma key %#lx", ukey);
- goto unlock;
- }
-
- /* We want to keep this key valid once we drop mmap_sem, so we have to
- * hold a reference. */
- get_futex_key_refs(&key);
-
- /* If the Guest specified an interrupt of 0, that means they want to
- * unregister this array of "struct lguest_dma"s. */
- if (interrupt == 0)
- ret = unbind_dma(lg, &key, dmas);
- else {
- /* Look through this Guest's dma array for an unused entry. */
- for (i = 0; i < LGUEST_MAX_DMA; i++) {
- /* If the interrupt is non-zero, the entry is already
- * used. */
- if (lg->dma[i].interrupt)
- continue;
-
- /* OK, a free one! Fill on our details. */
- lg->dma[i].dmas = dmas;
- lg->dma[i].num_dmas = numdmas;
- lg->dma[i].next_dma = 0;
- lg->dma[i].key = key;
- lg->dma[i].guestid = lg->guestid;
- lg->dma[i].interrupt = interrupt;
-
- /* Now we add it to the hash table: the position
- * depends on the futex key that we got. */
- list_add(&lg->dma[i].list, &dma_hash[hash(&key)]);
- /* Success! */
- ret = 1;
- goto unlock;
- }
- }
- /* If we didn't find a slot to put the key in, drop the reference
- * again. */
- drop_futex_key_refs(&key);
-unlock:
- /* Unlock and out. */
- up_read(fshared);
- mutex_unlock(&lguest_lock);
- return ret;
-}
-
-/*L:385 Note that our routines to access a different Guest's memory are called
- * lgread_other() and lgwrite_other(): these names emphasize that they are only
- * used when the Guest is *not* the current Guest.
- *
- * The interface for copying from another process's memory is called
- * access_process_vm(), with a final argument of 0 for a read, and 1 for a
- * write.
- *
- * We need lgread_other() to read the destination Guest's "struct lguest_dma"
- * array. */
-static int lgread_other(struct lguest *lg,
- void *buf, u32 addr, unsigned bytes)
-{
- if (!lguest_address_ok(lg, addr, bytes)
- || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) {
- memset(buf, 0, bytes);
- kill_guest(lg, "bad address in registered DMA struct");
- return 0;
- }
- return 1;
-}
-
-/* "lgwrite()" to another Guest: used to update the destination "used_len" once
- * we've transferred data into the buffer. */
-static int lgwrite_other(struct lguest *lg, u32 addr,
- const void *buf, unsigned bytes)
-{
- if (!lguest_address_ok(lg, addr, bytes)
- || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1)
- != bytes)) {
- kill_guest(lg, "bad address writing to registered DMA");
- return 0;
- }
- return 1;
-}
-
-/*L:400 This is the generic engine which copies from a source "struct
- * lguest_dma" from this Guest into another Guest's "struct lguest_dma". The
- * destination Guest's pages have already been mapped, as contained in the
- * pages array.
- *
- * If you're wondering if there's a nice "copy from one process to another"
- * routine, so was I. But Linux isn't really set up to copy between two
- * unrelated processes, so we have to write it ourselves.
- */
-static u32 copy_data(struct lguest *srclg,
- const struct lguest_dma *src,
- const struct lguest_dma *dst,
- struct page *pages[])
-{
- unsigned int totlen, si, di, srcoff, dstoff;
- void *maddr = NULL;
-
- /* We return the total length transferred. */
- totlen = 0;
-
- /* We keep indexes into the source and destination "struct lguest_dma",
- * and an offset within each region. */
- si = di = 0;
- srcoff = dstoff = 0;
-
- /* We loop until the source or destination is exhausted. */
- while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si]
- && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) {
- /* We can only transfer the rest of the src buffer, or as much
- * as will fit into the destination buffer. */
- u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff);
-
- /* For systems using "highmem" we need to use kmap() to access
- * the page we want. We often use the same page over and over,
- * so rather than kmap() it on every loop, we set the maddr
- * pointer to NULL when we need to move to the next
- * destination page. */
- if (!maddr)
- maddr = kmap(pages[di]);
-
- /* Copy directly from (this Guest's) source address to the
- * destination Guest's kmap()ed buffer. Note that maddr points
- * to the start of the page: we need to add the offset of the
- * destination address and offset within the buffer. */
-
- /* FIXME: This is not completely portable. I looked at
- * copy_to_user_page(), and some arch's seem to need special
- * flushes. x86 is fine. */
- if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
- (void __user *)src->addr[si], len) != 0) {
- /* If a copy failed, it's the source's fault. */
- kill_guest(srclg, "bad address in sending DMA");
- totlen = 0;
- break;
- }
-
- /* Increment the total and src & dst offsets */
- totlen += len;
- srcoff += len;
- dstoff += len;
-
- /* Presumably we reached the end of the src or dest buffers: */
- if (srcoff == src->len[si]) {
- /* Move to the next buffer at offset 0 */
- si++;
- srcoff = 0;
- }
- if (dstoff == dst->len[di]) {
- /* We need to unmap that destination page and reset
- * maddr ready for the next one. */
- kunmap(pages[di]);
- maddr = NULL;
- di++;
- dstoff = 0;
- }
- }
-
- /* If we still had a page mapped at the end, unmap now. */
- if (maddr)
- kunmap(pages[di]);
-
- return totlen;
-}
-
-/*L:390 This is how we transfer a "struct lguest_dma" from the source Guest
- * (the current Guest which called SEND_DMA) to another Guest. */
-static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src,
- struct lguest *dstlg, const struct lguest_dma *dst)
-{
- int i;
- u32 ret;
- struct page *pages[LGUEST_MAX_DMA_SECTIONS];
-
- /* We check that both source and destination "struct lguest_dma"s are
- * within the bounds of the source and destination Guests */
- if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src))
- return 0;
-
- /* We need to map the pages which correspond to each parts of
- * destination buffer. */
- for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
- if (dst->len[i] == 0)
- break;
- /* get_user_pages() is a complicated function, especially since
- * we only want a single page. But it works, and returns the
- * number of pages. Note that we're holding the destination's
- * mmap_sem, as get_user_pages() requires. */
- if (get_user_pages(dstlg->tsk, dstlg->mm,
- dst->addr[i], 1, 1, 1, pages+i, NULL)
- != 1) {
- /* This means the destination gave us a bogus buffer */
- kill_guest(dstlg, "Error mapping DMA pages");
- ret = 0;
- goto drop_pages;
- }
- }
-
- /* Now copy the data until we run out of src or dst. */
- ret = copy_data(srclg, src, dst, pages);
-
-drop_pages:
- while (--i >= 0)
- put_page(pages[i]);
- return ret;
-}
-
-/*L:380 Transferring data from one Guest to another is not as simple as I'd
- * like. We've found the "struct lguest_dma_info" bound to the same address as
- * the send, we need to copy into it.
- *
- * This function returns true if the destination array was empty. */
-static int dma_transfer(struct lguest *srclg,
- unsigned long udma,
- struct lguest_dma_info *dst)
-{
- struct lguest_dma dst_dma, src_dma;
- struct lguest *dstlg;
- u32 i, dma = 0;
-
- /* From the "struct lguest_dma_info" we found in the hash, grab the
- * Guest. */
- dstlg = &lguests[dst->guestid];
- /* Read in the source "struct lguest_dma" handed to SEND_DMA. */
- lgread(srclg, &src_dma, udma, sizeof(src_dma));
-
- /* We need the destination's mmap_sem, and we already hold the source's
- * mmap_sem for the futex key lookup. Normally this would suggest that
- * we could deadlock if the destination Guest was trying to send to
- * this source Guest at the same time, which is another reason that all
- * I/O is done under the big lguest_lock. */
- down_read(&dstlg->mm->mmap_sem);
-
- /* Look through the destination DMA array for an available buffer. */
- for (i = 0; i < dst->num_dmas; i++) {
- /* We keep a "next_dma" pointer which often helps us avoid
- * looking at lots of previously-filled entries. */
- dma = (dst->next_dma + i) % dst->num_dmas;
- if (!lgread_other(dstlg, &dst_dma,
- dst->dmas + dma * sizeof(struct lguest_dma),
- sizeof(dst_dma))) {
- goto fail;
- }
- if (!dst_dma.used_len)
- break;
- }
-
- /* If we found a buffer, we do the actual data copy. */
- if (i != dst->num_dmas) {
- unsigned long used_lenp;
- unsigned int ret;
-
- ret = do_dma(srclg, &src_dma, dstlg, &dst_dma);
- /* Put used length in the source "struct lguest_dma"'s used_len
- * field. It's a little tricky to figure out where that is,
- * though. */
- lgwrite_u32(srclg,
- udma+offsetof(struct lguest_dma, used_len), ret);
- /* Tranferring 0 bytes is OK if the source buffer was empty. */
- if (ret == 0 && src_dma.len[0] != 0)
- goto fail;
-
- /* The destination Guest might be running on a different CPU:
- * we have to make sure that it will see the "used_len" field
- * change to non-zero *after* it sees the data we copied into
- * the buffer. Hence a write memory barrier. */
- wmb();
- /* Figuring out where the destination's used_len field for this
- * "struct lguest_dma" in the array is also a little ugly. */
- used_lenp = dst->dmas
- + dma * sizeof(struct lguest_dma)
- + offsetof(struct lguest_dma, used_len);
- lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret));
- /* Move the cursor for next time. */
- dst->next_dma++;
- }
- up_read(&dstlg->mm->mmap_sem);
-
- /* We trigger the destination interrupt, even if the destination was
- * empty and we didn't transfer anything: this gives them a chance to
- * wake up and refill. */
- set_bit(dst->interrupt, dstlg->irqs_pending);
- /* Wake up the destination process. */
- wake_up_process(dstlg->tsk);
- /* If we passed the last "struct lguest_dma", the receive had no
- * buffers left. */
- return i == dst->num_dmas;
-
-fail:
- up_read(&dstlg->mm->mmap_sem);
- return 0;
-}
-
-/*L:370 This is the counter-side to the BIND_DMA hypercall; the SEND_DMA
- * hypercall. We find out who's listening, and send to them. */
-void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma)
-{
- union futex_key key;
- int empty = 0;
- struct rw_semaphore *fshared = &current->mm->mmap_sem;
-
-again:
- mutex_lock(&lguest_lock);
- down_read(fshared);
- /* Get the futex key for the key the Guest gave us */
- if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
- kill_guest(lg, "bad sending DMA key");
- goto unlock;
- }
- /* Since the key must be a multiple of 4, the futex key uses the lower
- * bit of the "offset" field (which would always be 0) to indicate a
- * mapping which is shared with other processes (ie. Guests). */
- if (key.shared.offset & 1) {
- struct lguest_dma_info *i;
- /* Look through the hash for other Guests. */
- list_for_each_entry(i, &dma_hash[hash(&key)], list) {
- /* Don't send to ourselves. */
- if (i->guestid == lg->guestid)
- continue;
- if (!key_eq(&key, &i->key))
- continue;
-
- /* If dma_transfer() tells us the destination has no
- * available buffers, we increment "empty". */
- empty += dma_transfer(lg, udma, i);
- break;
- }
- /* If the destination is empty, we release our locks and
- * give the destination Guest a brief chance to restock. */
- if (empty == 1) {
- /* Give any recipients one chance to restock. */
- up_read(&current->mm->mmap_sem);
- mutex_unlock(&lguest_lock);
- /* Next time, we won't try again. */
- empty++;
- goto again;
- }
- } else {
- /* Private mapping: Guest is sending to its Launcher. We set
- * the "dma_is_pending" flag so that the main loop will exit
- * and the Launcher's read() from /dev/lguest will return. */
- lg->dma_is_pending = 1;
- lg->pending_dma = udma;
- lg->pending_key = ukey;
- }
-unlock:
- up_read(fshared);
- mutex_unlock(&lguest_lock);
-}
-/*:*/
-
-void release_all_dma(struct lguest *lg)
-{
- unsigned int i;
-
- BUG_ON(!mutex_is_locked(&lguest_lock));
-
- down_read(&lg->mm->mmap_sem);
- for (i = 0; i < LGUEST_MAX_DMA; i++) {
- if (lg->dma[i].interrupt)
- unlink_dma(&lg->dma[i]);
- }
- up_read(&lg->mm->mmap_sem);
-}
-
-/*M:007 We only return a single DMA buffer to the Launcher, but it would be
- * more efficient to return a pointer to the entire array of DMA buffers, which
- * it can cache and choose one whenever it wants.
- *
- * Currently the Launcher uses a write to /dev/lguest, and the return value is
- * the address of the DMA structure with the interrupt number placed in
- * dma->used_len. If we wanted to return the entire array, we need to return
- * the address, array size and interrupt number: this seems to require an
- * ioctl(). :*/
-
-/*L:320 This routine looks for a DMA buffer registered by the Guest on the
- * given key (using the BIND_DMA hypercall). */
-unsigned long get_dma_buffer(struct lguest *lg,
- unsigned long ukey, unsigned long *interrupt)
-{
- unsigned long ret = 0;
- union futex_key key;
- struct lguest_dma_info *i;
- struct rw_semaphore *fshared = &current->mm->mmap_sem;
-
- /* Take the Big Lguest Lock to stop other Guests sending this Guest DMA
- * at the same time. */
- mutex_lock(&lguest_lock);
- /* To match between Guests sharing the same underlying memory we steal
- * code from the futex infrastructure. This requires that we hold the
- * "mmap_sem" for our process (the Launcher), and pass it to the futex
- * code. */
- down_read(fshared);
-
- /* This can fail if it's not a valid address, or if the address is not
- * divisible by 4 (the futex code needs that, we don't really). */
- if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
- kill_guest(lg, "bad registered DMA buffer");
- goto unlock;
- }
- /* Search the hash table for matching entries (the Launcher can only
- * send to its own Guest for the moment, so the entry must be for this
- * Guest) */
- list_for_each_entry(i, &dma_hash[hash(&key)], list) {
- if (key_eq(&key, &i->key) && i->guestid == lg->guestid) {
- unsigned int j;
- /* Look through the registered DMA array for an
- * available buffer. */
- for (j = 0; j < i->num_dmas; j++) {
- struct lguest_dma dma;
-
- ret = i->dmas + j * sizeof(struct lguest_dma);
- lgread(lg, &dma, ret, sizeof(dma));
- if (dma.used_len == 0)
- break;
- }
- /* Store the interrupt the Guest wants when the buffer
- * is used. */
- *interrupt = i->interrupt;
- break;
- }
- }
-unlock:
- up_read(fshared);
- mutex_unlock(&lguest_lock);
- return ret;
-}
-/*:*/
-
-/*L:410 This really has completed the Launcher. Not only have we now finished
- * the longest chapter in our journey, but this also means we are over halfway
- * through!
- *
- * Enough prevaricating around the bush: it is time for us to dive into the
- * core of the Host, in "make Host".
- */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 64f0abed317..d9144beca82 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -1,119 +1,25 @@
#ifndef _LGUEST_H
#define _LGUEST_H
-#include <asm/desc.h>
-
-#define GDT_ENTRY_LGUEST_CS 10
-#define GDT_ENTRY_LGUEST_DS 11
-#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8)
-#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8)
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/init.h>
#include <linux/stringify.h>
-#include <linux/binfmts.h>
-#include <linux/futex.h>
#include <linux/lguest.h>
#include <linux/lguest_launcher.h>
#include <linux/wait.h>
#include <linux/err.h>
#include <asm/semaphore.h>
-#include "irq_vectors.h"
-
-#define GUEST_PL 1
-struct lguest_regs
-{
- /* Manually saved part. */
- unsigned long ebx, ecx, edx;
- unsigned long esi, edi, ebp;
- unsigned long gs;
- unsigned long eax;
- unsigned long fs, ds, es;
- unsigned long trapnum, errcode;
- /* Trap pushed part */
- unsigned long eip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long esp;
- unsigned long ss;
-};
+#include <asm/lguest.h>
void free_pagetables(void);
int init_pagetables(struct page **switcher_page, unsigned int pages);
-/* Full 4G segment descriptors, suitable for CS and DS. */
-#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00})
-#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300})
-
-struct lguest_dma_info
-{
- struct list_head list;
- union futex_key key;
- unsigned long dmas;
- u16 next_dma;
- u16 num_dmas;
- u16 guestid;
- u8 interrupt; /* 0 when not registered */
-};
-
-/*H:310 The page-table code owes a great debt of gratitude to Andi Kleen. He
- * reviewed the original code which used "u32" for all page table entries, and
- * insisted that it would be far clearer with explicit typing. I thought it
- * was overkill, but he was right: it is much clearer than it was before.
- *
- * We have separate types for the Guest's ptes & pgds and the shadow ptes &
- * pgds. There's already a Linux type for these (pte_t and pgd_t) but they
- * change depending on kernel config options (PAE). */
-
-/* Each entry is identical: lower 12 bits of flags and upper 20 bits for the
- * "page frame number" (0 == first physical page, etc). They are different
- * types so the compiler will warn us if we mix them improperly. */
-typedef union {
- struct { unsigned flags:12, pfn:20; };
- struct { unsigned long val; } raw;
-} spgd_t;
-typedef union {
- struct { unsigned flags:12, pfn:20; };
- struct { unsigned long val; } raw;
-} spte_t;
-typedef union {
- struct { unsigned flags:12, pfn:20; };
- struct { unsigned long val; } raw;
-} gpgd_t;
-typedef union {
- struct { unsigned flags:12, pfn:20; };
- struct { unsigned long val; } raw;
-} gpte_t;
-
-/* We have two convenient macros to convert a "raw" value as handed to us by
- * the Guest into the correct Guest PGD or PTE type. */
-#define mkgpte(_val) ((gpte_t){.raw.val = _val})
-#define mkgpgd(_val) ((gpgd_t){.raw.val = _val})
-/*:*/
-
struct pgdir
{
- unsigned long cr3;
- spgd_t *pgdir;
-};
-
-/* This is a guest-specific page (mapped ro) into the guest. */
-struct lguest_ro_state
-{
- /* Host information we need to restore when we switch back. */
- u32 host_cr3;
- struct Xgt_desc_struct host_idt_desc;
- struct Xgt_desc_struct host_gdt_desc;
- u32 host_sp;
-
- /* Fields which are used when guest is running. */
- struct Xgt_desc_struct guest_idt_desc;
- struct Xgt_desc_struct guest_gdt_desc;
- struct i386_hw_tss guest_tss;
- struct desc_struct guest_idt[IDT_ENTRIES];
- struct desc_struct guest_gdt[GDT_ENTRIES];
+ unsigned long gpgdir;
+ pgd_t *pgdir;
};
/* We have two pages shared with guests, per cpu. */
@@ -141,9 +47,11 @@ struct lguest
struct lguest_data __user *lguest_data;
struct task_struct *tsk;
struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */
- u16 guestid;
u32 pfn_limit;
- u32 page_offset;
+ /* This provides the offset to the base of guest-physical
+ * memory in the Launcher. */
+ void __user *mem_base;
+ unsigned long kernel_address;
u32 cr2;
int halted;
int ts;
@@ -151,6 +59,9 @@ struct lguest
u32 esp1;
u8 ss1;
+ /* If a hypercall was asked for, this points to the arguments. */
+ struct hcall_args *hcall;
+
/* Do we need to stop what we're doing and return to userspace? */
int break_out;
wait_queue_head_t break_wq;
@@ -167,24 +78,15 @@ struct lguest
struct task_struct *wake;
unsigned long noirq_start, noirq_end;
- int dma_is_pending;
- unsigned long pending_dma; /* struct lguest_dma */
- unsigned long pending_key; /* address they're sending to */
+ unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
unsigned int stack_pages;
u32 tsc_khz;
- struct lguest_dma_info dma[LGUEST_MAX_DMA];
-
/* Dead? */
const char *dead;
- /* The GDT entries copied into lguest_ro_state when running. */
- struct desc_struct gdt[GDT_ENTRIES];
-
- /* The IDT entries: some copied into lguest_ro_state when running. */
- struct desc_struct idt[FIRST_EXTERNAL_VECTOR+LGUEST_IRQS];
- struct desc_struct syscall_idt;
+ struct lguest_arch arch;
/* Virtual clock device */
struct hrtimer hrt;
@@ -193,19 +95,38 @@ struct lguest
DECLARE_BITMAP(irqs_pending, LGUEST_IRQS);
};
-extern struct lguest lguests[];
extern struct mutex lguest_lock;
/* core.c: */
-u32 lgread_u32(struct lguest *lg, unsigned long addr);
-void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val);
-void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len);
-void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len);
-int find_free_guest(void);
int lguest_address_ok(const struct lguest *lg,
unsigned long addr, unsigned long len);
+void __lgread(struct lguest *, void *, unsigned long, unsigned);
+void __lgwrite(struct lguest *, unsigned long, const void *, unsigned);
+
+/*L:306 Using memory-copy operations like that is usually inconvient, so we
+ * have the following helper macros which read and write a specific type (often
+ * an unsigned long).
+ *
+ * This reads into a variable of the given type then returns that. */
+#define lgread(lg, addr, type) \
+ ({ type _v; __lgread((lg), &_v, (addr), sizeof(_v)); _v; })
+
+/* This checks that the variable is of the given type, then writes it out. */
+#define lgwrite(lg, addr, type, val) \
+ do { \
+ typecheck(type, val); \
+ __lgwrite((lg), (addr), &(val), sizeof(val)); \
+ } while(0)
+/* (end of memory access helper routines) :*/
+
int run_guest(struct lguest *lg, unsigned long __user *user);
+/* Helper macros to obtain the first 12 or the last 20 bits, this is only the
+ * first step in the migration to the kernel types. pte_pfn is already defined
+ * in the kernel. */
+#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK)
+#define pte_flags(x) (pte_val(x) & ~PAGE_MASK)
+#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
/* interrupts_and_traps.c: */
void maybe_do_interrupt(struct lguest *lg);
@@ -219,6 +140,9 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
const unsigned long *def);
void guest_set_clockevent(struct lguest *lg, unsigned long delta);
void init_clockdev(struct lguest *lg);
+bool check_syscall_vector(struct lguest *lg);
+int init_interrupts(void);
+void free_interrupts(void);
/* segments.c: */
void setup_default_gdt_entries(struct lguest_ro_state *state);
@@ -232,28 +156,33 @@ void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt);
int init_guest_pagetable(struct lguest *lg, unsigned long pgtable);
void free_guest_pagetable(struct lguest *lg);
void guest_new_pagetable(struct lguest *lg, unsigned long pgtable);
-void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 i);
+void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
void guest_pagetable_clear_all(struct lguest *lg);
void guest_pagetable_flush_user(struct lguest *lg);
-void guest_set_pte(struct lguest *lg, unsigned long cr3,
- unsigned long vaddr, gpte_t val);
+void guest_set_pte(struct lguest *lg, unsigned long gpgdir,
+ unsigned long vaddr, pte_t val);
void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages);
int demand_page(struct lguest *info, unsigned long cr2, int errcode);
void pin_page(struct lguest *lg, unsigned long vaddr);
+unsigned long guest_pa(struct lguest *lg, unsigned long vaddr);
+void page_table_guest_data_init(struct lguest *lg);
+
+/* <arch>/core.c: */
+void lguest_arch_host_init(void);
+void lguest_arch_host_fini(void);
+void lguest_arch_run_guest(struct lguest *lg);
+void lguest_arch_handle_trap(struct lguest *lg);
+int lguest_arch_init_hypercalls(struct lguest *lg);
+int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args);
+void lguest_arch_setup_regs(struct lguest *lg, unsigned long start);
+
+/* <arch>/switcher.S: */
+extern char start_switcher_text[], end_switcher_text[], switch_to_guest[];
/* lguest_user.c: */
int lguest_device_init(void);
void lguest_device_remove(void);
-/* io.c: */
-void lguest_io_init(void);
-int bind_dma(struct lguest *lg,
- unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt);
-void send_dma(struct lguest *info, unsigned long key, unsigned long udma);
-void release_all_dma(struct lguest *lg);
-unsigned long get_dma_buffer(struct lguest *lg, unsigned long key,
- unsigned long *interrupt);
-
/* hypercalls.c: */
void do_hypercalls(struct lguest *lg);
void write_timestamp(struct lguest *lg);
@@ -292,9 +221,5 @@ do { \
} while(0)
/* (End of aside) :*/
-static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
-{
- return vaddr - lg->page_offset;
-}
#endif /* __ASSEMBLY__ */
#endif /* _LGUEST_H */
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
deleted file mode 100644
index 3ba337dde85..00000000000
--- a/drivers/lguest/lguest.c
+++ /dev/null
@@ -1,1108 +0,0 @@
-/*P:010
- * A hypervisor allows multiple Operating Systems to run on a single machine.
- * To quote David Wheeler: "Any problem in computer science can be solved with
- * another layer of indirection."
- *
- * We keep things simple in two ways. First, we start with a normal Linux
- * kernel and insert a module (lg.ko) which allows us to run other Linux
- * kernels the same way we'd run processes. We call the first kernel the Host,
- * and the others the Guests. The program which sets up and configures Guests
- * (such as the example in Documentation/lguest/lguest.c) is called the
- * Launcher.
- *
- * Secondly, we only run specially modified Guests, not normal kernels. When
- * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets
- * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows
- * how to be a Guest. This means that you can use the same kernel you boot
- * normally (ie. as a Host) as a Guest.
- *
- * These Guests know that they cannot do privileged operations, such as disable
- * interrupts, and that they have to ask the Host to do such things explicitly.
- * This file consists of all the replacements for such low-level native
- * hardware operations: these special Guest versions call the Host.
- *
- * So how does the kernel know it's a Guest? The Guest starts at a special
- * entry point marked with a magic string, which sets up a few things then
- * calls here. We replace the native functions various "paravirt" structures
- * with our Guest versions, then boot like normal. :*/
-
-/*
- * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/kernel.h>
-#include <linux/start_kernel.h>
-#include <linux/string.h>
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/lguest.h>
-#include <linux/lguest_launcher.h>
-#include <linux/lguest_bus.h>
-#include <asm/paravirt.h>
-#include <asm/param.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/setup.h>
-#include <asm/e820.h>
-#include <asm/mce.h>
-#include <asm/io.h>
-
-/*G:010 Welcome to the Guest!
- *
- * The Guest in our tale is a simple creature: identical to the Host but
- * behaving in simplified but equivalent ways. In particular, the Guest is the
- * same kernel as the Host (or at least, built from the same source code). :*/
-
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
-extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
-extern void lguest_iret(void);
-
-struct lguest_data lguest_data = {
- .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
- .noirq_start = (u32)lguest_noirq_start,
- .noirq_end = (u32)lguest_noirq_end,
- .blocked_interrupts = { 1 }, /* Block timer interrupts */
-};
-struct lguest_device_desc *lguest_devices;
-static cycle_t clock_base;
-
-/*G:035 Notice the lazy_hcall() above, rather than hcall(). This is our first
- * real optimization trick!
- *
- * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
- * them as a batch when lazy_mode is eventually turned off. Because hypercalls
- * are reasonably expensive, batching them up makes sense. For example, a
- * large mmap might update dozens of page table entries: that code calls
- * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
- * lguest_leave_lazy_mode().
- *
- * So, when we're in lazy mode, we call async_hypercall() to store the call for
- * future processing. When lazy mode is turned off we issue a hypercall to
- * flush the stored calls.
- */
-static void lguest_leave_lazy_mode(void)
-{
- paravirt_leave_lazy(paravirt_get_lazy_mode());
- hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
-}
-
-static void lazy_hcall(unsigned long call,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3)
-{
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- hcall(call, arg1, arg2, arg3);
- else
- async_hcall(call, arg1, arg2, arg3);
-}
-
-/* async_hcall() is pretty simple: I'm quite proud of it really. We have a
- * ring buffer of stored hypercalls which the Host will run though next time we
- * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall
- * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
- * and 255 once the Host has finished with it.
- *
- * If we come around to a slot which hasn't been finished, then the table is
- * full and we just make the hypercall directly. This has the nice side
- * effect of causing the Host to run all the stored calls in the ring buffer
- * which empties it for next time! */
-void async_hcall(unsigned long call,
- unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
- /* Note: This code assumes we're uniprocessor. */
- static unsigned int next_call;
- unsigned long flags;
-
- /* Disable interrupts if not already disabled: we don't want an
- * interrupt handler making a hypercall while we're already doing
- * one! */
- local_irq_save(flags);
- if (lguest_data.hcall_status[next_call] != 0xFF) {
- /* Table full, so do normal hcall which will flush table. */
- hcall(call, arg1, arg2, arg3);
- } else {
- lguest_data.hcalls[next_call].eax = call;
- lguest_data.hcalls[next_call].edx = arg1;
- lguest_data.hcalls[next_call].ebx = arg2;
- lguest_data.hcalls[next_call].ecx = arg3;
- /* Arguments must all be written before we mark it to go */
- wmb();
- lguest_data.hcall_status[next_call] = 0;
- if (++next_call == LHCALL_RING_SIZE)
- next_call = 0;
- }
- local_irq_restore(flags);
-}
-/*:*/
-
-/* Wrappers for the SEND_DMA and BIND_DMA hypercalls. This is mainly because
- * Jeff Garzik complained that __pa() should never appear in drivers, and this
- * helps remove most of them. But also, it wraps some ugliness. */
-void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
-{
- /* The hcall might not write this if something goes wrong */
- dma->used_len = 0;
- hcall(LHCALL_SEND_DMA, key, __pa(dma), 0);
-}
-
-int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
- unsigned int num, u8 irq)
-{
- /* This is the only hypercall which actually wants 5 arguments, and we
- * only support 4. Fortunately the interrupt number is always less
- * than 256, so we can pack it with the number of dmas in the final
- * argument. */
- if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq))
- return -ENOMEM;
- return 0;
-}
-
-/* Unbinding is the same hypercall as binding, but with 0 num & irq. */
-void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas)
-{
- hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0);
-}
-
-/* For guests, device memory can be used as normal memory, so we cast away the
- * __iomem to quieten sparse. */
-void *lguest_map(unsigned long phys_addr, unsigned long pages)
-{
- return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages);
-}
-
-void lguest_unmap(void *addr)
-{
- iounmap((__force void __iomem *)addr);
-}
-
-/*G:033
- * Here are our first native-instruction replacements: four functions for
- * interrupt control.
- *
- * The simplest way of implementing these would be to have "turn interrupts
- * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow:
- * these are by far the most commonly called functions of those we override.
- *
- * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
- * which the Guest can update with a single instruction. The Host knows to
- * check there when it wants to deliver an interrupt.
- */
-
-/* save_flags() is expected to return the processor state (ie. "eflags"). The
- * eflags word contains all kind of stuff, but in practice Linux only cares
- * about the interrupt flag. Our "save_flags()" just returns that. */
-static unsigned long save_fl(void)
-{
- return lguest_data.irq_enabled;
-}
-
-/* "restore_flags" just sets the flags back to the value given. */
-static void restore_fl(unsigned long flags)
-{
- lguest_data.irq_enabled = flags;
-}
-
-/* Interrupts go off... */
-static void irq_disable(void)
-{
- lguest_data.irq_enabled = 0;
-}
-
-/* Interrupts go on... */
-static void irq_enable(void)
-{
- lguest_data.irq_enabled = X86_EFLAGS_IF;
-}
-/*:*/
-/*M:003 Note that we don't check for outstanding interrupts when we re-enable
- * them (or when we unmask an interrupt). This seems to work for the moment,
- * since interrupts are rare and we'll just get the interrupt on the next timer
- * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way
- * would be to put the "irq_enabled" field in a page by itself, and have the
- * Host write-protect it when an interrupt comes in when irqs are disabled.
- * There will then be a page fault as soon as interrupts are re-enabled. :*/
-
-/*G:034
- * The Interrupt Descriptor Table (IDT).
- *
- * The IDT tells the processor what to do when an interrupt comes in. Each
- * entry in the table is a 64-bit descriptor: this holds the privilege level,
- * address of the handler, and... well, who cares? The Guest just asks the
- * Host to make the change anyway, because the Host controls the real IDT.
- */
-static void lguest_write_idt_entry(struct desc_struct *dt,
- int entrynum, u32 low, u32 high)
-{
- /* Keep the local copy up to date. */
- write_dt_entry(dt, entrynum, low, high);
- /* Tell Host about this new entry. */
- hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high);
-}
-
-/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
- * time it is written, so we can simply loop through all entries and tell the
- * Host about them. */
-static void lguest_load_idt(const struct Xgt_desc_struct *desc)
-{
- unsigned int i;
- struct desc_struct *idt = (void *)desc->address;
-
- for (i = 0; i < (desc->size+1)/8; i++)
- hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
-}
-
-/*
- * The Global Descriptor Table.
- *
- * The Intel architecture defines another table, called the Global Descriptor
- * Table (GDT). You tell the CPU where it is (and its size) using the "lgdt"
- * instruction, and then several other instructions refer to entries in the
- * table. There are three entries which the Switcher needs, so the Host simply
- * controls the entire thing and the Guest asks it to make changes using the
- * LOAD_GDT hypercall.
- *
- * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
- * hypercall and use that repeatedly to load a new IDT. I don't think it
- * really matters, but wouldn't it be nice if they were the same?
- */
-static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
-{
- BUG_ON((desc->size+1)/8 != GDT_ENTRIES);
- hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0);
-}
-
-/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
- * then tell the Host to reload the entire thing. This operation is so rare
- * that this naive implementation is reasonable. */
-static void lguest_write_gdt_entry(struct desc_struct *dt,
- int entrynum, u32 low, u32 high)
-{
- write_dt_entry(dt, entrynum, low, high);
- hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
-}
-
-/* OK, I lied. There are three "thread local storage" GDT entries which change
- * on every context switch (these three entries are how glibc implements
- * __thread variables). So we have a hypercall specifically for this case. */
-static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- /* There's one problem which normal hardware doesn't have: the Host
- * can't handle us removing entries we're currently using. So we clear
- * the GS register here: if it's needed it'll be reloaded anyway. */
- loadsegment(gs, 0);
- lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
-}
-
-/*G:038 That's enough excitement for now, back to ploughing through each of
- * the different pv_ops structures (we're about 1/3 of the way through).
- *
- * This is the Local Descriptor Table, another weird Intel thingy. Linux only
- * uses this for some strange applications like Wine. We don't do anything
- * here, so they'll get an informative and friendly Segmentation Fault. */
-static void lguest_set_ldt(const void *addr, unsigned entries)
-{
-}
-
-/* This loads a GDT entry into the "Task Register": that entry points to a
- * structure called the Task State Segment. Some comments scattered though the
- * kernel code indicate that this used for task switching in ages past, along
- * with blood sacrifice and astrology.
- *
- * Now there's nothing interesting in here that we don't get told elsewhere.
- * But the native version uses the "ltr" instruction, which makes the Host
- * complain to the Guest about a Segmentation Fault and it'll oops. So we
- * override the native version with a do-nothing version. */
-static void lguest_load_tr_desc(void)
-{
-}
-
-/* The "cpuid" instruction is a way of querying both the CPU identity
- * (manufacturer, model, etc) and its features. It was introduced before the
- * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you
- * might imagine, after a decade and a half this treatment, it is now a giant
- * ball of hair. Its entry in the current Intel manual runs to 28 pages.
- *
- * This instruction even it has its own Wikipedia entry. The Wikipedia entry
- * has been translated into 4 languages. I am not making this up!
- *
- * We could get funky here and identify ourselves as "GenuineLguest", but
- * instead we just use the real "cpuid" instruction. Then I pretty much turned
- * off feature bits until the Guest booted. (Don't say that: you'll damage
- * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is
- * hardly future proof.) Noone's listening! They don't like you anyway,
- * parenthetic weirdo!
- *
- * Replacing the cpuid so we can turn features off is great for the kernel, but
- * anyone (including userspace) can just use the raw "cpuid" instruction and
- * the Host won't even notice since it isn't privileged. So we try not to get
- * too worked up about it. */
-static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- int function = *eax;
-
- native_cpuid(eax, ebx, ecx, edx);
- switch (function) {
- case 1: /* Basic feature request. */
- /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
- *ecx &= 0x00002201;
- /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
- *edx &= 0x07808101;
- /* The Host can do a nice optimization if it knows that the
- * kernel mappings (addresses above 0xC0000000 or whatever
- * PAGE_OFFSET is set to) haven't changed. But Linux calls
- * flush_tlb_user() for both user and kernel mappings unless
- * the Page Global Enable (PGE) feature bit is set. */
- *edx |= 0x00002000;
- break;
- case 0x80000000:
- /* Futureproof this a little: if they ask how much extended
- * processor information there is, limit it to known fields. */
- if (*eax > 0x80000008)
- *eax = 0x80000008;
- break;
- }
-}
-
-/* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
- * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother
- * it. The Host needs to know when the Guest wants to change them, so we have
- * a whole series of functions like read_cr0() and write_cr0().
- *
- * We start with CR0. CR0 allows you to turn on and off all kinds of basic
- * features, but Linux only really cares about one: the horrifically-named Task
- * Switched (TS) bit at bit 3 (ie. 8)
- *
- * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if
- * the floating point unit is used. Which allows us to restore FPU state
- * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
- * name like "FPUTRAP bit" be a little less cryptic?
- *
- * We store cr0 (and cr3) locally, because the Host never changes it. The
- * Guest sometimes wants to read it and we'd prefer not to bother the Host
- * unnecessarily. */
-static unsigned long current_cr0, current_cr3;
-static void lguest_write_cr0(unsigned long val)
-{
- /* 8 == TS bit. */
- lazy_hcall(LHCALL_TS, val & 8, 0, 0);
- current_cr0 = val;
-}
-
-static unsigned long lguest_read_cr0(void)
-{
- return current_cr0;
-}
-
-/* Intel provided a special instruction to clear the TS bit for people too cool
- * to use write_cr0() to do it. This "clts" instruction is faster, because all
- * the vowels have been optimized out. */
-static void lguest_clts(void)
-{
- lazy_hcall(LHCALL_TS, 0, 0, 0);
- current_cr0 &= ~8U;
-}
-
-/* CR2 is the virtual address of the last page fault, which the Guest only ever
- * reads. The Host kindly writes this into our "struct lguest_data", so we
- * just read it out of there. */
-static unsigned long lguest_read_cr2(void)
-{
- return lguest_data.cr2;
-}
-
-/* CR3 is the current toplevel pagetable page: the principle is the same as
- * cr0. Keep a local copy, and tell the Host when it changes. */
-static void lguest_write_cr3(unsigned long cr3)
-{
- lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
- current_cr3 = cr3;
-}
-
-static unsigned long lguest_read_cr3(void)
-{
- return current_cr3;
-}
-
-/* CR4 is used to enable and disable PGE, but we don't care. */
-static unsigned long lguest_read_cr4(void)
-{
- return 0;
-}
-
-static void lguest_write_cr4(unsigned long val)
-{
-}
-
-/*
- * Page Table Handling.
- *
- * Now would be a good time to take a rest and grab a coffee or similarly
- * relaxing stimulant. The easy parts are behind us, and the trek gradually
- * winds uphill from here.
- *
- * Quick refresher: memory is divided into "pages" of 4096 bytes each. The CPU
- * maps virtual addresses to physical addresses using "page tables". We could
- * use one huge index of 1 million entries: each address is 4 bytes, so that's
- * 1024 pages just to hold the page tables. But since most virtual addresses
- * are unused, we use a two level index which saves space. The CR3 register
- * contains the physical address of the top level "page directory" page, which
- * contains physical addresses of up to 1024 second-level pages. Each of these
- * second level pages contains up to 1024 physical addresses of actual pages,
- * or Page Table Entries (PTEs).
- *
- * Here's a diagram, where arrows indicate physical addresses:
- *
- * CR3 ---> +---------+
- * | --------->+---------+
- * | | | PADDR1 |
- * Top-level | | PADDR2 |
- * (PMD) page | | |
- * | | Lower-level |
- * | | (PTE) page |
- * | | | |
- * .... ....
- *
- * So to convert a virtual address to a physical address, we look up the top
- * level, which points us to the second level, which gives us the physical
- * address of that page. If the top level entry was not present, or the second
- * level entry was not present, then the virtual address is invalid (we
- * say "the page was not mapped").
- *
- * Put another way, a 32-bit virtual address is divided up like so:
- *
- * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- * |<---- 10 bits ---->|<---- 10 bits ---->|<------ 12 bits ------>|
- * Index into top Index into second Offset within page
- * page directory page pagetable page
- *
- * The kernel spends a lot of time changing both the top-level page directory
- * and lower-level pagetable pages. The Guest doesn't know physical addresses,
- * so while it maintains these page tables exactly like normal, it also needs
- * to keep the Host informed whenever it makes a change: the Host will create
- * the real page tables based on the Guests'.
- */
-
-/* The Guest calls this to set a second-level entry (pte), ie. to map a page
- * into a process' address space. We set the entry then tell the Host the
- * toplevel and address this corresponds to. The Guest uses one pagetable per
- * process, so we need to tell the Host which one we're changing (mm->pgd). */
-static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
-{
- *ptep = pteval;
- lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low);
-}
-
-/* The Guest calls this to set a top-level entry. Again, we set the entry then
- * tell the Host which top-level page we changed, and the index of the entry we
- * changed. */
-static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- *pmdp = pmdval;
- lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
- (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
-}
-
-/* There are a couple of legacy places where the kernel sets a PTE, but we
- * don't know the top level any more. This is useless for us, since we don't
- * know which pagetable is changing or what address, so we just tell the Host
- * to forget all of them. Fortunately, this is very rare.
- *
- * ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow. So we don't even tell the Host
- * anything changed until we've done the first page table switch.
- */
-static void lguest_set_pte(pte_t *ptep, pte_t pteval)
-{
- *ptep = pteval;
- /* Don't bother with hypercall before initial setup. */
- if (current_cr3)
- lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
-}
-
-/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
- * native page table operations. On native hardware you can set a new page
- * table entry whenever you want, but if you want to remove one you have to do
- * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
- *
- * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only
- * called when a valid entry is written, not when it's removed (ie. marked not
- * present). Instead, this is where we come when the Guest wants to remove a
- * page table entry: we tell the Host to set that entry to 0 (ie. the present
- * bit is zero). */
-static void lguest_flush_tlb_single(unsigned long addr)
-{
- /* Simply set it to zero: if it was not, it will fault back in. */
- lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
-}
-
-/* This is what happens after the Guest has removed a large number of entries.
- * This tells the Host that any of the page table entries for userspace might
- * have changed, ie. virtual addresses below PAGE_OFFSET. */
-static void lguest_flush_tlb_user(void)
-{
- lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0);
-}
-
-/* This is called when the kernel page tables have changed. That's not very
- * common (unless the Guest is using highmem, which makes the Guest extremely
- * slow), so it's worth separating this from the user flushing above. */
-static void lguest_flush_tlb_kernel(void)
-{
- lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
-}
-
-/*
- * The Unadvanced Programmable Interrupt Controller.
- *
- * This is an attempt to implement the simplest possible interrupt controller.
- * I spent some time looking though routines like set_irq_chip_and_handler,
- * set_irq_chip_and_handler_name, set_irq_chip_data and set_phasers_to_stun and
- * I *think* this is as simple as it gets.
- *
- * We can tell the Host what interrupts we want blocked ready for using the
- * lguest_data.interrupts bitmap, so disabling (aka "masking") them is as
- * simple as setting a bit. We don't actually "ack" interrupts as such, we
- * just mask and unmask them. I wonder if we should be cleverer?
- */
-static void disable_lguest_irq(unsigned int irq)
-{
- set_bit(irq, lguest_data.blocked_interrupts);
-}
-
-static void enable_lguest_irq(unsigned int irq)
-{
- clear_bit(irq, lguest_data.blocked_interrupts);
-}
-
-/* This structure describes the lguest IRQ controller. */
-static struct irq_chip lguest_irq_controller = {
- .name = "lguest",
- .mask = disable_lguest_irq,
- .mask_ack = disable_lguest_irq,
- .unmask = enable_lguest_irq,
-};
-
-/* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
- * interrupt (except 128, which is used for system calls), and then tells the
- * Linux infrastructure that each interrupt is controlled by our level-based
- * lguest interrupt controller. */
-static void __init lguest_init_IRQ(void)
-{
- unsigned int i;
-
- for (i = 0; i < LGUEST_IRQS; i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (vector != SYSCALL_VECTOR) {
- set_intr_gate(vector, interrupt[i]);
- set_irq_chip_and_handler(i, &lguest_irq_controller,
- handle_level_irq);
- }
- }
- /* This call is required to set up for 4k stacks, where we have
- * separate stacks for hard and soft interrupts. */
- irq_ctx_init(smp_processor_id());
-}
-
-/*
- * Time.
- *
- * It would be far better for everyone if the Guest had its own clock, but
- * until then the Host gives us the time on every interrupt.
- */
-static unsigned long lguest_get_wallclock(void)
-{
- return lguest_data.time.tv_sec;
-}
-
-static cycle_t lguest_clock_read(void)
-{
- unsigned long sec, nsec;
-
- /* If the Host tells the TSC speed, we can trust that. */
- if (lguest_data.tsc_khz)
- return native_read_tsc();
-
- /* If we can't use the TSC, we read the time value written by the Host.
- * Since it's in two parts (seconds and nanoseconds), we risk reading
- * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
- * getting 99 and 0. As Linux tends to come apart under the stress of
- * time travel, we must be careful: */
- do {
- /* First we read the seconds part. */
- sec = lguest_data.time.tv_sec;
- /* This read memory barrier tells the compiler and the CPU that
- * this can't be reordered: we have to complete the above
- * before going on. */
- rmb();
- /* Now we read the nanoseconds part. */
- nsec = lguest_data.time.tv_nsec;
- /* Make sure we've done that. */
- rmb();
- /* Now if the seconds part has changed, try again. */
- } while (unlikely(lguest_data.time.tv_sec != sec));
-
- /* Our non-TSC clock is in real nanoseconds. */
- return sec*1000000000ULL + nsec;
-}
-
-/* This is what we tell the kernel is our clocksource. */
-static struct clocksource lguest_clock = {
- .name = "lguest",
- .rating = 400,
- .read = lguest_clock_read,
- .mask = CLOCKSOURCE_MASK(64),
- .mult = 1 << 22,
- .shift = 22,
-};
-
-/* The "scheduler clock" is just our real clock, adjusted to start at zero */
-static unsigned long long lguest_sched_clock(void)
-{
- return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
-}
-
-/* We also need a "struct clock_event_device": Linux asks us to set it to go
- * off some time in the future. Actually, James Morris figured all this out, I
- * just applied the patch. */
-static int lguest_clockevent_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- if (delta < LG_CLOCK_MIN_DELTA) {
- if (printk_ratelimit())
- printk(KERN_DEBUG "%s: small delta %lu ns\n",
- __FUNCTION__, delta);
- return -ETIME;
- }
- hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
- return 0;
-}
-
-static void lguest_clockevent_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- switch (mode) {
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- /* A 0 argument shuts the clock down. */
- hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0);
- break;
- case CLOCK_EVT_MODE_ONESHOT:
- /* This is what we expect. */
- break;
- case CLOCK_EVT_MODE_PERIODIC:
- BUG();
- case CLOCK_EVT_MODE_RESUME:
- break;
- }
-}
-
-/* This describes our primitive timer chip. */
-static struct clock_event_device lguest_clockevent = {
- .name = "lguest",
- .features = CLOCK_EVT_FEAT_ONESHOT,
- .set_next_event = lguest_clockevent_set_next_event,
- .set_mode = lguest_clockevent_set_mode,
- .rating = INT_MAX,
- .mult = 1,
- .shift = 0,
- .min_delta_ns = LG_CLOCK_MIN_DELTA,
- .max_delta_ns = LG_CLOCK_MAX_DELTA,
-};
-
-/* This is the Guest timer interrupt handler (hardware interrupt 0). We just
- * call the clockevent infrastructure and it does whatever needs doing. */
-static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
-{
- unsigned long flags;
-
- /* Don't interrupt us while this is running. */
- local_irq_save(flags);
- lguest_clockevent.event_handler(&lguest_clockevent);
- local_irq_restore(flags);
-}
-
-/* At some point in the boot process, we get asked to set up our timing
- * infrastructure. The kernel doesn't expect timer interrupts before this, but
- * we cleverly initialized the "blocked_interrupts" field of "struct
- * lguest_data" so that timer interrupts were blocked until now. */
-static void lguest_time_init(void)
-{
- /* Set up the timer interrupt (0) to go to our simple timer routine */
- set_irq_handler(0, lguest_time_irq);
-
- /* Our clock structure look like arch/i386/kernel/tsc.c if we can use
- * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either
- * way, the "rating" is initialized so high that it's always chosen
- * over any other clocksource. */
- if (lguest_data.tsc_khz) {
- lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
- lguest_clock.shift);
- lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS;
- }
- clock_base = lguest_clock_read();
- clocksource_register(&lguest_clock);
-
- /* Now we've set up our clock, we can use it as the scheduler clock */
- pv_time_ops.sched_clock = lguest_sched_clock;
-
- /* We can't set cpumask in the initializer: damn C limitations! Set it
- * here and register our timer device. */
- lguest_clockevent.cpumask = cpumask_of_cpu(0);
- clockevents_register_device(&lguest_clockevent);
-
- /* Finally, we unblock the timer interrupt. */
- enable_lguest_irq(0);
-}
-
-/*
- * Miscellaneous bits and pieces.
- *
- * Here is an oddball collection of functions which the Guest needs for things
- * to work. They're pretty simple.
- */
-
-/* The Guest needs to tell the host what stack it expects traps to use. For
- * native hardware, this is part of the Task State Segment mentioned above in
- * lguest_load_tr_desc(), but to help hypervisors there's this special call.
- *
- * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data
- * segment), the privilege level (we're privilege level 1, the Host is 0 and
- * will not tolerate us trying to use that), the stack pointer, and the number
- * of pages in the stack. */
-static void lguest_load_esp0(struct tss_struct *tss,
- struct thread_struct *thread)
-{
- lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0,
- THREAD_SIZE/PAGE_SIZE);
-}
-
-/* Let's just say, I wouldn't do debugging under a Guest. */
-static void lguest_set_debugreg(int regno, unsigned long value)
-{
- /* FIXME: Implement */
-}
-
-/* There are times when the kernel wants to make sure that no memory writes are
- * caught in the cache (that they've all reached real hardware devices). This
- * doesn't matter for the Guest which has virtual hardware.
- *
- * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush
- * (clflush) instruction is available and the kernel uses that. Otherwise, it
- * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction.
- * Unlike clflush, wbinvd can only be run at privilege level 0. So we can
- * ignore clflush, but replace wbinvd.
- */
-static void lguest_wbinvd(void)
-{
-}
-
-/* If the Guest expects to have an Advanced Programmable Interrupt Controller,
- * we play dumb by ignoring writes and returning 0 for reads. So it's no
- * longer Programmable nor Controlling anything, and I don't think 8 lines of
- * code qualifies for Advanced. It will also never interrupt anything. It
- * does, however, allow us to get through the Linux boot code. */
-#ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, unsigned long v)
-{
-}
-
-static unsigned long lguest_apic_read(unsigned long reg)
-{
- return 0;
-}
-#endif
-
-/* STOP! Until an interrupt comes in. */
-static void lguest_safe_halt(void)
-{
- hcall(LHCALL_HALT, 0, 0, 0);
-}
-
-/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a
- * message out when we're crashing as well as elegant termination like powering
- * off.
- *
- * Note that the Host always prefers that the Guest speak in physical addresses
- * rather than virtual addresses, so we use __pa() here. */
-static void lguest_power_off(void)
-{
- hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);
-}
-
-/*
- * Panicing.
- *
- * Don't. But if you did, this is what happens.
- */
-static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
-{
- hcall(LHCALL_CRASH, __pa(p), 0, 0);
- /* The hcall won't return, but to keep gcc happy, we're "done". */
- return NOTIFY_DONE;
-}
-
-static struct notifier_block paniced = {
- .notifier_call = lguest_panic
-};
-
-/* Setting up memory is fairly easy. */
-static __init char *lguest_memory_setup(void)
-{
- /* We do this here and not earlier because lockcheck barfs if we do it
- * before start_kernel() */
- atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-
- /* The Linux bootloader header contains an "e820" memory map: the
- * Launcher populated the first entry with our memory limit. */
- add_memory_region(boot_params.e820_map[0].addr,
- boot_params.e820_map[0].size,
- boot_params.e820_map[0].type);
-
- /* This string is for the boot messages. */
- return "LGUEST";
-}
-
-/*G:050
- * Patching (Powerfully Placating Performance Pedants)
- *
- * We have already seen that pv_ops structures let us replace simple
- * native instructions with calls to the appropriate back end all throughout
- * the kernel. This allows the same kernel to run as a Guest and as a native
- * kernel, but it's slow because of all the indirect branches.
- *
- * Remember that David Wheeler quote about "Any problem in computer science can
- * be solved with another layer of indirection"? The rest of that quote is
- * "... But that usually will create another problem." This is the first of
- * those problems.
- *
- * Our current solution is to allow the paravirt back end to optionally patch
- * over the indirect calls to replace them with something more efficient. We
- * patch the four most commonly called functions: disable interrupts, enable
- * interrupts, restore interrupts and save interrupts. We usually have 10
- * bytes to patch into: the Guest versions of these operations are small enough
- * that we can fit comfortably.
- *
- * First we need assembly templates of each of the patchable Guest operations,
- * and these are in lguest_asm.S. */
-
-/*G:060 We construct a table from the assembler templates: */
-static const struct lguest_insns
-{
- const char *start, *end;
-} lguest_insns[] = {
- [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
- [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
- [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
- [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
-};
-
-/* Now our patch routine is fairly simple (based on the native one in
- * paravirt.c). If we have a replacement, we copy it in and return how much of
- * the available space we used. */
-static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
- unsigned long addr, unsigned len)
-{
- unsigned int insn_len;
-
- /* Don't do anything special if we don't have a replacement */
- if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
- return paravirt_patch_default(type, clobber, ibuf, addr, len);
-
- insn_len = lguest_insns[type].end - lguest_insns[type].start;
-
- /* Similarly if we can't fit replacement (shouldn't happen, but let's
- * be thorough). */
- if (len < insn_len)
- return paravirt_patch_default(type, clobber, ibuf, addr, len);
-
- /* Copy in our instructions. */
- memcpy(ibuf, lguest_insns[type].start, insn_len);
- return insn_len;
-}
-
-/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
- * structures in the kernel provide points for (almost) every routine we have
- * to override to avoid privileged instructions. */
-__init void lguest_init(void *boot)
-{
- /* Copy boot parameters first: the Launcher put the physical location
- * in %esi, and head.S converted that to a virtual address and handed
- * it to us. We use "__memcpy" because "memcpy" sometimes tries to do
- * tricky things to go faster, and we're not ready for that. */
- __memcpy(&boot_params, boot, PARAM_SIZE);
- /* The boot parameters also tell us where the command-line is: save
- * that, too. */
- __memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr),
- COMMAND_LINE_SIZE);
-
- /* We're under lguest, paravirt is enabled, and we're running at
- * privilege level 1, not 0 as normal. */
- pv_info.name = "lguest";
- pv_info.paravirt_enabled = 1;
- pv_info.kernel_rpl = 1;
-
- /* We set up all the lguest overrides for sensitive operations. These
- * are detailed with the operations themselves. */
-
- /* interrupt-related operations */
- pv_irq_ops.init_IRQ = lguest_init_IRQ;
- pv_irq_ops.save_fl = save_fl;
- pv_irq_ops.restore_fl = restore_fl;
- pv_irq_ops.irq_disable = irq_disable;
- pv_irq_ops.irq_enable = irq_enable;
- pv_irq_ops.safe_halt = lguest_safe_halt;
-
- /* init-time operations */
- pv_init_ops.memory_setup = lguest_memory_setup;
- pv_init_ops.patch = lguest_patch;
-
- /* Intercepts of various cpu instructions */
- pv_cpu_ops.load_gdt = lguest_load_gdt;
- pv_cpu_ops.cpuid = lguest_cpuid;
- pv_cpu_ops.load_idt = lguest_load_idt;
- pv_cpu_ops.iret = lguest_iret;
- pv_cpu_ops.load_esp0 = lguest_load_esp0;
- pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
- pv_cpu_ops.set_ldt = lguest_set_ldt;
- pv_cpu_ops.load_tls = lguest_load_tls;
- pv_cpu_ops.set_debugreg = lguest_set_debugreg;
- pv_cpu_ops.clts = lguest_clts;
- pv_cpu_ops.read_cr0 = lguest_read_cr0;
- pv_cpu_ops.write_cr0 = lguest_write_cr0;
- pv_cpu_ops.read_cr4 = lguest_read_cr4;
- pv_cpu_ops.write_cr4 = lguest_write_cr4;
- pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
- pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
- pv_cpu_ops.wbinvd = lguest_wbinvd;
- pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
- pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
-
- /* pagetable management */
- pv_mmu_ops.write_cr3 = lguest_write_cr3;
- pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
- pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
- pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
- pv_mmu_ops.set_pte = lguest_set_pte;
- pv_mmu_ops.set_pte_at = lguest_set_pte_at;
- pv_mmu_ops.set_pmd = lguest_set_pmd;
- pv_mmu_ops.read_cr2 = lguest_read_cr2;
- pv_mmu_ops.read_cr3 = lguest_read_cr3;
- pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
- pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /* apic read/write intercepts */
- pv_apic_ops.apic_write = lguest_apic_write;
- pv_apic_ops.apic_write_atomic = lguest_apic_write;
- pv_apic_ops.apic_read = lguest_apic_read;
-#endif
-
- /* time operations */
- pv_time_ops.get_wallclock = lguest_get_wallclock;
- pv_time_ops.time_init = lguest_time_init;
-
- /* Now is a good time to look at the implementations of these functions
- * before returning to the rest of lguest_init(). */
-
- /*G:070 Now we've seen all the paravirt_ops, we return to
- * lguest_init() where the rest of the fairly chaotic boot setup
- * occurs.
- *
- * The Host expects our first hypercall to tell it where our "struct
- * lguest_data" is, so we do that first. */
- hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0);
-
- /* The native boot code sets up initial page tables immediately after
- * the kernel itself, and sets init_pg_tables_end so they're not
- * clobbered. The Launcher places our initial pagetables somewhere at
- * the top of our physical memory, so we don't need extra space: set
- * init_pg_tables_end to the end of the kernel. */
- init_pg_tables_end = __pa(pg0);
-
- /* Load the %fs segment register (the per-cpu segment register) with
- * the normal data segment to get through booting. */
- asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
-
- /* Clear the part of the kernel data which is expected to be zero.
- * Normally it will be anyway, but if we're loading from a bzImage with
- * CONFIG_RELOCATALE=y, the relocations will be sitting here. */
- memset(__bss_start, 0, __bss_stop - __bss_start);
-
- /* The Host uses the top of the Guest's virtual address space for the
- * Host<->Guest Switcher, and it tells us how much it needs in
- * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */
- reserve_top_address(lguest_data.reserve_mem);
-
- /* If we don't initialize the lock dependency checker now, it crashes
- * paravirt_disable_iospace. */
- lockdep_init();
-
- /* The IDE code spends about 3 seconds probing for disks: if we reserve
- * all the I/O ports up front it can't get them and so doesn't probe.
- * Other device drivers are similar (but less severe). This cuts the
- * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */
- paravirt_disable_iospace();
-
- /* This is messy CPU setup stuff which the native boot code does before
- * start_kernel, so we have to do, too: */
- cpu_detect(&new_cpu_data);
- /* head.S usually sets up the first capability word, so do it here. */
- new_cpu_data.x86_capability[0] = cpuid_edx(1);
-
- /* Math is always hard! */
- new_cpu_data.hard_math = 1;
-
-#ifdef CONFIG_X86_MCE
- mce_disabled = 1;
-#endif
-#ifdef CONFIG_ACPI
- acpi_disabled = 1;
- acpi_ht = 0;
-#endif
-
- /* We set the perferred console to "hvc". This is the "hypervisor
- * virtual console" driver written by the PowerPC people, which we also
- * adapted for lguest's use. */
- add_preferred_console("hvc", 0, NULL);
-
- /* Last of all, we set the power management poweroff hook to point to
- * the Guest routine to power off. */
- pm_power_off = lguest_power_off;
-
- /* Now we're set up, call start_kernel() in init/main.c and we proceed
- * to boot as normal. It never returns. */
- start_kernel();
-}
-/*
- * This marks the end of stage II of our journey, The Guest.
- *
- * It is now time for us to explore the nooks and crannies of the three Guest
- * devices and complete our understanding of the Guest in "make Drivers".
- */
diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S
deleted file mode 100644
index 1ddcd5cd20f..00000000000
--- a/drivers/lguest/lguest_asm.S
+++ /dev/null
@@ -1,93 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/lguest.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/processor-flags.h>
-
-/*G:020 This is where we begin: we have a magic signature which the launcher
- * looks for. The plan is that the Linux boot protocol will be extended with a
- * "platform type" field which will guide us here from the normal entry point,
- * but for the moment this suffices. The normal boot code uses %esi for the
- * boot header, so we do too. We convert it to a virtual address by adding
- * PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
- *
- * The .section line puts this code in .init.text so it will be discarded after
- * boot. */
-.section .init.text, "ax", @progbits
-.ascii "GenuineLguest"
- /* Set up initial stack. */
- movl $(init_thread_union+THREAD_SIZE),%esp
- movl %esi, %eax
- addl $__PAGE_OFFSET, %eax
- jmp lguest_init
-
-/*G:055 We create a macro which puts the assembler code between lgstart_ and
- * lgend_ markers. These templates are put in the .text section: they can't be
- * discarded after boot as we may need to patch modules, too. */
-.text
-#define LGUEST_PATCH(name, insns...) \
- lgstart_##name: insns; lgend_##name:; \
- .globl lgstart_##name; .globl lgend_##name
-
-LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
-/*:*/
-
-/* These demark the EIP range where host should never deliver interrupts. */
-.global lguest_noirq_start
-.global lguest_noirq_end
-
-/*M:004 When the Host reflects a trap or injects an interrupt into the Guest,
- * it sets the eflags interrupt bit on the stack based on
- * lguest_data.irq_enabled, so the Guest iret logic does the right thing when
- * restoring it. However, when the Host sets the Guest up for direct traps,
- * such as system calls, the processor is the one to push eflags onto the
- * stack, and the interrupt bit will be 1 (in reality, interrupts are always
- * enabled in the Guest).
- *
- * This turns out to be harmless: the only trap which should happen under Linux
- * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc
- * regions), which has to be reflected through the Host anyway. If another
- * trap *does* go off when interrupts are disabled, the Guest will panic, and
- * we'll never get to this iret! :*/
-
-/*G:045 There is one final paravirt_op that the Guest implements, and glancing
- * at it you can see why I left it to last. It's *cool*! It's in *assembler*!
- *
- * The "iret" instruction is used to return from an interrupt or trap. The
- * stack looks like this:
- * old address
- * old code segment & privilege level
- * old processor flags ("eflags")
- *
- * The "iret" instruction pops those values off the stack and restores them all
- * at once. The only problem is that eflags includes the Interrupt Flag which
- * the Guest can't change: the CPU will simply ignore it when we do an "iret".
- * So we have to copy eflags from the stack to lguest_data.irq_enabled before
- * we do the "iret".
- *
- * There are two problems with this: firstly, we need to use a register to do
- * the copy and secondly, the whole thing needs to be atomic. The first
- * problem is easy to solve: push %eax on the stack so we can use it, and then
- * restore it at the end just before the real "iret".
- *
- * The second is harder: copying eflags to lguest_data.irq_enabled will turn
- * interrupts on before we're finished, so we could be interrupted before we
- * return to userspace or wherever. Our solution to this is to surround the
- * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
- * Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled. */
-ENTRY(lguest_iret)
- pushl %eax
- movl 12(%esp), %eax
-lguest_noirq_start:
- /* Note the %ss: segment prefix here. Normal data accesses use the
- * "ds" segment, but that will have already been restored for whatever
- * we're returning to (such as userspace): we can't trust it. The %ss:
- * prefix makes sure we use the stack segment, which is still valid. */
- movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
- popl %eax
- iret
-lguest_noirq_end:
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c
deleted file mode 100644
index 57329788f8a..00000000000
--- a/drivers/lguest/lguest_bus.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*P:050 Lguest guests use a very simple bus for devices. It's a simple array
- * of device descriptors contained just above the top of normal memory. The
- * lguest bus is 80% tedious boilerplate code. :*/
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/lguest_bus.h>
-#include <asm/io.h>
-#include <asm/paravirt.h>
-
-static ssize_t type_show(struct device *_dev,
- struct device_attribute *attr, char *buf)
-{
- struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
- return sprintf(buf, "%hu", lguest_devices[dev->index].type);
-}
-static ssize_t features_show(struct device *_dev,
- struct device_attribute *attr, char *buf)
-{
- struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
- return sprintf(buf, "%hx", lguest_devices[dev->index].features);
-}
-static ssize_t pfn_show(struct device *_dev,
- struct device_attribute *attr, char *buf)
-{
- struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
- return sprintf(buf, "%u", lguest_devices[dev->index].pfn);
-}
-static ssize_t status_show(struct device *_dev,
- struct device_attribute *attr, char *buf)
-{
- struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
- return sprintf(buf, "%hx", lguest_devices[dev->index].status);
-}
-static ssize_t status_store(struct device *_dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
- if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1)
- return -EINVAL;
- return count;
-}
-static struct device_attribute lguest_dev_attrs[] = {
- __ATTR_RO(type),
- __ATTR_RO(features),
- __ATTR_RO(pfn),
- __ATTR(status, 0644, status_show, status_store),
- __ATTR_NULL
-};
-
-/*D:130 The generic bus infrastructure requires a function which says whether a
- * device matches a driver. For us, it is simple: "struct lguest_driver"
- * contains a "device_type" field which indicates what type of device it can
- * handle, so we just cast the args and compare: */
-static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
-{
- struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
- struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv);
-
- return (drv->device_type == lguest_devices[dev->index].type);
-}
-/*:*/
-
-struct lguest_bus {
- struct bus_type bus;
- struct device dev;
-};
-
-static struct lguest_bus lguest_bus = {
- .bus = {
- .name = "lguest",
- .match = lguest_dev_match,
- .dev_attrs = lguest_dev_attrs,
- },
- .dev = {
- .parent = NULL,
- .bus_id = "lguest",
- }
-};
-
-/*D:140 This is the callback which occurs once the bus infrastructure matches
- * up a device and driver, ie. in response to add_lguest_device() calling
- * device_register(), or register_lguest_driver() calling driver_register().
- *
- * At the moment it's always the latter: the devices are added first, since
- * scan_devices() is called from a "core_initcall", and the drivers themselves
- * called later as a normal "initcall". But it would work the other way too.
- *
- * So now we have the happy couple, we add the status bit to indicate that we
- * found a driver. If the driver truly loves the device, it will return
- * happiness from its probe function (ok, perhaps this wasn't my greatest
- * analogy), and we set the final "driver ok" bit so the Host sees it's all
- * green. */
-static int lguest_dev_probe(struct device *_dev)
-{
- int ret;
- struct lguest_device*dev = container_of(_dev,struct lguest_device,dev);
- struct lguest_driver*drv = container_of(dev->dev.driver,
- struct lguest_driver, drv);
-
- lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
- ret = drv->probe(dev);
- if (ret == 0)
- lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK;
- return ret;
-}
-
-/* The last part of the bus infrastructure is the function lguest drivers use
- * to register themselves. Firstly, we do nothing if there's no lguest bus
- * (ie. this is not a Guest), otherwise we fill in the embedded generic "struct
- * driver" fields and call the generic driver_register(). */
-int register_lguest_driver(struct lguest_driver *drv)
-{
- if (!lguest_devices)
- return 0;
-
- drv->drv.bus = &lguest_bus.bus;
- drv->drv.name = drv->name;
- drv->drv.owner = drv->owner;
- drv->drv.probe = lguest_dev_probe;
-
- return driver_register(&drv->drv);
-}
-
-/* At the moment we build all the drivers into the kernel because they're so
- * simple: 8144 bytes for all three of them as I type this. And as the console
- * really needs to be built in, it's actually only 3527 bytes for the network
- * and block drivers.
- *
- * If they get complex it will make sense for them to be modularized, so we
- * need to explicitly export the symbol.
- *
- * I don't think non-GPL modules make sense, so it's a GPL-only export.
- */
-EXPORT_SYMBOL_GPL(register_lguest_driver);
-
-/*D:120 This is the core of the lguest bus: actually adding a new device.
- * It's a separate function because it's neater that way, and because an
- * earlier version of the code supported hotplug and unplug. They were removed
- * early on because they were never used.
- *
- * As Andrew Tridgell says, "Untested code is buggy code".
- *
- * It's worth reading this carefully: we start with an index into the array of
- * "struct lguest_device_desc"s indicating the device which is new: */
-static void add_lguest_device(unsigned int index)
-{
- struct lguest_device *new;
-
- /* Each "struct lguest_device_desc" has a "status" field, which the
- * Guest updates as the device is probed. In the worst case, the Host
- * can look at these bits to tell what part of device setup failed,
- * even if the console isn't available. */
- lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
- new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
- if (!new) {
- printk(KERN_EMERG "Cannot allocate lguest device %u\n", index);
- lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
- return;
- }
-
- /* The "struct lguest_device" setup is pretty straight-forward example
- * code. */
- new->index = index;
- new->private = NULL;
- memset(&new->dev, 0, sizeof(new->dev));
- new->dev.parent = &lguest_bus.dev;
- new->dev.bus = &lguest_bus.bus;
- sprintf(new->dev.bus_id, "%u", index);
-
- /* device_register() causes the bus infrastructure to look for a
- * matching driver. */
- if (device_register(&new->dev) != 0) {
- printk(KERN_EMERG "Cannot register lguest device %u\n", index);
- lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
- kfree(new);
- }
-}
-
-/*D:110 scan_devices() simply iterates through the device array. The type 0
- * is reserved to mean "no device", and anything else means we have found a
- * device: add it. */
-static void scan_devices(void)
-{
- unsigned int i;
-
- for (i = 0; i < LGUEST_MAX_DEVICES; i++)
- if (lguest_devices[i].type)
- add_lguest_device(i);
-}
-
-/*D:100 Fairly early in boot, lguest_bus_init() is called to set up the lguest
- * bus. We check that we are a Guest by checking paravirt_ops.name: there are
- * other ways of checking, but this seems most obvious to me.
- *
- * So we can access the array of "struct lguest_device_desc"s easily, we map
- * that memory and store the pointer in the global "lguest_devices". Then we
- * register the bus with the core. Doing two registrations seems clunky to me,
- * but it seems to be the correct sysfs incantation.
- *
- * Finally we call scan_devices() which adds all the devices found in the
- * "struct lguest_device_desc" array. */
-static int __init lguest_bus_init(void)
-{
- if (strcmp(pv_info.name, "lguest") != 0)
- return 0;
-
- /* Devices are in a single page above top of "normal" mem */
- lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
-
- if (bus_register(&lguest_bus.bus) != 0
- || device_register(&lguest_bus.dev) != 0)
- panic("lguest bus registration failed");
-
- scan_devices();
- return 0;
-}
-/* Do this after core stuff, before devices. */
-postcore_initcall(lguest_bus_init);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
new file mode 100644
index 00000000000..71c64837b43
--- /dev/null
+++ b/drivers/lguest/lguest_device.c
@@ -0,0 +1,373 @@
+/*P:050 Lguest guests use a very simple method to describe devices. It's a
+ * series of device descriptors contained just above the top of normal
+ * memory.
+ *
+ * We use the standard "virtio" device infrastructure, which provides us with a
+ * console, a network and a block driver. Each one expects some configuration
+ * information and a "virtqueue" mechanism to send and receive data. :*/
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/lguest_launcher.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/interrupt.h>
+#include <linux/virtio_ring.h>
+#include <linux/err.h>
+#include <asm/io.h>
+#include <asm/paravirt.h>
+#include <asm/lguest_hcall.h>
+
+/* The pointer to our (page) of device descriptions. */
+static void *lguest_devices;
+
+/* Unique numbering for lguest devices. */
+static unsigned int dev_index;
+
+/* For Guests, device memory can be used as normal memory, so we cast away the
+ * __iomem to quieten sparse. */
+static inline void *lguest_map(unsigned long phys_addr, unsigned long pages)
+{
+ return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages);
+}
+
+static inline void lguest_unmap(void *addr)
+{
+ iounmap((__force void __iomem *)addr);
+}
+
+/*D:100 Each lguest device is just a virtio device plus a pointer to its entry
+ * in the lguest_devices page. */
+struct lguest_device {
+ struct virtio_device vdev;
+
+ /* The entry in the lguest_devices page for this device. */
+ struct lguest_device_desc *desc;
+};
+
+/* Since the virtio infrastructure hands us a pointer to the virtio_device all
+ * the time, it helps to have a curt macro to get a pointer to the struct
+ * lguest_device it's enclosed in. */
+#define to_lgdev(vdev) container_of(vdev, struct lguest_device, vdev)
+
+/*D:130
+ * Device configurations
+ *
+ * The configuration information for a device consists of a series of fields.
+ * The device will look for these fields during setup.
+ *
+ * For us these fields come immediately after that device's descriptor in the
+ * lguest_devices page.
+ *
+ * Each field starts with a "type" byte, a "length" byte, then that number of
+ * bytes of configuration information. The device descriptor tells us the
+ * total configuration length so we know when we've reached the last field. */
+
+/* type + length bytes */
+#define FHDR_LEN 2
+
+/* This finds the first field of a given type for a device's configuration. */
+static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len)
+{
+ struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
+ int i;
+
+ for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) {
+ if (desc->config[i] == type) {
+ /* Mark it used, so Host can know we looked at it, and
+ * also so we won't find the same one twice. */
+ desc->config[i] |= 0x80;
+ /* Remember, the second byte is the length. */
+ *len = desc->config[i+1];
+ /* We return a pointer to the field header. */
+ return desc->config + i;
+ }
+ }
+
+ /* Not found: return NULL for failure. */
+ return NULL;
+}
+
+/* Once they've found a field, getting a copy of it is easy. */
+static void lg_get(struct virtio_device *vdev, void *token,
+ void *buf, unsigned len)
+{
+ /* Check they didn't ask for more than the length of the field! */
+ BUG_ON(len > ((u8 *)token)[1]);
+ memcpy(buf, token + FHDR_LEN, len);
+}
+
+/* Setting the contents is also trivial. */
+static void lg_set(struct virtio_device *vdev, void *token,
+ const void *buf, unsigned len)
+{
+ BUG_ON(len > ((u8 *)token)[1]);
+ memcpy(token + FHDR_LEN, buf, len);
+}
+
+/* The operations to get and set the status word just access the status field
+ * of the device descriptor. */
+static u8 lg_get_status(struct virtio_device *vdev)
+{
+ return to_lgdev(vdev)->desc->status;
+}
+
+static void lg_set_status(struct virtio_device *vdev, u8 status)
+{
+ to_lgdev(vdev)->desc->status = status;
+}
+
+/*
+ * Virtqueues
+ *
+ * The other piece of infrastructure virtio needs is a "virtqueue": a way of
+ * the Guest device registering buffers for the other side to read from or
+ * write into (ie. send and receive buffers). Each device can have multiple
+ * virtqueues: for example the console has one queue for sending and one for
+ * receiving.
+ *
+ * Fortunately for us, a very fast shared-memory-plus-descriptors virtqueue
+ * already exists in virtio_ring.c. We just need to connect it up.
+ *
+ * We start with the information we need to keep about each virtqueue.
+ */
+
+/*D:140 This is the information we remember about each virtqueue. */
+struct lguest_vq_info
+{
+ /* A copy of the information contained in the device config. */
+ struct lguest_vqconfig config;
+
+ /* The address where we mapped the virtio ring, so we can unmap it. */
+ void *pages;
+};
+
+/* When the virtio_ring code wants to prod the Host, it calls us here and we
+ * make a hypercall. We hand the page number of the virtqueue so the Host
+ * knows which virtqueue we're talking about. */
+static void lg_notify(struct virtqueue *vq)
+{
+ /* We store our virtqueue information in the "priv" pointer of the
+ * virtqueue structure. */
+ struct lguest_vq_info *lvq = vq->priv;
+
+ hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0);
+}
+
+/* This routine finds the first virtqueue described in the configuration of
+ * this device and sets it up.
+ *
+ * This is kind of an ugly duckling. It'd be nicer to have a standard
+ * representation of a virtqueue in the configuration space, but it seems that
+ * everyone wants to do it differently. The KVM guys want the Guest to
+ * allocate its own pages and tell the Host where they are, but for lguest it's
+ * simpler for the Host to simply tell us where the pages are.
+ *
+ * So we provide devices with a "find virtqueue and set it up" function. */
+static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
+ bool (*callback)(struct virtqueue *vq))
+{
+ struct lguest_vq_info *lvq;
+ struct virtqueue *vq;
+ unsigned int len;
+ void *token;
+ int err;
+
+ /* Look for a field of the correct type to mark a virtqueue. Note that
+ * if this succeeds, then the type will be changed so it won't be found
+ * again, and future lg_find_vq() calls will find the next
+ * virtqueue (if any). */
+ token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len);
+ if (!token)
+ return ERR_PTR(-ENOENT);
+
+ lvq = kmalloc(sizeof(*lvq), GFP_KERNEL);
+ if (!lvq)
+ return ERR_PTR(-ENOMEM);
+
+ /* Note: we could use a configuration space inside here, just like we
+ * do for the device. This would allow expansion in future, because
+ * our configuration system is designed to be expansible. But this is
+ * way easier. */
+ if (len != sizeof(lvq->config)) {
+ dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len);
+ err = -EIO;
+ goto free_lvq;
+ }
+ /* Make a copy of the "struct lguest_vqconfig" field. We need a copy
+ * because the config space might not be aligned correctly. */
+ vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config));
+
+ /* Figure out how many pages the ring will take, and map that memory */
+ lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
+ DIV_ROUND_UP(vring_size(lvq->config.num),
+ PAGE_SIZE));
+ if (!lvq->pages) {
+ err = -ENOMEM;
+ goto free_lvq;
+ }
+
+ /* OK, tell virtio_ring.c to set up a virtqueue now we know its size
+ * and we've got a pointer to its pages. */
+ vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages,
+ lg_notify, callback);
+ if (!vq) {
+ err = -ENOMEM;
+ goto unmap;
+ }
+
+ /* Tell the interrupt for this virtqueue to go to the virtio_ring
+ * interrupt handler. */
+ /* FIXME: We used to have a flag for the Host to tell us we could use
+ * the interrupt as a source of randomness: it'd be nice to have that
+ * back.. */
+ err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED,
+ vdev->dev.bus_id, vq);
+ if (err)
+ goto destroy_vring;
+
+ /* Last of all we hook up our 'struct lguest_vq_info" to the
+ * virtqueue's priv pointer. */
+ vq->priv = lvq;
+ return vq;
+
+destroy_vring:
+ vring_del_virtqueue(vq);
+unmap:
+ lguest_unmap(lvq->pages);
+free_lvq:
+ kfree(lvq);
+ return ERR_PTR(err);
+}
+/*:*/
+
+/* Cleaning up a virtqueue is easy */
+static void lg_del_vq(struct virtqueue *vq)
+{
+ struct lguest_vq_info *lvq = vq->priv;
+
+ /* Tell virtio_ring.c to free the virtqueue. */
+ vring_del_virtqueue(vq);
+ /* Unmap the pages containing the ring. */
+ lguest_unmap(lvq->pages);
+ /* Free our own queue information. */
+ kfree(lvq);
+}
+
+/* The ops structure which hooks everything together. */
+static struct virtio_config_ops lguest_config_ops = {
+ .find = lg_find,
+ .get = lg_get,
+ .set = lg_set,
+ .get_status = lg_get_status,
+ .set_status = lg_set_status,
+ .find_vq = lg_find_vq,
+ .del_vq = lg_del_vq,
+};
+
+/* The root device for the lguest virtio devices. This makes them appear as
+ * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */
+static struct device lguest_root = {
+ .parent = NULL,
+ .bus_id = "lguest",
+};
+
+/*D:120 This is the core of the lguest bus: actually adding a new device.
+ * It's a separate function because it's neater that way, and because an
+ * earlier version of the code supported hotplug and unplug. They were removed
+ * early on because they were never used.
+ *
+ * As Andrew Tridgell says, "Untested code is buggy code".
+ *
+ * It's worth reading this carefully: we start with a pointer to the new device
+ * descriptor in the "lguest_devices" page. */
+static void add_lguest_device(struct lguest_device_desc *d)
+{
+ struct lguest_device *ldev;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev) {
+ printk(KERN_EMERG "Cannot allocate lguest dev %u\n",
+ dev_index++);
+ return;
+ }
+
+ /* This devices' parent is the lguest/ dir. */
+ ldev->vdev.dev.parent = &lguest_root;
+ /* We have a unique device index thanks to the dev_index counter. */
+ ldev->vdev.index = dev_index++;
+ /* The device type comes straight from the descriptor. There's also a
+ * device vendor field in the virtio_device struct, which we leave as
+ * 0. */
+ ldev->vdev.id.device = d->type;
+ /* We have a simple set of routines for querying the device's
+ * configuration information and setting its status. */
+ ldev->vdev.config = &lguest_config_ops;
+ /* And we remember the device's descriptor for lguest_config_ops. */
+ ldev->desc = d;
+
+ /* register_virtio_device() sets up the generic fields for the struct
+ * virtio_device and calls device_register(). This makes the bus
+ * infrastructure look for a matching driver. */
+ if (register_virtio_device(&ldev->vdev) != 0) {
+ printk(KERN_ERR "Failed to register lguest device %u\n",
+ ldev->vdev.index);
+ kfree(ldev);
+ }
+}
+
+/*D:110 scan_devices() simply iterates through the device page. The type 0 is
+ * reserved to mean "end of devices". */
+static void scan_devices(void)
+{
+ unsigned int i;
+ struct lguest_device_desc *d;
+
+ /* We start at the page beginning, and skip over each entry. */
+ for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) {
+ d = lguest_devices + i;
+
+ /* Once we hit a zero, stop. */
+ if (d->type == 0)
+ break;
+
+ add_lguest_device(d);
+ }
+}
+
+/*D:105 Fairly early in boot, lguest_devices_init() is called to set up the
+ * lguest device infrastructure. We check that we are a Guest by checking
+ * pv_info.name: there are other ways of checking, but this seems most
+ * obvious to me.
+ *
+ * So we can access the "struct lguest_device_desc"s easily, we map that memory
+ * and store the pointer in the global "lguest_devices". Then we register a
+ * root device from which all our devices will hang (this seems to be the
+ * correct sysfs incantation).
+ *
+ * Finally we call scan_devices() which adds all the devices found in the
+ * lguest_devices page. */
+static int __init lguest_devices_init(void)
+{
+ if (strcmp(pv_info.name, "lguest") != 0)
+ return 0;
+
+ if (device_register(&lguest_root) != 0)
+ panic("Could not register lguest root");
+
+ /* Devices are in a single page above top of "normal" mem */
+ lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
+
+ scan_devices();
+ return 0;
+}
+/* We do this after core stuff, but before the drivers. */
+postcore_initcall(lguest_devices_init);
+
+/*D:150 At this point in the journey we used to now wade through the lguest
+ * devices themselves: net, block and console. Since they're all now virtio
+ * devices rather than lguest-specific, I've decided to ignore them. Mostly,
+ * they're kind of boring. But this does mean you'll never experience the
+ * thrill of reading the forbidden love scene buried deep in the block driver.
+ *
+ * "make Launcher" beckons, where we answer questions like "Where do Guests
+ * come from?", and "What do you do when someone asks for optimization?". */
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 80d1b58c769..ee405b38383 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -1,73 +1,17 @@
/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
* controls and communicates with the Guest. For example, the first write will
- * tell us the memory size, pagetable, entry point and kernel address offset.
- * A read will run the Guest until a signal is pending (-EINTR), or the Guest
- * does a DMA out to the Launcher. Writes are also used to get a DMA buffer
- * registered by the Guest and to send the Guest an interrupt. :*/
+ * tell us the Guest's memory layout, pagetable, entry point and kernel address
+ * offset. A read will run the Guest until something happens, such as a signal
+ * or the Guest doing a NOTIFY out to the Launcher. :*/
#include <linux/uaccess.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include "lg.h"
-/*L:030 setup_regs() doesn't really belong in this file, but it gives us an
- * early glimpse deeper into the Host so it's worth having here.
- *
- * Most of the Guest's registers are left alone: we used get_zeroed_page() to
- * allocate the structure, so they will be 0. */
-static void setup_regs(struct lguest_regs *regs, unsigned long start)
-{
- /* There are four "segment" registers which the Guest needs to boot:
- * The "code segment" register (cs) refers to the kernel code segment
- * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
- * refer to the kernel data segment __KERNEL_DS.
- *
- * The privilege level is packed into the lower bits. The Guest runs
- * at privilege level 1 (GUEST_PL).*/
- regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
- regs->cs = __KERNEL_CS|GUEST_PL;
-
- /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002)
- * is supposed to always be "1". Bit 9 (0x200) controls whether
- * interrupts are enabled. We always leave interrupts enabled while
- * running the Guest. */
- regs->eflags = 0x202;
-
- /* The "Extended Instruction Pointer" register says where the Guest is
- * running. */
- regs->eip = start;
-
- /* %esi points to our boot information, at physical address 0, so don't
- * touch it. */
-}
-
-/*L:310 To send DMA into the Guest, the Launcher needs to be able to ask for a
- * DMA buffer. This is done by writing LHREQ_GETDMA and the key to
- * /dev/lguest. */
-static long user_get_dma(struct lguest *lg, const u32 __user *input)
-{
- unsigned long key, udma, irq;
-
- /* Fetch the key they wrote to us. */
- if (get_user(key, input) != 0)
- return -EFAULT;
- /* Look for a free Guest DMA buffer bound to that key. */
- udma = get_dma_buffer(lg, key, &irq);
- if (!udma)
- return -ENOENT;
-
- /* We need to tell the Launcher what interrupt the Guest expects after
- * the buffer is filled. We stash it in udma->used_len. */
- lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq);
-
- /* The (guest-physical) address of the DMA buffer is returned from
- * the write(). */
- return udma;
-}
-
/*L:315 To force the Guest to stop running and return to the Launcher, the
* Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The
* Launcher then writes LHREQ_BREAK and "0" to release the Waker. */
-static int break_guest_out(struct lguest *lg, const u32 __user *input)
+static int break_guest_out(struct lguest *lg, const unsigned long __user *input)
{
unsigned long on;
@@ -90,9 +34,9 @@ static int break_guest_out(struct lguest *lg, const u32 __user *input)
/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
* number to /dev/lguest. */
-static int user_send_irq(struct lguest *lg, const u32 __user *input)
+static int user_send_irq(struct lguest *lg, const unsigned long __user *input)
{
- u32 irq;
+ unsigned long irq;
if (get_user(irq, input) != 0)
return -EFAULT;
@@ -133,17 +77,19 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
return len;
}
- /* If we returned from read() last time because the Guest sent DMA,
+ /* If we returned from read() last time because the Guest notified,
* clear the flag. */
- if (lg->dma_is_pending)
- lg->dma_is_pending = 0;
+ if (lg->pending_notify)
+ lg->pending_notify = 0;
/* Run the Guest until something interesting happens. */
return run_guest(lg, (unsigned long __user *)user);
}
-/*L:020 The initialization write supplies 4 32-bit values (in addition to the
- * 32-bit LHREQ_INITIALIZE value). These are:
+/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit)
+ * values (in addition to the LHREQ_INITIALIZE value). These are:
+ *
+ * base: The start of the Guest-physical memory inside the Launcher memory.
*
* pfnlimit: The highest (Guest-physical) page number the Guest should be
* allowed to access. The Launcher has to live in Guest memory, so it sets
@@ -153,23 +99,17 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
* pagetables (which are set up by the Launcher).
*
* start: The first instruction to execute ("eip" in x86-speak).
- *
- * page_offset: The PAGE_OFFSET constant in the Guest kernel. We should
- * probably wean the code off this, but it's a very useful constant! Any
- * address above this is within the Guest kernel, and any kernel address can
- * quickly converted from physical to virtual by adding PAGE_OFFSET. It's
- * 0xC0000000 (3G) by default, but it's configurable at kernel build time.
*/
-static int initialize(struct file *file, const u32 __user *input)
+static int initialize(struct file *file, const unsigned long __user *input)
{
/* "struct lguest" contains everything we (the Host) know about a
* Guest. */
struct lguest *lg;
- int err, i;
- u32 args[4];
+ int err;
+ unsigned long args[4];
- /* We grab the Big Lguest lock, which protects the global array
- * "lguests" and multiple simultaneous initializations. */
+ /* We grab the Big Lguest lock, which protects against multiple
+ * simultaneous initializations. */
mutex_lock(&lguest_lock);
/* You can't initialize twice! Close the device and start again... */
if (file->private_data) {
@@ -182,20 +122,15 @@ static int initialize(struct file *file, const u32 __user *input)
goto unlock;
}
- /* Find an unused guest. */
- i = find_free_guest();
- if (i < 0) {
- err = -ENOSPC;
+ lg = kzalloc(sizeof(*lg), GFP_KERNEL);
+ if (!lg) {
+ err = -ENOMEM;
goto unlock;
}
- /* OK, we have an index into the "lguest" array: "lg" is a convenient
- * pointer. */
- lg = &lguests[i];
/* Populate the easy fields of our "struct lguest" */
- lg->guestid = i;
- lg->pfn_limit = args[0];
- lg->page_offset = args[3];
+ lg->mem_base = (void __user *)(long)args[0];
+ lg->pfn_limit = args[1];
/* We need a complete page for the Guest registers: they are accessible
* to the Guest and we can only grant it access to whole pages. */
@@ -210,17 +145,13 @@ static int initialize(struct file *file, const u32 __user *input)
/* Initialize the Guest's shadow page tables, using the toplevel
* address the Launcher gave us. This allocates memory, so can
* fail. */
- err = init_guest_pagetable(lg, args[1]);
+ err = init_guest_pagetable(lg, args[2]);
if (err)
goto free_regs;
/* Now we initialize the Guest's registers, handing it the start
* address. */
- setup_regs(lg->regs, args[2]);
-
- /* There are a couple of GDT entries the Guest expects when first
- * booting. */
- setup_guest_gdt(lg);
+ lguest_arch_setup_regs(lg, args[3]);
/* The timer for lguest's clock needs initialization. */
init_clockdev(lg);
@@ -260,18 +191,19 @@ unlock:
/*L:010 The first operation the Launcher does must be a write. All writes
* start with a 32 bit number: for the first write this must be
* LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use
- * writes of other values to get DMA buffers and send interrupts. */
-static ssize_t write(struct file *file, const char __user *input,
+ * writes of other values to send interrupts. */
+static ssize_t write(struct file *file, const char __user *in,
size_t size, loff_t *off)
{
/* Once the guest is initialized, we hold the "struct lguest" in the
* file private data. */
struct lguest *lg = file->private_data;
- u32 req;
+ const unsigned long __user *input = (const unsigned long __user *)in;
+ unsigned long req;
if (get_user(req, input) != 0)
return -EFAULT;
- input += sizeof(req);
+ input++;
/* If you haven't initialized, you must do that first. */
if (req != LHREQ_INITIALIZE && !lg)
@@ -287,13 +219,11 @@ static ssize_t write(struct file *file, const char __user *input,
switch (req) {
case LHREQ_INITIALIZE:
- return initialize(file, (const u32 __user *)input);
- case LHREQ_GETDMA:
- return user_get_dma(lg, (const u32 __user *)input);
+ return initialize(file, input);
case LHREQ_IRQ:
- return user_send_irq(lg, (const u32 __user *)input);
+ return user_send_irq(lg, input);
case LHREQ_BREAK:
- return break_guest_out(lg, (const u32 __user *)input);
+ return break_guest_out(lg, input);
default:
return -EINVAL;
}
@@ -319,8 +249,6 @@ static int close(struct inode *inode, struct file *file)
mutex_lock(&lguest_lock);
/* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */
hrtimer_cancel(&lg->hrt);
- /* Free any DMA buffers the Guest had bound. */
- release_all_dma(lg);
/* Free up the shadow page tables for the Guest. */
free_guest_pagetable(lg);
/* Now all the memory cleanups are done, it's safe to release the
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index b7a924ace68..2a45f0691c9 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -13,6 +13,7 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
#include "lg.h"
/*M:008 We hold reference to pages, which prevents them from being swapped.
@@ -44,44 +45,32 @@
* (vii) Setting up the page tables initially.
:*/
-/* Pages a 4k long, and each page table entry is 4 bytes long, giving us 1024
- * (or 2^10) entries per page. */
-#define PTES_PER_PAGE_SHIFT 10
-#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT)
/* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is
* conveniently placed at the top 4MB, so it uses a separate, complete PTE
* page. */
-#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1)
+#define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
/* We actually need a separate PTE page for each CPU. Remember that after the
* Switcher code itself comes two pages for each CPU, and we don't want this
* CPU's guest to see the pages of any other CPU. */
-static DEFINE_PER_CPU(spte_t *, switcher_pte_pages);
+static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
/*H:320 With our shadow and Guest types established, we need to deal with
* them: the page table code is curly enough to need helper functions to keep
* it clear and clean.
*
- * The first helper takes a virtual address, and says which entry in the top
- * level page table deals with that address. Since each top level entry deals
- * with 4M, this effectively divides by 4M. */
-static unsigned vaddr_to_pgd_index(unsigned long vaddr)
-{
- return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
-}
-
-/* There are two functions which return pointers to the shadow (aka "real")
+ * There are two functions which return pointers to the shadow (aka "real")
* page tables.
*
* spgd_addr() takes the virtual address and returns a pointer to the top-level
* page directory entry for that address. Since we keep track of several page
* tables, the "i" argument tells us which one we're interested in (it's
* usually the current one). */
-static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
+static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
{
- unsigned int index = vaddr_to_pgd_index(vaddr);
+ unsigned int index = pgd_index(vaddr);
/* We kill any Guest trying to touch the Switcher addresses. */
if (index >= SWITCHER_PGD_INDEX) {
@@ -95,28 +84,28 @@ static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
/* This routine then takes the PGD entry given above, which contains the
* address of the PTE page. It then returns a pointer to the PTE entry for the
* given address. */
-static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr)
+static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr)
{
- spte_t *page = __va(spgd.pfn << PAGE_SHIFT);
+ pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
/* You should never call this if the PGD entry wasn't valid */
- BUG_ON(!(spgd.flags & _PAGE_PRESENT));
- return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE];
+ BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
+ return &page[(vaddr >> PAGE_SHIFT) % PTRS_PER_PTE];
}
/* These two functions just like the above two, except they access the Guest
* page tables. Hence they return a Guest address. */
static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr)
{
- unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
- return lg->pgdirs[lg->pgdidx].cr3 + index * sizeof(gpgd_t);
+ unsigned int index = vaddr >> (PGDIR_SHIFT);
+ return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t);
}
static unsigned long gpte_addr(struct lguest *lg,
- gpgd_t gpgd, unsigned long vaddr)
+ pgd_t gpgd, unsigned long vaddr)
{
- unsigned long gpage = gpgd.pfn << PAGE_SHIFT;
- BUG_ON(!(gpgd.flags & _PAGE_PRESENT));
- return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t);
+ unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
+ BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
+ return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
}
/*H:350 This routine takes a page number given by the Guest and converts it to
@@ -149,53 +138,55 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
* entry can be a little tricky. The flags are (almost) the same, but the
* Guest PTE contains a virtual page number: the CPU needs the real page
* number. */
-static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write)
+static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write)
{
- spte_t spte;
- unsigned long pfn;
+ unsigned long pfn, base, flags;
/* The Guest sets the global flag, because it thinks that it is using
* PGE. We only told it to use PGE so it would tell us whether it was
* flushing a kernel mapping or a userspace mapping. We don't actually
* use the global bit, so throw it away. */
- spte.flags = (gpte.flags & ~_PAGE_GLOBAL);
+ flags = (pte_flags(gpte) & ~_PAGE_GLOBAL);
+
+ /* The Guest's pages are offset inside the Launcher. */
+ base = (unsigned long)lg->mem_base / PAGE_SIZE;
/* We need a temporary "unsigned long" variable to hold the answer from
* get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
* fit in spte.pfn. get_pfn() finds the real physical number of the
* page, given the virtual number. */
- pfn = get_pfn(gpte.pfn, write);
+ pfn = get_pfn(base + pte_pfn(gpte), write);
if (pfn == -1UL) {
- kill_guest(lg, "failed to get page %u", gpte.pfn);
+ kill_guest(lg, "failed to get page %lu", pte_pfn(gpte));
/* When we destroy the Guest, we'll go through the shadow page
* tables and release_pte() them. Make sure we don't think
* this one is valid! */
- spte.flags = 0;
+ flags = 0;
}
- /* Now we assign the page number, and our shadow PTE is complete. */
- spte.pfn = pfn;
- return spte;
+ /* Now we assemble our shadow PTE from the page number and flags. */
+ return pfn_pte(pfn, __pgprot(flags));
}
/*H:460 And to complete the chain, release_pte() looks like this: */
-static void release_pte(spte_t pte)
+static void release_pte(pte_t pte)
{
/* Remember that get_user_pages() took a reference to the page, in
* get_pfn()? We have to put it back now. */
- if (pte.flags & _PAGE_PRESENT)
- put_page(pfn_to_page(pte.pfn));
+ if (pte_flags(pte) & _PAGE_PRESENT)
+ put_page(pfn_to_page(pte_pfn(pte)));
}
/*:*/
-static void check_gpte(struct lguest *lg, gpte_t gpte)
+static void check_gpte(struct lguest *lg, pte_t gpte)
{
- if ((gpte.flags & (_PAGE_PWT|_PAGE_PSE)) || gpte.pfn >= lg->pfn_limit)
+ if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE))
+ || pte_pfn(gpte) >= lg->pfn_limit)
kill_guest(lg, "bad page table entry");
}
-static void check_gpgd(struct lguest *lg, gpgd_t gpgd)
+static void check_gpgd(struct lguest *lg, pgd_t gpgd)
{
- if ((gpgd.flags & ~_PAGE_TABLE) || gpgd.pfn >= lg->pfn_limit)
+ if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || pgd_pfn(gpgd) >= lg->pfn_limit)
kill_guest(lg, "bad page directory entry");
}
@@ -211,21 +202,21 @@ static void check_gpgd(struct lguest *lg, gpgd_t gpgd)
* true. */
int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
{
- gpgd_t gpgd;
- spgd_t *spgd;
+ pgd_t gpgd;
+ pgd_t *spgd;
unsigned long gpte_ptr;
- gpte_t gpte;
- spte_t *spte;
+ pte_t gpte;
+ pte_t *spte;
/* First step: get the top-level Guest page table entry. */
- gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr)));
+ gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t);
/* Toplevel not present? We can't map it in. */
- if (!(gpgd.flags & _PAGE_PRESENT))
+ if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
return 0;
/* Now look at the matching shadow entry. */
spgd = spgd_addr(lg, lg->pgdidx, vaddr);
- if (!(spgd->flags & _PAGE_PRESENT)) {
+ if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
/* No shadow entry: allocate a new shadow PTE page. */
unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
/* This is not really the Guest's fault, but killing it is
@@ -238,34 +229,35 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
check_gpgd(lg, gpgd);
/* And we copy the flags to the shadow PGD entry. The page
* number in the shadow PGD is the page we just allocated. */
- spgd->raw.val = (__pa(ptepage) | gpgd.flags);
+ *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
}
/* OK, now we look at the lower level in the Guest page table: keep its
* address, because we might update it later. */
gpte_ptr = gpte_addr(lg, gpgd, vaddr);
- gpte = mkgpte(lgread_u32(lg, gpte_ptr));
+ gpte = lgread(lg, gpte_ptr, pte_t);
/* If this page isn't in the Guest page tables, we can't page it in. */
- if (!(gpte.flags & _PAGE_PRESENT))
+ if (!(pte_flags(gpte) & _PAGE_PRESENT))
return 0;
/* Check they're not trying to write to a page the Guest wants
* read-only (bit 2 of errcode == write). */
- if ((errcode & 2) && !(gpte.flags & _PAGE_RW))
+ if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
return 0;
/* User access to a kernel page? (bit 3 == user access) */
- if ((errcode & 4) && !(gpte.flags & _PAGE_USER))
+ if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
return 0;
/* Check that the Guest PTE flags are OK, and the page number is below
* the pfn_limit (ie. not mapping the Launcher binary). */
check_gpte(lg, gpte);
/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
- gpte.flags |= _PAGE_ACCESSED;
+ gpte = pte_mkyoung(gpte);
+
if (errcode & 2)
- gpte.flags |= _PAGE_DIRTY;
+ gpte = pte_mkdirty(gpte);
/* Get the pointer to the shadow PTE entry we're going to set. */
spte = spte_addr(lg, *spgd, vaddr);
@@ -275,21 +267,18 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
/* If this is a write, we insist that the Guest page is writable (the
* final arg to gpte_to_spte()). */
- if (gpte.flags & _PAGE_DIRTY)
+ if (pte_dirty(gpte))
*spte = gpte_to_spte(lg, gpte, 1);
- else {
+ else
/* If this is a read, don't set the "writable" bit in the page
* table entry, even if the Guest says it's writable. That way
* we come back here when a write does actually ocur, so we can
* update the Guest's _PAGE_DIRTY flag. */
- gpte_t ro_gpte = gpte;
- ro_gpte.flags &= ~_PAGE_RW;
- *spte = gpte_to_spte(lg, ro_gpte, 0);
- }
+ *spte = gpte_to_spte(lg, pte_wrprotect(gpte), 0);
/* Finally, we write the Guest PTE entry back: we've set the
* _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
- lgwrite_u32(lg, gpte_ptr, gpte.raw.val);
+ lgwrite(lg, gpte_ptr, pte_t, gpte);
/* We succeeded in mapping the page! */
return 1;
@@ -305,17 +294,18 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
* mapped by the shadow page tables, and is it writable? */
static int page_writable(struct lguest *lg, unsigned long vaddr)
{
- spgd_t *spgd;
+ pgd_t *spgd;
unsigned long flags;
/* Look at the top level entry: is it present? */
spgd = spgd_addr(lg, lg->pgdidx, vaddr);
- if (!(spgd->flags & _PAGE_PRESENT))
+ if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
return 0;
/* Check the flags on the pte entry itself: it must be present and
* writable. */
- flags = spte_addr(lg, *spgd, vaddr)->flags;
+ flags = pte_flags(*(spte_addr(lg, *spgd, vaddr)));
+
return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
}
@@ -329,22 +319,22 @@ void pin_page(struct lguest *lg, unsigned long vaddr)
}
/*H:450 If we chase down the release_pgd() code, it looks like this: */
-static void release_pgd(struct lguest *lg, spgd_t *spgd)
+static void release_pgd(struct lguest *lg, pgd_t *spgd)
{
/* If the entry's not present, there's nothing to release. */
- if (spgd->flags & _PAGE_PRESENT) {
+ if (pgd_flags(*spgd) & _PAGE_PRESENT) {
unsigned int i;
/* Converting the pfn to find the actual PTE page is easy: turn
* the page number into a physical address, then convert to a
* virtual address (easy for kernel pages like this one). */
- spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT);
+ pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
/* For each entry in the page, we might need to release it. */
- for (i = 0; i < PTES_PER_PAGE; i++)
+ for (i = 0; i < PTRS_PER_PTE; i++)
release_pte(ptepage[i]);
/* Now we can free the page of PTEs */
free_page((long)ptepage);
/* And zero out the PGD entry we we never release it twice. */
- spgd->raw.val = 0;
+ *spgd = __pgd(0);
}
}
@@ -356,7 +346,7 @@ static void flush_user_mappings(struct lguest *lg, int idx)
{
unsigned int i;
/* Release every pgd entry up to the kernel's address. */
- for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++)
+ for (i = 0; i < pgd_index(lg->kernel_address); i++)
release_pgd(lg, lg->pgdirs[idx].pgdir + i);
}
@@ -369,6 +359,25 @@ void guest_pagetable_flush_user(struct lguest *lg)
}
/*:*/
+/* We walk down the guest page tables to get a guest-physical address */
+unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
+{
+ pgd_t gpgd;
+ pte_t gpte;
+
+ /* First step: get the top-level Guest page table entry. */
+ gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t);
+ /* Toplevel not present? We can't map it in. */
+ if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
+ kill_guest(lg, "Bad address %#lx", vaddr);
+
+ gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t);
+ if (!(pte_flags(gpte) & _PAGE_PRESENT))
+ kill_guest(lg, "Bad address %#lx", vaddr);
+
+ return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
+}
+
/* We keep several page tables. This is a simple routine to find the page
* table (if any) corresponding to this top-level address the Guest has given
* us. */
@@ -376,7 +385,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
- if (lg->pgdirs[i].cr3 == pgtable)
+ if (lg->pgdirs[i].gpgdir == pgtable)
break;
return i;
}
@@ -385,7 +394,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
* allocate a new one (and so the kernel parts are not there), we set
* blank_pgdir. */
static unsigned int new_pgdir(struct lguest *lg,
- unsigned long cr3,
+ unsigned long gpgdir,
int *blank_pgdir)
{
unsigned int next;
@@ -395,7 +404,7 @@ static unsigned int new_pgdir(struct lguest *lg,
next = random32() % ARRAY_SIZE(lg->pgdirs);
/* If it's never been allocated at all before, try now. */
if (!lg->pgdirs[next].pgdir) {
- lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL);
+ lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
/* If the allocation fails, just keep using the one we have */
if (!lg->pgdirs[next].pgdir)
next = lg->pgdidx;
@@ -405,7 +414,7 @@ static unsigned int new_pgdir(struct lguest *lg,
*blank_pgdir = 1;
}
/* Record which Guest toplevel this shadows. */
- lg->pgdirs[next].cr3 = cr3;
+ lg->pgdirs[next].gpgdir = gpgdir;
/* Release all the non-kernel mappings. */
flush_user_mappings(lg, next);
@@ -472,26 +481,27 @@ void guest_pagetable_clear_all(struct lguest *lg)
* they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
*/
static void do_set_pte(struct lguest *lg, int idx,
- unsigned long vaddr, gpte_t gpte)
+ unsigned long vaddr, pte_t gpte)
{
/* Look up the matching shadow page directot entry. */
- spgd_t *spgd = spgd_addr(lg, idx, vaddr);
+ pgd_t *spgd = spgd_addr(lg, idx, vaddr);
/* If the top level isn't present, there's no entry to update. */
- if (spgd->flags & _PAGE_PRESENT) {
+ if (pgd_flags(*spgd) & _PAGE_PRESENT) {
/* Otherwise, we start by releasing the existing entry. */
- spte_t *spte = spte_addr(lg, *spgd, vaddr);
+ pte_t *spte = spte_addr(lg, *spgd, vaddr);
release_pte(*spte);
/* If they're setting this entry as dirty or accessed, we might
* as well put that entry they've given us in now. This shaves
* 10% off a copy-on-write micro-benchmark. */
- if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
+ if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
check_gpte(lg, gpte);
- *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY);
+ *spte = gpte_to_spte(lg, gpte,
+ pte_flags(gpte) & _PAGE_DIRTY);
} else
/* Otherwise we can demand_page() it in later. */
- spte->raw.val = 0;
+ *spte = __pte(0);
}
}
@@ -506,18 +516,18 @@ static void do_set_pte(struct lguest *lg, int idx,
* The benefit is that when we have to track a new page table, we can copy keep
* all the kernel mappings. This speeds up context switch immensely. */
void guest_set_pte(struct lguest *lg,
- unsigned long cr3, unsigned long vaddr, gpte_t gpte)
+ unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
{
/* Kernel mappings must be changed on all top levels. Slow, but
* doesn't happen often. */
- if (vaddr >= lg->page_offset) {
+ if (vaddr >= lg->kernel_address) {
unsigned int i;
for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
if (lg->pgdirs[i].pgdir)
do_set_pte(lg, i, vaddr, gpte);
} else {
/* Is this page table one we have a shadow for? */
- int pgdir = find_pgdir(lg, cr3);
+ int pgdir = find_pgdir(lg, gpgdir);
if (pgdir != ARRAY_SIZE(lg->pgdirs))
/* If so, do the update. */
do_set_pte(lg, pgdir, vaddr, gpte);
@@ -538,7 +548,7 @@ void guest_set_pte(struct lguest *lg,
*
* So with that in mind here's our code to to update a (top-level) PGD entry:
*/
-void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
+void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
{
int pgdir;
@@ -548,7 +558,7 @@ void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
return;
/* If they're talking about a page table we have a shadow for... */
- pgdir = find_pgdir(lg, cr3);
+ pgdir = find_pgdir(lg, gpgdir);
if (pgdir < ARRAY_SIZE(lg->pgdirs))
/* ... throw it away. */
release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
@@ -560,21 +570,34 @@ void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
* its first page table is. We set some things up here: */
int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
{
- /* In flush_user_mappings() we loop from 0 to
- * "vaddr_to_pgd_index(lg->page_offset)". This assumes it won't hit
- * the Switcher mappings, so check that now. */
- if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX)
- return -EINVAL;
/* We start on the first shadow page table, and give it a blank PGD
* page. */
lg->pgdidx = 0;
- lg->pgdirs[lg->pgdidx].cr3 = pgtable;
- lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL);
+ lg->pgdirs[lg->pgdidx].gpgdir = pgtable;
+ lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL);
if (!lg->pgdirs[lg->pgdidx].pgdir)
return -ENOMEM;
return 0;
}
+/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
+void page_table_guest_data_init(struct lguest *lg)
+{
+ /* We get the kernel address: above this is all kernel memory. */
+ if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address)
+ /* We tell the Guest that it can't use the top 4MB of virtual
+ * addresses used by the Switcher. */
+ || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
+ || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir))
+ kill_guest(lg, "bad guest page %p", lg->lguest_data);
+
+ /* In flush_user_mappings() we loop from 0 to
+ * "pgd_index(lg->kernel_address)". This assumes it won't hit the
+ * Switcher mappings, so check that now. */
+ if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX)
+ kill_guest(lg, "bad kernel address %#lx", lg->kernel_address);
+}
+
/* When a Guest dies, our cleanup is fairly simple. */
void free_guest_pagetable(struct lguest *lg)
{
@@ -594,14 +617,14 @@ void free_guest_pagetable(struct lguest *lg)
* for each CPU already set up, we just need to hook them in. */
void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
{
- spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
- spgd_t switcher_pgd;
- spte_t regs_pte;
+ pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
+ pgd_t switcher_pgd;
+ pte_t regs_pte;
/* Make the last PGD entry for this Guest point to the Switcher's PTE
* page for this CPU (with appropriate flags). */
- switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT;
- switcher_pgd.flags = _PAGE_KERNEL;
+ switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL);
+
lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
/* We also change the Switcher PTE page. When we're running the Guest,
@@ -611,10 +634,8 @@ void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
* CPU's "struct lguest_pages": if we make sure the Guest's register
* page is already mapped there, we don't have to copy them out
* again. */
- regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT;
- regs_pte.flags = _PAGE_KERNEL;
- switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE]
- = regs_pte;
+ regs_pte = pfn_pte (__pa(lg->regs_page) >> PAGE_SHIFT, __pgprot(_PAGE_KERNEL));
+ switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
}
/*:*/
@@ -635,24 +656,25 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
unsigned int pages)
{
unsigned int i;
- spte_t *pte = switcher_pte_page(cpu);
+ pte_t *pte = switcher_pte_page(cpu);
/* The first entries are easy: they map the Switcher code. */
for (i = 0; i < pages; i++) {
- pte[i].pfn = page_to_pfn(switcher_page[i]);
- pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED;
+ pte[i] = mk_pte(switcher_page[i],
+ __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
}
/* The only other thing we map is this CPU's pair of pages. */
i = pages + cpu*2;
/* First page (Guest registers) is writable from the Guest */
- pte[i].pfn = page_to_pfn(switcher_page[i]);
- pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW;
+ pte[i] = pfn_pte(page_to_pfn(switcher_page[i]),
+ __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW));
+
/* The second page contains the "struct lguest_ro_state", and is
* read-only. */
- pte[i+1].pfn = page_to_pfn(switcher_page[i+1]);
- pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED;
+ pte[i+1] = pfn_pte(page_to_pfn(switcher_page[i+1]),
+ __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
}
/*H:510 At boot or module load time, init_pagetables() allocates and populates
@@ -662,7 +684,7 @@ __init int init_pagetables(struct page **switcher_page, unsigned int pages)
unsigned int i;
for_each_possible_cpu(i) {
- switcher_pte_page(i) = (spte_t *)get_zeroed_page(GFP_KERNEL);
+ switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL);
if (!switcher_pte_page(i)) {
free_switcher_pte_pages();
return -ENOMEM;
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 9b81119f46e..c2434ec99f7 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -73,14 +73,14 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
/* Segment descriptors contain a privilege level: the Guest is
* sometimes careless and leaves this as 0, even though it's
* running at privilege level 1. If so, we fix it here. */
- if ((lg->gdt[i].b & 0x00006000) == 0)
- lg->gdt[i].b |= (GUEST_PL << 13);
+ if ((lg->arch.gdt[i].b & 0x00006000) == 0)
+ lg->arch.gdt[i].b |= (GUEST_PL << 13);
/* Each descriptor has an "accessed" bit. If we don't set it
* now, the CPU will try to set it when the Guest first loads
* that entry into a segment register. But the GDT isn't
* writable by the Guest, so bad things can happen. */
- lg->gdt[i].b |= 0x00000100;
+ lg->arch.gdt[i].b |= 0x00000100;
}
}
@@ -106,12 +106,12 @@ void setup_default_gdt_entries(struct lguest_ro_state *state)
void setup_guest_gdt(struct lguest *lg)
{
/* Start with full 0-4G segments... */
- lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
- lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
+ lg->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
+ lg->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
/* ...except the Guest is allowed to use them, so set the privilege
* level appropriately in the flags. */
- lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
- lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
+ lg->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
+ lg->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
}
/* Like the IDT, we never simply use the GDT the Guest gives us. We set up the
@@ -126,7 +126,7 @@ void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt)
unsigned int i;
for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++)
- gdt[i] = lg->gdt[i];
+ gdt[i] = lg->arch.gdt[i];
}
/* This is the full version */
@@ -138,7 +138,7 @@ void copy_gdt(const struct lguest *lg, struct desc_struct *gdt)
* replaced. See ignored_gdt() above. */
for (i = 0; i < GDT_ENTRIES; i++)
if (!ignored_gdt(i))
- gdt[i] = lg->gdt[i];
+ gdt[i] = lg->arch.gdt[i];
}
/* This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). */
@@ -146,12 +146,12 @@ void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
{
/* We assume the Guest has the same number of GDT entries as the
* Host, otherwise we'd have to dynamically allocate the Guest GDT. */
- if (num > ARRAY_SIZE(lg->gdt))
+ if (num > ARRAY_SIZE(lg->arch.gdt))
kill_guest(lg, "too many gdt entries %i", num);
/* We read the whole thing in, then fix it up. */
- lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0]));
- fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt));
+ __lgread(lg, lg->arch.gdt, table, num * sizeof(lg->arch.gdt[0]));
+ fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->arch.gdt));
/* Mark that the GDT changed so the core knows it has to copy it again,
* even if the Guest is run on the same CPU. */
lg->changed |= CHANGED_GDT;
@@ -159,9 +159,9 @@ void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
void guest_load_tls(struct lguest *lg, unsigned long gtls)
{
- struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN];
+ struct desc_struct *tls = &lg->arch.gdt[GDT_ENTRY_TLS_MIN];
- lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
+ __lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1);
lg->changed |= CHANGED_GDT_TLS;
}
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
new file mode 100644
index 00000000000..9eed12d5a39
--- /dev/null
+++ b/drivers/lguest/x86/core.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
+ * Copyright (C) 2007, Jes Sorensen <jes@sgi.com> SGI.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/kernel.h>
+#include <linux/start_kernel.h>
+#include <linux/string.h>
+#include <linux/console.h>
+#include <linux/screen_info.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/lguest.h>
+#include <linux/lguest_launcher.h>
+#include <asm/paravirt.h>
+#include <asm/param.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/setup.h>
+#include <asm/lguest.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+#include "../lg.h"
+
+static int cpu_had_pge;
+
+static struct {
+ unsigned long offset;
+ unsigned short segment;
+} lguest_entry;
+
+/* Offset from where switcher.S was compiled to where we've copied it */
+static unsigned long switcher_offset(void)
+{
+ return SWITCHER_ADDR - (unsigned long)start_switcher_text;
+}
+
+/* This cpu's struct lguest_pages. */
+static struct lguest_pages *lguest_pages(unsigned int cpu)
+{
+ return &(((struct lguest_pages *)
+ (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
+}
+
+static DEFINE_PER_CPU(struct lguest *, last_guest);
+
+/*S:010
+ * We are getting close to the Switcher.
+ *
+ * Remember that each CPU has two pages which are visible to the Guest when it
+ * runs on that CPU. This has to contain the state for that Guest: we copy the
+ * state in just before we run the Guest.
+ *
+ * Each Guest has "changed" flags which indicate what has changed in the Guest
+ * since it last ran. We saw this set in interrupts_and_traps.c and
+ * segments.c.
+ */
+static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
+{
+ /* Copying all this data can be quite expensive. We usually run the
+ * same Guest we ran last time (and that Guest hasn't run anywhere else
+ * meanwhile). If that's not the case, we pretend everything in the
+ * Guest has changed. */
+ if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
+ __get_cpu_var(last_guest) = lg;
+ lg->last_pages = pages;
+ lg->changed = CHANGED_ALL;
+ }
+
+ /* These copies are pretty cheap, so we do them unconditionally: */
+ /* Save the current Host top-level page directory. */
+ pages->state.host_cr3 = __pa(current->mm->pgd);
+ /* Set up the Guest's page tables to see this CPU's pages (and no
+ * other CPU's pages). */
+ map_switcher_in_guest(lg, pages);
+ /* Set up the two "TSS" members which tell the CPU what stack to use
+ * for traps which do directly into the Guest (ie. traps at privilege
+ * level 1). */
+ pages->state.guest_tss.esp1 = lg->esp1;
+ pages->state.guest_tss.ss1 = lg->ss1;
+
+ /* Copy direct-to-Guest trap entries. */
+ if (lg->changed & CHANGED_IDT)
+ copy_traps(lg, pages->state.guest_idt, default_idt_entries);
+
+ /* Copy all GDT entries which the Guest can change. */
+ if (lg->changed & CHANGED_GDT)
+ copy_gdt(lg, pages->state.guest_gdt);
+ /* If only the TLS entries have changed, copy them. */
+ else if (lg->changed & CHANGED_GDT_TLS)
+ copy_gdt_tls(lg, pages->state.guest_gdt);
+
+ /* Mark the Guest as unchanged for next time. */
+ lg->changed = 0;
+}
+
+/* Finally: the code to actually call into the Switcher to run the Guest. */
+static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
+{
+ /* This is a dummy value we need for GCC's sake. */
+ unsigned int clobber;
+
+ /* Copy the guest-specific information into this CPU's "struct
+ * lguest_pages". */
+ copy_in_guest_info(lg, pages);
+
+ /* Set the trap number to 256 (impossible value). If we fault while
+ * switching to the Guest (bad segment registers or bug), this will
+ * cause us to abort the Guest. */
+ lg->regs->trapnum = 256;
+
+ /* Now: we push the "eflags" register on the stack, then do an "lcall".
+ * This is how we change from using the kernel code segment to using
+ * the dedicated lguest code segment, as well as jumping into the
+ * Switcher.
+ *
+ * The lcall also pushes the old code segment (KERNEL_CS) onto the
+ * stack, then the address of this call. This stack layout happens to
+ * exactly match the stack of an interrupt... */
+ asm volatile("pushf; lcall *lguest_entry"
+ /* This is how we tell GCC that %eax ("a") and %ebx ("b")
+ * are changed by this routine. The "=" means output. */
+ : "=a"(clobber), "=b"(clobber)
+ /* %eax contains the pages pointer. ("0" refers to the
+ * 0-th argument above, ie "a"). %ebx contains the
+ * physical address of the Guest's top-level page
+ * directory. */
+ : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
+ /* We tell gcc that all these registers could change,
+ * which means we don't have to save and restore them in
+ * the Switcher. */
+ : "memory", "%edx", "%ecx", "%edi", "%esi");
+}
+/*:*/
+
+/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts
+ * are disabled: we own the CPU. */
+void lguest_arch_run_guest(struct lguest *lg)
+{
+ /* Remember the awfully-named TS bit? If the Guest has asked
+ * to set it we set it now, so we can trap and pass that trap
+ * to the Guest if it uses the FPU. */
+ if (lg->ts)
+ lguest_set_ts();
+
+ /* SYSENTER is an optimized way of doing system calls. We
+ * can't allow it because it always jumps to privilege level 0.
+ * A normal Guest won't try it because we don't advertise it in
+ * CPUID, but a malicious Guest (or malicious Guest userspace
+ * program) could, so we tell the CPU to disable it before
+ * running the Guest. */
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
+
+ /* Now we actually run the Guest. It will pop back out when
+ * something interesting happens, and we can examine its
+ * registers to see what it was doing. */
+ run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
+
+ /* The "regs" pointer contains two extra entries which are not
+ * really registers: a trap number which says what interrupt or
+ * trap made the switcher code come back, and an error code
+ * which some traps set. */
+
+ /* If the Guest page faulted, then the cr2 register will tell
+ * us the bad virtual address. We have to grab this now,
+ * because once we re-enable interrupts an interrupt could
+ * fault and thus overwrite cr2, or we could even move off to a
+ * different CPU. */
+ if (lg->regs->trapnum == 14)
+ lg->arch.last_pagefault = read_cr2();
+ /* Similarly, if we took a trap because the Guest used the FPU,
+ * we have to restore the FPU it expects to see. */
+ else if (lg->regs->trapnum == 7)
+ math_state_restore();
+
+ /* Restore SYSENTER if it's supposed to be on. */
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+}
+
+/*H:130 Our Guest is usually so well behaved; it never tries to do things it
+ * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't
+ * quite complete, because it doesn't contain replacements for the Intel I/O
+ * instructions. As a result, the Guest sometimes fumbles across one during
+ * the boot process as it probes for various things which are usually attached
+ * to a PC.
+ *
+ * When the Guest uses one of these instructions, we get trap #13 (General
+ * Protection Fault) and come here. We see if it's one of those troublesome
+ * instructions and skip over it. We return true if we did. */
+static int emulate_insn(struct lguest *lg)
+{
+ u8 insn;
+ unsigned int insnlen = 0, in = 0, shift = 0;
+ /* The eip contains the *virtual* address of the Guest's instruction:
+ * guest_pa just subtracts the Guest's page_offset. */
+ unsigned long physaddr = guest_pa(lg, lg->regs->eip);
+
+ /* This must be the Guest kernel trying to do something, not userspace!
+ * The bottom two bits of the CS segment register are the privilege
+ * level. */
+ if ((lg->regs->cs & 3) != GUEST_PL)
+ return 0;
+
+ /* Decoding x86 instructions is icky. */
+ insn = lgread(lg, physaddr, u8);
+
+ /* 0x66 is an "operand prefix". It means it's using the upper 16 bits
+ of the eax register. */
+ if (insn == 0x66) {
+ shift = 16;
+ /* The instruction is 1 byte so far, read the next byte. */
+ insnlen = 1;
+ insn = lgread(lg, physaddr + insnlen, u8);
+ }
+
+ /* We can ignore the lower bit for the moment and decode the 4 opcodes
+ * we need to emulate. */
+ switch (insn & 0xFE) {
+ case 0xE4: /* in <next byte>,%al */
+ insnlen += 2;
+ in = 1;
+ break;
+ case 0xEC: /* in (%dx),%al */
+ insnlen += 1;
+ in = 1;
+ break;
+ case 0xE6: /* out %al,<next byte> */
+ insnlen += 2;
+ break;
+ case 0xEE: /* out %al,(%dx) */
+ insnlen += 1;
+ break;
+ default:
+ /* OK, we don't know what this is, can't emulate. */
+ return 0;
+ }
+
+ /* If it was an "IN" instruction, they expect the result to be read
+ * into %eax, so we change %eax. We always return all-ones, which
+ * traditionally means "there's nothing there". */
+ if (in) {
+ /* Lower bit tells is whether it's a 16 or 32 bit access */
+ if (insn & 0x1)
+ lg->regs->eax = 0xFFFFFFFF;
+ else
+ lg->regs->eax |= (0xFFFF << shift);
+ }
+ /* Finally, we've "done" the instruction, so move past it. */
+ lg->regs->eip += insnlen;
+ /* Success! */
+ return 1;
+}
+
+/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
+void lguest_arch_handle_trap(struct lguest *lg)
+{
+ switch (lg->regs->trapnum) {
+ case 13: /* We've intercepted a GPF. */
+ /* Check if this was one of those annoying IN or OUT
+ * instructions which we need to emulate. If so, we
+ * just go back into the Guest after we've done it. */
+ if (lg->regs->errcode == 0) {
+ if (emulate_insn(lg))
+ return;
+ }
+ break;
+ case 14: /* We've intercepted a page fault. */
+ /* The Guest accessed a virtual address that wasn't
+ * mapped. This happens a lot: we don't actually set
+ * up most of the page tables for the Guest at all when
+ * we start: as it runs it asks for more and more, and
+ * we set them up as required. In this case, we don't
+ * even tell the Guest that the fault happened.
+ *
+ * The errcode tells whether this was a read or a
+ * write, and whether kernel or userspace code. */
+ if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode))
+ return;
+
+ /* OK, it's really not there (or not OK): the Guest
+ * needs to know. We write out the cr2 value so it
+ * knows where the fault occurred.
+ *
+ * Note that if the Guest were really messed up, this
+ * could happen before it's done the INITIALIZE
+ * hypercall, so lg->lguest_data will be NULL */
+ if (lg->lguest_data &&
+ put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2))
+ kill_guest(lg, "Writing cr2");
+ break;
+ case 7: /* We've intercepted a Device Not Available fault. */
+ /* If the Guest doesn't want to know, we already
+ * restored the Floating Point Unit, so we just
+ * continue without telling it. */
+ if (!lg->ts)
+ return;
+ break;
+ case 32 ... 255:
+ /* These values mean a real interrupt occurred, in which case
+ * the Host handler has already been run. We just do a
+ * friendly check if another process should now be run, then
+ * return to run the Guest again */
+ cond_resched();
+ return;
+ case LGUEST_TRAP_ENTRY:
+ /* Our 'struct hcall_args' maps directly over our regs: we set
+ * up the pointer now to indicate a hypercall is pending. */
+ lg->hcall = (struct hcall_args *)lg->regs;
+ return;
+ }
+
+ /* We didn't handle the trap, so it needs to go to the Guest. */
+ if (!deliver_trap(lg, lg->regs->trapnum))
+ /* If the Guest doesn't have a handler (either it hasn't
+ * registered any yet, or it's one of the faults we don't let
+ * it handle), it dies with a cryptic error message. */
+ kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
+ lg->regs->trapnum, lg->regs->eip,
+ lg->regs->trapnum == 14 ? lg->arch.last_pagefault
+ : lg->regs->errcode);
+}
+
+/* Now we can look at each of the routines this calls, in increasing order of
+ * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
+ * deliver_trap() and demand_page(). After all those, we'll be ready to
+ * examine the Switcher, and our philosophical understanding of the Host/Guest
+ * duality will be complete. :*/
+static void adjust_pge(void *on)
+{
+ if (on)
+ write_cr4(read_cr4() | X86_CR4_PGE);
+ else
+ write_cr4(read_cr4() & ~X86_CR4_PGE);
+}
+
+/*H:020 Now the Switcher is mapped and every thing else is ready, we need to do
+ * some more i386-specific initialization. */
+void __init lguest_arch_host_init(void)
+{
+ int i;
+
+ /* Most of the i386/switcher.S doesn't care that it's been moved; on
+ * Intel, jumps are relative, and it doesn't access any references to
+ * external code or data.
+ *
+ * The only exception is the interrupt handlers in switcher.S: their
+ * addresses are placed in a table (default_idt_entries), so we need to
+ * update the table with the new addresses. switcher_offset() is a
+ * convenience function which returns the distance between the builtin
+ * switcher code and the high-mapped copy we just made. */
+ for (i = 0; i < IDT_ENTRIES; i++)
+ default_idt_entries[i] += switcher_offset();
+
+ /*
+ * Set up the Switcher's per-cpu areas.
+ *
+ * Each CPU gets two pages of its own within the high-mapped region
+ * (aka. "struct lguest_pages"). Much of this can be initialized now,
+ * but some depends on what Guest we are running (which is set up in
+ * copy_in_guest_info()).
+ */
+ for_each_possible_cpu(i) {
+ /* lguest_pages() returns this CPU's two pages. */
+ struct lguest_pages *pages = lguest_pages(i);
+ /* This is a convenience pointer to make the code fit one
+ * statement to a line. */
+ struct lguest_ro_state *state = &pages->state;
+
+ /* The Global Descriptor Table: the Host has a different one
+ * for each CPU. We keep a descriptor for the GDT which says
+ * where it is and how big it is (the size is actually the last
+ * byte, not the size, hence the "-1"). */
+ state->host_gdt_desc.size = GDT_SIZE-1;
+ state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+
+ /* All CPUs on the Host use the same Interrupt Descriptor
+ * Table, so we just use store_idt(), which gets this CPU's IDT
+ * descriptor. */
+ store_idt(&state->host_idt_desc);
+
+ /* The descriptors for the Guest's GDT and IDT can be filled
+ * out now, too. We copy the GDT & IDT into ->guest_gdt and
+ * ->guest_idt before actually running the Guest. */
+ state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
+ state->guest_idt_desc.address = (long)&state->guest_idt;
+ state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
+ state->guest_gdt_desc.address = (long)&state->guest_gdt;
+
+ /* We know where we want the stack to be when the Guest enters
+ * the switcher: in pages->regs. The stack grows upwards, so
+ * we start it at the end of that structure. */
+ state->guest_tss.esp0 = (long)(&pages->regs + 1);
+ /* And this is the GDT entry to use for the stack: we keep a
+ * couple of special LGUEST entries. */
+ state->guest_tss.ss0 = LGUEST_DS;
+
+ /* x86 can have a finegrained bitmap which indicates what I/O
+ * ports the process can use. We set it to the end of our
+ * structure, meaning "none". */
+ state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
+
+ /* Some GDT entries are the same across all Guests, so we can
+ * set them up now. */
+ setup_default_gdt_entries(state);
+ /* Most IDT entries are the same for all Guests, too.*/
+ setup_default_idt_entries(state, default_idt_entries);
+
+ /* The Host needs to be able to use the LGUEST segments on this
+ * CPU, too, so put them in the Host GDT. */
+ get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+ get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+ }
+
+ /* In the Switcher, we want the %cs segment register to use the
+ * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
+ * it will be undisturbed when we switch. To change %cs and jump we
+ * need this structure to feed to Intel's "lcall" instruction. */
+ lguest_entry.offset = (long)switch_to_guest + switcher_offset();
+ lguest_entry.segment = LGUEST_CS;
+
+ /* Finally, we need to turn off "Page Global Enable". PGE is an
+ * optimization where page table entries are specially marked to show
+ * they never change. The Host kernel marks all the kernel pages this
+ * way because it's always present, even when userspace is running.
+ *
+ * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
+ * switch to the Guest kernel. If you don't disable this on all CPUs,
+ * you'll get really weird bugs that you'll chase for two days.
+ *
+ * I used to turn PGE off every time we switched to the Guest and back
+ * on when we return, but that slowed the Switcher down noticibly. */
+
+ /* We don't need the complexity of CPUs coming and going while we're
+ * doing this. */
+ lock_cpu_hotplug();
+ if (cpu_has_pge) { /* We have a broader idea of "global". */
+ /* Remember that this was originally set (for cleanup). */
+ cpu_had_pge = 1;
+ /* adjust_pge is a helper function which sets or unsets the PGE
+ * bit on its CPU, depending on the argument (0 == unset). */
+ on_each_cpu(adjust_pge, (void *)0, 0, 1);
+ /* Turn off the feature in the global feature set. */
+ clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
+ }
+ unlock_cpu_hotplug();
+};
+/*:*/
+
+void __exit lguest_arch_host_fini(void)
+{
+ /* If we had PGE before we started, turn it back on now. */
+ lock_cpu_hotplug();
+ if (cpu_had_pge) {
+ set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
+ /* adjust_pge's argument "1" means set PGE. */
+ on_each_cpu(adjust_pge, (void *)1, 0, 1);
+ }
+ unlock_cpu_hotplug();
+}
+
+
+/*H:122 The i386-specific hypercalls simply farm out to the right functions. */
+int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args)
+{
+ switch (args->arg0) {
+ case LHCALL_LOAD_GDT:
+ load_guest_gdt(lg, args->arg1, args->arg2);
+ break;
+ case LHCALL_LOAD_IDT_ENTRY:
+ load_guest_idt_entry(lg, args->arg1, args->arg2, args->arg3);
+ break;
+ case LHCALL_LOAD_TLS:
+ guest_load_tls(lg, args->arg1);
+ break;
+ default:
+ /* Bad Guest. Bad! */
+ return -EIO;
+ }
+ return 0;
+}
+
+/*H:126 i386-specific hypercall initialization: */
+int lguest_arch_init_hypercalls(struct lguest *lg)
+{
+ u32 tsc_speed;
+
+ /* The pointer to the Guest's "struct lguest_data" is the only
+ * argument. We check that address now. */
+ if (!lguest_address_ok(lg, lg->hcall->arg1, sizeof(*lg->lguest_data)))
+ return -EFAULT;
+
+ /* Having checked it, we simply set lg->lguest_data to point straight
+ * into the Launcher's memory at the right place and then use
+ * copy_to_user/from_user from now on, instead of lgread/write. I put
+ * this in to show that I'm not immune to writing stupid
+ * optimizations. */
+ lg->lguest_data = lg->mem_base + lg->hcall->arg1;
+
+ /* We insist that the Time Stamp Counter exist and doesn't change with
+ * cpu frequency. Some devious chip manufacturers decided that TSC
+ * changes could be handled in software. I decided that time going
+ * backwards might be good for benchmarks, but it's bad for users.
+ *
+ * We also insist that the TSC be stable: the kernel detects unreliable
+ * TSCs for its own purposes, and we use that here. */
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
+ tsc_speed = tsc_khz;
+ else
+ tsc_speed = 0;
+ if (put_user(tsc_speed, &lg->lguest_data->tsc_khz))
+ return -EFAULT;
+
+ /* The interrupt code might not like the system call vector. */
+ if (!check_syscall_vector(lg))
+ kill_guest(lg, "bad syscall vector");
+
+ return 0;
+}
+/* Now we've examined the hypercall code; our Guest can make requests. There
+ * is one other way we can do things for the Guest, as we see in
+ * emulate_insn(). :*/
+
+/*L:030 lguest_arch_setup_regs()
+ *
+ * Most of the Guest's registers are left alone: we used get_zeroed_page() to
+ * allocate the structure, so they will be 0. */
+void lguest_arch_setup_regs(struct lguest *lg, unsigned long start)
+{
+ struct lguest_regs *regs = lg->regs;
+
+ /* There are four "segment" registers which the Guest needs to boot:
+ * The "code segment" register (cs) refers to the kernel code segment
+ * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
+ * refer to the kernel data segment __KERNEL_DS.
+ *
+ * The privilege level is packed into the lower bits. The Guest runs
+ * at privilege level 1 (GUEST_PL).*/
+ regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
+ regs->cs = __KERNEL_CS|GUEST_PL;
+
+ /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002)
+ * is supposed to always be "1". Bit 9 (0x200) controls whether
+ * interrupts are enabled. We always leave interrupts enabled while
+ * running the Guest. */
+ regs->eflags = 0x202;
+
+ /* The "Extended Instruction Pointer" register says where the Guest is
+ * running. */
+ regs->eip = start;
+
+ /* %esi points to our boot information, at physical address 0, so don't
+ * touch it. */
+ /* There are a couple of GDT entries the Guest expects when first
+ * booting. */
+
+ setup_guest_gdt(lg);
+}
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/x86/switcher_32.S
index 7c9c230cc84..1010b90b11f 100644
--- a/drivers/lguest/switcher.S
+++ b/drivers/lguest/x86/switcher_32.S
@@ -48,7 +48,8 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/page.h>
-#include "lg.h"
+#include <asm/segment.h>
+#include <asm/lguest.h>
// We mark the start of the code to copy
// It's placed in .text tho it's never run here
@@ -132,6 +133,7 @@ ENTRY(switch_to_guest)
// The Guest's register page has been mapped
// Writable onto our %esp (stack) --
// We can simply pop off all Guest regs.
+ popl %eax
popl %ebx
popl %ecx
popl %edx
@@ -139,7 +141,6 @@ ENTRY(switch_to_guest)
popl %edi
popl %ebp
popl %gs
- popl %eax
popl %fs
popl %ds
popl %es
@@ -167,7 +168,6 @@ ENTRY(switch_to_guest)
pushl %es; \
pushl %ds; \
pushl %fs; \
- pushl %eax; \
pushl %gs; \
pushl %ebp; \
pushl %edi; \
@@ -175,6 +175,7 @@ ENTRY(switch_to_guest)
pushl %edx; \
pushl %ecx; \
pushl %ebx; \
+ pushl %eax; \
/* Our stack and our code are using segments \
* Set in the TSS and IDT \
* Yet if we were to touch data we'd use \
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 927cb34c480..7c426d07a55 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -274,7 +274,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
if (bitmap->offset < 0) {
/* DATA BITMAP METADATA */
if (bitmap->offset
- + page->index * (PAGE_SIZE/512)
+ + (long)(page->index * (PAGE_SIZE/512))
+ size/512 > 0)
/* bitmap runs in to metadata */
return -EINVAL;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0eb5416798b..ac54f697c50 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -348,16 +348,17 @@ static int crypt_convert(struct crypt_config *cc,
ctx->idx_out < ctx->bio_out->bi_vcnt) {
struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
- struct scatterlist sg_in = {
- .page = bv_in->bv_page,
- .offset = bv_in->bv_offset + ctx->offset_in,
- .length = 1 << SECTOR_SHIFT
- };
- struct scatterlist sg_out = {
- .page = bv_out->bv_page,
- .offset = bv_out->bv_offset + ctx->offset_out,
- .length = 1 << SECTOR_SHIFT
- };
+ struct scatterlist sg_in, sg_out;
+
+ sg_init_table(&sg_in, 1);
+ sg_set_page(&sg_in, bv_in->bv_page);
+ sg_in.offset = bv_in->bv_offset + ctx->offset_in;
+ sg_in.length = 1 << SECTOR_SHIFT;
+
+ sg_init_table(&sg_out, 1);
+ sg_set_page(&sg_out, bv_out->bv_page);
+ sg_out.offset = bv_out->bv_offset + ctx->offset_out;
+ sg_out.length = 1 << SECTOR_SHIFT;
ctx->offset_in += sg_in.length;
if (ctx->offset_in >= bv_in->bv_len) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8ee181a01f5..80a67d789b7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -376,7 +376,12 @@ static unsigned long get_stripe_work(struct stripe_head *sh)
ack++;
sh->ops.count -= ack;
- BUG_ON(sh->ops.count < 0);
+ if (unlikely(sh->ops.count < 0)) {
+ printk(KERN_ERR "pending: %#lx ops.pending: %#lx ops.ack: %#lx "
+ "ops.complete: %#lx\n", pending, sh->ops.pending,
+ sh->ops.ack, sh->ops.complete);
+ BUG();
+ }
return pending;
}
@@ -550,8 +555,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
}
}
}
- clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
- clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+ set_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
return_io(return_bi);
@@ -2893,6 +2897,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
/* Now to look around and see what can be done */
+ /* clean-up completed biofill operations */
+ if (test_bit(STRIPE_OP_BIOFILL, &sh->ops.complete)) {
+ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
+ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
+ }
+
rcu_read_lock();
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index aefcf28da1c..185e8a860c1 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -1074,41 +1074,41 @@ EXPORT_SYMBOL_GPL(ir_codes_manli);
/* Mike Baikov <mike@baikov.com> */
IR_KEYTAB_TYPE ir_codes_gotview7135[IR_KEYTAB_SIZE] = {
- [ 0x21 ] = KEY_POWER,
- [ 0x69 ] = KEY_TV,
- [ 0x33 ] = KEY_0,
- [ 0x51 ] = KEY_1,
- [ 0x31 ] = KEY_2,
- [ 0x71 ] = KEY_3,
- [ 0x3b ] = KEY_4,
- [ 0x58 ] = KEY_5,
- [ 0x41 ] = KEY_6,
- [ 0x48 ] = KEY_7,
- [ 0x30 ] = KEY_8,
- [ 0x53 ] = KEY_9,
- [ 0x73 ] = KEY_AGAIN, /* LOOP */
- [ 0x0a ] = KEY_AUDIO,
- [ 0x61 ] = KEY_PRINT, /* PREVIEW */
- [ 0x7a ] = KEY_VIDEO,
- [ 0x20 ] = KEY_CHANNELUP,
- [ 0x40 ] = KEY_CHANNELDOWN,
- [ 0x18 ] = KEY_VOLUMEDOWN,
- [ 0x50 ] = KEY_VOLUMEUP,
- [ 0x10 ] = KEY_MUTE,
- [ 0x4a ] = KEY_SEARCH,
- [ 0x7b ] = KEY_SHUFFLE, /* SNAPSHOT */
- [ 0x22 ] = KEY_RECORD,
- [ 0x62 ] = KEY_STOP,
- [ 0x78 ] = KEY_PLAY,
- [ 0x39 ] = KEY_REWIND,
- [ 0x59 ] = KEY_PAUSE,
- [ 0x19 ] = KEY_FORWARD,
- [ 0x09 ] = KEY_ZOOM,
-
- [ 0x52 ] = KEY_F21, /* LIVE TIMESHIFT */
- [ 0x1a ] = KEY_F22, /* MIN TIMESHIFT */
- [ 0x3a ] = KEY_F23, /* TIMESHIFT */
- [ 0x70 ] = KEY_F24, /* NORMAL TIMESHIFT */
+ [ 0x11 ] = KEY_POWER,
+ [ 0x35 ] = KEY_TV,
+ [ 0x1b ] = KEY_0,
+ [ 0x29 ] = KEY_1,
+ [ 0x19 ] = KEY_2,
+ [ 0x39 ] = KEY_3,
+ [ 0x1f ] = KEY_4,
+ [ 0x2c ] = KEY_5,
+ [ 0x21 ] = KEY_6,
+ [ 0x24 ] = KEY_7,
+ [ 0x18 ] = KEY_8,
+ [ 0x2b ] = KEY_9,
+ [ 0x3b ] = KEY_AGAIN, /* LOOP */
+ [ 0x06 ] = KEY_AUDIO,
+ [ 0x31 ] = KEY_PRINT, /* PREVIEW */
+ [ 0x3e ] = KEY_VIDEO,
+ [ 0x10 ] = KEY_CHANNELUP,
+ [ 0x20 ] = KEY_CHANNELDOWN,
+ [ 0x0c ] = KEY_VOLUMEDOWN,
+ [ 0x28 ] = KEY_VOLUMEUP,
+ [ 0x08 ] = KEY_MUTE,
+ [ 0x26 ] = KEY_SEARCH, /*SCAN*/
+ [ 0x3f ] = KEY_SHUFFLE, /* SNAPSHOT */
+ [ 0x12 ] = KEY_RECORD,
+ [ 0x32 ] = KEY_STOP,
+ [ 0x3c ] = KEY_PLAY,
+ [ 0x1d ] = KEY_REWIND,
+ [ 0x2d ] = KEY_PAUSE,
+ [ 0x0d ] = KEY_FORWARD,
+ [ 0x05 ] = KEY_ZOOM, /*FULL*/
+
+ [ 0x2a ] = KEY_F21, /* LIVE TIMESHIFT */
+ [ 0x0e ] = KEY_F22, /* MIN TIMESHIFT */
+ [ 0x1e ] = KEY_F23, /* TIMESHIFT */
+ [ 0x38 ] = KEY_F24, /* NORMAL TIMESHIFT */
};
EXPORT_SYMBOL_GPL(ir_codes_gotview7135);
diff --git a/drivers/media/common/saa7146_core.c b/drivers/media/common/saa7146_core.c
index 365a22118a0..2b1f8b4be00 100644
--- a/drivers/media/common/saa7146_core.c
+++ b/drivers/media/common/saa7146_core.c
@@ -112,12 +112,13 @@ static struct scatterlist* vmalloc_to_sg(unsigned char *virt, int nr_pages)
sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL);
if (NULL == sglist)
return NULL;
+ sg_init_table(sglist, nr_pages);
for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) {
pg = vmalloc_to_page(virt);
if (NULL == pg)
goto err;
BUG_ON(PageHighMem(pg));
- sglist[i].page = pg;
+ sg_set_page(&sglist[i], pg);
sglist[i].length = PAGE_SIZE;
}
return sglist;
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index a05e5c18228..db08b0a8888 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -345,7 +345,9 @@ static int cinergyt2_start_feed(struct dvb_demux_feed *dvbdmxfeed)
struct dvb_demux *demux = dvbdmxfeed->demux;
struct cinergyt2 *cinergyt2 = demux->priv;
- if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem))
+ if (cinergyt2->disconnect_pending)
+ return -EAGAIN;
+ if (mutex_lock_interruptible(&cinergyt2->sem))
return -ERESTARTSYS;
if (cinergyt2->streaming == 0)
@@ -361,7 +363,9 @@ static int cinergyt2_stop_feed(struct dvb_demux_feed *dvbdmxfeed)
struct dvb_demux *demux = dvbdmxfeed->demux;
struct cinergyt2 *cinergyt2 = demux->priv;
- if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem))
+ if (cinergyt2->disconnect_pending)
+ return -EAGAIN;
+ if (mutex_lock_interruptible(&cinergyt2->sem))
return -ERESTARTSYS;
if (--cinergyt2->streaming == 0)
@@ -481,12 +485,16 @@ static int cinergyt2_open (struct inode *inode, struct file *file)
{
struct dvb_device *dvbdev = file->private_data;
struct cinergyt2 *cinergyt2 = dvbdev->priv;
- int err = -ERESTARTSYS;
+ int err = -EAGAIN;
- if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->wq_sem))
+ if (cinergyt2->disconnect_pending)
+ goto out;
+ err = mutex_lock_interruptible(&cinergyt2->wq_sem);
+ if (err)
goto out;
- if (mutex_lock_interruptible(&cinergyt2->sem))
+ err = mutex_lock_interruptible(&cinergyt2->sem);
+ if (err)
goto out_unlock1;
if ((err = dvb_generic_open(inode, file)))
@@ -550,7 +558,9 @@ static unsigned int cinergyt2_poll (struct file *file, struct poll_table_struct
struct cinergyt2 *cinergyt2 = dvbdev->priv;
unsigned int mask = 0;
- if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem))
+ if (cinergyt2->disconnect_pending)
+ return -EAGAIN;
+ if (mutex_lock_interruptible(&cinergyt2->sem))
return -ERESTARTSYS;
poll_wait(file, &cinergyt2->poll_wq, wait);
@@ -625,7 +635,9 @@ static int cinergyt2_ioctl (struct inode *inode, struct file *file,
if (copy_from_user(&p, (void __user*) arg, sizeof(p)))
return -EFAULT;
- if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem))
+ if (cinergyt2->disconnect_pending)
+ return -EAGAIN;
+ if (mutex_lock_interruptible(&cinergyt2->sem))
return -ERESTARTSYS;
param->cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS;
@@ -996,7 +1008,9 @@ static int cinergyt2_suspend (struct usb_interface *intf, pm_message_t state)
{
struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf);
- if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->wq_sem))
+ if (cinergyt2->disconnect_pending)
+ return -EAGAIN;
+ if (mutex_lock_interruptible(&cinergyt2->wq_sem))
return -ERESTARTSYS;
cinergyt2_suspend_rc(cinergyt2);
@@ -1017,16 +1031,18 @@ static int cinergyt2_resume (struct usb_interface *intf)
{
struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf);
struct dvbt_set_parameters_msg *param = &cinergyt2->param;
- int err = -ERESTARTSYS;
+ int err = -EAGAIN;
- if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->wq_sem))
+ if (cinergyt2->disconnect_pending)
+ goto out;
+ err = mutex_lock_interruptible(&cinergyt2->wq_sem);
+ if (err)
goto out;
- if (mutex_lock_interruptible(&cinergyt2->sem))
+ err = mutex_lock_interruptible(&cinergyt2->sem);
+ if (err)
goto out_unlock1;
- err = 0;
-
if (!cinergyt2->sleeping) {
cinergyt2_sleep(cinergyt2, 0);
cinergyt2_command(cinergyt2, (char *) param, sizeof(*param), NULL, 0);
diff --git a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
index 084a508a03d..89437fdab8b 100644
--- a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
@@ -972,7 +972,7 @@ static int dvb_ca_en50221_thread(void *data)
/* main loop */
while (!kthread_should_stop()) {
/* sleep for a bit */
- while (!ca->wakeup) {
+ if (!ca->wakeup) {
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(ca->delay);
if (kthread_should_stop())
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index e8c4a869453..58452b52002 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -828,7 +828,7 @@ MODULE_DEVICE_TABLE(usb, dib0700_usb_id_table);
#define DIB0700_DEFAULT_DEVICE_PROPERTIES \
.caps = DVB_USB_IS_AN_I2C_ADAPTER, \
.usb_ctrl = DEVICE_SPECIFIC, \
- .firmware = "dvb-usb-dib0700-03-pre1.fw", \
+ .firmware = "dvb-usb-dib0700-1.10.fw", \
.download_firmware = dib0700_download_firmware, \
.no_reconnect = 1, \
.size_of_priv = sizeof(struct dib0700_state), \
diff --git a/drivers/media/radio/miropcm20-radio.c b/drivers/media/radio/miropcm20-radio.c
index c7c9d1dc069..3ae56fef8c9 100644
--- a/drivers/media/radio/miropcm20-radio.c
+++ b/drivers/media/radio/miropcm20-radio.c
@@ -229,7 +229,6 @@ static struct video_device pcm20_radio = {
.owner = THIS_MODULE,
.name = "Miro PCM 20 radio",
.type = VID_TYPE_TUNER,
- .hardware = VID_HARDWARE_RTRACK,
.fops = &pcm20_fops,
.priv = &pcm20_unit
};
diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c
index 0c963db0361..5e4b9ddb23c 100644
--- a/drivers/media/radio/radio-gemtek.c
+++ b/drivers/media/radio/radio-gemtek.c
@@ -554,7 +554,6 @@ static struct video_device gemtek_radio = {
.owner = THIS_MODULE,
.name = "GemTek Radio card",
.type = VID_TYPE_TUNER,
- .hardware = VID_HARDWARE_GEMTEK,
.fops = &gemtek_fops,
.vidioc_querycap = vidioc_querycap,
.vidioc_g_tuner = vidioc_g_tuner,
diff --git a/drivers/media/video/arv.c b/drivers/media/video/arv.c
index 19e9929ffa0..c94a4d0f280 100644
--- a/drivers/media/video/arv.c
+++ b/drivers/media/video/arv.c
@@ -755,7 +755,6 @@ static struct video_device ar_template = {
.owner = THIS_MODULE,
.name = "Colour AR VGA",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_ARV,
.fops = &ar_fops,
.release = ar_release,
.minor = -1,
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 7a332b3efe5..9feeb636ff9 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -3877,7 +3877,6 @@ static struct video_device bttv_video_template =
.name = "UNSET",
.type = VID_TYPE_CAPTURE|VID_TYPE_TUNER|
VID_TYPE_CLIPPING|VID_TYPE_SCALES,
- .hardware = VID_HARDWARE_BT848,
.fops = &bttv_fops,
.minor = -1,
};
@@ -3886,7 +3885,6 @@ static struct video_device bttv_vbi_template =
{
.name = "bt848/878 vbi",
.type = VID_TYPE_TUNER|VID_TYPE_TELETEXT,
- .hardware = VID_HARDWARE_BT848,
.fops = &bttv_fops,
.minor = -1,
};
@@ -4034,7 +4032,6 @@ static struct video_device radio_template =
{
.name = "bt848/878 radio",
.type = VID_TYPE_TUNER,
- .hardware = VID_HARDWARE_BT848,
.fops = &radio_fops,
.minor = -1,
};
diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c
index 7f7e3d3398d..58423525591 100644
--- a/drivers/media/video/bw-qcam.c
+++ b/drivers/media/video/bw-qcam.c
@@ -899,7 +899,6 @@ static struct video_device qcam_template=
.owner = THIS_MODULE,
.name = "Connectix Quickcam",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_QCAM_BW,
.fops = &qcam_fops,
};
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index f76c6a6c376..cf1546b5a7f 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -699,7 +699,6 @@ static struct video_device qcam_template=
.owner = THIS_MODULE,
.name = "Colour QuickCam",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_QCAM_C,
.fops = &qcam_fops,
};
diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c
index a1d02e5ce0f..7c630f5ee72 100644
--- a/drivers/media/video/cpia.c
+++ b/drivers/media/video/cpia.c
@@ -65,10 +65,6 @@ MODULE_PARM_DESC(colorspace_conv,
#define ABOUT "V4L-Driver for Vision CPiA based cameras"
-#ifndef VID_HARDWARE_CPIA
-#define VID_HARDWARE_CPIA 24 /* FIXME -> from linux/videodev.h */
-#endif
-
#define CPIA_MODULE_CPIA (0<<5)
#define CPIA_MODULE_SYSTEM (1<<5)
#define CPIA_MODULE_VP_CTRL (5<<5)
@@ -3804,7 +3800,6 @@ static struct video_device cpia_template = {
.owner = THIS_MODULE,
.name = "CPiA Camera",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_CPIA,
.fops = &cpia_fops,
};
diff --git a/drivers/media/video/cpia2/cpia2_v4l.c b/drivers/media/video/cpia2/cpia2_v4l.c
index e3aaba1e0e0..e378abec806 100644
--- a/drivers/media/video/cpia2/cpia2_v4l.c
+++ b/drivers/media/video/cpia2/cpia2_v4l.c
@@ -86,10 +86,6 @@ MODULE_LICENSE("GPL");
#define ABOUT "V4L-Driver for Vision CPiA2 based cameras"
-#ifndef VID_HARDWARE_CPIA2
-#error "VID_HARDWARE_CPIA2 should have been defined in linux/videodev.h"
-#endif
-
struct control_menu_info {
int value;
char name[32];
@@ -1942,7 +1938,6 @@ static struct video_device cpia2_template = {
.type= VID_TYPE_CAPTURE,
.type2 = V4L2_CAP_VIDEO_CAPTURE |
V4L2_CAP_STREAMING,
- .hardware= VID_HARDWARE_CPIA2,
.minor= -1,
.fops= &fops_template,
.release= video_device_release,
diff --git a/drivers/media/video/cx23885/cx23885-core.c b/drivers/media/video/cx23885/cx23885-core.c
index af16505bd2e..3cdd136477e 100644
--- a/drivers/media/video/cx23885/cx23885-core.c
+++ b/drivers/media/video/cx23885/cx23885-core.c
@@ -793,7 +793,7 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
dev->pci->subsystem_device);
cx23885_devcount--;
- goto fail_free;
+ return -ENODEV;
}
/* PCIe stuff */
@@ -835,10 +835,6 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
}
return 0;
-
-fail_free:
- kfree(dev);
- return -ENODEV;
}
void cx23885_dev_unregister(struct cx23885_dev *dev)
diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 141dadf7cf1..40ffd7a5579 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -39,6 +39,7 @@
#include <sound/pcm_params.h>
#include <sound/control.h>
#include <sound/initval.h>
+#include <sound/tlv.h>
#include "cx88.h"
#include "cx88-reg.h"
@@ -82,6 +83,7 @@ typedef struct cx88_audio_dev snd_cx88_card_t;
+
/****************************************************************************
Module global static vars
****************************************************************************/
@@ -545,8 +547,8 @@ static int __devinit snd_cx88_pcm(snd_cx88_card_t *chip, int device, char *name)
/****************************************************************************
CONTROL INTERFACE
****************************************************************************/
-static int snd_cx88_capture_volume_info(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_info *info)
+static int snd_cx88_volume_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *info)
{
info->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
info->count = 2;
@@ -556,9 +558,8 @@ static int snd_cx88_capture_volume_info(struct snd_kcontrol *kcontrol,
return 0;
}
-/* OK - TODO: test it */
-static int snd_cx88_capture_volume_get(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *value)
+static int snd_cx88_volume_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *value)
{
snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
struct cx88_core *core=chip->core;
@@ -573,8 +574,8 @@ static int snd_cx88_capture_volume_get(struct snd_kcontrol *kcontrol,
}
/* OK - TODO: test it */
-static int snd_cx88_capture_volume_put(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *value)
+static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *value)
{
snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
struct cx88_core *core=chip->core;
@@ -605,14 +606,67 @@ static int snd_cx88_capture_volume_put(struct snd_kcontrol *kcontrol,
return changed;
}
-static struct snd_kcontrol_new snd_cx88_capture_volume = {
+static const DECLARE_TLV_DB_SCALE(snd_cx88_db_scale, -6300, 100, 0);
+
+static struct snd_kcontrol_new snd_cx88_volume = {
+ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+ .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+ SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+ .name = "Playback Volume",
+ .info = snd_cx88_volume_info,
+ .get = snd_cx88_volume_get,
+ .put = snd_cx88_volume_put,
+ .tlv.p = snd_cx88_db_scale,
+};
+
+static int snd_cx88_switch_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *value)
+{
+ snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
+ struct cx88_core *core = chip->core;
+ u32 bit = kcontrol->private_value;
+
+ value->value.integer.value[0] = !(cx_read(AUD_VOL_CTL) & bit);
+ return 0;
+}
+
+static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *value)
+{
+ snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
+ struct cx88_core *core = chip->core;
+ u32 bit = kcontrol->private_value;
+ int ret = 0;
+ u32 vol;
+
+ spin_lock_irq(&chip->reg_lock);
+ vol = cx_read(AUD_VOL_CTL);
+ if (value->value.integer.value[0] != !(vol & bit)) {
+ vol ^= bit;
+ cx_write(AUD_VOL_CTL, vol);
+ ret = 1;
+ }
+ spin_unlock_irq(&chip->reg_lock);
+ return ret;
+}
+
+static struct snd_kcontrol_new snd_cx88_dac_switch = {
.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
- .name = "Capture Volume",
- .info = snd_cx88_capture_volume_info,
- .get = snd_cx88_capture_volume_get,
- .put = snd_cx88_capture_volume_put,
+ .name = "Playback Switch",
+ .info = snd_ctl_boolean_mono_info,
+ .get = snd_cx88_switch_get,
+ .put = snd_cx88_switch_put,
+ .private_value = (1<<8),
};
+static struct snd_kcontrol_new snd_cx88_source_switch = {
+ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+ .name = "Capture Switch",
+ .info = snd_ctl_boolean_mono_info,
+ .get = snd_cx88_switch_get,
+ .put = snd_cx88_switch_put,
+ .private_value = (1<<6),
+};
/****************************************************************************
Basic Flow for Sound Devices
@@ -762,7 +816,13 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
if (err < 0)
goto error;
- err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_capture_volume, chip));
+ err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_volume, chip));
+ if (err < 0)
+ goto error;
+ err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_dac_switch, chip));
+ if (err < 0)
+ goto error;
+ err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_source_switch, chip));
if (err < 0)
goto error;
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index 6d6f5048d76..f33f0b47142 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -527,44 +527,6 @@ static void blackbird_codec_settings(struct cx8802_dev *dev)
cx2341x_update(dev, blackbird_mbox_func, NULL, &dev->params);
}
-static struct v4l2_mpeg_compression default_mpeg_params = {
- .st_type = V4L2_MPEG_PS_2,
- .st_bitrate = {
- .mode = V4L2_BITRATE_CBR,
- .min = 0,
- .target = 0,
- .max = 0
- },
- .ts_pid_pmt = 16,
- .ts_pid_audio = 260,
- .ts_pid_video = 256,
- .ts_pid_pcr = 259,
- .ps_size = 0,
- .au_type = V4L2_MPEG_AU_2_II,
- .au_bitrate = {
- .mode = V4L2_BITRATE_CBR,
- .min = 224,
- .target = 224,
- .max = 224
- },
- .au_sample_rate = 48000,
- .au_pesid = 0,
- .vi_type = V4L2_MPEG_VI_2,
- .vi_aspect_ratio = V4L2_MPEG_ASPECT_4_3,
- .vi_bitrate = {
- .mode = V4L2_BITRATE_CBR,
- .min = 4000,
- .target = 4500,
- .max = 6000
- },
- .vi_frame_rate = 25,
- .vi_frames_per_gop = 12,
- .vi_bframes_count = 2,
- .vi_pesid = 0,
- .closed_gops = 1,
- .pulldown = 0
-};
-
static int blackbird_initialize_codec(struct cx8802_dev *dev)
{
struct cx88_core *core = dev->core;
@@ -852,23 +814,6 @@ static int vidioc_streamoff(struct file *file, void *priv, enum v4l2_buf_type i)
return videobuf_streamoff(&fh->mpegq);
}
-static int vidioc_g_mpegcomp (struct file *file, void *fh,
- struct v4l2_mpeg_compression *f)
-{
- printk(KERN_WARNING "VIDIOC_G_MPEGCOMP is obsolete. "
- "Replace with VIDIOC_G_EXT_CTRLS!");
- memcpy(f,&default_mpeg_params,sizeof(*f));
- return 0;
-}
-
-static int vidioc_s_mpegcomp (struct file *file, void *fh,
- struct v4l2_mpeg_compression *f)
-{
- printk(KERN_WARNING "VIDIOC_S_MPEGCOMP is obsolete. "
- "Replace with VIDIOC_S_EXT_CTRLS!");
- return 0;
-}
-
static int vidioc_g_ext_ctrls (struct file *file, void *priv,
struct v4l2_ext_controls *f)
{
@@ -1216,8 +1161,6 @@ static struct video_device cx8802_mpeg_template =
.vidioc_dqbuf = vidioc_dqbuf,
.vidioc_streamon = vidioc_streamon,
.vidioc_streamoff = vidioc_streamoff,
- .vidioc_g_mpegcomp = vidioc_g_mpegcomp,
- .vidioc_s_mpegcomp = vidioc_s_mpegcomp,
.vidioc_g_ext_ctrls = vidioc_g_ext_ctrls,
.vidioc_s_ext_ctrls = vidioc_s_ext_ctrls,
.vidioc_try_ext_ctrls = vidioc_try_ext_ctrls,
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index d16e5c6d21c..fce19caf9d0 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -475,8 +475,9 @@ static int dvb_register(struct cx8802_dev *dev)
break;
case CX88_BOARD_DNTV_LIVE_DVB_T_PRO:
#if defined(CONFIG_VIDEO_CX88_VP3054) || (defined(CONFIG_VIDEO_CX88_VP3054_MODULE) && defined(MODULE))
+ /* MT352 is on a secondary I2C bus made from some GPIO lines */
dev->dvb.frontend = dvb_attach(mt352_attach, &dntv_live_dvbt_pro_config,
- &((struct vp3054_i2c_state *)dev->card_priv)->adap);
+ &dev->vp3054->adap);
if (dev->dvb.frontend != NULL) {
dvb_attach(dvb_pll_attach, dev->dvb.frontend, 0x61,
&dev->core->i2c_adap, DVB_PLL_FMD1216ME);
diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c
index a652f294d23..448c6738094 100644
--- a/drivers/media/video/cx88/cx88-mpeg.c
+++ b/drivers/media/video/cx88/cx88-mpeg.c
@@ -79,7 +79,8 @@ static int cx8802_start_dma(struct cx8802_dev *dev,
{
struct cx88_core *core = dev->core;
- dprintk(1, "cx8802_start_dma w: %d, h: %d, f: %d\n", dev->width, dev->height, buf->vb.field);
+ dprintk(1, "cx8802_start_dma w: %d, h: %d, f: %d\n",
+ buf->vb.width, buf->vb.height, buf->vb.field);
/* setup fifo + format */
cx88_sram_channel_setup(core, &cx88_sram_channels[SRAM_CH28],
@@ -177,7 +178,6 @@ static int cx8802_restart_queue(struct cx8802_dev *dev,
struct cx88_dmaqueue *q)
{
struct cx88_buffer *buf;
- struct list_head *item;
dprintk( 1, "cx8802_restart_queue\n" );
if (list_empty(&q->active))
@@ -223,10 +223,8 @@ static int cx8802_restart_queue(struct cx8802_dev *dev,
dprintk(2,"restart_queue [%p/%d]: restart dma\n",
buf, buf->vb.i);
cx8802_start_dma(dev, q, buf);
- list_for_each(item,&q->active) {
- buf = list_entry(item, struct cx88_buffer, vb.queue);
+ list_for_each_entry(buf, &q->active, vb.queue)
buf->count = q->count++;
- }
mod_timer(&q->timeout, jiffies+BUFFER_TIMEOUT);
return 0;
}
@@ -572,42 +570,29 @@ int cx8802_resume_common(struct pci_dev *pci_dev)
return 0;
}
+#if defined(CONFIG_VIDEO_CX88_BLACKBIRD) || \
+ defined(CONFIG_VIDEO_CX88_BLACKBIRD_MODULE)
struct cx8802_dev * cx8802_get_device(struct inode *inode)
{
int minor = iminor(inode);
- struct cx8802_dev *h = NULL;
- struct list_head *list;
+ struct cx8802_dev *dev;
- list_for_each(list,&cx8802_devlist) {
- h = list_entry(list, struct cx8802_dev, devlist);
- if (h->mpeg_dev && h->mpeg_dev->minor == minor)
- return h;
- }
+ list_for_each_entry(dev, &cx8802_devlist, devlist)
+ if (dev->mpeg_dev && dev->mpeg_dev->minor == minor)
+ return dev;
return NULL;
}
+EXPORT_SYMBOL(cx8802_get_device);
+#endif
struct cx8802_driver * cx8802_get_driver(struct cx8802_dev *dev, enum cx88_board_type btype)
{
- struct cx8802_dev *h = NULL;
- struct cx8802_driver *d = NULL;
- struct list_head *list;
- struct list_head *list2;
-
- list_for_each(list,&cx8802_devlist) {
- h = list_entry(list, struct cx8802_dev, devlist);
- if (h != dev)
- continue;
-
- list_for_each(list2, &h->drvlist.devlist) {
- d = list_entry(list2, struct cx8802_driver, devlist);
+ struct cx8802_driver *d;
- /* only unregister the correct driver type */
- if (d->type_id == btype) {
- return d;
- }
- }
- }
+ list_for_each_entry(d, &dev->drvlist, drvlist)
+ if (d->type_id == btype)
+ return d;
return NULL;
}
@@ -671,10 +656,9 @@ static int cx8802_check_driver(struct cx8802_driver *drv)
int cx8802_register_driver(struct cx8802_driver *drv)
{
- struct cx8802_dev *h;
+ struct cx8802_dev *dev;
struct cx8802_driver *driver;
- struct list_head *list;
- int err = 0, i = 0;
+ int err, i = 0;
printk(KERN_INFO
"cx88/2: registering cx8802 driver, type: %s access: %s\n",
@@ -686,14 +670,12 @@ int cx8802_register_driver(struct cx8802_driver *drv)
return err;
}
- list_for_each(list,&cx8802_devlist) {
- h = list_entry(list, struct cx8802_dev, devlist);
-
+ list_for_each_entry(dev, &cx8802_devlist, devlist) {
printk(KERN_INFO
"%s/2: subsystem: %04x:%04x, board: %s [card=%d]\n",
- h->core->name, h->pci->subsystem_vendor,
- h->pci->subsystem_device, h->core->board.name,
- h->core->boardnr);
+ dev->core->name, dev->pci->subsystem_vendor,
+ dev->pci->subsystem_device, dev->core->board.name,
+ dev->core->boardnr);
/* Bring up a new struct for each driver instance */
driver = kzalloc(sizeof(*drv),GFP_KERNEL);
@@ -701,7 +683,7 @@ int cx8802_register_driver(struct cx8802_driver *drv)
return -ENOMEM;
/* Snapshot of the driver registration data */
- drv->core = h->core;
+ drv->core = dev->core;
drv->suspend = cx8802_suspend_common;
drv->resume = cx8802_resume_common;
drv->request_acquire = cx8802_request_acquire;
@@ -712,49 +694,38 @@ int cx8802_register_driver(struct cx8802_driver *drv)
if (err == 0) {
i++;
mutex_lock(&drv->core->lock);
- list_add_tail(&driver->devlist,&h->drvlist.devlist);
+ list_add_tail(&driver->drvlist, &dev->drvlist);
mutex_unlock(&drv->core->lock);
} else {
printk(KERN_ERR
"%s/2: cx8802 probe failed, err = %d\n",
- h->core->name, err);
+ dev->core->name, err);
}
}
- if (i == 0)
- err = -ENODEV;
- else
- err = 0;
- return err;
+ return i ? 0 : -ENODEV;
}
int cx8802_unregister_driver(struct cx8802_driver *drv)
{
- struct cx8802_dev *h;
- struct cx8802_driver *d;
- struct list_head *list;
- struct list_head *list2, *q;
- int err = 0, i = 0;
+ struct cx8802_dev *dev;
+ struct cx8802_driver *d, *dtmp;
+ int err = 0;
printk(KERN_INFO
"cx88/2: unregistering cx8802 driver, type: %s access: %s\n",
drv->type_id == CX88_MPEG_DVB ? "dvb" : "blackbird",
drv->hw_access == CX8802_DRVCTL_SHARED ? "shared" : "exclusive");
- list_for_each(list,&cx8802_devlist) {
- i++;
- h = list_entry(list, struct cx8802_dev, devlist);
-
+ list_for_each_entry(dev, &cx8802_devlist, devlist) {
printk(KERN_INFO
"%s/2: subsystem: %04x:%04x, board: %s [card=%d]\n",
- h->core->name, h->pci->subsystem_vendor,
- h->pci->subsystem_device, h->core->board.name,
- h->core->boardnr);
-
- list_for_each_safe(list2, q, &h->drvlist.devlist) {
- d = list_entry(list2, struct cx8802_driver, devlist);
+ dev->core->name, dev->pci->subsystem_vendor,
+ dev->pci->subsystem_device, dev->core->board.name,
+ dev->core->boardnr);
+ list_for_each_entry_safe(d, dtmp, &dev->drvlist, drvlist) {
/* only unregister the correct driver type */
if (d->type_id != drv->type_id)
continue;
@@ -762,12 +733,12 @@ int cx8802_unregister_driver(struct cx8802_driver *drv)
err = d->remove(d);
if (err == 0) {
mutex_lock(&drv->core->lock);
- list_del(list2);
+ list_del(&d->drvlist);
mutex_unlock(&drv->core->lock);
+ kfree(d);
} else
printk(KERN_ERR "%s/2: cx8802 driver remove "
- "failed (%d)\n", h->core->name, err);
-
+ "failed (%d)\n", dev->core->name, err);
}
}
@@ -805,7 +776,7 @@ static int __devinit cx8802_probe(struct pci_dev *pci_dev,
if (err != 0)
goto fail_free;
- INIT_LIST_HEAD(&dev->drvlist.devlist);
+ INIT_LIST_HEAD(&dev->drvlist);
list_add_tail(&dev->devlist,&cx8802_devlist);
/* Maintain a reference so cx88-video can query the 8802 device. */
@@ -825,23 +796,30 @@ static int __devinit cx8802_probe(struct pci_dev *pci_dev,
static void __devexit cx8802_remove(struct pci_dev *pci_dev)
{
struct cx8802_dev *dev;
- struct cx8802_driver *h;
- struct list_head *list;
dev = pci_get_drvdata(pci_dev);
dprintk( 1, "%s\n", __FUNCTION__);
- list_for_each(list,&dev->drvlist.devlist) {
- h = list_entry(list, struct cx8802_driver, devlist);
- dprintk( 1, " ->driver\n");
- if (h->remove == NULL) {
- printk(KERN_ERR "%s .. skipping driver, no probe function\n", __FUNCTION__);
- continue;
+ if (!list_empty(&dev->drvlist)) {
+ struct cx8802_driver *drv, *tmp;
+ int err;
+
+ printk(KERN_WARNING "%s/2: Trying to remove cx8802 driver "
+ "while cx8802 sub-drivers still loaded?!\n",
+ dev->core->name);
+
+ list_for_each_entry_safe(drv, tmp, &dev->drvlist, drvlist) {
+ err = drv->remove(drv);
+ if (err == 0) {
+ mutex_lock(&drv->core->lock);
+ list_del(&drv->drvlist);
+ mutex_unlock(&drv->core->lock);
+ } else
+ printk(KERN_ERR "%s/2: cx8802 driver remove "
+ "failed (%d)\n", dev->core->name, err);
+ kfree(drv);
}
- printk(KERN_INFO "%s .. Removing driver type %d\n", __FUNCTION__, h->type_id);
- cx8802_unregister_driver(h);
- list_del(&dev->drvlist.devlist);
}
/* Destroy any 8802 reference. */
@@ -901,7 +879,6 @@ EXPORT_SYMBOL(cx8802_fini_common);
EXPORT_SYMBOL(cx8802_register_driver);
EXPORT_SYMBOL(cx8802_unregister_driver);
-EXPORT_SYMBOL(cx8802_get_device);
EXPORT_SYMBOL(cx8802_get_driver);
/* ----------------------------------------------------------- */
/*
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 231ae6c4dd2..5ee05f8f3fa 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1675,7 +1675,6 @@ static struct video_device cx8800_radio_template =
{
.name = "cx8800-radio",
.type = VID_TYPE_TUNER,
- .hardware = 0,
.fops = &radio_fops,
.minor = -1,
.vidioc_querycap = radio_querycap,
diff --git a/drivers/media/video/cx88/cx88-vp3054-i2c.c b/drivers/media/video/cx88/cx88-vp3054-i2c.c
index 77c37889232..6ce5af48847 100644
--- a/drivers/media/video/cx88/cx88-vp3054-i2c.c
+++ b/drivers/media/video/cx88/cx88-vp3054-i2c.c
@@ -41,7 +41,7 @@ static void vp3054_bit_setscl(void *data, int state)
{
struct cx8802_dev *dev = data;
struct cx88_core *core = dev->core;
- struct vp3054_i2c_state *vp3054_i2c = dev->card_priv;
+ struct vp3054_i2c_state *vp3054_i2c = dev->vp3054;
if (state) {
vp3054_i2c->state |= 0x0001; /* SCL high */
@@ -58,7 +58,7 @@ static void vp3054_bit_setsda(void *data, int state)
{
struct cx8802_dev *dev = data;
struct cx88_core *core = dev->core;
- struct vp3054_i2c_state *vp3054_i2c = dev->card_priv;
+ struct vp3054_i2c_state *vp3054_i2c = dev->vp3054;
if (state) {
vp3054_i2c->state |= 0x0002; /* SDA high */
@@ -113,10 +113,10 @@ int vp3054_i2c_probe(struct cx8802_dev *dev)
if (core->boardnr != CX88_BOARD_DNTV_LIVE_DVB_T_PRO)
return 0;
- dev->card_priv = kzalloc(sizeof(*vp3054_i2c), GFP_KERNEL);
- if (dev->card_priv == NULL)
+ vp3054_i2c = kzalloc(sizeof(*vp3054_i2c), GFP_KERNEL);
+ if (vp3054_i2c == NULL)
return -ENOMEM;
- vp3054_i2c = dev->card_priv;
+ dev->vp3054 = vp3054_i2c;
memcpy(&vp3054_i2c->algo, &vp3054_i2c_algo_template,
sizeof(vp3054_i2c->algo));
@@ -139,8 +139,8 @@ int vp3054_i2c_probe(struct cx8802_dev *dev)
if (0 != rc) {
printk("%s: vp3054_i2c register FAILED\n", core->name);
- kfree(dev->card_priv);
- dev->card_priv = NULL;
+ kfree(dev->vp3054);
+ dev->vp3054 = NULL;
}
return rc;
@@ -148,7 +148,7 @@ int vp3054_i2c_probe(struct cx8802_dev *dev)
void vp3054_i2c_remove(struct cx8802_dev *dev)
{
- struct vp3054_i2c_state *vp3054_i2c = dev->card_priv;
+ struct vp3054_i2c_state *vp3054_i2c = dev->vp3054;
if (vp3054_i2c == NULL ||
dev->core->boardnr != CX88_BOARD_DNTV_LIVE_DVB_T_PRO)
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index 42e0a9b8c55..eb296bdecb1 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -412,7 +412,9 @@ struct cx8802_suspend_state {
struct cx8802_driver {
struct cx88_core *core;
- struct list_head devlist;
+
+ /* List of drivers attached to device */
+ struct list_head drvlist;
/* Type of driver and access required */
enum cx88_board_type type_id;
@@ -453,27 +455,33 @@ struct cx8802_dev {
/* for blackbird only */
struct list_head devlist;
+#if defined(CONFIG_VIDEO_CX88_BLACKBIRD) || \
+ defined(CONFIG_VIDEO_CX88_BLACKBIRD_MODULE)
struct video_device *mpeg_dev;
u32 mailbox;
int width;
int height;
+ /* mpeg params */
+ struct cx2341x_mpeg_params params;
+#endif
+
#if defined(CONFIG_VIDEO_CX88_DVB) || defined(CONFIG_VIDEO_CX88_DVB_MODULE)
/* for dvb only */
struct videobuf_dvb dvb;
+#endif
- void *card_priv;
+#if defined(CONFIG_VIDEO_CX88_VP3054) || \
+ defined(CONFIG_VIDEO_CX88_VP3054_MODULE)
+ /* For VP3045 secondary I2C bus support */
+ struct vp3054_i2c_state *vp3054;
#endif
/* for switching modulation types */
unsigned char ts_gen_cntrl;
- /* mpeg params */
- struct cx2341x_mpeg_params params;
-
/* List of attached drivers */
- struct cx8802_driver drvlist;
- struct work_struct request_module_wk;
-
+ struct list_head drvlist;
+ struct work_struct request_module_wk;
};
/* ----------------------------------------------------------- */
diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c
index d3282ec62c5..d56484f2046 100644
--- a/drivers/media/video/em28xx/em28xx-core.c
+++ b/drivers/media/video/em28xx/em28xx-core.c
@@ -648,7 +648,7 @@ void em28xx_uninit_isoc(struct em28xx *dev)
*/
int em28xx_init_isoc(struct em28xx *dev)
{
- /* change interface to 3 which allowes the biggest packet sizes */
+ /* change interface to 3 which allows the biggest packet sizes */
int i, errCode;
const int sb_size = EM28XX_NUM_PACKETS * dev->max_pkt_size;
@@ -673,6 +673,7 @@ int em28xx_init_isoc(struct em28xx *dev)
("unable to allocate %i bytes for transfer buffer %i\n",
sb_size, i);
em28xx_uninit_isoc(dev);
+ usb_free_urb(urb);
return -ENOMEM;
}
memset(dev->transfer_buffer[i], 0, sb_size);
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index e467682aabd..a4c2a907124 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1617,7 +1617,6 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev,
/* Fills VBI device info */
dev->vbi_dev->type = VFL_TYPE_VBI;
- dev->vbi_dev->hardware = 0;
dev->vbi_dev->fops = &em28xx_v4l_fops;
dev->vbi_dev->minor = -1;
dev->vbi_dev->dev = &dev->udev->dev;
@@ -1629,7 +1628,6 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev,
dev->vdev->type = VID_TYPE_CAPTURE;
if (dev->has_tuner)
dev->vdev->type |= VID_TYPE_TUNER;
- dev->vdev->hardware = 0;
dev->vdev->fops = &em28xx_v4l_fops;
dev->vdev->minor = -1;
dev->vdev->dev = &dev->udev->dev;
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index d5fef4c01c8..d19d73b81ed 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -2585,7 +2585,6 @@ et61x251_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
strcpy(cam->v4ldev->name, "ET61X[12]51 PC Camera");
cam->v4ldev->owner = THIS_MODULE;
cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
- cam->v4ldev->hardware = 0;
cam->v4ldev->fops = &et61x251_fops;
cam->v4ldev->minor = video_nr[dev_nr];
cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index d98dd0d1e37..29779d8bf7f 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -528,6 +528,7 @@ static int ir_probe(struct i2c_adapter *adap)
break;
case I2C_HW_B_CX2388x:
probe = probe_cx88;
+ break;
case I2C_HW_B_CX23885:
probe = probe_cx23885;
break;
diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c
index fd7a932e1d3..6d2dd8764f8 100644
--- a/drivers/media/video/ivtv/ivtv-driver.c
+++ b/drivers/media/video/ivtv/ivtv-driver.c
@@ -1003,8 +1003,6 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
IVTV_DEBUG_INFO("base addr: 0x%08x\n", itv->base_addr);
- mutex_lock(&itv->serialize_lock);
-
/* PCI Device Setup */
if ((retval = ivtv_setup_pci(itv, dev, pci_id)) != 0) {
if (retval == -EIO)
@@ -1064,7 +1062,7 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
IVTV_DEBUG_INFO("activating i2c...\n");
if (init_ivtv_i2c(itv)) {
IVTV_ERR("Could not initialize i2c\n");
- goto free_irq;
+ goto free_io;
}
IVTV_DEBUG_INFO("Active card count: %d.\n", ivtv_cards_active);
@@ -1176,7 +1174,11 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
IVTV_ERR("Failed to register irq %d\n", retval);
goto free_streams;
}
- mutex_unlock(&itv->serialize_lock);
+ retval = ivtv_streams_register(itv);
+ if (retval) {
+ IVTV_ERR("Error %d registering devices\n", retval);
+ goto free_irq;
+ }
IVTV_INFO("Initialized card #%d: %s\n", itv->num, itv->card_name);
return 0;
@@ -1195,7 +1197,6 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
release_mem_region(itv->base_addr + IVTV_DECODER_OFFSET, IVTV_DECODER_SIZE);
free_workqueue:
destroy_workqueue(itv->irq_work_queues);
- mutex_unlock(&itv->serialize_lock);
err:
if (retval == 0)
retval = -ENODEV;
diff --git a/drivers/media/video/ivtv/ivtv-driver.h b/drivers/media/video/ivtv/ivtv-driver.h
index 3bda1df63cb..49ce14d14a5 100644
--- a/drivers/media/video/ivtv/ivtv-driver.h
+++ b/drivers/media/video/ivtv/ivtv-driver.h
@@ -51,6 +51,7 @@
#include <linux/unistd.h>
#include <linux/byteorder/swab.h>
#include <linux/pagemap.h>
+#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <asm/uaccess.h>
diff --git a/drivers/media/video/ivtv/ivtv-fileops.c b/drivers/media/video/ivtv/ivtv-fileops.c
index da50fa4a72a..a200a8a95a2 100644
--- a/drivers/media/video/ivtv/ivtv-fileops.c
+++ b/drivers/media/video/ivtv/ivtv-fileops.c
@@ -822,6 +822,11 @@ int ivtv_v4l2_close(struct inode *inode, struct file *filp)
crystal_freq.flags = 0;
ivtv_saa7115(itv, VIDIOC_INT_S_CRYSTAL_FREQ, &crystal_freq);
}
+ if (atomic_read(&itv->capturing) > 0) {
+ /* Undo video mute */
+ ivtv_vapi(itv, CX2341X_ENC_MUTE_VIDEO, 1,
+ itv->params.video_mute | (itv->params.video_mute_yuv << 8));
+ }
/* Done! Unmute and continue. */
ivtv_unmute(itv);
ivtv_release_stream(s);
@@ -892,6 +897,7 @@ static int ivtv_serialized_open(struct ivtv_stream *s, struct file *filp)
if (atomic_read(&itv->capturing) > 0) {
/* switching to radio while capture is
in progress is not polite */
+ ivtv_release_stream(s);
kfree(item);
return -EBUSY;
}
@@ -947,7 +953,7 @@ int ivtv_v4l2_open(struct inode *inode, struct file *filp)
if (itv == NULL) {
/* Couldn't find a device registered
on that minor, shouldn't happen! */
- IVTV_WARN("No ivtv device found on minor %d\n", minor);
+ printk(KERN_WARNING "No ivtv device found on minor %d\n", minor);
return -ENXIO;
}
diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c
index 206eee7542d..fd6826f472e 100644
--- a/drivers/media/video/ivtv/ivtv-ioctl.c
+++ b/drivers/media/video/ivtv/ivtv-ioctl.c
@@ -555,6 +555,7 @@ static int ivtv_try_or_set_fmt(struct ivtv *itv, int streamtype,
/* set window size */
if (fmt->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+ struct cx2341x_mpeg_params *p = &itv->params;
int w = fmt->fmt.pix.width;
int h = fmt->fmt.pix.height;
@@ -566,17 +567,19 @@ static int ivtv_try_or_set_fmt(struct ivtv *itv, int streamtype,
fmt->fmt.pix.width = w;
fmt->fmt.pix.height = h;
- if (!set_fmt || (itv->params.width == w && itv->params.height == h))
+ if (!set_fmt || (p->width == w && p->height == h))
return 0;
if (atomic_read(&itv->capturing) > 0)
return -EBUSY;
- itv->params.width = w;
- itv->params.height = h;
+ p->width = w;
+ p->height = h;
if (w != 720 || h != (itv->is_50hz ? 576 : 480))
- itv->params.video_temporal_filter = 0;
+ p->video_temporal_filter = 0;
else
- itv->params.video_temporal_filter = 8;
+ p->video_temporal_filter = 8;
+ if (p->video_encoding == V4L2_MPEG_VIDEO_ENCODING_MPEG_1)
+ fmt->fmt.pix.width /= 2;
itv->video_dec_func(itv, VIDIOC_S_FMT, fmt);
return ivtv_get_fmt(itv, streamtype, fmt);
}
diff --git a/drivers/media/video/ivtv/ivtv-streams.c b/drivers/media/video/ivtv/ivtv-streams.c
index fd135985e70..aa03e61ef31 100644
--- a/drivers/media/video/ivtv/ivtv-streams.c
+++ b/drivers/media/video/ivtv/ivtv-streams.c
@@ -166,10 +166,9 @@ static void ivtv_stream_init(struct ivtv *itv, int type)
ivtv_queue_init(&s->q_io);
}
-static int ivtv_reg_dev(struct ivtv *itv, int type)
+static int ivtv_prep_dev(struct ivtv *itv, int type)
{
struct ivtv_stream *s = &itv->streams[type];
- int vfl_type = ivtv_stream_info[type].vfl_type;
int minor_offset = ivtv_stream_info[type].minor_offset;
int minor;
@@ -187,15 +186,12 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
if (type >= IVTV_DEC_STREAM_TYPE_MPG && !(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT))
return 0;
- if (minor_offset >= 0)
- /* card number + user defined offset + device offset */
- minor = itv->num + ivtv_first_minor + minor_offset;
- else
- minor = -1;
+ /* card number + user defined offset + device offset */
+ minor = itv->num + ivtv_first_minor + minor_offset;
/* User explicitly selected 0 buffers for these streams, so don't
create them. */
- if (minor >= 0 && ivtv_stream_info[type].dma != PCI_DMA_NONE &&
+ if (ivtv_stream_info[type].dma != PCI_DMA_NONE &&
itv->options.kilobytes[type] == 0) {
IVTV_INFO("Disabled %s device\n", ivtv_stream_info[type].name);
return 0;
@@ -223,21 +219,53 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
s->v4l2dev->fops = ivtv_stream_info[type].fops;
s->v4l2dev->release = video_device_release;
- if (minor >= 0) {
- /* Register device. First try the desired minor, then any free one. */
- if (video_register_device(s->v4l2dev, vfl_type, minor) &&
- video_register_device(s->v4l2dev, vfl_type, -1)) {
- IVTV_ERR("Couldn't register v4l2 device for %s minor %d\n",
- s->name, minor);
- video_device_release(s->v4l2dev);
- s->v4l2dev = NULL;
- return -ENOMEM;
- }
+ return 0;
+}
+
+/* Initialize v4l2 variables and prepare v4l2 devices */
+int ivtv_streams_setup(struct ivtv *itv)
+{
+ int type;
+
+ /* Setup V4L2 Devices */
+ for (type = 0; type < IVTV_MAX_STREAMS; type++) {
+ /* Prepare device */
+ if (ivtv_prep_dev(itv, type))
+ break;
+
+ if (itv->streams[type].v4l2dev == NULL)
+ continue;
+
+ /* Allocate Stream */
+ if (ivtv_stream_alloc(&itv->streams[type]))
+ break;
}
- else {
- /* Don't register a 'hidden' stream (OSD) */
- IVTV_INFO("Created framebuffer stream for %s\n", s->name);
+ if (type == IVTV_MAX_STREAMS)
return 0;
+
+ /* One or more streams could not be initialized. Clean 'em all up. */
+ ivtv_streams_cleanup(itv);
+ return -ENOMEM;
+}
+
+static int ivtv_reg_dev(struct ivtv *itv, int type)
+{
+ struct ivtv_stream *s = &itv->streams[type];
+ int vfl_type = ivtv_stream_info[type].vfl_type;
+ int minor;
+
+ if (s->v4l2dev == NULL)
+ return 0;
+
+ minor = s->v4l2dev->minor;
+ /* Register device. First try the desired minor, then any free one. */
+ if (video_register_device(s->v4l2dev, vfl_type, minor) &&
+ video_register_device(s->v4l2dev, vfl_type, -1)) {
+ IVTV_ERR("Couldn't register v4l2 device for %s minor %d\n",
+ s->name, minor);
+ video_device_release(s->v4l2dev);
+ s->v4l2dev = NULL;
+ return -ENOMEM;
}
switch (vfl_type) {
@@ -262,27 +290,18 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
return 0;
}
-/* Initialize v4l2 variables and register v4l2 devices */
-int ivtv_streams_setup(struct ivtv *itv)
+/* Register v4l2 devices */
+int ivtv_streams_register(struct ivtv *itv)
{
int type;
+ int err = 0;
- /* Setup V4L2 Devices */
- for (type = 0; type < IVTV_MAX_STREAMS; type++) {
- /* Register Device */
- if (ivtv_reg_dev(itv, type))
- break;
-
- if (itv->streams[type].v4l2dev == NULL)
- continue;
+ /* Register V4L2 devices */
+ for (type = 0; type < IVTV_MAX_STREAMS; type++)
+ err |= ivtv_reg_dev(itv, type);
- /* Allocate Stream */
- if (ivtv_stream_alloc(&itv->streams[type]))
- break;
- }
- if (type == IVTV_MAX_STREAMS) {
+ if (err == 0)
return 0;
- }
/* One or more streams could not be initialized. Clean 'em all up. */
ivtv_streams_cleanup(itv);
@@ -303,11 +322,8 @@ void ivtv_streams_cleanup(struct ivtv *itv)
continue;
ivtv_stream_free(&itv->streams[type]);
- /* Free Device */
- if (vdev->minor == -1) /* 'Hidden' never registered stream (OSD) */
- video_device_release(vdev);
- else /* All others, just unregister. */
- video_unregister_device(vdev);
+ /* Unregister device */
+ video_unregister_device(vdev);
}
}
@@ -425,6 +441,7 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
{
u32 data[CX2341X_MBOX_MAX_DATA];
struct ivtv *itv = s->itv;
+ struct cx2341x_mpeg_params *p = &itv->params;
int captype = 0, subtype = 0;
int enable_passthrough = 0;
@@ -445,7 +462,7 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
}
itv->mpg_data_received = itv->vbi_data_inserted = 0;
itv->dualwatch_jiffies = jiffies;
- itv->dualwatch_stereo_mode = itv->params.audio_properties & 0x0300;
+ itv->dualwatch_stereo_mode = p->audio_properties & 0x0300;
itv->search_pack_header = 0;
break;
@@ -477,9 +494,6 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
s->subtype = subtype;
s->buffers_stolen = 0;
- /* mute/unmute video */
- ivtv_vapi(itv, CX2341X_ENC_MUTE_VIDEO, 1, test_bit(IVTV_F_I_RADIO_USER, &itv->i_flags) ? 1 : 0);
-
/* Clear Streamoff flags in case left from last capture */
clear_bit(IVTV_F_S_STREAMOFF, &s->s_flags);
@@ -536,7 +550,12 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
itv->pgm_info_offset, itv->pgm_info_num);
/* Setup API for Stream */
- cx2341x_update(itv, ivtv_api_func, NULL, &itv->params);
+ cx2341x_update(itv, ivtv_api_func, NULL, p);
+
+ /* mute if capturing radio */
+ if (test_bit(IVTV_F_I_RADIO_USER, &itv->i_flags))
+ ivtv_vapi(itv, CX2341X_ENC_MUTE_VIDEO, 1,
+ 1 | (p->video_mute_yuv << 8));
}
/* Vsync Setup */
@@ -585,6 +604,7 @@ static int ivtv_setup_v4l2_decode_stream(struct ivtv_stream *s)
{
u32 data[CX2341X_MBOX_MAX_DATA];
struct ivtv *itv = s->itv;
+ struct cx2341x_mpeg_params *p = &itv->params;
int datatype;
if (s->v4l2dev == NULL)
@@ -623,7 +643,7 @@ static int ivtv_setup_v4l2_decode_stream(struct ivtv_stream *s)
break;
}
if (ivtv_vapi(itv, CX2341X_DEC_SET_DECODER_SOURCE, 4, datatype,
- itv->params.width, itv->params.height, itv->params.audio_properties)) {
+ p->width, p->height, p->audio_properties)) {
IVTV_DEBUG_WARN("Couldn't initialize decoder source\n");
}
return 0;
diff --git a/drivers/media/video/ivtv/ivtv-streams.h b/drivers/media/video/ivtv/ivtv-streams.h
index 8f5f5b1c7c8..3d76a415fbd 100644
--- a/drivers/media/video/ivtv/ivtv-streams.h
+++ b/drivers/media/video/ivtv/ivtv-streams.h
@@ -22,6 +22,7 @@
#define IVTV_STREAMS_H
int ivtv_streams_setup(struct ivtv *itv);
+int ivtv_streams_register(struct ivtv *itv);
void ivtv_streams_cleanup(struct ivtv *itv);
/* Capture related */
diff --git a/drivers/media/video/ivtv/ivtv-udma.c b/drivers/media/video/ivtv/ivtv-udma.c
index c4626d1cdf4..912b424e520 100644
--- a/drivers/media/video/ivtv/ivtv-udma.c
+++ b/drivers/media/video/ivtv/ivtv-udma.c
@@ -63,10 +63,10 @@ int ivtv_udma_fill_sg_list (struct ivtv_user_dma *dma, struct ivtv_dma_page_info
memcpy(page_address(dma->bouncemap[map_offset]) + offset, src, len);
kunmap_atomic(src, KM_BOUNCE_READ);
local_irq_restore(flags);
- dma->SGlist[map_offset].page = dma->bouncemap[map_offset];
+ sg_set_page(&dma->SGlist[map_offset], dma->bouncemap[map_offset]);
}
else {
- dma->SGlist[map_offset].page = dma->map[map_offset];
+ sg_set_page(&dma->SGlist[map_offset], dma->map[map_offset]);
}
offset = 0;
map_offset++;
diff --git a/drivers/media/video/ivtv/ivtv-yuv.c b/drivers/media/video/ivtv/ivtv-yuv.c
index e2288f224ab..9091c4837bb 100644
--- a/drivers/media/video/ivtv/ivtv-yuv.c
+++ b/drivers/media/video/ivtv/ivtv-yuv.c
@@ -710,7 +710,7 @@ static u32 ivtv_yuv_window_setup (struct ivtv *itv, struct yuv_frame_info *windo
/* If there's nothing to safe to display, we may as well stop now */
if ((int)window->dst_w <= 2 || (int)window->dst_h <= 2 || (int)window->src_w <= 2 || (int)window->src_h <= 2) {
- return 0;
+ return IVTV_YUV_UPDATE_INVALID;
}
/* Ensure video remains inside OSD area */
@@ -791,7 +791,7 @@ static u32 ivtv_yuv_window_setup (struct ivtv *itv, struct yuv_frame_info *windo
/* Check again. If there's nothing to safe to display, stop now */
if ((int)window->dst_w <= 2 || (int)window->dst_h <= 2 || (int)window->src_w <= 2 || (int)window->src_h <= 2) {
- return 0;
+ return IVTV_YUV_UPDATE_INVALID;
}
/* Both x offset & width are linked, so they have to be done together */
@@ -840,110 +840,118 @@ void ivtv_yuv_work_handler (struct ivtv *itv)
if (!(yuv_update = ivtv_yuv_window_setup (itv, &window)))
return;
- /* Update horizontal settings */
- if (yuv_update & IVTV_YUV_UPDATE_HORIZONTAL)
- ivtv_yuv_handle_horizontal(itv, &window);
+ if (yuv_update & IVTV_YUV_UPDATE_INVALID) {
+ write_reg(0x01008080, 0x2898);
+ } else if (yuv_update) {
+ write_reg(0x00108080, 0x2898);
- if (yuv_update & IVTV_YUV_UPDATE_VERTICAL)
- ivtv_yuv_handle_vertical(itv, &window);
+ if (yuv_update & IVTV_YUV_UPDATE_HORIZONTAL)
+ ivtv_yuv_handle_horizontal(itv, &window);
+
+ if (yuv_update & IVTV_YUV_UPDATE_VERTICAL)
+ ivtv_yuv_handle_vertical(itv, &window);
+ }
memcpy(&itv->yuv_info.old_frame_info, &window, sizeof (itv->yuv_info.old_frame_info));
}
static void ivtv_yuv_init (struct ivtv *itv)
{
+ struct yuv_playback_info *yi = &itv->yuv_info;
+
IVTV_DEBUG_YUV("ivtv_yuv_init\n");
/* Take a snapshot of the current register settings */
- itv->yuv_info.reg_2834 = read_reg(0x02834);
- itv->yuv_info.reg_2838 = read_reg(0x02838);
- itv->yuv_info.reg_283c = read_reg(0x0283c);
- itv->yuv_info.reg_2840 = read_reg(0x02840);
- itv->yuv_info.reg_2844 = read_reg(0x02844);
- itv->yuv_info.reg_2848 = read_reg(0x02848);
- itv->yuv_info.reg_2854 = read_reg(0x02854);
- itv->yuv_info.reg_285c = read_reg(0x0285c);
- itv->yuv_info.reg_2864 = read_reg(0x02864);
- itv->yuv_info.reg_2870 = read_reg(0x02870);
- itv->yuv_info.reg_2874 = read_reg(0x02874);
- itv->yuv_info.reg_2898 = read_reg(0x02898);
- itv->yuv_info.reg_2890 = read_reg(0x02890);
-
- itv->yuv_info.reg_289c = read_reg(0x0289c);
- itv->yuv_info.reg_2918 = read_reg(0x02918);
- itv->yuv_info.reg_291c = read_reg(0x0291c);
- itv->yuv_info.reg_2920 = read_reg(0x02920);
- itv->yuv_info.reg_2924 = read_reg(0x02924);
- itv->yuv_info.reg_2928 = read_reg(0x02928);
- itv->yuv_info.reg_292c = read_reg(0x0292c);
- itv->yuv_info.reg_2930 = read_reg(0x02930);
- itv->yuv_info.reg_2934 = read_reg(0x02934);
- itv->yuv_info.reg_2938 = read_reg(0x02938);
- itv->yuv_info.reg_293c = read_reg(0x0293c);
- itv->yuv_info.reg_2940 = read_reg(0x02940);
- itv->yuv_info.reg_2944 = read_reg(0x02944);
- itv->yuv_info.reg_2948 = read_reg(0x02948);
- itv->yuv_info.reg_294c = read_reg(0x0294c);
- itv->yuv_info.reg_2950 = read_reg(0x02950);
- itv->yuv_info.reg_2954 = read_reg(0x02954);
- itv->yuv_info.reg_2958 = read_reg(0x02958);
- itv->yuv_info.reg_295c = read_reg(0x0295c);
- itv->yuv_info.reg_2960 = read_reg(0x02960);
- itv->yuv_info.reg_2964 = read_reg(0x02964);
- itv->yuv_info.reg_2968 = read_reg(0x02968);
- itv->yuv_info.reg_296c = read_reg(0x0296c);
- itv->yuv_info.reg_2970 = read_reg(0x02970);
-
- itv->yuv_info.v_filter_1 = -1;
- itv->yuv_info.v_filter_2 = -1;
- itv->yuv_info.h_filter = -1;
+ yi->reg_2834 = read_reg(0x02834);
+ yi->reg_2838 = read_reg(0x02838);
+ yi->reg_283c = read_reg(0x0283c);
+ yi->reg_2840 = read_reg(0x02840);
+ yi->reg_2844 = read_reg(0x02844);
+ yi->reg_2848 = read_reg(0x02848);
+ yi->reg_2854 = read_reg(0x02854);
+ yi->reg_285c = read_reg(0x0285c);
+ yi->reg_2864 = read_reg(0x02864);
+ yi->reg_2870 = read_reg(0x02870);
+ yi->reg_2874 = read_reg(0x02874);
+ yi->reg_2898 = read_reg(0x02898);
+ yi->reg_2890 = read_reg(0x02890);
+
+ yi->reg_289c = read_reg(0x0289c);
+ yi->reg_2918 = read_reg(0x02918);
+ yi->reg_291c = read_reg(0x0291c);
+ yi->reg_2920 = read_reg(0x02920);
+ yi->reg_2924 = read_reg(0x02924);
+ yi->reg_2928 = read_reg(0x02928);
+ yi->reg_292c = read_reg(0x0292c);
+ yi->reg_2930 = read_reg(0x02930);
+ yi->reg_2934 = read_reg(0x02934);
+ yi->reg_2938 = read_reg(0x02938);
+ yi->reg_293c = read_reg(0x0293c);
+ yi->reg_2940 = read_reg(0x02940);
+ yi->reg_2944 = read_reg(0x02944);
+ yi->reg_2948 = read_reg(0x02948);
+ yi->reg_294c = read_reg(0x0294c);
+ yi->reg_2950 = read_reg(0x02950);
+ yi->reg_2954 = read_reg(0x02954);
+ yi->reg_2958 = read_reg(0x02958);
+ yi->reg_295c = read_reg(0x0295c);
+ yi->reg_2960 = read_reg(0x02960);
+ yi->reg_2964 = read_reg(0x02964);
+ yi->reg_2968 = read_reg(0x02968);
+ yi->reg_296c = read_reg(0x0296c);
+ yi->reg_2970 = read_reg(0x02970);
+
+ yi->v_filter_1 = -1;
+ yi->v_filter_2 = -1;
+ yi->h_filter = -1;
/* Set some valid size info */
- itv->yuv_info.osd_x_offset = read_reg(0x02a04) & 0x00000FFF;
- itv->yuv_info.osd_y_offset = (read_reg(0x02a04) >> 16) & 0x00000FFF;
+ yi->osd_x_offset = read_reg(0x02a04) & 0x00000FFF;
+ yi->osd_y_offset = (read_reg(0x02a04) >> 16) & 0x00000FFF;
/* Bit 2 of reg 2878 indicates current decoder output format
0 : NTSC 1 : PAL */
if (read_reg(0x2878) & 4)
- itv->yuv_info.decode_height = 576;
+ yi->decode_height = 576;
else
- itv->yuv_info.decode_height = 480;
+ yi->decode_height = 480;
- /* If no visible size set, assume full size */
- if (!itv->yuv_info.osd_vis_w)
- itv->yuv_info.osd_vis_w = 720 - itv->yuv_info.osd_x_offset;
-
- if (!itv->yuv_info.osd_vis_h) {
- itv->yuv_info.osd_vis_h = itv->yuv_info.decode_height - itv->yuv_info.osd_y_offset;
+ if (!itv->osd_info) {
+ yi->osd_vis_w = 720 - yi->osd_x_offset;
+ yi->osd_vis_h = yi->decode_height - yi->osd_y_offset;
} else {
- /* If output video standard has changed, requested height may
- not be legal */
- if (itv->yuv_info.osd_vis_h + itv->yuv_info.osd_y_offset > itv->yuv_info.decode_height) {
- IVTV_DEBUG_WARN("Clipping yuv output - fb size (%d) exceeds video standard limit (%d)\n",
- itv->yuv_info.osd_vis_h + itv->yuv_info.osd_y_offset,
- itv->yuv_info.decode_height);
- itv->yuv_info.osd_vis_h = itv->yuv_info.decode_height - itv->yuv_info.osd_y_offset;
+ /* If no visible size set, assume full size */
+ if (!yi->osd_vis_w)
+ yi->osd_vis_w = 720 - yi->osd_x_offset;
+
+ if (!yi->osd_vis_h)
+ yi->osd_vis_h = yi->decode_height - yi->osd_y_offset;
+ else {
+ /* If output video standard has changed, requested height may
+ not be legal */
+ if (yi->osd_vis_h + yi->osd_y_offset > yi->decode_height) {
+ IVTV_DEBUG_WARN("Clipping yuv output - fb size (%d) exceeds video standard limit (%d)\n",
+ yi->osd_vis_h + yi->osd_y_offset,
+ yi->decode_height);
+ yi->osd_vis_h = yi->decode_height - yi->osd_y_offset;
+ }
}
}
/* We need a buffer for blanking when Y plane is offset - non-fatal if we can't get one */
- itv->yuv_info.blanking_ptr = kzalloc(720*16,GFP_KERNEL);
- if (itv->yuv_info.blanking_ptr) {
- itv->yuv_info.blanking_dmaptr = pci_map_single(itv->dev, itv->yuv_info.blanking_ptr, 720*16, PCI_DMA_TODEVICE);
- }
+ yi->blanking_ptr = kzalloc(720*16, GFP_KERNEL);
+ if (yi->blanking_ptr)
+ yi->blanking_dmaptr = pci_map_single(itv->dev, yi->blanking_ptr, 720*16, PCI_DMA_TODEVICE);
else {
- itv->yuv_info.blanking_dmaptr = 0;
- IVTV_DEBUG_WARN ("Failed to allocate yuv blanking buffer\n");
+ yi->blanking_dmaptr = 0;
+ IVTV_DEBUG_WARN("Failed to allocate yuv blanking buffer\n");
}
- IVTV_DEBUG_WARN("Enable video output\n");
- write_reg_sync(0x00108080, 0x2898);
-
/* Enable YUV decoder output */
write_reg_sync(0x01, IVTV_REG_VDM);
set_bit(IVTV_F_I_DECODING_YUV, &itv->i_flags);
- atomic_set(&itv->yuv_info.next_dma_frame,0);
+ atomic_set(&yi->next_dma_frame, 0);
}
int ivtv_yuv_prep_frame(struct ivtv *itv, struct ivtv_dma_frame *args)
diff --git a/drivers/media/video/ivtv/ivtv-yuv.h b/drivers/media/video/ivtv/ivtv-yuv.h
index f7215eeca01..3b966f0a204 100644
--- a/drivers/media/video/ivtv/ivtv-yuv.h
+++ b/drivers/media/video/ivtv/ivtv-yuv.h
@@ -34,6 +34,7 @@
#define IVTV_YUV_UPDATE_HORIZONTAL 0x01
#define IVTV_YUV_UPDATE_VERTICAL 0x02
+#define IVTV_YUV_UPDATE_INVALID 0x04
extern const u32 yuv_offset[4];
diff --git a/drivers/media/video/ivtv/ivtvfb.c b/drivers/media/video/ivtv/ivtvfb.c
index 9684048fe56..52ffd154a3d 100644
--- a/drivers/media/video/ivtv/ivtvfb.c
+++ b/drivers/media/video/ivtv/ivtvfb.c
@@ -55,7 +55,6 @@
static int ivtvfb_card_id = -1;
static int ivtvfb_debug = 0;
static int osd_laced;
-static int osd_compat;
static int osd_depth;
static int osd_upper;
static int osd_left;
@@ -65,7 +64,6 @@ static int osd_xres;
module_param(ivtvfb_card_id, int, 0444);
module_param_named(debug,ivtvfb_debug, int, 0644);
module_param(osd_laced, bool, 0444);
-module_param(osd_compat, bool, 0444);
module_param(osd_depth, int, 0444);
module_param(osd_upper, int, 0444);
module_param(osd_left, int, 0444);
@@ -80,12 +78,6 @@ MODULE_PARM_DESC(debug,
"Debug level (bitmask). Default: errors only\n"
"\t\t\t(debug = 3 gives full debugging)");
-MODULE_PARM_DESC(osd_compat,
- "Compatibility mode - Display size is locked (use for old X drivers)\n"
- "\t\t\t0=off\n"
- "\t\t\t1=on\n"
- "\t\t\tdefault off");
-
/* Why upper, left, xres, yres, depth, laced ? To match terminology used
by fbset.
Why start at 1 for left & upper coordinate ? Because X doesn't allow 0 */
@@ -166,9 +158,6 @@ struct osd_info {
unsigned long fb_end_aligned_physaddr;
#endif
- /* Current osd mode */
- int osd_mode;
-
/* Store the buffer offset */
int set_osd_coords_x;
int set_osd_coords_y;
@@ -470,13 +459,11 @@ static int ivtvfb_set_var(struct ivtv *itv, struct fb_var_screeninfo *var)
IVTVFB_DEBUG_WARN("ivtvfb_set_var - Invalid bpp\n");
}
- /* Change osd mode if needed.
- Although rare, things can go wrong. The extra mode
- change seems to help... */
- if (osd_mode != -1 && osd_mode != oi->osd_mode) {
+ /* Set video mode. Although rare, the display can become scrambled even
+ if we don't change mode. Always 'bounce' to osd_mode via mode 0 */
+ if (osd_mode != -1) {
ivtv_vapi(itv, CX2341X_OSD_SET_PIXEL_FORMAT, 1, 0);
ivtv_vapi(itv, CX2341X_OSD_SET_PIXEL_FORMAT, 1, osd_mode);
- oi->osd_mode = osd_mode;
}
oi->bits_per_pixel = var->bits_per_pixel;
@@ -579,14 +566,6 @@ static int _ivtvfb_check_var(struct fb_var_screeninfo *var, struct ivtv *itv)
osd_height_limit = 480;
}
- /* Check the bits per pixel */
- if (osd_compat) {
- if (var->bits_per_pixel != 32) {
- IVTVFB_DEBUG_WARN("Invalid colour mode: %d\n", var->bits_per_pixel);
- return -EINVAL;
- }
- }
-
if (var->bits_per_pixel == 8 || var->bits_per_pixel == 32) {
var->transp.offset = 24;
var->transp.length = 8;
@@ -638,32 +617,20 @@ static int _ivtvfb_check_var(struct fb_var_screeninfo *var, struct ivtv *itv)
}
/* Check the resolution */
- if (osd_compat) {
- if (var->xres != oi->ivtvfb_defined.xres ||
- var->yres != oi->ivtvfb_defined.yres ||
- var->xres_virtual != oi->ivtvfb_defined.xres_virtual ||
- var->yres_virtual != oi->ivtvfb_defined.yres_virtual) {
- IVTVFB_DEBUG_WARN("Invalid resolution: %dx%d (virtual %dx%d)\n",
- var->xres, var->yres, var->xres_virtual, var->yres_virtual);
- return -EINVAL;
- }
+ if (var->xres > IVTV_OSD_MAX_WIDTH || var->yres > osd_height_limit) {
+ IVTVFB_DEBUG_WARN("Invalid resolution: %dx%d\n",
+ var->xres, var->yres);
+ return -EINVAL;
}
- else {
- if (var->xres > IVTV_OSD_MAX_WIDTH || var->yres > osd_height_limit) {
- IVTVFB_DEBUG_WARN("Invalid resolution: %dx%d\n",
- var->xres, var->yres);
- return -EINVAL;
- }
- /* Max horizontal size is 1023 @ 32bpp, 2046 & 16bpp, 4092 @ 8bpp */
- if (var->xres_virtual > 4095 / (var->bits_per_pixel / 8) ||
- var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8) > oi->video_buffer_size ||
- var->xres_virtual < var->xres ||
- var->yres_virtual < var->yres) {
- IVTVFB_DEBUG_WARN("Invalid virtual resolution: %dx%d\n",
- var->xres_virtual, var->yres_virtual);
- return -EINVAL;
- }
+ /* Max horizontal size is 1023 @ 32bpp, 2046 & 16bpp, 4092 @ 8bpp */
+ if (var->xres_virtual > 4095 / (var->bits_per_pixel / 8) ||
+ var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8) > oi->video_buffer_size ||
+ var->xres_virtual < var->xres ||
+ var->yres_virtual < var->yres) {
+ IVTVFB_DEBUG_WARN("Invalid virtual resolution: %dx%d\n",
+ var->xres_virtual, var->yres_virtual);
+ return -EINVAL;
}
/* Some extra checks if in 8 bit mode */
@@ -877,17 +844,15 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
/* Color mode */
- if (osd_compat) osd_depth = 32;
- if (osd_depth != 8 && osd_depth != 16 && osd_depth != 32) osd_depth = 8;
+ if (osd_depth != 8 && osd_depth != 16 && osd_depth != 32)
+ osd_depth = 8;
oi->bits_per_pixel = osd_depth;
oi->bytes_per_pixel = oi->bits_per_pixel / 8;
- /* Invalidate current osd mode to force a mode switch later */
- oi->osd_mode = -1;
-
/* Horizontal size & position */
- if (osd_xres > 720) osd_xres = 720;
+ if (osd_xres > 720)
+ osd_xres = 720;
/* Must be a multiple of 4 for 8bpp & 2 for 16bpp */
if (osd_depth == 8)
@@ -895,10 +860,7 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
else if (osd_depth == 16)
osd_xres &= ~1;
- if (osd_xres)
- start_window.width = osd_xres;
- else
- start_window.width = osd_compat ? 720: 640;
+ start_window.width = osd_xres ? osd_xres : 640;
/* Check horizontal start (osd_left). */
if (osd_left && osd_left + start_window.width > 721) {
@@ -921,10 +883,7 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
if (osd_yres > max_height)
osd_yres = max_height;
- if (osd_yres)
- start_window.height = osd_yres;
- else
- start_window.height = osd_compat ? max_height : (itv->is_50hz ? 480 : 400);
+ start_window.height = osd_yres ? osd_yres : itv->is_50hz ? 480 : 400;
/* Check vertical start (osd_upper). */
if (osd_upper + start_window.height > max_height + 1) {
@@ -1127,10 +1086,6 @@ static int ivtvfb_init_card(struct ivtv *itv)
/* Enable the osd */
ivtvfb_blank(FB_BLANK_UNBLANK, &itv->osd_info->ivtvfb_info);
- /* Note if we're running in compatibility mode */
- if (osd_compat)
- IVTVFB_INFO("Running in compatibility mode. Display resize & mode change disabled\n");
-
/* Allocate DMA */
ivtv_udma_alloc(itv);
return 0;
@@ -1177,9 +1132,12 @@ static void ivtvfb_cleanup(void)
for (i = 0; i < ivtv_cards_active; i++) {
itv = ivtv_cards[i];
if (itv && (itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT) && itv->osd_info) {
+ if (unregister_framebuffer(&itv->osd_info->ivtvfb_info)) {
+ IVTVFB_WARN("Framebuffer %d is in use, cannot unload\n", i);
+ return;
+ }
IVTVFB_DEBUG_INFO("Unregister framebuffer %d\n", i);
ivtvfb_blank(FB_BLANK_POWERDOWN, &itv->osd_info->ivtvfb_info);
- unregister_framebuffer(&itv->osd_info->ivtvfb_info);
ivtvfb_release_buffers(itv);
itv->osd_video_pbase = 0;
}
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 69283926a8d..c3116329043 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -1762,7 +1762,6 @@ static struct video_device meye_template = {
.owner = THIS_MODULE,
.name = "meye",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_MEYE,
.fops = &meye_fops,
.release = video_device_release,
.minor = -1,
diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c
index b8d4ac0d938..d55d5800efb 100644
--- a/drivers/media/video/ov511.c
+++ b/drivers/media/video/ov511.c
@@ -4668,7 +4668,6 @@ static struct video_device vdev_template = {
.owner = THIS_MODULE,
.name = "OV511 USB Camera",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_OV511,
.fops = &ov511_fops,
.release = video_device_release,
.minor = -1,
diff --git a/drivers/media/video/planb.c b/drivers/media/video/planb.c
index 0ef73d9d584..ce4b2f9791e 100644
--- a/drivers/media/video/planb.c
+++ b/drivers/media/video/planb.c
@@ -2013,7 +2013,6 @@ static struct video_device planb_template=
.owner = THIS_MODULE,
.name = PLANB_DEVICE_NAME,
.type = VID_TYPE_OVERLAY,
- .hardware = VID_HARDWARE_PLANB,
.open = planb_open,
.close = planb_close,
.read = planb_read,
diff --git a/drivers/media/video/pms.c b/drivers/media/video/pms.c
index b5a67f0dd19..6820c2aabd3 100644
--- a/drivers/media/video/pms.c
+++ b/drivers/media/video/pms.c
@@ -895,7 +895,6 @@ static struct video_device pms_template=
.owner = THIS_MODULE,
.name = "Mediavision PMS",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_PMS,
.fops = &pms_fops,
};
diff --git a/drivers/media/video/pvrusb2/pvrusb2-encoder.c b/drivers/media/video/pvrusb2/pvrusb2-encoder.c
index 20b614436d2..205087a3e13 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-encoder.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-encoder.c
@@ -209,6 +209,11 @@ static int pvr2_encoder_cmd(void *ctxt,
LOCK_TAKE(hdw->ctl_lock); do {
+ if (!hdw->flag_encoder_ok) {
+ ret = -EIO;
+ break;
+ }
+
retry_flag = 0;
try_count++;
ret = 0;
@@ -273,6 +278,7 @@ static int pvr2_encoder_cmd(void *ctxt,
ret = -EBUSY;
}
if (ret) {
+ hdw->flag_encoder_ok = 0;
pvr2_trace(
PVR2_TRACE_ERROR_LEGS,
"Giving up on command."
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
index 985d9ae7f5e..f873994b088 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
@@ -225,11 +225,12 @@ struct pvr2_hdw {
unsigned int cmd_debug_write_len; //
unsigned int cmd_debug_read_len; //
- int flag_ok; // device in known good state
- int flag_disconnected; // flag_ok == 0 due to disconnect
- int flag_init_ok; // true if structure is fully initialized
- int flag_streaming_enabled; // true if streaming should be on
- int fw1_state; // current situation with fw1
+ int flag_ok; /* device in known good state */
+ int flag_disconnected; /* flag_ok == 0 due to disconnect */
+ int flag_init_ok; /* true if structure is fully initialized */
+ int flag_streaming_enabled; /* true if streaming should be on */
+ int fw1_state; /* current situation with fw1 */
+ int flag_encoder_ok; /* True if encoder is healthy */
int flag_decoder_is_tuned;
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
index 27b12b4b5c8..402c5948825 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
@@ -1248,6 +1248,8 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw)
time we configure the encoder, then we'll fully configure it. */
hdw->enc_cur_valid = 0;
+ hdw->flag_encoder_ok = 0;
+
/* First prepare firmware loading */
ret |= pvr2_write_register(hdw, 0x0048, 0xffffffff); /*interrupt mask*/
ret |= pvr2_hdw_gpio_chg_dir(hdw,0xffffffff,0x00000088); /*gpio dir*/
@@ -1346,6 +1348,7 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw)
pvr2_trace(PVR2_TRACE_ERROR_LEGS,
"firmware2 upload post-proc failure");
} else {
+ hdw->flag_encoder_ok = !0;
hdw->subsys_enabled_mask |= (1<<PVR2_SUBSYS_B_ENC_FIRMWARE);
}
return ret;
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index 4563b3df8a0..7a596ea7cfe 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -1121,15 +1121,12 @@ static const struct file_operations vdev_fops = {
};
-#define VID_HARDWARE_PVRUSB2 38 /* FIXME : need a good value */
-
static struct video_device vdev_template = {
.owner = THIS_MODULE,
.type = VID_TYPE_CAPTURE | VID_TYPE_TUNER,
.type2 = (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VBI_CAPTURE
| V4L2_CAP_TUNER | V4L2_CAP_AUDIO
| V4L2_CAP_READWRITE),
- .hardware = VID_HARDWARE_PVRUSB2,
.fops = &vdev_fops,
};
diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c
index 950da254214..7300ace8f44 100644
--- a/drivers/media/video/pwc/pwc-if.c
+++ b/drivers/media/video/pwc/pwc-if.c
@@ -166,7 +166,6 @@ static struct video_device pwc_template = {
.owner = THIS_MODULE,
.name = "Philips Webcam", /* Filled in later */
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_PWC,
.release = video_device_release,
.fops = &pwc_fops,
.minor = -1,
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index 57f1f5d409e..002e70a33a4 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -71,7 +71,6 @@ static const struct v4l2_format v4l2_format_table[] =
struct saa6752hs_state {
struct i2c_client client;
- struct v4l2_mpeg_compression old_params;
struct saa6752hs_mpeg_params params;
enum saa6752hs_videoformat video_format;
v4l2_std_id standard;
@@ -161,35 +160,6 @@ static struct saa6752hs_mpeg_params param_defaults =
.au_l2_bitrate = V4L2_MPEG_AUDIO_L2_BITRATE_256K,
};
-static struct v4l2_mpeg_compression old_param_defaults =
-{
- .st_type = V4L2_MPEG_TS_2,
- .st_bitrate = {
- .mode = V4L2_BITRATE_CBR,
- .target = 7000,
- },
-
- .ts_pid_pmt = 16,
- .ts_pid_video = 260,
- .ts_pid_audio = 256,
- .ts_pid_pcr = 259,
-
- .vi_type = V4L2_MPEG_VI_2,
- .vi_aspect_ratio = V4L2_MPEG_ASPECT_4_3,
- .vi_bitrate = {
- .mode = V4L2_BITRATE_VBR,
- .target = 4000,
- .max = 6000,
- },
-
- .au_type = V4L2_MPEG_AU_2_II,
- .au_bitrate = {
- .mode = V4L2_BITRATE_CBR,
- .target = 256,
- },
-
-};
-
/* ---------------------------------------------------------------------- */
static int saa6752hs_chip_command(struct i2c_client* client,
@@ -362,74 +332,6 @@ static void saa6752hs_set_subsampling(struct i2c_client* client,
}
-static void saa6752hs_old_set_params(struct i2c_client* client,
- struct v4l2_mpeg_compression* params)
-{
- struct saa6752hs_state *h = i2c_get_clientdata(client);
-
- /* check PIDs */
- if (params->ts_pid_pmt <= MPEG_PID_MAX) {
- h->old_params.ts_pid_pmt = params->ts_pid_pmt;
- h->params.ts_pid_pmt = params->ts_pid_pmt;
- }
- if (params->ts_pid_pcr <= MPEG_PID_MAX) {
- h->old_params.ts_pid_pcr = params->ts_pid_pcr;
- h->params.ts_pid_pcr = params->ts_pid_pcr;
- }
- if (params->ts_pid_video <= MPEG_PID_MAX) {
- h->old_params.ts_pid_video = params->ts_pid_video;
- h->params.ts_pid_video = params->ts_pid_video;
- }
- if (params->ts_pid_audio <= MPEG_PID_MAX) {
- h->old_params.ts_pid_audio = params->ts_pid_audio;
- h->params.ts_pid_audio = params->ts_pid_audio;
- }
-
- /* check bitrate parameters */
- if ((params->vi_bitrate.mode == V4L2_BITRATE_CBR) ||
- (params->vi_bitrate.mode == V4L2_BITRATE_VBR)) {
- h->old_params.vi_bitrate.mode = params->vi_bitrate.mode;
- h->params.vi_bitrate_mode = (params->vi_bitrate.mode == V4L2_BITRATE_VBR) ?
- V4L2_MPEG_VIDEO_BITRATE_MODE_VBR : V4L2_MPEG_VIDEO_BITRATE_MODE_CBR;
- }
- if (params->vi_bitrate.mode != V4L2_BITRATE_NONE)
- h->old_params.st_bitrate.target = params->st_bitrate.target;
- if (params->vi_bitrate.mode != V4L2_BITRATE_NONE)
- h->old_params.vi_bitrate.target = params->vi_bitrate.target;
- if (params->vi_bitrate.mode == V4L2_BITRATE_VBR)
- h->old_params.vi_bitrate.max = params->vi_bitrate.max;
- if (params->au_bitrate.mode != V4L2_BITRATE_NONE)
- h->old_params.au_bitrate.target = params->au_bitrate.target;
-
- /* aspect ratio */
- if (params->vi_aspect_ratio == V4L2_MPEG_ASPECT_4_3 ||
- params->vi_aspect_ratio == V4L2_MPEG_ASPECT_16_9) {
- h->old_params.vi_aspect_ratio = params->vi_aspect_ratio;
- if (params->vi_aspect_ratio == V4L2_MPEG_ASPECT_4_3)
- h->params.vi_aspect = V4L2_MPEG_VIDEO_ASPECT_4x3;
- else
- h->params.vi_aspect = V4L2_MPEG_VIDEO_ASPECT_16x9;
- }
-
- /* range checks */
- if (h->old_params.st_bitrate.target > MPEG_TOTAL_TARGET_BITRATE_MAX)
- h->old_params.st_bitrate.target = MPEG_TOTAL_TARGET_BITRATE_MAX;
- if (h->old_params.vi_bitrate.target > MPEG_VIDEO_TARGET_BITRATE_MAX)
- h->old_params.vi_bitrate.target = MPEG_VIDEO_TARGET_BITRATE_MAX;
- if (h->old_params.vi_bitrate.max > MPEG_VIDEO_MAX_BITRATE_MAX)
- h->old_params.vi_bitrate.max = MPEG_VIDEO_MAX_BITRATE_MAX;
- h->params.vi_bitrate = params->vi_bitrate.target;
- h->params.vi_bitrate_peak = params->vi_bitrate.max;
- if (h->old_params.au_bitrate.target <= 256) {
- h->old_params.au_bitrate.target = 256;
- h->params.au_l2_bitrate = V4L2_MPEG_AUDIO_L2_BITRATE_256K;
- }
- else {
- h->old_params.au_bitrate.target = 384;
- h->params.au_l2_bitrate = V4L2_MPEG_AUDIO_L2_BITRATE_384K;
- }
-}
-
static int handle_ctrl(struct saa6752hs_mpeg_params *params,
struct v4l2_ext_control *ctrl, unsigned int cmd)
{
@@ -697,7 +599,6 @@ static int saa6752hs_attach(struct i2c_adapter *adap, int addr, int kind)
return -ENOMEM;
h->client = client_template;
h->params = param_defaults;
- h->old_params = old_param_defaults;
h->client.adapter = adap;
h->client.addr = addr;
@@ -734,23 +635,11 @@ saa6752hs_command(struct i2c_client *client, unsigned int cmd, void *arg)
{
struct saa6752hs_state *h = i2c_get_clientdata(client);
struct v4l2_ext_controls *ctrls = arg;
- struct v4l2_mpeg_compression *old_params = arg;
struct saa6752hs_mpeg_params params;
int err = 0;
int i;
switch (cmd) {
- case VIDIOC_S_MPEGCOMP:
- if (NULL == old_params) {
- /* apply settings and start encoder */
- saa6752hs_init(client);
- break;
- }
- saa6752hs_old_set_params(client, old_params);
- /* fall through */
- case VIDIOC_G_MPEGCOMP:
- *old_params = h->old_params;
- break;
case VIDIOC_S_EXT_CTRLS:
if (ctrls->ctrl_class != V4L2_CTRL_CLASS_MPEG)
return -EINVAL;
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 1a4a24471f2..a499eea379e 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -429,7 +429,7 @@ int saa7134_set_dmabits(struct saa7134_dev *dev)
assert_spin_locked(&dev->slock);
- if (dev->inresume)
+ if (dev->insuspend)
return 0;
/* video capture -- dma 0 + video task A */
@@ -563,6 +563,9 @@ static irqreturn_t saa7134_irq(int irq, void *dev_id)
unsigned long report,status;
int loop, handled = 0;
+ if (dev->insuspend)
+ goto out;
+
for (loop = 0; loop < 10; loop++) {
report = saa_readl(SAA7134_IRQ_REPORT);
status = saa_readl(SAA7134_IRQ_STATUS);
@@ -1163,6 +1166,7 @@ static void __devexit saa7134_finidev(struct pci_dev *pci_dev)
kfree(dev);
}
+#ifdef CONFIG_PM
static int saa7134_suspend(struct pci_dev *pci_dev , pm_message_t state)
{
@@ -1176,6 +1180,19 @@ static int saa7134_suspend(struct pci_dev *pci_dev , pm_message_t state)
saa_writel(SAA7134_IRQ2, 0);
saa_writel(SAA7134_MAIN_CTRL, 0);
+ synchronize_irq(pci_dev->irq);
+ dev->insuspend = 1;
+
+ /* Disable timeout timers - if we have active buffers, we will
+ fill them on resume*/
+
+ del_timer(&dev->video_q.timeout);
+ del_timer(&dev->vbi_q.timeout);
+ del_timer(&dev->ts_q.timeout);
+
+ if (dev->remote)
+ saa7134_ir_stop(dev);
+
pci_set_power_state(pci_dev, pci_choose_state(pci_dev, state));
pci_save_state(pci_dev);
@@ -1194,24 +1211,27 @@ static int saa7134_resume(struct pci_dev *pci_dev)
/* Do things that are done in saa7134_initdev ,
except of initializing memory structures.*/
- dev->inresume = 1;
saa7134_board_init1(dev);
+ /* saa7134_hwinit1 */
if (saa7134_boards[dev->board].video_out)
saa7134_videoport_init(dev);
-
if (card_has_mpeg(dev))
saa7134_ts_init_hw(dev);
-
+ if (dev->remote)
+ saa7134_ir_start(dev, dev->remote);
saa7134_hw_enable1(dev);
- saa7134_set_decoder(dev);
- saa7134_i2c_call_clients(dev, VIDIOC_S_STD, &dev->tvnorm->id);
+
+
saa7134_board_init2(dev);
- saa7134_hw_enable2(dev);
+ /*saa7134_hwinit2*/
+ saa7134_set_tvnorm_hw(dev);
saa7134_tvaudio_setmute(dev);
saa7134_tvaudio_setvolume(dev, dev->ctl_volume);
+ saa7134_tvaudio_do_scan(dev);
saa7134_enable_i2s(dev);
+ saa7134_hw_enable2(dev);
/*resume unfinished buffer(s)*/
spin_lock_irqsave(&dev->slock, flags);
@@ -1219,13 +1239,19 @@ static int saa7134_resume(struct pci_dev *pci_dev)
saa7134_buffer_requeue(dev, &dev->vbi_q);
saa7134_buffer_requeue(dev, &dev->ts_q);
+ /* FIXME: Disable DMA audio sound - temporary till proper support
+ is implemented*/
+
+ dev->dmasound.dma_running = 0;
+
/* start DMA now*/
- dev->inresume = 0;
+ dev->insuspend = 0;
saa7134_set_dmabits(dev);
spin_unlock_irqrestore(&dev->slock, flags);
return 0;
}
+#endif
/* ----------------------------------------------------------- */
@@ -1262,8 +1288,10 @@ static struct pci_driver saa7134_pci_driver = {
.id_table = saa7134_pci_tbl,
.probe = saa7134_initdev,
.remove = __devexit_p(saa7134_finidev),
+#ifdef CONFIG_PM
.suspend = saa7134_suspend,
.resume = saa7134_resume
+#endif
};
static int saa7134_init(void)
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 34ca874dd7f..75d0c5bf46d 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -284,17 +284,6 @@ static int ts_do_ioctl(struct inode *inode, struct file *file,
case VIDIOC_S_CTRL:
return saa7134_common_ioctl(dev, cmd, arg);
- case VIDIOC_S_MPEGCOMP:
- printk(KERN_WARNING "VIDIOC_S_MPEGCOMP is obsolete. "
- "Replace with VIDIOC_S_EXT_CTRLS!");
- saa7134_i2c_call_clients(dev, VIDIOC_S_MPEGCOMP, arg);
- ts_init_encoder(dev);
- return 0;
- case VIDIOC_G_MPEGCOMP:
- printk(KERN_WARNING "VIDIOC_G_MPEGCOMP is obsolete. "
- "Replace with VIDIOC_G_EXT_CTRLS!");
- saa7134_i2c_call_clients(dev, VIDIOC_G_MPEGCOMP, arg);
- return 0;
case VIDIOC_S_EXT_CTRLS:
/* count == 0 is abused in saa6752hs.c, so that special
case is handled here explicitly. */
@@ -342,7 +331,6 @@ static struct video_device saa7134_empress_template =
.name = "saa7134-empress",
.type = 0 /* FIXME */,
.type2 = 0 /* FIXME */,
- .hardware = 0,
.fops = &ts_fops,
.minor = -1,
};
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 80d2644f765..3abaa1b8ac9 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -44,6 +44,14 @@ module_param(ir_rc5_remote_gap, int, 0644);
static int ir_rc5_key_timeout = 115;
module_param(ir_rc5_key_timeout, int, 0644);
+static int repeat_delay = 500;
+module_param(repeat_delay, int, 0644);
+MODULE_PARM_DESC(repeat_delay, "delay before key repeat started");
+static int repeat_period = 33;
+module_param(repeat_period, int, 0644);
+MODULE_PARM_DESC(repeat_period, "repeat period between"
+ "keypresses when key is down");
+
#define dprintk(fmt, arg...) if (ir_debug) \
printk(KERN_DEBUG "%s/ir: " fmt, dev->name , ## arg)
#define i2cdprintk(fmt, arg...) if (ir_debug) \
@@ -59,6 +67,13 @@ static int build_key(struct saa7134_dev *dev)
struct card_ir *ir = dev->remote;
u32 gpio, data;
+ /* here comes the additional handshake steps for some cards */
+ switch (dev->board) {
+ case SAA7134_BOARD_GOTVIEW_7135:
+ saa_setb(SAA7134_GPIO_GPSTATUS1, 0x80);
+ saa_clearb(SAA7134_GPIO_GPSTATUS1, 0x80);
+ break;
+ }
/* rising SAA7134_GPIO_GPRESCAN reads the status */
saa_clearb(SAA7134_GPIO_GPMODE3,SAA7134_GPIO_GPRESCAN);
saa_setb(SAA7134_GPIO_GPMODE3,SAA7134_GPIO_GPRESCAN);
@@ -159,7 +174,7 @@ static void saa7134_input_timer(unsigned long data)
mod_timer(&ir->timer, jiffies + msecs_to_jiffies(ir->polling));
}
-static void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir)
+void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir)
{
if (ir->polling) {
setup_timer(&ir->timer, saa7134_input_timer,
@@ -182,7 +197,7 @@ static void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir)
}
}
-static void saa7134_ir_stop(struct saa7134_dev *dev)
+void saa7134_ir_stop(struct saa7134_dev *dev)
{
if (dev->remote->polling)
del_timer_sync(&dev->remote->timer);
@@ -285,10 +300,10 @@ int saa7134_input_init1(struct saa7134_dev *dev)
break;
case SAA7134_BOARD_GOTVIEW_7135:
ir_codes = ir_codes_gotview7135;
- mask_keycode = 0x0003EC;
- mask_keyup = 0x008000;
+ mask_keycode = 0x0003CC;
mask_keydown = 0x000010;
- polling = 50; // ms
+ polling = 5; /* ms */
+ saa_setb(SAA7134_GPIO_GPMODE1, 0x80);
break;
case SAA7134_BOARD_VIDEOMATE_TV_PVR:
case SAA7134_BOARD_VIDEOMATE_GOLD_PLUS:
@@ -386,6 +401,10 @@ int saa7134_input_init1(struct saa7134_dev *dev)
if (err)
goto err_out_stop;
+ /* the remote isn't as bouncy as a keyboard */
+ ir->dev->rep[REP_DELAY] = repeat_delay;
+ ir->dev->rep[REP_PERIOD] = repeat_period;
+
return 0;
err_out_stop:
diff --git a/drivers/media/video/saa7134/saa7134-tvaudio.c b/drivers/media/video/saa7134/saa7134-tvaudio.c
index 1b9e39a5ea4..f8e304c7623 100644
--- a/drivers/media/video/saa7134/saa7134-tvaudio.c
+++ b/drivers/media/video/saa7134/saa7134-tvaudio.c
@@ -27,6 +27,7 @@
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/freezer.h>
#include <asm/div64.h>
#include "saa7134-reg.h"
@@ -231,7 +232,7 @@ static void mute_input_7134(struct saa7134_dev *dev)
}
if (dev->hw_mute == mute &&
- dev->hw_input == in && !dev->inresume) {
+ dev->hw_input == in && !dev->insuspend) {
dprintk("mute/input: nothing to do [mute=%d,input=%s]\n",
mute,in->name);
return;
@@ -502,13 +503,17 @@ static int tvaudio_thread(void *data)
unsigned int i, audio, nscan;
int max1,max2,carrier,rx,mode,lastmode,default_carrier;
- allow_signal(SIGTERM);
+
+ set_freezable();
+
for (;;) {
tvaudio_sleep(dev,-1);
- if (kthread_should_stop() || signal_pending(current))
+ if (kthread_should_stop())
goto done;
restart:
+ try_to_freeze();
+
dev->thread.scan1 = dev->thread.scan2;
dprintk("tvaudio thread scan start [%d]\n",dev->thread.scan1);
dev->tvaudio = NULL;
@@ -612,9 +617,12 @@ static int tvaudio_thread(void *data)
lastmode = 42;
for (;;) {
+
+ try_to_freeze();
+
if (tvaudio_sleep(dev,5000))
goto restart;
- if (kthread_should_stop() || signal_pending(current))
+ if (kthread_should_stop())
break;
if (UNSET == dev->thread.mode) {
rx = tvaudio_getstereo(dev,&tvaudio[i]);
@@ -630,6 +638,7 @@ static int tvaudio_thread(void *data)
}
done:
+ dev->thread.stopped = 1;
return 0;
}
@@ -777,7 +786,8 @@ static int tvaudio_thread_ddep(void *data)
struct saa7134_dev *dev = data;
u32 value, norms, clock;
- allow_signal(SIGTERM);
+
+ set_freezable();
clock = saa7134_boards[dev->board].audio_clock;
if (UNSET != audio_clock_override)
@@ -790,10 +800,13 @@ static int tvaudio_thread_ddep(void *data)
for (;;) {
tvaudio_sleep(dev,-1);
- if (kthread_should_stop() || signal_pending(current))
+ if (kthread_should_stop())
goto done;
restart:
+
+ try_to_freeze();
+
dev->thread.scan1 = dev->thread.scan2;
dprintk("tvaudio thread scan start [%d]\n",dev->thread.scan1);
@@ -870,6 +883,7 @@ static int tvaudio_thread_ddep(void *data)
}
done:
+ dev->thread.stopped = 1;
return 0;
}
@@ -997,7 +1011,7 @@ int saa7134_tvaudio_init2(struct saa7134_dev *dev)
int saa7134_tvaudio_fini(struct saa7134_dev *dev)
{
/* shutdown tvaudio thread */
- if (dev->thread.thread)
+ if (dev->thread.thread && !dev->thread.stopped)
kthread_stop(dev->thread.thread);
saa_andorb(SAA7134_ANALOG_IO_SELECT, 0x07, 0x00); /* LINE1 */
@@ -1013,7 +1027,9 @@ int saa7134_tvaudio_do_scan(struct saa7134_dev *dev)
} else if (dev->thread.thread) {
dev->thread.mode = UNSET;
dev->thread.scan2++;
- wake_up_process(dev->thread.thread);
+
+ if (!dev->insuspend && !dev->thread.stopped)
+ wake_up_process(dev->thread.thread);
} else {
dev->automute = 0;
saa7134_tvaudio_setmute(dev);
diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
index 471b92793c1..3b9ffb4b648 100644
--- a/drivers/media/video/saa7134/saa7134-video.c
+++ b/drivers/media/video/saa7134/saa7134-video.c
@@ -560,15 +560,8 @@ void set_tvnorm(struct saa7134_dev *dev, struct saa7134_tvnorm *norm)
dev->crop_current = dev->crop_defrect;
- saa7134_set_decoder(dev);
+ saa7134_set_tvnorm_hw(dev);
- if (card_in(dev, dev->ctl_input).tv) {
- if ((card(dev).tuner_type == TUNER_PHILIPS_TDA8290)
- && ((card(dev).tuner_config == 1)
- || (card(dev).tuner_config == 2)))
- saa7134_set_gpio(dev, 22, 5);
- saa7134_i2c_call_clients(dev, VIDIOC_S_STD, &norm->id);
- }
}
static void video_mux(struct saa7134_dev *dev, int input)
@@ -579,7 +572,8 @@ static void video_mux(struct saa7134_dev *dev, int input)
saa7134_tvaudio_setinput(dev, &card_in(dev, input));
}
-void saa7134_set_decoder(struct saa7134_dev *dev)
+
+static void saa7134_set_decoder(struct saa7134_dev *dev)
{
int luma_control, sync_control, mux;
@@ -630,6 +624,19 @@ void saa7134_set_decoder(struct saa7134_dev *dev)
saa_writeb(SAA7134_RAW_DATA_OFFSET, 0x80);
}
+void saa7134_set_tvnorm_hw(struct saa7134_dev *dev)
+{
+ saa7134_set_decoder(dev);
+
+ if (card_in(dev, dev->ctl_input).tv) {
+ if ((card(dev).tuner_type == TUNER_PHILIPS_TDA8290)
+ && ((card(dev).tuner_config == 1)
+ || (card(dev).tuner_config == 2)))
+ saa7134_set_gpio(dev, 22, 5);
+ saa7134_i2c_call_clients(dev, VIDIOC_S_STD, &dev->tvnorm->id);
+ }
+}
+
static void set_h_prescale(struct saa7134_dev *dev, int task, int prescale)
{
static const struct {
@@ -2352,7 +2359,6 @@ struct video_device saa7134_video_template =
.name = "saa7134-video",
.type = VID_TYPE_CAPTURE|VID_TYPE_TUNER|
VID_TYPE_CLIPPING|VID_TYPE_SCALES,
- .hardware = 0,
.fops = &video_fops,
.minor = -1,
};
@@ -2361,7 +2367,6 @@ struct video_device saa7134_vbi_template =
{
.name = "saa7134-vbi",
.type = VID_TYPE_TUNER|VID_TYPE_TELETEXT,
- .hardware = 0,
.fops = &video_fops,
.minor = -1,
};
@@ -2370,7 +2375,6 @@ struct video_device saa7134_radio_template =
{
.name = "saa7134-radio",
.type = VID_TYPE_TUNER,
- .hardware = 0,
.fops = &radio_fops,
.minor = -1,
};
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index 28ec6804bd5..66a390c321a 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -333,6 +333,7 @@ struct saa7134_thread {
unsigned int scan1;
unsigned int scan2;
unsigned int mode;
+ unsigned int stopped;
};
/* buffer for one video/vbi/ts frame */
@@ -524,7 +525,7 @@ struct saa7134_dev {
unsigned int hw_mute;
int last_carrier;
int nosignal;
- unsigned int inresume;
+ unsigned int insuspend;
/* SAA7134_MPEG_* */
struct saa7134_ts ts;
@@ -632,7 +633,7 @@ extern struct video_device saa7134_radio_template;
void set_tvnorm(struct saa7134_dev *dev, struct saa7134_tvnorm *norm);
int saa7134_videoport_init(struct saa7134_dev *dev);
-void saa7134_set_decoder(struct saa7134_dev *dev);
+void saa7134_set_tvnorm_hw(struct saa7134_dev *dev);
int saa7134_common_ioctl(struct saa7134_dev *dev,
unsigned int cmd, void *arg);
@@ -706,6 +707,8 @@ int saa7134_input_init1(struct saa7134_dev *dev);
void saa7134_input_fini(struct saa7134_dev *dev);
void saa7134_input_irq(struct saa7134_dev *dev);
void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir);
+void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir);
+void saa7134_ir_stop(struct saa7134_dev *dev);
/*
diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c
index 93fb04ed99a..d5d7d6cf734 100644
--- a/drivers/media/video/se401.c
+++ b/drivers/media/video/se401.c
@@ -1231,7 +1231,6 @@ static struct video_device se401_template = {
.owner = THIS_MODULE,
.name = "se401 USB camera",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_SE401,
.fops = &se401_fops,
};
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 6991e06f765..511847912c4 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -3319,7 +3319,6 @@ sn9c102_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
strcpy(cam->v4ldev->name, "SN9C1xx PC Camera");
cam->v4ldev->owner = THIS_MODULE;
cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
- cam->v4ldev->hardware = 0;
cam->v4ldev->fops = &sn9c102_fops;
cam->v4ldev->minor = video_nr[dev_nr];
cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c
index eb220461ac7..3fb85af5d1f 100644
--- a/drivers/media/video/stradis.c
+++ b/drivers/media/video/stradis.c
@@ -1917,7 +1917,6 @@ static const struct file_operations saa_fops = {
static struct video_device saa_template = {
.name = "SAA7146A",
.type = VID_TYPE_CAPTURE | VID_TYPE_OVERLAY,
- .hardware = VID_HARDWARE_SAA7146,
.fops = &saa_fops,
.minor = -1,
};
diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c
index 9e009a7ab86..afc32aa56fd 100644
--- a/drivers/media/video/stv680.c
+++ b/drivers/media/video/stv680.c
@@ -1398,7 +1398,6 @@ static struct video_device stv680_template = {
.owner = THIS_MODULE,
.name = "STV0680 USB camera",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_SE401,
.fops = &stv680_fops,
.release = video_device_release,
.minor = -1,
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 94843086cda..6a777604f07 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -113,7 +113,7 @@ static void fe_standby(struct tuner *t)
static int fe_has_signal(struct tuner *t)
{
struct dvb_tuner_ops *fe_tuner_ops = &t->fe.ops.tuner_ops;
- u16 strength;
+ u16 strength = 0;
if (fe_tuner_ops->get_rf_strength)
fe_tuner_ops->get_rf_strength(&t->fe, &strength);
diff --git a/drivers/media/video/usbvideo/usbvideo.c b/drivers/media/video/usbvideo/usbvideo.c
index 37ce36b9e58..fb434b5602a 100644
--- a/drivers/media/video/usbvideo/usbvideo.c
+++ b/drivers/media/video/usbvideo/usbvideo.c
@@ -952,7 +952,6 @@ static const struct file_operations usbvideo_fops = {
static const struct video_device usbvideo_template = {
.owner = THIS_MODULE,
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_CPIA,
.fops = &usbvideo_fops,
};
diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c
index db3c9e3deb2..da1ba021110 100644
--- a/drivers/media/video/usbvideo/vicam.c
+++ b/drivers/media/video/usbvideo/vicam.c
@@ -1074,7 +1074,6 @@ static struct video_device vicam_template = {
.owner = THIS_MODULE,
.name = "ViCam-based USB Camera",
.type = VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_VICAM,
.fops = &vicam_fops,
.minor = -1,
};
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index e2f3c01cfa1..36e689fa16c 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -1400,7 +1400,6 @@ static const struct file_operations usbvision_fops = {
static struct video_device usbvision_video_template = {
.owner = THIS_MODULE,
.type = VID_TYPE_TUNER | VID_TYPE_CAPTURE,
- .hardware = VID_HARDWARE_USBVISION,
.fops = &usbvision_fops,
.name = "usbvision-video",
.release = video_device_release,
@@ -1455,7 +1454,6 @@ static struct video_device usbvision_radio_template=
{
.owner = THIS_MODULE,
.type = VID_TYPE_TUNER,
- .hardware = VID_HARDWARE_USBVISION,
.fops = &usbvision_radio_fops,
.name = "usbvision-radio",
.release = video_device_release,
@@ -1492,7 +1490,6 @@ static struct video_device usbvision_vbi_template=
{
.owner = THIS_MODULE,
.type = VID_TYPE_TUNER,
- .hardware = VID_HARDWARE_USBVISION,
.fops = &usbvision_vbi_fops,
.release = video_device_release,
.name = "usbvision-vbi",
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 321249240d0..1141b4bf41c 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -317,8 +317,6 @@ static const char *v4l2_ioctls[] = {
[_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT",
[_IOC_NR(VIDIOC_G_FMT)] = "VIDIOC_G_FMT",
[_IOC_NR(VIDIOC_S_FMT)] = "VIDIOC_S_FMT",
- [_IOC_NR(VIDIOC_G_MPEGCOMP)] = "VIDIOC_G_MPEGCOMP",
- [_IOC_NR(VIDIOC_S_MPEGCOMP)] = "VIDIOC_S_MPEGCOMP",
[_IOC_NR(VIDIOC_REQBUFS)] = "VIDIOC_REQBUFS",
[_IOC_NR(VIDIOC_QUERYBUF)] = "VIDIOC_QUERYBUF",
[_IOC_NR(VIDIOC_G_FBUF)] = "VIDIOC_G_FBUF",
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index 5599a36490f..89a44f16f0b 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -967,6 +967,7 @@ int videobuf_cgmbuf(struct videobuf_queue *q,
return 0;
}
+EXPORT_SYMBOL_GPL(videobuf_cgmbuf);
#endif
/* --------------------------------------------------------------------- */
@@ -985,7 +986,6 @@ EXPORT_SYMBOL_GPL(videobuf_reqbufs);
EXPORT_SYMBOL_GPL(videobuf_querybuf);
EXPORT_SYMBOL_GPL(videobuf_qbuf);
EXPORT_SYMBOL_GPL(videobuf_dqbuf);
-EXPORT_SYMBOL_GPL(videobuf_cgmbuf);
EXPORT_SYMBOL_GPL(videobuf_streamon);
EXPORT_SYMBOL_GPL(videobuf_streamoff);
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 3eb6123227b..9ab94a749d8 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -27,6 +27,7 @@
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
+#include <linux/scatterlist.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -60,12 +61,13 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages)
sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL);
if (NULL == sglist)
return NULL;
+ sg_init_table(sglist, nr_pages);
for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) {
pg = vmalloc_to_page(virt);
if (NULL == pg)
goto err;
BUG_ON(PageHighMem(pg));
- sglist[i].page = pg;
+ sg_set_page(&sglist[i], pg);
sglist[i].length = PAGE_SIZE;
}
return sglist;
@@ -86,13 +88,14 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset)
sglist = kcalloc(nr_pages, sizeof(*sglist), GFP_KERNEL);
if (NULL == sglist)
return NULL;
+ sg_init_table(sglist, nr_pages);
if (NULL == pages[0])
goto nopage;
if (PageHighMem(pages[0]))
/* DMA to highmem pages might not work */
goto highmem;
- sglist[0].page = pages[0];
+ sg_set_page(&sglist[0], pages[0]);
sglist[0].offset = offset;
sglist[0].length = PAGE_SIZE - offset;
for (i = 1; i < nr_pages; i++) {
@@ -100,7 +103,7 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset)
goto nopage;
if (PageHighMem(pages[i]))
goto highmem;
- sglist[i].page = pages[i];
+ sg_set_page(&sglist[i], pages[i]);
sglist[i].length = PAGE_SIZE;
}
return sglist;
diff --git a/drivers/media/video/videocodec.c b/drivers/media/video/videocodec.c
index f2bbd7a4d56..87951ec8254 100644
--- a/drivers/media/video/videocodec.c
+++ b/drivers/media/video/videocodec.c
@@ -86,8 +86,8 @@ videocodec_attach (struct videocodec_master *master)
}
dprintk(2,
- "videocodec_attach: '%s', type: %x, flags %lx, magic %lx\n",
- master->name, master->type, master->flags, master->magic);
+ "videocodec_attach: '%s', flags %lx, magic %lx\n",
+ master->name, master->flags, master->magic);
if (!h) {
dprintk(1,
diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c
index 8d8e517b344..9611c399028 100644
--- a/drivers/media/video/videodev.c
+++ b/drivers/media/video/videodev.c
@@ -1313,48 +1313,6 @@ static int __video_do_ioctl(struct inode *inode, struct file *file,
ret=vfd->vidioc_cropcap(file, fh, p);
break;
}
- case VIDIOC_G_MPEGCOMP:
- {
- struct v4l2_mpeg_compression *p=arg;
-
- /*FIXME: Several fields not shown */
- if (!vfd->vidioc_g_mpegcomp)
- break;
- ret=vfd->vidioc_g_mpegcomp(file, fh, p);
- if (!ret)
- dbgarg (cmd, "ts_pid_pmt=%d, ts_pid_audio=%d,"
- " ts_pid_video=%d, ts_pid_pcr=%d, "
- "ps_size=%d, au_sample_rate=%d, "
- "au_pesid=%c, vi_frame_rate=%d, "
- "vi_frames_per_gop=%d, "
- "vi_bframes_count=%d, vi_pesid=%c\n",
- p->ts_pid_pmt,p->ts_pid_audio,
- p->ts_pid_video,p->ts_pid_pcr,
- p->ps_size, p->au_sample_rate,
- p->au_pesid, p->vi_frame_rate,
- p->vi_frames_per_gop,
- p->vi_bframes_count, p->vi_pesid);
- break;
- }
- case VIDIOC_S_MPEGCOMP:
- {
- struct v4l2_mpeg_compression *p=arg;
- /*FIXME: Several fields not shown */
- if (!vfd->vidioc_s_mpegcomp)
- break;
- dbgarg (cmd, "ts_pid_pmt=%d, ts_pid_audio=%d, "
- "ts_pid_video=%d, ts_pid_pcr=%d, ps_size=%d, "
- "au_sample_rate=%d, au_pesid=%c, "
- "vi_frame_rate=%d, vi_frames_per_gop=%d, "
- "vi_bframes_count=%d, vi_pesid=%c\n",
- p->ts_pid_pmt,p->ts_pid_audio, p->ts_pid_video,
- p->ts_pid_pcr, p->ps_size, p->au_sample_rate,
- p->au_pesid, p->vi_frame_rate,
- p->vi_frames_per_gop, p->vi_bframes_count,
- p->vi_pesid);
- ret=vfd->vidioc_s_mpegcomp(file, fh, p);
- break;
- }
case VIDIOC_G_JPEGCOMP:
{
struct v4l2_jpegcompression *p=arg;
diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c
index b532aa280a1..ee73dc75131 100644
--- a/drivers/media/video/vivi.c
+++ b/drivers/media/video/vivi.c
@@ -1119,7 +1119,6 @@ static const struct file_operations vivi_fops = {
static struct video_device vivi = {
.name = "vivi",
.type = VID_TYPE_CAPTURE,
- .hardware = 0,
.fops = &vivi_fops,
.minor = -1,
// .release = video_device_release,
diff --git a/drivers/media/video/w9966.c b/drivers/media/video/w9966.c
index 47366408637..08aaae07c7e 100644
--- a/drivers/media/video/w9966.c
+++ b/drivers/media/video/w9966.c
@@ -196,7 +196,6 @@ static struct video_device w9966_template = {
.owner = THIS_MODULE,
.name = W9966_DRIVERNAME,
.type = VID_TYPE_CAPTURE | VID_TYPE_SCALES,
- .hardware = VID_HARDWARE_W9966,
.fops = &w9966_fops,
};
diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 9e7f3e685d7..2ae1430f5f7 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c
@@ -3549,7 +3549,6 @@ w9968cf_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
strcpy(cam->v4ldev->name, symbolic(camlist, mod_id));
cam->v4ldev->owner = THIS_MODULE;
cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
- cam->v4ldev->hardware = VID_HARDWARE_W9968CF;
cam->v4ldev->fops = &w9968cf_fops;
cam->v4ldev->minor = video_nr[dev_nr];
cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/zc0301/zc0301_core.c b/drivers/media/video/zc0301/zc0301_core.c
index 08a93c31c0a..2c5665c8244 100644
--- a/drivers/media/video/zc0301/zc0301_core.c
+++ b/drivers/media/video/zc0301/zc0301_core.c
@@ -1985,7 +1985,6 @@ zc0301_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
strcpy(cam->v4ldev->name, "ZC0301[P] PC Camera");
cam->v4ldev->owner = THIS_MODULE;
cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
- cam->v4ldev->hardware = 0;
cam->v4ldev->fops = &zc0301_fops;
cam->v4ldev->minor = video_nr[dev_nr];
cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c
index 48da36a15fc..6e0ac4c5c37 100644
--- a/drivers/media/video/zoran_card.c
+++ b/drivers/media/video/zoran_card.c
@@ -1235,8 +1235,14 @@ zoran_setup_videocodec (struct zoran *zr,
return m;
}
- m->magic = 0L; /* magic not used */
- m->type = VID_HARDWARE_ZR36067;
+ /* magic and type are unused for master struct. Makes sense only at
+ codec structs.
+ In the past, .type were initialized to the old V4L1 .hardware
+ value, as VID_HARDWARE_ZR36067
+ */
+ m->magic = 0L;
+ m->type = 0;
+
m->flags = CODEC_FLAG_ENCODER | CODEC_FLAG_DECODER;
strncpy(m->name, ZR_DEVNAME(zr), sizeof(m->name));
m->data = zr;
diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c
index 419e5af7853..dd3d7d2c8b0 100644
--- a/drivers/media/video/zoran_driver.c
+++ b/drivers/media/video/zoran_driver.c
@@ -60,7 +60,6 @@
#include <linux/spinlock.h>
#define MAP_NR(x) virt_to_page(x)
-#define ZORAN_HARDWARE VID_HARDWARE_ZR36067
#define ZORAN_VID_TYPE ( \
VID_TYPE_CAPTURE | \
VID_TYPE_OVERLAY | \
@@ -4659,7 +4658,6 @@ struct video_device zoran_template __devinitdata = {
#ifdef CONFIG_VIDEO_V4L2
.type2 = ZORAN_V4L2_VID_FLAGS,
#endif
- .hardware = ZORAN_HARDWARE,
.fops = &zoran_fops,
.release = &zoran_vdev_release,
.minor = -1
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index a5d0354bbbd..9203a0b221b 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/scatterlist.h>
#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
@@ -153,19 +154,21 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
blk_queue_max_segment_size(mq->queue, bouncesz);
- mq->sg = kzalloc(sizeof(struct scatterlist),
+ mq->sg = kmalloc(sizeof(struct scatterlist),
GFP_KERNEL);
if (!mq->sg) {
ret = -ENOMEM;
goto cleanup_queue;
}
+ sg_init_table(mq->sg, 1);
- mq->bounce_sg = kzalloc(sizeof(struct scatterlist) *
+ mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
bouncesz / 512, GFP_KERNEL);
if (!mq->bounce_sg) {
ret = -ENOMEM;
goto cleanup_queue;
}
+ sg_init_table(mq->bounce_sg, bouncesz / 512);
}
}
#endif
@@ -302,12 +305,12 @@ static void copy_sg(struct scatterlist *dst, unsigned int dst_len,
BUG_ON(dst_len == 0);
if (dst_size == 0) {
- dst_buf = page_address(dst->page) + dst->offset;
+ dst_buf = sg_virt(dst);
dst_size = dst->length;
}
if (src_size == 0) {
- src_buf = page_address(src->page) + src->offset;
+ src_buf = sg_virt(dst);
src_size = src->length;
}
@@ -353,9 +356,7 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
return 1;
}
- mq->sg[0].page = virt_to_page(mq->bounce_buf);
- mq->sg[0].offset = offset_in_page(mq->bounce_buf);
- mq->sg[0].length = 0;
+ sg_init_one(mq->sg, mq->bounce_buf, 0);
while (sg_len) {
mq->sg[0].length += mq->bounce_sg[sg_len - 1].length;
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 7a452c2ad1f..b1edcefdd4f 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -149,7 +149,7 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data
sg = &data->sg[i];
- sgbuffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset;
+ sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
amount = min(size, sg->length);
size -= amount;
@@ -226,7 +226,7 @@ static void at91_mci_pre_dma_read(struct at91mci_host *host)
sg = &data->sg[host->transfer_index++];
pr_debug("sg = %p\n", sg);
- sg->dma_address = dma_map_page(NULL, sg->page, sg->offset, sg->length, DMA_FROM_DEVICE);
+ sg->dma_address = dma_map_page(NULL, sg_page(sg), sg->offset, sg->length, DMA_FROM_DEVICE);
pr_debug("dma address = %08X, length = %d\n", sg->dma_address, sg->length);
@@ -283,7 +283,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
int index;
/* Swap the contents of the buffer */
- buffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
pr_debug("buffer = %p, length = %d\n", buffer, sg->length);
for (index = 0; index < (sg->length / 4); index++)
@@ -292,7 +292,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
}
- flush_dcache_page(sg->page);
+ flush_dcache_page(sg_page(sg));
}
/* Is there another transfer to trigger? */
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index 92c4d0dfee4..bcbb6d247bf 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -340,7 +340,7 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host)
/* This is the pointer to the data buffer */
sg = &data->sg[host->pio.index];
- sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset;
+ sg_ptr = sg_virt(sg) + host->pio.offset;
/* This is the space left inside the buffer */
sg_len = data->sg[host->pio.index].length - host->pio.offset;
@@ -400,7 +400,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host)
if (host->pio.index < host->dma.len) {
sg = &data->sg[host->pio.index];
- sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset;
+ sg_ptr = sg_virt(sg) + host->pio.offset;
/* This is the space left inside the buffer */
sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset;
@@ -613,14 +613,11 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)
if (host->flags & HOST_F_XMIT){
ret = au1xxx_dbdma_put_source_flags(channel,
- (void *) (page_address(sg->page) +
- sg->offset),
- len, flags);
+ (void *) sg_virt(sg), len, flags);
}
else {
ret = au1xxx_dbdma_put_dest_flags(channel,
- (void *) (page_address(sg->page) +
- sg->offset),
+ (void *) sg_virt(sg),
len, flags);
}
diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c
index 6ebc41e7592..fc72e1fadb6 100644
--- a/drivers/mmc/host/imxmmc.c
+++ b/drivers/mmc/host/imxmmc.c
@@ -262,7 +262,7 @@ static void imxmci_setup_data(struct imxmci_host *host, struct mmc_data *data)
}
/* Convert back to virtual address */
- host->data_ptr = (u16*)(page_address(data->sg->page) + data->sg->offset);
+ host->data_ptr = (u16*)sg_virt(sg);
host->data_cnt = 0;
clear_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events);
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 7ae18eaed6c..12c2d807c14 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -813,7 +813,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
&& dir == DMA_FROM_DEVICE)
dir = DMA_BIDIRECTIONAL;
- dma_addr = dma_map_page(dma_dev, sg->page, 0,
+ dma_addr = dma_map_page(dma_dev, sg_page(sg), 0,
PAGE_SIZE, dir);
if (direction == DMA_TO_DEVICE)
t->tx_dma = dma_addr + sg->offset;
@@ -822,7 +822,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
}
/* allow pio too; we don't allow highmem */
- kmap_addr = kmap(sg->page);
+ kmap_addr = kmap(sg_page(sg));
if (direction == DMA_TO_DEVICE)
t->tx_buf = kmap_addr + sg->offset;
else
@@ -855,8 +855,8 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
/* discard mappings */
if (direction == DMA_FROM_DEVICE)
- flush_kernel_dcache_page(sg->page);
- kunmap(sg->page);
+ flush_kernel_dcache_page(sg_page(sg));
+ kunmap(sg_page(sg));
if (dma_dev)
dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 000e6a91978..0f39c490f02 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -169,7 +169,7 @@ static inline char *mmci_kmap_atomic(struct mmci_host *host, unsigned long *flag
struct scatterlist *sg = host->sg_ptr;
local_irq_save(*flags);
- return kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset;
+ return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
}
static inline void mmci_kunmap_atomic(struct mmci_host *host, void *buffer, unsigned long *flags)
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 60a67dfcda6..971e18b91f4 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -24,10 +24,10 @@
#include <linux/mmc/host.h>
#include <linux/mmc/card.h>
#include <linux/clk.h>
+#include <linux/scatterlist.h>
#include <asm/io.h>
#include <asm/irq.h>
-#include <asm/scatterlist.h>
#include <asm/mach-types.h>
#include <asm/arch/board.h>
@@ -383,7 +383,7 @@ mmc_omap_sg_to_buf(struct mmc_omap_host *host)
sg = host->data->sg + host->sg_idx;
host->buffer_bytes_left = sg->length;
- host->buffer = page_address(sg->page) + sg->offset;
+ host->buffer = sg_virt(sg);
if (host->buffer_bytes_left > host->total_bytes_left)
host->buffer_bytes_left = host->total_bytes_left;
}
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b397121b947..d7c5b94d8c5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -13,6 +13,7 @@
#include <linux/highmem.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
#include <linux/mmc/host.h>
@@ -231,7 +232,7 @@ static void sdhci_deactivate_led(struct sdhci_host *host)
static inline char* sdhci_sg_to_buffer(struct sdhci_host* host)
{
- return page_address(host->cur_sg->page) + host->cur_sg->offset;
+ return sg_virt(host->cur_sg);
}
static inline int sdhci_next_sg(struct sdhci_host* host)
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index 9b904795eb7..c11a3d25605 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -192,7 +192,7 @@ static void tifm_sd_transfer_data(struct tifm_sd *host)
}
off = sg[host->sg_pos].offset + host->block_pos;
- pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT);
+ pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT);
p_off = offset_in_page(off);
p_cnt = PAGE_SIZE - p_off;
p_cnt = min(p_cnt, cnt);
@@ -241,18 +241,18 @@ static void tifm_sd_bounce_block(struct tifm_sd *host, struct mmc_data *r_data)
}
off = sg[host->sg_pos].offset + host->block_pos;
- pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT);
+ pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT);
p_off = offset_in_page(off);
p_cnt = PAGE_SIZE - p_off;
p_cnt = min(p_cnt, cnt);
p_cnt = min(p_cnt, t_size);
if (r_data->flags & MMC_DATA_WRITE)
- tifm_sd_copy_page(host->bounce_buf.page,
+ tifm_sd_copy_page(sg_page(&host->bounce_buf),
r_data->blksz - t_size,
pg, p_off, p_cnt);
else if (r_data->flags & MMC_DATA_READ)
- tifm_sd_copy_page(pg, p_off, host->bounce_buf.page,
+ tifm_sd_copy_page(pg, p_off, sg_page(&host->bounce_buf),
r_data->blksz - t_size, p_cnt);
t_size -= p_cnt;
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index 80db11c05f2..fa4c8c53cc7 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -269,7 +269,7 @@ static inline int wbsd_next_sg(struct wbsd_host *host)
static inline char *wbsd_sg_to_buffer(struct wbsd_host *host)
{
- return page_address(host->cur_sg->page) + host->cur_sg->offset;
+ return sg_virt(host->cur_sg);
}
static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data)
@@ -283,7 +283,7 @@ static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data)
len = data->sg_len;
for (i = 0; i < len; i++) {
- sgbuf = page_address(sg[i].page) + sg[i].offset;
+ sgbuf = sg_virt(&sg[i]);
memcpy(dmabuf, sgbuf, sg[i].length);
dmabuf += sg[i].length;
}
@@ -300,7 +300,7 @@ static inline void wbsd_dma_to_sg(struct wbsd_host *host, struct mmc_data *data)
len = data->sg_len;
for (i = 0; i < len; i++) {
- sgbuf = page_address(sg[i].page) + sg[i].offset;
+ sgbuf = sg_virt(&sg[i]);
memcpy(sgbuf, dmabuf, sg[i].length);
dmabuf += sg[i].length;
}
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 3aa3dca56ae..a9eb1c51624 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -85,6 +85,7 @@ static int cfi_intelext_point (struct mtd_info *mtd, loff_t from, size_t len,
static void cfi_intelext_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from,
size_t len);
+static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long adr, int mode);
static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode);
static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr);
#include "fwh_lock.h"
@@ -641,73 +642,13 @@ static int cfi_intelext_partition_fixup(struct mtd_info *mtd,
/*
* *********** CHIP ACCESS FUNCTIONS ***********
*/
-
-static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
+static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
{
DECLARE_WAITQUEUE(wait, current);
struct cfi_private *cfi = map->fldrv_priv;
map_word status, status_OK = CMD(0x80), status_PWS = CMD(0x01);
- unsigned long timeo;
struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
-
- resettime:
- timeo = jiffies + HZ;
- retry:
- if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
- /*
- * OK. We have possibility for contension on the write/erase
- * operations which are global to the real chip and not per
- * partition. So let's fight it over in the partition which
- * currently has authority on the operation.
- *
- * The rules are as follows:
- *
- * - any write operation must own shared->writing.
- *
- * - any erase operation must own _both_ shared->writing and
- * shared->erasing.
- *
- * - contension arbitration is handled in the owner's context.
- *
- * The 'shared' struct can be read and/or written only when
- * its lock is taken.
- */
- struct flchip_shared *shared = chip->priv;
- struct flchip *contender;
- spin_lock(&shared->lock);
- contender = shared->writing;
- if (contender && contender != chip) {
- /*
- * The engine to perform desired operation on this
- * partition is already in use by someone else.
- * Let's fight over it in the context of the chip
- * currently using it. If it is possible to suspend,
- * that other partition will do just that, otherwise
- * it'll happily send us to sleep. In any case, when
- * get_chip returns success we're clear to go ahead.
- */
- int ret = spin_trylock(contender->mutex);
- spin_unlock(&shared->lock);
- if (!ret)
- goto retry;
- spin_unlock(chip->mutex);
- ret = get_chip(map, contender, contender->start, mode);
- spin_lock(chip->mutex);
- if (ret) {
- spin_unlock(contender->mutex);
- return ret;
- }
- timeo = jiffies + HZ;
- spin_lock(&shared->lock);
- spin_unlock(contender->mutex);
- }
-
- /* We now own it */
- shared->writing = chip;
- if (mode == FL_ERASING)
- shared->erasing = chip;
- spin_unlock(&shared->lock);
- }
+ unsigned long timeo = jiffies + HZ;
switch (chip->state) {
@@ -722,16 +663,11 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
if (chip->priv && map_word_andequal(map, status, status_PWS, status_PWS))
break;
- if (time_after(jiffies, timeo)) {
- printk(KERN_ERR "%s: Waiting for chip to be ready timed out. Status %lx\n",
- map->name, status.x[0]);
- return -EIO;
- }
spin_unlock(chip->mutex);
cfi_udelay(1);
spin_lock(chip->mutex);
/* Someone else might have been playing with it. */
- goto retry;
+ return -EAGAIN;
}
case FL_READY:
@@ -809,10 +745,82 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
schedule();
remove_wait_queue(&chip->wq, &wait);
spin_lock(chip->mutex);
- goto resettime;
+ return -EAGAIN;
}
}
+static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
+{
+ int ret;
+
+ retry:
+ if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING
+ || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
+ /*
+ * OK. We have possibility for contention on the write/erase
+ * operations which are global to the real chip and not per
+ * partition. So let's fight it over in the partition which
+ * currently has authority on the operation.
+ *
+ * The rules are as follows:
+ *
+ * - any write operation must own shared->writing.
+ *
+ * - any erase operation must own _both_ shared->writing and
+ * shared->erasing.
+ *
+ * - contention arbitration is handled in the owner's context.
+ *
+ * The 'shared' struct can be read and/or written only when
+ * its lock is taken.
+ */
+ struct flchip_shared *shared = chip->priv;
+ struct flchip *contender;
+ spin_lock(&shared->lock);
+ contender = shared->writing;
+ if (contender && contender != chip) {
+ /*
+ * The engine to perform desired operation on this
+ * partition is already in use by someone else.
+ * Let's fight over it in the context of the chip
+ * currently using it. If it is possible to suspend,
+ * that other partition will do just that, otherwise
+ * it'll happily send us to sleep. In any case, when
+ * get_chip returns success we're clear to go ahead.
+ */
+ ret = spin_trylock(contender->mutex);
+ spin_unlock(&shared->lock);
+ if (!ret)
+ goto retry;
+ spin_unlock(chip->mutex);
+ ret = chip_ready(map, contender, contender->start, mode);
+ spin_lock(chip->mutex);
+
+ if (ret == -EAGAIN) {
+ spin_unlock(contender->mutex);
+ goto retry;
+ }
+ if (ret) {
+ spin_unlock(contender->mutex);
+ return ret;
+ }
+ spin_lock(&shared->lock);
+ spin_unlock(contender->mutex);
+ }
+
+ /* We now own it */
+ shared->writing = chip;
+ if (mode == FL_ERASING)
+ shared->erasing = chip;
+ spin_unlock(&shared->lock);
+ }
+ ret = chip_ready(map, chip, adr, mode);
+ if (ret == -EAGAIN)
+ goto retry;
+
+ return ret;
+}
+
static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr)
{
struct cfi_private *cfi = map->fldrv_priv;
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 8f9c3baeb38..246d4512f64 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -300,7 +300,7 @@ config MTD_NAND_PLATFORM
via platform_data.
config MTD_ALAUDA
- tristate "MTD driver for Olympus MAUSB-10 and Fijufilm DPC-R1"
+ tristate "MTD driver for Olympus MAUSB-10 and Fujifilm DPC-R1"
depends on MTD_NAND && USB
help
These two (and possibly other) Alauda-based cardreaders for
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index ab9f5c5db38..0e72153b329 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -220,7 +220,7 @@ static int doc_ecc_decode(struct rs_control *rs, uint8_t *data, uint8_t *ecc)
}
}
/* If the parity is wrong, no rescue possible */
- return parity ? -1 : nerr;
+ return parity ? -EBADMSG : nerr;
}
static void DoC_Delay(struct doc_priv *doc, unsigned short cycles)
@@ -1034,7 +1034,7 @@ static int doc200x_correct_data(struct mtd_info *mtd, u_char *dat,
WriteDOC(DOC_ECC_DIS, docptr, Mplus_ECCConf);
else
WriteDOC(DOC_ECC_DIS, docptr, ECCConf);
- if (no_ecc_failures && (ret == -1)) {
+ if (no_ecc_failures && (ret == -EBADMSG)) {
printk(KERN_ERR "suppressing ECC failure\n");
ret = 0;
}
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b4e0e772389..e29c1da7f56 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -789,7 +789,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
int stat;
stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
- if (stat == -1)
+ if (stat < 0)
mtd->ecc_stats.failed++;
else
mtd->ecc_stats.corrected += stat;
@@ -833,7 +833,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
int stat;
stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
- if (stat == -1)
+ if (stat < 0)
mtd->ecc_stats.failed++;
else
mtd->ecc_stats.corrected += stat;
@@ -874,7 +874,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
chip->read_buf(mtd, oob, eccbytes);
stat = chip->ecc.correct(mtd, p, oob, NULL);
- if (stat == -1)
+ if (stat < 0)
mtd->ecc_stats.failed++;
else
mtd->ecc_stats.corrected += stat;
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index fde593e5e63..9003a135e05 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -189,7 +189,7 @@ int nand_correct_data(struct mtd_info *mtd, u_char *dat,
if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
return 1;
- return -1;
+ return -EBADMSG;
}
EXPORT_SYMBOL(nand_correct_data);
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index a7574807dc4..10490b48d9f 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -511,7 +511,7 @@ static int init_nandsim(struct mtd_info *mtd)
}
if (ns->options & OPT_SMALLPAGE) {
- if (ns->geom.totsz < (64 << 20)) {
+ if (ns->geom.totsz < (32 << 20)) {
ns->geom.pgaddrbytes = 3;
ns->geom.secaddrbytes = 2;
} else {
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 21b921dd6aa..66f76e9618d 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -488,12 +488,24 @@ static void s3c2410_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
readsb(this->IO_ADDR_R, buf, len);
}
+static void s3c2440_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+{
+ struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
+ readsl(info->regs + S3C2440_NFDATA, buf, len / 4);
+}
+
static void s3c2410_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
{
struct nand_chip *this = mtd->priv;
writesb(this->IO_ADDR_W, buf, len);
}
+static void s3c2440_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+ struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
+ writesl(info->regs + S3C2440_NFDATA, buf, len / 4);
+}
+
/* device management functions */
static int s3c2410_nand_remove(struct platform_device *pdev)
@@ -604,6 +616,8 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
info->sel_bit = S3C2440_NFCONT_nFCE;
chip->cmd_ctrl = s3c2440_nand_hwcontrol;
chip->dev_ready = s3c2440_nand_devready;
+ chip->read_buf = s3c2440_nand_read_buf;
+ chip->write_buf = s3c2440_nand_write_buf;
break;
case TYPE_S3C2412:
diff --git a/drivers/mtd/onenand/onenand_sim.c b/drivers/mtd/onenand/onenand_sim.c
index 0d89ad5776f..d64200b7c94 100644
--- a/drivers/mtd/onenand/onenand_sim.c
+++ b/drivers/mtd/onenand/onenand_sim.c
@@ -88,11 +88,11 @@ do { \
/**
* onenand_lock_handle - Handle Lock scheme
- * @param this OneNAND device structure
- * @param cmd The command to be sent
+ * @this: OneNAND device structure
+ * @cmd: The command to be sent
*
* Send lock command to OneNAND device.
- * The lock scheme is depends on chip type.
+ * The lock scheme depends on chip type.
*/
static void onenand_lock_handle(struct onenand_chip *this, int cmd)
{
@@ -131,8 +131,8 @@ static void onenand_lock_handle(struct onenand_chip *this, int cmd)
/**
* onenand_bootram_handle - Handle BootRAM area
- * @param this OneNAND device structure
- * @param cmd The command to be sent
+ * @this: OneNAND device structure
+ * @cmd: The command to be sent
*
* Emulate BootRAM area. It is possible to do basic operation using BootRAM.
*/
@@ -153,10 +153,10 @@ static void onenand_bootram_handle(struct onenand_chip *this, int cmd)
/**
* onenand_update_interrupt - Set interrupt register
- * @param this OneNAND device structure
- * @param cmd The command to be sent
+ * @this: OneNAND device structure
+ * @cmd: The command to be sent
*
- * Update interrupt register. The status is depends on command.
+ * Update interrupt register. The status depends on command.
*/
static void onenand_update_interrupt(struct onenand_chip *this, int cmd)
{
@@ -189,11 +189,12 @@ static void onenand_update_interrupt(struct onenand_chip *this, int cmd)
}
/**
- * onenand_check_overwrite - Check over-write if happend
- * @param dest The destination pointer
- * @param src The source pointer
- * @param count The length to be check
- * @return 0 on same, otherwise 1
+ * onenand_check_overwrite - Check if over-write happened
+ * @dest: The destination pointer
+ * @src: The source pointer
+ * @count: The length to be check
+ *
+ * Returns: 0 on same, otherwise 1
*
* Compare the source with destination
*/
@@ -213,10 +214,10 @@ static int onenand_check_overwrite(void *dest, void *src, size_t count)
/**
* onenand_data_handle - Handle OneNAND Core and DataRAM
- * @param this OneNAND device structure
- * @param cmd The command to be sent
- * @param dataram Which dataram used
- * @param offset The offset to OneNAND Core
+ * @this: OneNAND device structure
+ * @cmd: The command to be sent
+ * @dataram: Which dataram used
+ * @offset: The offset to OneNAND Core
*
* Copy data from OneNAND Core to DataRAM (read)
* Copy data from DataRAM to OneNAND Core (write)
@@ -295,8 +296,8 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd,
/**
* onenand_command_handle - Handle command
- * @param this OneNAND device structure
- * @param cmd The command to be sent
+ * @this: OneNAND device structure
+ * @cmd: The command to be sent
*
* Emulate OneNAND command.
*/
@@ -350,8 +351,8 @@ static void onenand_command_handle(struct onenand_chip *this, int cmd)
/**
* onenand_writew - [OneNAND Interface] Emulate write operation
- * @param value value to write
- * @param addr address to write
+ * @value: value to write
+ * @addr: address to write
*
* Write OneNAND register with value
*/
@@ -373,7 +374,7 @@ static void onenand_writew(unsigned short value, void __iomem * addr)
/**
* flash_init - Initialize OneNAND simulator
- * @param flash OneNAND simulaotr data strucutres
+ * @flash: OneNAND simulator data strucutres
*
* Initialize OneNAND simulator.
*/
@@ -416,7 +417,7 @@ static int __init flash_init(struct onenand_flash *flash)
/**
* flash_exit - Clean up OneNAND simulator
- * @param flash OneNAND simulaotr data strucutres
+ * @flash: OneNAND simulator data structures
*
* Clean up OneNAND simulator.
*/
@@ -424,7 +425,6 @@ static void flash_exit(struct onenand_flash *flash)
{
vfree(ONENAND_CORE(flash));
kfree(flash->base);
- kfree(flash);
}
static int __init onenand_sim_init(void)
@@ -449,7 +449,7 @@ static int __init onenand_sim_init(void)
info->onenand.write_word = onenand_writew;
if (flash_init(&info->flash)) {
- printk(KERN_ERR "Unable to allocat flash.\n");
+ printk(KERN_ERR "Unable to allocate flash.\n");
kfree(ffchars);
kfree(info);
return -ENOMEM;
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index eb75773a9eb..86b8641b466 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3103,4 +3103,10 @@ config NETPOLL_TRAP
config NET_POLL_CONTROLLER
def_bool NETPOLL
+config VIRTIO_NET
+ tristate "Virtio network driver (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && VIRTIO
+ ---help---
+ This is the virtual network driver for lguest. Say Y or M.
+
endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 22f78cbd126..593262065c9 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -183,7 +183,6 @@ obj-$(CONFIG_ZORRO8390) += zorro8390.o
obj-$(CONFIG_HPLANCE) += hplance.o 7990.o
obj-$(CONFIG_MVME147_NET) += mvme147.o 7990.o
obj-$(CONFIG_EQUALIZER) += eql.o
-obj-$(CONFIG_LGUEST_NET) += lguest_net.o
obj-$(CONFIG_MIPS_JAZZ_SONIC) += jazzsonic.o
obj-$(CONFIG_MIPS_AU1X00_ENET) += au1000_eth.o
obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o
@@ -243,3 +242,4 @@ obj-$(CONFIG_FS_ENET) += fs_enet/
obj-$(CONFIG_NETXEN_NIC) += netxen/
obj-$(CONFIG_NIU) += niu.o
+obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index ed53aaab4c0..ae419736158 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -471,7 +471,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
len = max(skb->len, ETH_ZLEN);
- queue = skb->queue_mapping;
+ queue = skb_get_queue_mapping(skb);
#ifdef CONFIG_NETDEVICES_MULTIQUEUE
netif_stop_subqueue(dev, queue);
#else
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 2b5782056dd..0fbf1bbbaee 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -751,13 +751,11 @@ mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_devi
if (mii_head) {
mii_tail->mii_next = mip;
mii_tail = mip;
- }
- else {
+ } else {
mii_head = mii_tail = mip;
fep->hwp->fec_mii_data = regval;
}
- }
- else {
+ } else {
retval = 1;
}
@@ -768,14 +766,11 @@ mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_devi
static void mii_do_cmd(struct net_device *dev, const phy_cmd_t *c)
{
- int k;
-
if(!c)
return;
- for(k = 0; (c+k)->mii_data != mk_mii_end; k++) {
- mii_queue(dev, (c+k)->mii_data, (c+k)->funct);
- }
+ for (; c->mii_data != mk_mii_end; c++)
+ mii_queue(dev, c->mii_data, c->funct);
}
static void mii_parse_sr(uint mii_reg, struct net_device *dev)
@@ -792,7 +787,6 @@ static void mii_parse_sr(uint mii_reg, struct net_device *dev)
status |= PHY_STAT_FAULT;
if (mii_reg & 0x0020)
status |= PHY_STAT_ANC;
-
*s = status;
}
@@ -1239,7 +1233,6 @@ mii_link_interrupt(int irq, void * dev_id);
#endif
#if defined(CONFIG_M5272)
-
/*
* Code specific to Coldfire 5272 setup.
*/
@@ -2020,8 +2013,7 @@ static void mii_relink(struct work_struct *work)
& (PHY_STAT_100FDX | PHY_STAT_10FDX))
duplex = 1;
fec_restart(dev, duplex);
- }
- else
+ } else
fec_stop(dev);
#if 0
@@ -2119,8 +2111,7 @@ mii_discover_phy(uint mii_reg, struct net_device *dev)
fep->phy_id = phytype << 16;
mii_queue(dev, mk_mii_read(MII_REG_PHYIR2),
mii_discover_phy3);
- }
- else {
+ } else {
fep->phy_addr++;
mii_queue(dev, mk_mii_read(MII_REG_PHYIR1),
mii_discover_phy);
@@ -2574,8 +2565,7 @@ fec_restart(struct net_device *dev, int duplex)
if (duplex) {
fecp->fec_r_cntrl = OPT_FRAME_SIZE | 0x04;/* MII enable */
fecp->fec_x_cntrl = 0x04; /* FD enable */
- }
- else {
+ } else {
/* MII enable|No Rcv on Xmit */
fecp->fec_r_cntrl = OPT_FRAME_SIZE | 0x06;
fecp->fec_x_cntrl = 0x00;
diff --git a/drivers/net/lguest_net.c b/drivers/net/lguest_net.c
deleted file mode 100644
index abce2ee8430..00000000000
--- a/drivers/net/lguest_net.c
+++ /dev/null
@@ -1,555 +0,0 @@
-/*D:500
- * The Guest network driver.
- *
- * This is very simple a virtual network driver, and our last Guest driver.
- * The only trick is that it can talk directly to multiple other recipients
- * (ie. other Guests on the same network). It can also be used with only the
- * Host on the network.
- :*/
-
-/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-//#define DEBUG
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/module.h>
-#include <linux/mm_types.h>
-#include <linux/io.h>
-#include <linux/lguest_bus.h>
-
-#define SHARED_SIZE PAGE_SIZE
-#define MAX_LANS 4
-#define NUM_SKBS 8
-
-/*M:011 Network code master Jeff Garzik points out numerous shortcomings in
- * this driver if it aspires to greatness.
- *
- * Firstly, it doesn't use "NAPI": the networking's New API, and is poorer for
- * it. As he says "NAPI means system-wide load leveling, across multiple
- * network interfaces. Lack of NAPI can mean competition at higher loads."
- *
- * He also points out that we don't implement set_mac_address, so users cannot
- * change the devices hardware address. When I asked why one would want to:
- * "Bonding, and situations where you /do/ want the MAC address to "leak" out
- * of the host onto the wider net."
- *
- * Finally, he would like module unloading: "It is not unrealistic to think of
- * [un|re|]loading the net support module in an lguest guest. And, adding
- * module support makes the programmer more responsible, because they now have
- * to learn to clean up after themselves. Any driver that cannot clean up
- * after itself is an incomplete driver in my book."
- :*/
-
-/*D:530 The "struct lguestnet_info" contains all the information we need to
- * know about the network device. */
-struct lguestnet_info
-{
- /* The mapped device page(s) (an array of "struct lguest_net"). */
- struct lguest_net *peer;
- /* The physical address of the device page(s) */
- unsigned long peer_phys;
- /* The size of the device page(s). */
- unsigned long mapsize;
-
- /* The lguest_device I come from */
- struct lguest_device *lgdev;
-
- /* My peerid (ie. my slot in the array). */
- unsigned int me;
-
- /* Receive queue: the network packets waiting to be filled. */
- struct sk_buff *skb[NUM_SKBS];
- struct lguest_dma dma[NUM_SKBS];
-};
-/*:*/
-
-/* How many bytes left in this page. */
-static unsigned int rest_of_page(void *data)
-{
- return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
-}
-
-/*D:570 Each peer (ie. Guest or Host) on the network binds their receive
- * buffers to a different key: we simply use the physical address of the
- * device's memory page plus the peer number. The Host insists that all keys
- * be a multiple of 4, so we multiply the peer number by 4. */
-static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum)
-{
- return info->peer_phys + 4 * peernum;
-}
-
-/* This is the routine which sets up a "struct lguest_dma" to point to a
- * network packet, similar to req_to_dma() in lguest_blk.c. The structure of a
- * "struct sk_buff" has grown complex over the years: it consists of a "head"
- * linear section pointed to by "skb->data", and possibly an array of
- * "fragments" in the case of a non-linear packet.
- *
- * Our receive buffers don't use fragments at all but outgoing skbs might, so
- * we handle it. */
-static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen,
- struct lguest_dma *dma)
-{
- unsigned int i, seg;
-
- /* First, we put the linear region into the "struct lguest_dma". Each
- * entry can't go over a page boundary, so even though all our packets
- * are 1514 bytes or less, we might need to use two entries here: */
- for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) {
- dma->addr[seg] = virt_to_phys(skb->data + i);
- dma->len[seg] = min((unsigned)(headlen - i),
- rest_of_page(skb->data + i));
- }
-
- /* Now we handle the fragments: at least they're guaranteed not to go
- * over a page. skb_shinfo(skb) returns a pointer to the structure
- * which tells us about the number of fragments and the fragment
- * array. */
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) {
- const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
- /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */
- if (seg == LGUEST_MAX_DMA_SECTIONS) {
- /* We will end up sending a truncated packet should
- * this ever happen. Plus, a cool log message! */
- printk("Woah dude! Megapacket!\n");
- break;
- }
- dma->addr[seg] = page_to_phys(f->page) + f->page_offset;
- dma->len[seg] = f->size;
- }
-
- /* If after all that we didn't use the entire "struct lguest_dma"
- * array, we terminate it with a 0 length. */
- if (seg < LGUEST_MAX_DMA_SECTIONS)
- dma->len[seg] = 0;
-}
-
-/*
- * Packet transmission.
- *
- * Our packet transmission is a little unusual. A real network card would just
- * send out the packet and leave the receivers to decide if they're interested.
- * Instead, we look through the network device memory page and see if any of
- * the ethernet addresses match the packet destination, and if so we send it to
- * that Guest.
- *
- * This is made a little more complicated in two cases. The first case is
- * broadcast packets: for that we send the packet to all Guests on the network,
- * one at a time. The second case is "promiscuous" mode, where a Guest wants
- * to see all the packets on the network. We need a way for the Guest to tell
- * us it wants to see all packets, so it sets the "multicast" bit on its
- * published MAC address, which is never valid in a real ethernet address.
- */
-#define PROMISC_BIT 0x01
-
-/* This is the callback which is summoned whenever the network device's
- * multicast or promiscuous state changes. If the card is in promiscuous mode,
- * we advertise that in our ethernet address in the device's memory. We do the
- * same if Linux wants any or all multicast traffic. */
-static void lguestnet_set_multicast(struct net_device *dev)
-{
- struct lguestnet_info *info = netdev_priv(dev);
-
- if ((dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || dev->mc_count)
- info->peer[info->me].mac[0] |= PROMISC_BIT;
- else
- info->peer[info->me].mac[0] &= ~PROMISC_BIT;
-}
-
-/* A simple test function to see if a peer wants to see all packets.*/
-static int promisc(struct lguestnet_info *info, unsigned int peer)
-{
- return info->peer[peer].mac[0] & PROMISC_BIT;
-}
-
-/* Another simple function to see if a peer's advertised ethernet address
- * matches a packet's destination ethernet address. */
-static int mac_eq(const unsigned char mac[ETH_ALEN],
- struct lguestnet_info *info, unsigned int peer)
-{
- /* Ignore multicast bit, which peer turns on to mean promisc. */
- if ((info->peer[peer].mac[0] & (~PROMISC_BIT)) != mac[0])
- return 0;
- return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0;
-}
-
-/* This is the function which actually sends a packet once we've decided a
- * peer wants it: */
-static void transfer_packet(struct net_device *dev,
- struct sk_buff *skb,
- unsigned int peernum)
-{
- struct lguestnet_info *info = netdev_priv(dev);
- struct lguest_dma dma;
-
- /* We use our handy "struct lguest_dma" packing function to prepare
- * the skb for sending. */
- skb_to_dma(skb, skb_headlen(skb), &dma);
- pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len);
-
- /* This is the actual send call which copies the packet. */
- lguest_send_dma(peer_key(info, peernum), &dma);
-
- /* Check that the entire packet was transmitted. If not, it could mean
- * that the other Guest registered a short receive buffer, but this
- * driver should never do that. More likely, the peer is dead. */
- if (dma.used_len != skb->len) {
- dev->stats.tx_carrier_errors++;
- pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
- peernum, dma.used_len, skb->len,
- (void *)dma.addr[0], dma.len[0]);
- } else {
- /* On success we update the stats. */
- dev->stats.tx_bytes += skb->len;
- dev->stats.tx_packets++;
- }
-}
-
-/* Another helper function to tell is if a slot in the device memory is unused.
- * Since we always set the Local Assignment bit in the ethernet address, the
- * first byte can never be 0. */
-static int unused_peer(const struct lguest_net peer[], unsigned int num)
-{
- return peer[num].mac[0] == 0;
-}
-
-/* Finally, here is the routine which handles an outgoing packet. It's called
- * "start_xmit" for traditional reasons. */
-static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- unsigned int i;
- int broadcast;
- struct lguestnet_info *info = netdev_priv(dev);
- /* Extract the destination ethernet address from the packet. */
- const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
- DECLARE_MAC_BUF(mac);
-
- pr_debug("%s: xmit %s\n", dev->name, print_mac(mac, dest));
-
- /* If it's a multicast packet, we broadcast to everyone. That's not
- * very efficient, but there are very few applications which actually
- * use multicast, which is a shame really.
- *
- * As etherdevice.h points out: "By definition the broadcast address is
- * also a multicast address." So we don't have to test for broadcast
- * packets separately. */
- broadcast = is_multicast_ether_addr(dest);
-
- /* Look through all the published ethernet addresses to see if we
- * should send this packet. */
- for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) {
- /* We don't send to ourselves (we actually can't SEND_DMA to
- * ourselves anyway), and don't send to unused slots.*/
- if (i == info->me || unused_peer(info->peer, i))
- continue;
-
- /* If it's broadcast we send it. If they want every packet we
- * send it. If the destination matches their address we send
- * it. Otherwise we go to the next peer. */
- if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i))
- continue;
-
- pr_debug("lguestnet %s: sending from %i to %i\n",
- dev->name, info->me, i);
- /* Our routine which actually does the transfer. */
- transfer_packet(dev, skb, i);
- }
-
- /* An xmit routine is expected to dispose of the packet, so we do. */
- dev_kfree_skb(skb);
-
- /* As per kernel convention, 0 means success. This is why I love
- * networking: even if we never sent to anyone, that's still
- * success! */
- return 0;
-}
-
-/*D:560
- * Packet receiving.
- *
- * First, here's a helper routine which fills one of our array of receive
- * buffers: */
-static int fill_slot(struct net_device *dev, unsigned int slot)
-{
- struct lguestnet_info *info = netdev_priv(dev);
-
- /* We can receive ETH_DATA_LEN (1500) byte packets, plus a standard
- * ethernet header of ETH_HLEN (14) bytes. */
- info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN);
- if (!info->skb[slot]) {
- printk("%s: could not fill slot %i\n", dev->name, slot);
- return -ENOMEM;
- }
-
- /* skb_to_dma() is a helper which sets up the "struct lguest_dma" to
- * point to the data in the skb: we also use it for sending out a
- * packet. */
- skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]);
-
- /* This is a Write Memory Barrier: it ensures that the entry in the
- * receive buffer array is written *before* we set the "used_len" entry
- * to 0. If the Host were looking at the receive buffer array from a
- * different CPU, it could potentially see "used_len = 0" and not see
- * the updated receive buffer information. This would be a horribly
- * nasty bug, so make sure the compiler and CPU know this has to happen
- * first. */
- wmb();
- /* Writing 0 to "used_len" tells the Host it can use this receive
- * buffer now. */
- info->dma[slot].used_len = 0;
- return 0;
-}
-
-/* This is the actual receive routine. When we receive an interrupt from the
- * Host to tell us a packet has been delivered, we arrive here: */
-static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
-{
- struct net_device *dev = dev_id;
- struct lguestnet_info *info = netdev_priv(dev);
- unsigned int i, done = 0;
-
- /* Look through our entire receive array for an entry which has data
- * in it. */
- for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
- unsigned int length;
- struct sk_buff *skb;
-
- length = info->dma[i].used_len;
- if (length == 0)
- continue;
-
- /* We've found one! Remember the skb (we grabbed the length
- * above), and immediately refill the slot we've taken it
- * from. */
- done++;
- skb = info->skb[i];
- fill_slot(dev, i);
-
- /* This shouldn't happen: micropackets could be sent by a
- * badly-behaved Guest on the network, but the Host will never
- * stuff more data in the buffer than the buffer length. */
- if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) {
- pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n",
- dev->name, length);
- dev_kfree_skb(skb);
- continue;
- }
-
- /* skb_put(), what a great function! I've ranted about this
- * function before (http://lkml.org/lkml/1999/9/26/24). You
- * call it after you've added data to the end of an skb (in
- * this case, it was the Host which wrote the data). */
- skb_put(skb, length);
-
- /* The ethernet header contains a protocol field: we use the
- * standard helper to extract it, and place the result in
- * skb->protocol. The helper also sets up skb->pkt_type and
- * eats up the ethernet header from the front of the packet. */
- skb->protocol = eth_type_trans(skb, dev);
-
- /* If this device doesn't need checksums for sending, we also
- * don't need to check the packets when they come in. */
- if (dev->features & NETIF_F_NO_CSUM)
- skb->ip_summed = CHECKSUM_UNNECESSARY;
-
- /* As a last resort for debugging the driver or the lguest I/O
- * subsystem, you can uncomment the "#define DEBUG" at the top
- * of this file, which turns all the pr_debug() into printk()
- * and floods the logs. */
- pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
- ntohs(skb->protocol), skb->len, skb->pkt_type);
-
- /* Update the packet and byte counts (visible from ifconfig,
- * and good for debugging). */
- dev->stats.rx_bytes += skb->len;
- dev->stats.rx_packets++;
-
- /* Hand our fresh network packet into the stack's "network
- * interface receive" routine. That will free the packet
- * itself when it's finished. */
- netif_rx(skb);
- }
-
- /* If we found any packets, we assume the interrupt was for us. */
- return done ? IRQ_HANDLED : IRQ_NONE;
-}
-
-/*D:550 This is where we start: when the device is brought up by dhcpd or
- * ifconfig. At this point we advertise our MAC address to the rest of the
- * network, and register receive buffers ready for incoming packets. */
-static int lguestnet_open(struct net_device *dev)
-{
- int i;
- struct lguestnet_info *info = netdev_priv(dev);
-
- /* Copy our MAC address into the device page, so others on the network
- * can find us. */
- memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN);
-
- /* We might already be in promisc mode (dev->flags & IFF_PROMISC). Our
- * set_multicast callback handles this already, so we call it now. */
- lguestnet_set_multicast(dev);
-
- /* Allocate packets and put them into our "struct lguest_dma" array.
- * If we fail to allocate all the packets we could still limp along,
- * but it's a sign of real stress so we should probably give up now. */
- for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
- if (fill_slot(dev, i) != 0)
- goto cleanup;
- }
-
- /* Finally we tell the Host where our array of "struct lguest_dma"
- * receive buffers is, binding it to the key corresponding to the
- * device's physical memory plus our peerid. */
- if (lguest_bind_dma(peer_key(info,info->me), info->dma,
- NUM_SKBS, lgdev_irq(info->lgdev)) != 0)
- goto cleanup;
- return 0;
-
-cleanup:
- while (--i >= 0)
- dev_kfree_skb(info->skb[i]);
- return -ENOMEM;
-}
-/*:*/
-
-/* The close routine is called when the device is no longer in use: we clean up
- * elegantly. */
-static int lguestnet_close(struct net_device *dev)
-{
- unsigned int i;
- struct lguestnet_info *info = netdev_priv(dev);
-
- /* Clear all trace of our existence out of the device memory by setting
- * the slot which held our MAC address to 0 (unused). */
- memset(&info->peer[info->me], 0, sizeof(info->peer[info->me]));
-
- /* Unregister our array of receive buffers */
- lguest_unbind_dma(peer_key(info, info->me), info->dma);
- for (i = 0; i < ARRAY_SIZE(info->dma); i++)
- dev_kfree_skb(info->skb[i]);
- return 0;
-}
-
-/*D:510 The network device probe function is basically a standard ethernet
- * device setup. It reads the "struct lguest_device_desc" and sets the "struct
- * net_device". Oh, the line-by-line excitement! Let's skip over it. :*/
-static int lguestnet_probe(struct lguest_device *lgdev)
-{
- int err, irqf = IRQF_SHARED;
- struct net_device *dev;
- struct lguestnet_info *info;
- struct lguest_device_desc *desc = &lguest_devices[lgdev->index];
-
- pr_debug("lguest_net: probing for device %i\n", lgdev->index);
-
- dev = alloc_etherdev(sizeof(struct lguestnet_info));
- if (!dev)
- return -ENOMEM;
-
- /* Ethernet defaults with some changes */
- ether_setup(dev);
- dev->set_mac_address = NULL;
-
- dev->dev_addr[0] = 0x02; /* set local assignment bit (IEEE802) */
- dev->dev_addr[1] = 0x00;
- memcpy(&dev->dev_addr[2], &lguest_data.guestid, 2);
- dev->dev_addr[4] = 0x00;
- dev->dev_addr[5] = 0x00;
-
- dev->open = lguestnet_open;
- dev->stop = lguestnet_close;
- dev->hard_start_xmit = lguestnet_start_xmit;
-
- /* We don't actually support multicast yet, but turning on/off
- * promisc also calls dev->set_multicast_list. */
- dev->set_multicast_list = lguestnet_set_multicast;
- SET_NETDEV_DEV(dev, &lgdev->dev);
-
- /* The network code complains if you have "scatter-gather" capability
- * if you don't also handle checksums (it seem that would be
- * "illogical"). So we use a lie of omission and don't tell it that we
- * can handle scattered packets unless we also don't want checksums,
- * even though to us they're completely independent. */
- if (desc->features & LGUEST_NET_F_NOCSUM)
- dev->features = NETIF_F_SG|NETIF_F_NO_CSUM;
-
- info = netdev_priv(dev);
- info->mapsize = PAGE_SIZE * desc->num_pages;
- info->peer_phys = ((unsigned long)desc->pfn << PAGE_SHIFT);
- info->lgdev = lgdev;
- info->peer = lguest_map(info->peer_phys, desc->num_pages);
- if (!info->peer) {
- err = -ENOMEM;
- goto free;
- }
-
- /* This stores our peerid (upper bits reserved for future). */
- info->me = (desc->features & (info->mapsize-1));
-
- err = register_netdev(dev);
- if (err) {
- pr_debug("lguestnet: registering device failed\n");
- goto unmap;
- }
-
- if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
- irqf |= IRQF_SAMPLE_RANDOM;
- if (request_irq(lgdev_irq(lgdev), lguestnet_rcv, irqf, "lguestnet",
- dev) != 0) {
- pr_debug("lguestnet: cannot get irq %i\n", lgdev_irq(lgdev));
- goto unregister;
- }
-
- pr_debug("lguestnet: registered device %s\n", dev->name);
- /* Finally, we put the "struct net_device" in the generic "struct
- * lguest_device"s private pointer. Again, it's not necessary, but
- * makes sure the cool kernel kids don't tease us. */
- lgdev->private = dev;
- return 0;
-
-unregister:
- unregister_netdev(dev);
-unmap:
- lguest_unmap(info->peer);
-free:
- free_netdev(dev);
- return err;
-}
-
-static struct lguest_driver lguestnet_drv = {
- .name = "lguestnet",
- .owner = THIS_MODULE,
- .device_type = LGUEST_DEVICE_T_NET,
- .probe = lguestnet_probe,
-};
-
-static __init int lguestnet_init(void)
-{
- return register_lguest_driver(&lguestnet_drv);
-}
-module_init(lguestnet_init);
-
-MODULE_DESCRIPTION("Lguest network driver");
-MODULE_LICENSE("GPL");
-
-/*D:580
- * This is the last of the Drivers, and with this we have covered the many and
- * wonderous and fine (and boring) details of the Guest.
- *
- * "make Launcher" beckons, where we answer questions like "Where do Guests
- * come from?", and "What do you do when someone asks for optimization?"
- */
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 6471d33afb7..50648738d67 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -736,7 +736,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
- err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 1000);
+ err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000);
if (err)
mlx4_err(dev, "INIT_HCA returns %d\n", err);
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
index 4b3c109d5ea..887633b207d 100644
--- a/drivers/net/mlx4/icm.c
+++ b/drivers/net/mlx4/icm.c
@@ -60,7 +60,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
PCI_DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i)
- __free_pages(chunk->mem[i].page,
+ __free_pages(sg_page(&chunk->mem[i]),
get_order(chunk->mem[i].length));
}
@@ -70,7 +70,7 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *
for (i = 0; i < chunk->npages; ++i)
dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
- lowmem_page_address(chunk->mem[i].page),
+ lowmem_page_address(sg_page(&chunk->mem[i])),
sg_dma_address(&chunk->mem[i]));
}
@@ -95,10 +95,13 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
{
- mem->page = alloc_pages(gfp_mask, order);
- if (!mem->page)
+ struct page *page;
+
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
return -ENOMEM;
+ sg_set_page(mem, page);
mem->length = PAGE_SIZE << order;
mem->offset = 0;
return 0;
@@ -145,6 +148,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
if (!chunk)
goto fail;
+ sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
chunk->npages = 0;
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list);
@@ -334,7 +338,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han
* been assigned to.
*/
if (chunk->mem[i].length > offset) {
- page = chunk->mem[i].page;
+ page = sg_page(&chunk->mem[i]);
goto out;
}
offset -= chunk->mem[i].length;
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 5b41e8bdd6b..651c2699d5e 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -3274,6 +3274,7 @@ static const struct ethtool_ops mv643xx_ethtool_ops = {
.get_drvinfo = mv643xx_get_drvinfo,
.get_link = mv643xx_eth_get_link,
.set_sg = ethtool_op_set_sg,
+ .get_sset_count = mv643xx_get_sset_count,
.get_ethtool_stats = mv643xx_get_ethtool_stats,
.get_strings = mv643xx_get_strings,
.nway_reset = mv643xx_eth_nway_restart,
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index ed1f9bbb2a3..112ab079ce7 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -3103,31 +3103,12 @@ static int niu_alloc_tx_ring_info(struct niu *np,
static void niu_size_rbr(struct niu *np, struct rx_ring_info *rp)
{
- u16 bs;
+ u16 bss;
- switch (PAGE_SIZE) {
- case 4 * 1024:
- case 8 * 1024:
- case 16 * 1024:
- case 32 * 1024:
- rp->rbr_block_size = PAGE_SIZE;
- rp->rbr_blocks_per_page = 1;
- break;
+ bss = min(PAGE_SHIFT, 15);
- default:
- if (PAGE_SIZE % (32 * 1024) == 0)
- bs = 32 * 1024;
- else if (PAGE_SIZE % (16 * 1024) == 0)
- bs = 16 * 1024;
- else if (PAGE_SIZE % (8 * 1024) == 0)
- bs = 8 * 1024;
- else if (PAGE_SIZE % (4 * 1024) == 0)
- bs = 4 * 1024;
- else
- BUG();
- rp->rbr_block_size = bs;
- rp->rbr_blocks_per_page = PAGE_SIZE / bs;
- }
+ rp->rbr_block_size = 1 << bss;
+ rp->rbr_blocks_per_page = 1 << (PAGE_SHIFT-bss);
rp->rbr_sizes[0] = 256;
rp->rbr_sizes[1] = 1024;
@@ -7902,12 +7883,7 @@ static int __init niu_init(void)
{
int err = 0;
- BUILD_BUG_ON((PAGE_SIZE < 4 * 1024) ||
- ((PAGE_SIZE > 32 * 1024) &&
- ((PAGE_SIZE % (32 * 1024)) != 0 &&
- (PAGE_SIZE % (16 * 1024)) != 0 &&
- (PAGE_SIZE % (8 * 1024)) != 0 &&
- (PAGE_SIZE % (4 * 1024)) != 0)));
+ BUILD_BUG_ON(PAGE_SIZE < 4 * 1024);
niu_debug = netif_msg_init(debug, NIU_MSG_DEFAULT);
diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index c0b6d19d145..bcb0885011c 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -55,7 +55,7 @@
#include <linux/mm.h>
#include <linux/ppp_defs.h>
#include <linux/ppp-comp.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include "ppp_mppe.h"
@@ -68,9 +68,7 @@ MODULE_VERSION("1.0.2");
static unsigned int
setup_sg(struct scatterlist *sg, const void *address, unsigned int length)
{
- sg[0].page = virt_to_page(address);
- sg[0].offset = offset_in_page(address);
- sg[0].length = length;
+ sg_init_one(sg, address, length);
return length;
}
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 419c00cbe6e..e8960f294a6 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -44,7 +44,8 @@
printk( "Assertion failed! %s,%s,%s,line=%d\n", \
#expr,__FILE__,__FUNCTION__,__LINE__); \
}
-#define dprintk(fmt, args...) do { printk(PFX fmt, ## args); } while (0)
+#define dprintk(fmt, args...) \
+ do { printk(KERN_DEBUG PFX fmt, ## args); } while (0)
#else
#define assert(expr) do {} while (0)
#define dprintk(fmt, args...) do {} while (0)
@@ -111,19 +112,15 @@ enum mac_version {
RTL_GIGA_MAC_VER_05 = 0x05, // 8110SCd
RTL_GIGA_MAC_VER_06 = 0x06, // 8110SCe
RTL_GIGA_MAC_VER_11 = 0x0b, // 8168Bb
- RTL_GIGA_MAC_VER_12 = 0x0c, // 8168Be 8168Bf
- RTL_GIGA_MAC_VER_13 = 0x0d, // 8101Eb 8101Ec
- RTL_GIGA_MAC_VER_14 = 0x0e, // 8101
- RTL_GIGA_MAC_VER_15 = 0x0f // 8101
-};
-
-enum phy_version {
- RTL_GIGA_PHY_VER_C = 0x03, /* PHY Reg 0x03 bit0-3 == 0x0000 */
- RTL_GIGA_PHY_VER_D = 0x04, /* PHY Reg 0x03 bit0-3 == 0x0000 */
- RTL_GIGA_PHY_VER_E = 0x05, /* PHY Reg 0x03 bit0-3 == 0x0000 */
- RTL_GIGA_PHY_VER_F = 0x06, /* PHY Reg 0x03 bit0-3 == 0x0001 */
- RTL_GIGA_PHY_VER_G = 0x07, /* PHY Reg 0x03 bit0-3 == 0x0002 */
- RTL_GIGA_PHY_VER_H = 0x08, /* PHY Reg 0x03 bit0-3 == 0x0003 */
+ RTL_GIGA_MAC_VER_12 = 0x0c, // 8168Be
+ RTL_GIGA_MAC_VER_13 = 0x0d, // 8101Eb
+ RTL_GIGA_MAC_VER_14 = 0x0e, // 8101 ?
+ RTL_GIGA_MAC_VER_15 = 0x0f, // 8101 ?
+ RTL_GIGA_MAC_VER_16 = 0x11, // 8101Ec
+ RTL_GIGA_MAC_VER_17 = 0x10, // 8168Bf
+ RTL_GIGA_MAC_VER_18 = 0x12, // 8168CP
+ RTL_GIGA_MAC_VER_19 = 0x13, // 8168C
+ RTL_GIGA_MAC_VER_20 = 0x14 // 8168C
};
#define _R(NAME,MAC,MASK) \
@@ -144,7 +141,12 @@ static const struct {
_R("RTL8168b/8111b", RTL_GIGA_MAC_VER_12, 0xff7e1880), // PCI-E
_R("RTL8101e", RTL_GIGA_MAC_VER_13, 0xff7e1880), // PCI-E 8139
_R("RTL8100e", RTL_GIGA_MAC_VER_14, 0xff7e1880), // PCI-E 8139
- _R("RTL8100e", RTL_GIGA_MAC_VER_15, 0xff7e1880) // PCI-E 8139
+ _R("RTL8100e", RTL_GIGA_MAC_VER_15, 0xff7e1880), // PCI-E 8139
+ _R("RTL8168b/8111b", RTL_GIGA_MAC_VER_17, 0xff7e1880), // PCI-E
+ _R("RTL8101e", RTL_GIGA_MAC_VER_16, 0xff7e1880), // PCI-E
+ _R("RTL8168cp/8111cp", RTL_GIGA_MAC_VER_18, 0xff7e1880), // PCI-E
+ _R("RTL8168c/8111c", RTL_GIGA_MAC_VER_19, 0xff7e1880), // PCI-E
+ _R("RTL8168c/8111c", RTL_GIGA_MAC_VER_20, 0xff7e1880) // PCI-E
};
#undef _R
@@ -165,7 +167,7 @@ static struct pci_device_id rtl8169_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 },
{ PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 },
- { PCI_DEVICE(0x1259, 0xc107), 0, 0, RTL_CFG_0 },
+ { PCI_DEVICE(PCI_VENDOR_ID_AT, 0xc107), 0, 0, RTL_CFG_0 },
{ PCI_DEVICE(0x16ec, 0x0116), 0, 0, RTL_CFG_0 },
{ PCI_VENDOR_ID_LINKSYS, 0x1032,
PCI_ANY_ID, 0x0024, 0, 0, RTL_CFG_0 },
@@ -277,6 +279,7 @@ enum rtl_register_content {
TxDMAShift = 8, /* DMA burst value (0-7) is shift this many bits */
/* Config1 register p.24 */
+ MSIEnable = (1 << 5), /* Enable Message Signaled Interrupt */
PMEnable = (1 << 0), /* Power Management Enable */
/* Config2 register p. 25 */
@@ -380,17 +383,20 @@ struct ring_info {
u8 __pad[sizeof(void *) - sizeof(u32)];
};
+enum features {
+ RTL_FEATURE_WOL = (1 << 0),
+ RTL_FEATURE_MSI = (1 << 1),
+};
+
struct rtl8169_private {
void __iomem *mmio_addr; /* memory map physical address */
struct pci_dev *pci_dev; /* Index of PCI device */
struct net_device *dev;
struct napi_struct napi;
- struct net_device_stats stats; /* statistics of net device */
spinlock_t lock; /* spin lock flag */
u32 msg_enable;
int chipset;
int mac_version;
- int phy_version;
u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
u32 dirty_rx;
@@ -420,7 +426,7 @@ struct rtl8169_private {
unsigned int (*phy_reset_pending)(void __iomem *);
unsigned int (*link_ok)(void __iomem *);
struct delayed_work task;
- unsigned wol_enabled : 1;
+ unsigned features;
};
MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
@@ -626,7 +632,10 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
RTL_W8(Cfg9346, Cfg9346_Lock);
- tp->wol_enabled = (wol->wolopts) ? 1 : 0;
+ if (wol->wolopts)
+ tp->features |= RTL_FEATURE_WOL;
+ else
+ tp->features &= ~RTL_FEATURE_WOL;
spin_unlock_irq(&tp->lock);
@@ -707,7 +716,8 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
/* This tweak comes straight from Realtek's driver. */
if ((speed == SPEED_100) && (duplex == DUPLEX_HALF) &&
- (tp->mac_version == RTL_GIGA_MAC_VER_13)) {
+ ((tp->mac_version == RTL_GIGA_MAC_VER_13) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_16))) {
auto_nego = ADVERTISE_100HALF | ADVERTISE_CSMA;
}
}
@@ -715,7 +725,8 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
/* The 8100e/8101e do Fast Ethernet only. */
if ((tp->mac_version == RTL_GIGA_MAC_VER_13) ||
(tp->mac_version == RTL_GIGA_MAC_VER_14) ||
- (tp->mac_version == RTL_GIGA_MAC_VER_15)) {
+ (tp->mac_version == RTL_GIGA_MAC_VER_15) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_16)) {
if ((giga_ctrl & (ADVERTISE_1000FULL | ADVERTISE_1000HALF)) &&
netif_msg_link(tp)) {
printk(KERN_INFO "%s: PHY does not support 1000Mbps.\n",
@@ -726,7 +737,8 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
- if (tp->mac_version == RTL_GIGA_MAC_VER_12) {
+ if ((tp->mac_version == RTL_GIGA_MAC_VER_12) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_17)) {
/* Vendor specific (0x1f) and reserved (0x0e) MII registers. */
mdio_write(ioaddr, 0x1f, 0x0000);
mdio_write(ioaddr, 0x0e, 0x0000);
@@ -1104,26 +1116,51 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
*/
const struct {
u32 mask;
+ u32 val;
int mac_version;
} mac_info[] = {
- { 0x38800000, RTL_GIGA_MAC_VER_15 },
- { 0x38000000, RTL_GIGA_MAC_VER_12 },
- { 0x34000000, RTL_GIGA_MAC_VER_13 },
- { 0x30800000, RTL_GIGA_MAC_VER_14 },
- { 0x30000000, RTL_GIGA_MAC_VER_11 },
- { 0x98000000, RTL_GIGA_MAC_VER_06 },
- { 0x18000000, RTL_GIGA_MAC_VER_05 },
- { 0x10000000, RTL_GIGA_MAC_VER_04 },
- { 0x04000000, RTL_GIGA_MAC_VER_03 },
- { 0x00800000, RTL_GIGA_MAC_VER_02 },
- { 0x00000000, RTL_GIGA_MAC_VER_01 } /* Catch-all */
+ /* 8168B family. */
+ { 0x7c800000, 0x3c800000, RTL_GIGA_MAC_VER_18 },
+ { 0x7cf00000, 0x3c000000, RTL_GIGA_MAC_VER_19 },
+ { 0x7cf00000, 0x3c200000, RTL_GIGA_MAC_VER_20 },
+ { 0x7c800000, 0x3c000000, RTL_GIGA_MAC_VER_20 },
+
+ /* 8168B family. */
+ { 0x7cf00000, 0x38000000, RTL_GIGA_MAC_VER_12 },
+ { 0x7cf00000, 0x38500000, RTL_GIGA_MAC_VER_17 },
+ { 0x7c800000, 0x38000000, RTL_GIGA_MAC_VER_17 },
+ { 0x7c800000, 0x30000000, RTL_GIGA_MAC_VER_11 },
+
+ /* 8101 family. */
+ { 0x7cf00000, 0x34000000, RTL_GIGA_MAC_VER_13 },
+ { 0x7cf00000, 0x34200000, RTL_GIGA_MAC_VER_16 },
+ { 0x7c800000, 0x34000000, RTL_GIGA_MAC_VER_16 },
+ /* FIXME: where did these entries come from ? -- FR */
+ { 0xfc800000, 0x38800000, RTL_GIGA_MAC_VER_15 },
+ { 0xfc800000, 0x30800000, RTL_GIGA_MAC_VER_14 },
+
+ /* 8110 family. */
+ { 0xfc800000, 0x98000000, RTL_GIGA_MAC_VER_06 },
+ { 0xfc800000, 0x18000000, RTL_GIGA_MAC_VER_05 },
+ { 0xfc800000, 0x10000000, RTL_GIGA_MAC_VER_04 },
+ { 0xfc800000, 0x04000000, RTL_GIGA_MAC_VER_03 },
+ { 0xfc800000, 0x00800000, RTL_GIGA_MAC_VER_02 },
+ { 0xfc800000, 0x00000000, RTL_GIGA_MAC_VER_01 },
+
+ { 0x00000000, 0x00000000, RTL_GIGA_MAC_VER_01 } /* Catch-all */
}, *p = mac_info;
u32 reg;
- reg = RTL_R32(TxConfig) & 0xfc800000;
- while ((reg & p->mask) != p->mask)
+ reg = RTL_R32(TxConfig);
+ while ((reg & p->mask) != p->val)
p++;
tp->mac_version = p->mac_version;
+
+ if (p->mask == 0x00000000) {
+ struct pci_dev *pdev = tp->pci_dev;
+
+ dev_info(&pdev->dev, "unknown MAC (%08x)\n", reg);
+ }
}
static void rtl8169_print_mac_version(struct rtl8169_private *tp)
@@ -1131,54 +1168,21 @@ static void rtl8169_print_mac_version(struct rtl8169_private *tp)
dprintk("mac_version = 0x%02x\n", tp->mac_version);
}
-static void rtl8169_get_phy_version(struct rtl8169_private *tp,
- void __iomem *ioaddr)
-{
- const struct {
- u16 mask;
- u16 set;
- int phy_version;
- } phy_info[] = {
- { 0x000f, 0x0002, RTL_GIGA_PHY_VER_G },
- { 0x000f, 0x0001, RTL_GIGA_PHY_VER_F },
- { 0x000f, 0x0000, RTL_GIGA_PHY_VER_E },
- { 0x0000, 0x0000, RTL_GIGA_PHY_VER_D } /* Catch-all */
- }, *p = phy_info;
+struct phy_reg {
u16 reg;
+ u16 val;
+};
- reg = mdio_read(ioaddr, MII_PHYSID2) & 0xffff;
- while ((reg & p->mask) != p->set)
- p++;
- tp->phy_version = p->phy_version;
-}
-
-static void rtl8169_print_phy_version(struct rtl8169_private *tp)
+static void rtl_phy_write(void __iomem *ioaddr, struct phy_reg *regs, int len)
{
- struct {
- int version;
- char *msg;
- u32 reg;
- } phy_print[] = {
- { RTL_GIGA_PHY_VER_G, "RTL_GIGA_PHY_VER_G", 0x0002 },
- { RTL_GIGA_PHY_VER_F, "RTL_GIGA_PHY_VER_F", 0x0001 },
- { RTL_GIGA_PHY_VER_E, "RTL_GIGA_PHY_VER_E", 0x0000 },
- { RTL_GIGA_PHY_VER_D, "RTL_GIGA_PHY_VER_D", 0x0000 },
- { 0, NULL, 0x0000 }
- }, *p;
-
- for (p = phy_print; p->msg; p++) {
- if (tp->phy_version == p->version) {
- dprintk("phy_version == %s (%04x)\n", p->msg, p->reg);
- return;
- }
+ while (len-- > 0) {
+ mdio_write(ioaddr, regs->reg, regs->val);
+ regs++;
}
- dprintk("phy_version == Unknown\n");
}
-static void rtl8169_hw_phy_config(struct net_device *dev)
+static void rtl8169s_hw_phy_config(void __iomem *ioaddr)
{
- struct rtl8169_private *tp = netdev_priv(dev);
- void __iomem *ioaddr = tp->mmio_addr;
struct {
u16 regs[5]; /* Beware of bit-sign propagation */
} phy_magic[5] = { {
@@ -1211,33 +1215,9 @@ static void rtl8169_hw_phy_config(struct net_device *dev)
}, *p = phy_magic;
unsigned int i;
- rtl8169_print_mac_version(tp);
- rtl8169_print_phy_version(tp);
-
- if (tp->mac_version <= RTL_GIGA_MAC_VER_01)
- return;
- if (tp->phy_version >= RTL_GIGA_PHY_VER_H)
- return;
-
- dprintk("MAC version != 0 && PHY version == 0 or 1\n");
- dprintk("Do final_reg2.cfg\n");
-
- /* Shazam ! */
-
- if (tp->mac_version == RTL_GIGA_MAC_VER_04) {
- mdio_write(ioaddr, 31, 0x0002);
- mdio_write(ioaddr, 1, 0x90d0);
- mdio_write(ioaddr, 31, 0x0000);
- return;
- }
-
- if ((tp->mac_version != RTL_GIGA_MAC_VER_02) &&
- (tp->mac_version != RTL_GIGA_MAC_VER_03))
- return;
-
- mdio_write(ioaddr, 31, 0x0001); //w 31 2 0 1
- mdio_write(ioaddr, 21, 0x1000); //w 21 15 0 1000
- mdio_write(ioaddr, 24, 0x65c7); //w 24 15 0 65c7
+ mdio_write(ioaddr, 0x1f, 0x0001); //w 31 2 0 1
+ mdio_write(ioaddr, 0x15, 0x1000); //w 21 15 0 1000
+ mdio_write(ioaddr, 0x18, 0x65c7); //w 24 15 0 65c7
rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0
for (i = 0; i < ARRAY_SIZE(phy_magic); i++, p++) {
@@ -1250,7 +1230,115 @@ static void rtl8169_hw_phy_config(struct net_device *dev)
rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 1); //w 4 11 11 1
rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0
}
- mdio_write(ioaddr, 31, 0x0000); //w 31 2 0 0
+ mdio_write(ioaddr, 0x1f, 0x0000); //w 31 2 0 0
+}
+
+static void rtl8169sb_hw_phy_config(void __iomem *ioaddr)
+{
+ struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0002 },
+ { 0x01, 0x90d0 },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+}
+static void rtl8168b_hw_phy_config(void __iomem *ioaddr)
+{
+ struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0000 },
+ { 0x10, 0xf41b },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+}
+
+static void rtl8168cp_hw_phy_config(void __iomem *ioaddr)
+{
+ struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0000 },
+ { 0x1d, 0x0f00 },
+ { 0x1f, 0x0002 },
+ { 0x0c, 0x1ec8 },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+}
+
+static void rtl8168c_hw_phy_config(void __iomem *ioaddr)
+{
+ struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0001 },
+ { 0x12, 0x2300 },
+ { 0x1f, 0x0002 },
+ { 0x00, 0x88d4 },
+ { 0x01, 0x82b1 },
+ { 0x03, 0x7002 },
+ { 0x08, 0x9e30 },
+ { 0x09, 0x01f0 },
+ { 0x0a, 0x5500 },
+ { 0x0c, 0x00c8 },
+ { 0x1f, 0x0003 },
+ { 0x12, 0xc096 },
+ { 0x16, 0x000a },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+}
+
+static void rtl8168cx_hw_phy_config(void __iomem *ioaddr)
+{
+ struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0000 },
+ { 0x12, 0x2300 },
+ { 0x1f, 0x0003 },
+ { 0x16, 0x0f0a },
+ { 0x1f, 0x0000 },
+ { 0x1f, 0x0002 },
+ { 0x0c, 0x7eb8 },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+}
+
+static void rtl_hw_phy_config(struct net_device *dev)
+{
+ struct rtl8169_private *tp = netdev_priv(dev);
+ void __iomem *ioaddr = tp->mmio_addr;
+
+ rtl8169_print_mac_version(tp);
+
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_01:
+ break;
+ case RTL_GIGA_MAC_VER_02:
+ case RTL_GIGA_MAC_VER_03:
+ rtl8169s_hw_phy_config(ioaddr);
+ break;
+ case RTL_GIGA_MAC_VER_04:
+ rtl8169sb_hw_phy_config(ioaddr);
+ break;
+ case RTL_GIGA_MAC_VER_11:
+ case RTL_GIGA_MAC_VER_12:
+ case RTL_GIGA_MAC_VER_17:
+ rtl8168b_hw_phy_config(ioaddr);
+ break;
+ case RTL_GIGA_MAC_VER_18:
+ rtl8168cp_hw_phy_config(ioaddr);
+ break;
+ case RTL_GIGA_MAC_VER_19:
+ rtl8168c_hw_phy_config(ioaddr);
+ break;
+ case RTL_GIGA_MAC_VER_20:
+ rtl8168cx_hw_phy_config(ioaddr);
+ break;
+ default:
+ break;
+ }
}
static void rtl8169_phy_timer(unsigned long __opaque)
@@ -1262,7 +1350,6 @@ static void rtl8169_phy_timer(unsigned long __opaque)
unsigned long timeout = RTL8169_PHY_TIMEOUT;
assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
- assert(tp->phy_version < RTL_GIGA_PHY_VER_H);
if (!(tp->phy_1000_ctrl_reg & ADVERTISE_1000FULL))
return;
@@ -1297,8 +1384,7 @@ static inline void rtl8169_delete_timer(struct net_device *dev)
struct rtl8169_private *tp = netdev_priv(dev);
struct timer_list *timer = &tp->timer;
- if ((tp->mac_version <= RTL_GIGA_MAC_VER_01) ||
- (tp->phy_version >= RTL_GIGA_PHY_VER_H))
+ if (tp->mac_version <= RTL_GIGA_MAC_VER_01)
return;
del_timer_sync(timer);
@@ -1309,8 +1395,7 @@ static inline void rtl8169_request_timer(struct net_device *dev)
struct rtl8169_private *tp = netdev_priv(dev);
struct timer_list *timer = &tp->timer;
- if ((tp->mac_version <= RTL_GIGA_MAC_VER_01) ||
- (tp->phy_version >= RTL_GIGA_PHY_VER_H))
+ if (tp->mac_version <= RTL_GIGA_MAC_VER_01)
return;
mod_timer(timer, jiffies + RTL8169_PHY_TIMEOUT);
@@ -1362,7 +1447,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
{
void __iomem *ioaddr = tp->mmio_addr;
- rtl8169_hw_phy_config(dev);
+ rtl_hw_phy_config(dev);
dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
RTL_W8(0x82, 0x01);
@@ -1457,6 +1542,7 @@ static const struct rtl_cfg_info {
unsigned int align;
u16 intr_event;
u16 napi_event;
+ unsigned msi;
} rtl_cfg_infos [] = {
[RTL_CFG_0] = {
.hw_start = rtl_hw_start_8169,
@@ -1464,7 +1550,8 @@ static const struct rtl_cfg_info {
.align = 0,
.intr_event = SYSErr | LinkChg | RxOverflow |
RxFIFOOver | TxErr | TxOK | RxOK | RxErr,
- .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow
+ .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow,
+ .msi = 0
},
[RTL_CFG_1] = {
.hw_start = rtl_hw_start_8168,
@@ -1472,7 +1559,8 @@ static const struct rtl_cfg_info {
.align = 8,
.intr_event = SYSErr | LinkChg | RxOverflow |
TxErr | TxOK | RxOK | RxErr,
- .napi_event = TxErr | TxOK | RxOK | RxOverflow
+ .napi_event = TxErr | TxOK | RxOK | RxOverflow,
+ .msi = RTL_FEATURE_MSI
},
[RTL_CFG_2] = {
.hw_start = rtl_hw_start_8101,
@@ -1480,10 +1568,39 @@ static const struct rtl_cfg_info {
.align = 8,
.intr_event = SYSErr | LinkChg | RxOverflow | PCSTimeout |
RxFIFOOver | TxErr | TxOK | RxOK | RxErr,
- .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow
+ .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow,
+ .msi = RTL_FEATURE_MSI
}
};
+/* Cfg9346_Unlock assumed. */
+static unsigned rtl_try_msi(struct pci_dev *pdev, void __iomem *ioaddr,
+ const struct rtl_cfg_info *cfg)
+{
+ unsigned msi = 0;
+ u8 cfg2;
+
+ cfg2 = RTL_R8(Config2) & ~MSIEnable;
+ if (cfg->msi) {
+ if (pci_enable_msi(pdev)) {
+ dev_info(&pdev->dev, "no MSI. Back to INTx.\n");
+ } else {
+ cfg2 |= MSIEnable;
+ msi = RTL_FEATURE_MSI;
+ }
+ }
+ RTL_W8(Config2, cfg2);
+ return msi;
+}
+
+static void rtl_disable_msi(struct pci_dev *pdev, struct rtl8169_private *tp)
+{
+ if (tp->features & RTL_FEATURE_MSI) {
+ pci_disable_msi(pdev);
+ tp->features &= ~RTL_FEATURE_MSI;
+ }
+}
+
static int __devinit
rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
@@ -1596,10 +1713,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Identify chip attached to board */
rtl8169_get_mac_version(tp, ioaddr);
- rtl8169_get_phy_version(tp, ioaddr);
rtl8169_print_mac_version(tp);
- rtl8169_print_phy_version(tp);
for (i = ARRAY_SIZE(rtl_chip_info) - 1; i >= 0; i--) {
if (tp->mac_version == rtl_chip_info[i].mac_version)
@@ -1619,6 +1734,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
RTL_W8(Cfg9346, Cfg9346_Unlock);
RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
RTL_W8(Config5, RTL_R8(Config5) & PMEStatus);
+ tp->features |= rtl_try_msi(pdev, ioaddr, cfg);
RTL_W8(Cfg9346, Cfg9346_Lock);
if (RTL_R8(PHYstatus) & TBI_Enable) {
@@ -1686,7 +1802,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = register_netdev(dev);
if (rc < 0)
- goto err_out_unmap_5;
+ goto err_out_msi_5;
pci_set_drvdata(pdev, dev);
@@ -1709,7 +1825,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
out:
return rc;
-err_out_unmap_5:
+err_out_msi_5:
+ rtl_disable_msi(pdev, tp);
iounmap(ioaddr);
err_out_free_res_4:
pci_release_regions(pdev);
@@ -1730,6 +1847,7 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
flush_scheduled_work();
unregister_netdev(dev);
+ rtl_disable_msi(pdev, tp);
rtl8169_release_board(pdev, dev, tp->mmio_addr);
pci_set_drvdata(pdev, NULL);
}
@@ -1773,7 +1891,8 @@ static int rtl8169_open(struct net_device *dev)
smp_mb();
- retval = request_irq(dev->irq, rtl8169_interrupt, IRQF_SHARED,
+ retval = request_irq(dev->irq, rtl8169_interrupt,
+ (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
dev->name, dev);
if (retval < 0)
goto err_release_ring_2;
@@ -1933,7 +2052,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
if ((tp->mac_version == RTL_GIGA_MAC_VER_02) ||
(tp->mac_version == RTL_GIGA_MAC_VER_03)) {
- dprintk(KERN_INFO PFX "Set MAC Reg C+CR Offset 0xE0. "
+ dprintk("Set MAC Reg C+CR Offset 0xE0. "
"Bit-3 and bit-14 MUST be 1\n");
tp->cp_cmd |= (1 << 14);
}
@@ -2029,7 +2148,8 @@ static void rtl_hw_start_8101(struct net_device *dev)
void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
- if (tp->mac_version == RTL_GIGA_MAC_VER_13) {
+ if ((tp->mac_version == RTL_GIGA_MAC_VER_13) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_16)) {
pci_write_config_word(pdev, 0x68, 0x00);
pci_write_config_word(pdev, 0x69, 0x08);
}
@@ -2259,7 +2379,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
dev_kfree_skb(skb);
tx_skb->skb = NULL;
}
- tp->stats.tx_dropped++;
+ tp->dev->stats.tx_dropped++;
}
}
tp->cur_tx = tp->dirty_tx = 0;
@@ -2310,7 +2430,7 @@ static void rtl8169_reinit_task(struct work_struct *work)
ret = rtl8169_open(dev);
if (unlikely(ret < 0)) {
if (net_ratelimit() && netif_msg_drv(tp)) {
- printk(PFX KERN_ERR "%s: reinit failure (status = %d)."
+ printk(KERN_ERR PFX "%s: reinit failure (status = %d)."
" Rescheduling.\n", dev->name, ret);
}
rtl8169_schedule_work(dev, rtl8169_reinit_task);
@@ -2340,9 +2460,10 @@ static void rtl8169_reset_task(struct work_struct *work)
rtl8169_init_ring_indexes(tp);
rtl_hw_start(dev);
netif_wake_queue(dev);
+ rtl8169_check_link_status(dev, tp, tp->mmio_addr);
} else {
if (net_ratelimit() && netif_msg_intr(tp)) {
- printk(PFX KERN_EMERG "%s: Rx buffers shortage\n",
+ printk(KERN_EMERG PFX "%s: Rx buffers shortage\n",
dev->name);
}
rtl8169_schedule_work(dev, rtl8169_reset_task);
@@ -2496,7 +2617,7 @@ err_stop:
netif_stop_queue(dev);
ret = NETDEV_TX_BUSY;
err_update_stats:
- tp->stats.tx_dropped++;
+ dev->stats.tx_dropped++;
goto out;
}
@@ -2571,8 +2692,8 @@ static void rtl8169_tx_interrupt(struct net_device *dev,
if (status & DescOwn)
break;
- tp->stats.tx_bytes += len;
- tp->stats.tx_packets++;
+ dev->stats.tx_bytes += len;
+ dev->stats.tx_packets++;
rtl8169_unmap_tx_skb(tp->pci_dev, tx_skb, tp->TxDescArray + entry);
@@ -2672,14 +2793,14 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
"%s: Rx ERROR. status = %08x\n",
dev->name, status);
}
- tp->stats.rx_errors++;
+ dev->stats.rx_errors++;
if (status & (RxRWT | RxRUNT))
- tp->stats.rx_length_errors++;
+ dev->stats.rx_length_errors++;
if (status & RxCRC)
- tp->stats.rx_crc_errors++;
+ dev->stats.rx_crc_errors++;
if (status & RxFOVF) {
rtl8169_schedule_work(dev, rtl8169_reset_task);
- tp->stats.rx_fifo_errors++;
+ dev->stats.rx_fifo_errors++;
}
rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
} else {
@@ -2694,8 +2815,8 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
* sized frames.
*/
if (unlikely(rtl8169_fragmented_frame(status))) {
- tp->stats.rx_dropped++;
- tp->stats.rx_length_errors++;
+ dev->stats.rx_dropped++;
+ dev->stats.rx_length_errors++;
rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
continue;
}
@@ -2719,8 +2840,8 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
rtl8169_rx_skb(skb);
dev->last_rx = jiffies;
- tp->stats.rx_bytes += pkt_size;
- tp->stats.rx_packets++;
+ dev->stats.rx_bytes += pkt_size;
+ dev->stats.rx_packets++;
}
/* Work around for AMD plateform. */
@@ -2881,7 +3002,7 @@ core_down:
rtl8169_asic_down(ioaddr);
/* Update the error counts. */
- tp->stats.rx_missed_errors += RTL_R32(RxMissed);
+ dev->stats.rx_missed_errors += RTL_R32(RxMissed);
RTL_W32(RxMissed, 0);
spin_unlock_irq(&tp->lock);
@@ -2984,7 +3105,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
(tp->mac_version == RTL_GIGA_MAC_VER_12) ||
(tp->mac_version == RTL_GIGA_MAC_VER_13) ||
(tp->mac_version == RTL_GIGA_MAC_VER_14) ||
- (tp->mac_version == RTL_GIGA_MAC_VER_15)) {
+ (tp->mac_version == RTL_GIGA_MAC_VER_15) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_16) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_17)) {
mc_filter[0] = 0xffffffff;
mc_filter[1] = 0xffffffff;
}
@@ -3011,12 +3134,12 @@ static struct net_device_stats *rtl8169_get_stats(struct net_device *dev)
if (netif_running(dev)) {
spin_lock_irqsave(&tp->lock, flags);
- tp->stats.rx_missed_errors += RTL_R32(RxMissed);
+ dev->stats.rx_missed_errors += RTL_R32(RxMissed);
RTL_W32(RxMissed, 0);
spin_unlock_irqrestore(&tp->lock, flags);
}
- return &tp->stats;
+ return &dev->stats;
}
#ifdef CONFIG_PM
@@ -3037,14 +3160,15 @@ static int rtl8169_suspend(struct pci_dev *pdev, pm_message_t state)
rtl8169_asic_down(ioaddr);
- tp->stats.rx_missed_errors += RTL_R32(RxMissed);
+ dev->stats.rx_missed_errors += RTL_R32(RxMissed);
RTL_W32(RxMissed, 0);
spin_unlock_irq(&tp->lock);
out_pci_suspend:
pci_save_state(pdev);
- pci_enable_wake(pdev, pci_choose_state(pdev, state), tp->wol_enabled);
+ pci_enable_wake(pdev, pci_choose_state(pdev, state),
+ (tp->features & RTL_FEATURE_WOL) ? 1 : 0);
pci_set_power_state(pdev, pci_choose_state(pdev, state));
return 0;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 014dc2cfe4d..09440d783e6 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,8 +64,8 @@
#define DRV_MODULE_NAME "tg3"
#define PFX DRV_MODULE_NAME ": "
-#define DRV_MODULE_VERSION "3.84"
-#define DRV_MODULE_RELDATE "October 12, 2007"
+#define DRV_MODULE_VERSION "3.85"
+#define DRV_MODULE_RELDATE "October 18, 2007"
#define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0
@@ -200,6 +200,7 @@ static struct pci_device_id tg3_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5906M)},
{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5784)},
{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5764)},
+ {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5723)},
{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761)},
{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761E)},
{PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)},
@@ -5028,10 +5029,7 @@ static int tg3_poll_fw(struct tg3 *tp)
/* Save PCI command register before chip reset */
static void tg3_save_pci_state(struct tg3 *tp)
{
- u32 val;
-
- pci_read_config_dword(tp->pdev, TG3PCI_COMMAND, &val);
- tp->pci_cmd = val;
+ pci_read_config_word(tp->pdev, PCI_COMMAND, &tp->pci_cmd);
}
/* Restore PCI state after chip reset */
@@ -5054,7 +5052,7 @@ static void tg3_restore_pci_state(struct tg3 *tp)
PCISTATE_ALLOW_APE_SHMEM_WR;
pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, val);
- pci_write_config_dword(tp->pdev, TG3PCI_COMMAND, tp->pci_cmd);
+ pci_write_config_word(tp->pdev, PCI_COMMAND, tp->pci_cmd);
if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
pci_write_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE,
@@ -10820,9 +10818,24 @@ out_not_found:
strcpy(tp->board_part_number, "none");
}
+static int __devinit tg3_fw_img_is_valid(struct tg3 *tp, u32 offset)
+{
+ u32 val;
+
+ if (tg3_nvram_read_swab(tp, offset, &val) ||
+ (val & 0xfc000000) != 0x0c000000 ||
+ tg3_nvram_read_swab(tp, offset + 4, &val) ||
+ val != 0)
+ return 0;
+
+ return 1;
+}
+
static void __devinit tg3_read_fw_ver(struct tg3 *tp)
{
u32 val, offset, start;
+ u32 ver_offset;
+ int i, bcnt;
if (tg3_nvram_read_swab(tp, 0, &val))
return;
@@ -10835,29 +10848,71 @@ static void __devinit tg3_read_fw_ver(struct tg3 *tp)
return;
offset = tg3_nvram_logical_addr(tp, offset);
- if (tg3_nvram_read_swab(tp, offset, &val))
+
+ if (!tg3_fw_img_is_valid(tp, offset) ||
+ tg3_nvram_read_swab(tp, offset + 8, &ver_offset))
return;
- if ((val & 0xfc000000) == 0x0c000000) {
- u32 ver_offset, addr;
- int i;
+ offset = offset + ver_offset - start;
+ for (i = 0; i < 16; i += 4) {
+ if (tg3_nvram_read(tp, offset + i, &val))
+ return;
- if (tg3_nvram_read_swab(tp, offset + 4, &val) ||
- tg3_nvram_read_swab(tp, offset + 8, &ver_offset))
+ val = le32_to_cpu(val);
+ memcpy(tp->fw_ver + i, &val, 4);
+ }
+
+ if (!(tp->tg3_flags & TG3_FLAG_ENABLE_ASF) ||
+ (tp->tg3_flags & TG3_FLG3_ENABLE_APE))
+ return;
+
+ for (offset = TG3_NVM_DIR_START;
+ offset < TG3_NVM_DIR_END;
+ offset += TG3_NVM_DIRENT_SIZE) {
+ if (tg3_nvram_read_swab(tp, offset, &val))
return;
- if (val != 0)
+ if ((val >> TG3_NVM_DIRTYPE_SHIFT) == TG3_NVM_DIRTYPE_ASFINI)
+ break;
+ }
+
+ if (offset == TG3_NVM_DIR_END)
+ return;
+
+ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
+ start = 0x08000000;
+ else if (tg3_nvram_read_swab(tp, offset - 4, &start))
+ return;
+
+ if (tg3_nvram_read_swab(tp, offset + 4, &offset) ||
+ !tg3_fw_img_is_valid(tp, offset) ||
+ tg3_nvram_read_swab(tp, offset + 8, &val))
+ return;
+
+ offset += val - start;
+
+ bcnt = strlen(tp->fw_ver);
+
+ tp->fw_ver[bcnt++] = ',';
+ tp->fw_ver[bcnt++] = ' ';
+
+ for (i = 0; i < 4; i++) {
+ if (tg3_nvram_read(tp, offset, &val))
return;
- addr = offset + ver_offset - start;
- for (i = 0; i < 16; i += 4) {
- if (tg3_nvram_read(tp, addr + i, &val))
- return;
+ val = le32_to_cpu(val);
+ offset += sizeof(val);
- val = cpu_to_le32(val);
- memcpy(tp->fw_ver + i, &val, 4);
+ if (bcnt > TG3_VER_SIZE - sizeof(val)) {
+ memcpy(&tp->fw_ver[bcnt], &val, TG3_VER_SIZE - bcnt);
+ break;
}
+
+ memcpy(&tp->fw_ver[bcnt], &val, sizeof(val));
+ bcnt += sizeof(val);
}
+
+ tp->fw_ver[TG3_VER_SIZE - 1] = 0;
}
static struct pci_dev * __devinit tg3_find_peer(struct tg3 *);
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 6dbdad2b8f8..1d5b2a3dd29 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1540,6 +1540,12 @@
#define TG3_EEPROM_MAGIC_HW 0xabcd
#define TG3_EEPROM_MAGIC_HW_MSK 0xffff
+#define TG3_NVM_DIR_START 0x18
+#define TG3_NVM_DIR_END 0x78
+#define TG3_NVM_DIRENT_SIZE 0xc
+#define TG3_NVM_DIRTYPE_SHIFT 24
+#define TG3_NVM_DIRTYPE_ASFINI 1
+
/* 32K Window into NIC internal memory */
#define NIC_SRAM_WIN_BASE 0x00008000
@@ -2415,10 +2421,11 @@ struct tg3 {
#define PHY_REV_BCM5411_X0 0x1 /* Found on Netgear GA302T */
u32 led_ctrl;
- u32 pci_cmd;
+ u16 pci_cmd;
char board_part_number[24];
- char fw_ver[16];
+#define TG3_VER_SIZE 32
+ char fw_ver[TG3_VER_SIZE];
u32 nic_sram_data_cfg;
u32 pci_clock_ctrl;
struct pci_dev *pdev_peer;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
new file mode 100644
index 00000000000..e396c9d2af8
--- /dev/null
+++ b/drivers/net/virtio_net.c
@@ -0,0 +1,435 @@
+/* A simple network driver using virtio.
+ *
+ * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+//#define DEBUG
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_net.h>
+#include <linux/scatterlist.h>
+
+/* FIXME: MTU in config. */
+#define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
+
+struct virtnet_info
+{
+ struct virtio_device *vdev;
+ struct virtqueue *rvq, *svq;
+ struct net_device *dev;
+ struct napi_struct napi;
+
+ /* Number of input buffers, and max we've ever had. */
+ unsigned int num, max;
+
+ /* Receive & send queues. */
+ struct sk_buff_head recv;
+ struct sk_buff_head send;
+};
+
+static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
+{
+ return (struct virtio_net_hdr *)skb->cb;
+}
+
+static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
+{
+ sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
+}
+
+static bool skb_xmit_done(struct virtqueue *rvq)
+{
+ struct virtnet_info *vi = rvq->vdev->priv;
+
+ /* In case we were waiting for output buffers. */
+ netif_wake_queue(vi->dev);
+ return true;
+}
+
+static void receive_skb(struct net_device *dev, struct sk_buff *skb,
+ unsigned len)
+{
+ struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
+
+ if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
+ pr_debug("%s: short packet %i\n", dev->name, len);
+ dev->stats.rx_length_errors++;
+ goto drop;
+ }
+ len -= sizeof(struct virtio_net_hdr);
+ BUG_ON(len > MAX_PACKET_LEN);
+
+ skb_trim(skb, len);
+ skb->protocol = eth_type_trans(skb, dev);
+ pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
+ ntohs(skb->protocol), skb->len, skb->pkt_type);
+ dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_packets++;
+
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+ pr_debug("Needs csum!\n");
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = hdr->csum_start;
+ skb->csum_offset = hdr->csum_offset;
+ if (skb->csum_start > skb->len - 2
+ || skb->csum_offset > skb->len - 2) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "%s: csum=%u/%u len=%u\n",
+ dev->name, skb->csum_start,
+ skb->csum_offset, skb->len);
+ goto frame_err;
+ }
+ }
+
+ if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ pr_debug("GSO!\n");
+ switch (hdr->gso_type) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ break;
+ case VIRTIO_NET_HDR_GSO_TCPV4_ECN:
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCP_ECN;
+ break;
+ case VIRTIO_NET_HDR_GSO_UDP:
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ break;
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ break;
+ default:
+ if (net_ratelimit())
+ printk(KERN_WARNING "%s: bad gso type %u.\n",
+ dev->name, hdr->gso_type);
+ goto frame_err;
+ }
+
+ skb_shinfo(skb)->gso_size = hdr->gso_size;
+ if (skb_shinfo(skb)->gso_size == 0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "%s: zero gso size.\n",
+ dev->name);
+ goto frame_err;
+ }
+
+ /* Header must be checked, and gso_segs computed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ }
+
+ netif_receive_skb(skb);
+ return;
+
+frame_err:
+ dev->stats.rx_frame_errors++;
+drop:
+ dev_kfree_skb(skb);
+}
+
+static void try_fill_recv(struct virtnet_info *vi)
+{
+ struct sk_buff *skb;
+ struct scatterlist sg[1+MAX_SKB_FRAGS];
+ int num, err;
+
+ for (;;) {
+ skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
+ if (unlikely(!skb))
+ break;
+
+ skb_put(skb, MAX_PACKET_LEN);
+ vnet_hdr_to_sg(sg, skb);
+ num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
+ skb_queue_head(&vi->recv, skb);
+
+ err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
+ if (err) {
+ skb_unlink(skb, &vi->recv);
+ kfree_skb(skb);
+ break;
+ }
+ vi->num++;
+ }
+ if (unlikely(vi->num > vi->max))
+ vi->max = vi->num;
+ vi->rvq->vq_ops->kick(vi->rvq);
+}
+
+static bool skb_recv_done(struct virtqueue *rvq)
+{
+ struct virtnet_info *vi = rvq->vdev->priv;
+ netif_rx_schedule(vi->dev, &vi->napi);
+ /* Suppress further interrupts. */
+ return false;
+}
+
+static int virtnet_poll(struct napi_struct *napi, int budget)
+{
+ struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
+ struct sk_buff *skb = NULL;
+ unsigned int len, received = 0;
+
+again:
+ while (received < budget &&
+ (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
+ __skb_unlink(skb, &vi->recv);
+ receive_skb(vi->dev, skb, len);
+ vi->num--;
+ received++;
+ }
+
+ /* FIXME: If we oom and completely run out of inbufs, we need
+ * to start a timer trying to fill more. */
+ if (vi->num < vi->max / 2)
+ try_fill_recv(vi);
+
+ /* All done? */
+ if (!skb) {
+ netif_rx_complete(vi->dev, napi);
+ if (unlikely(!vi->rvq->vq_ops->restart(vi->rvq))
+ && netif_rx_reschedule(vi->dev, napi))
+ goto again;
+ }
+
+ return received;
+}
+
+static void free_old_xmit_skbs(struct virtnet_info *vi)
+{
+ struct sk_buff *skb;
+ unsigned int len;
+
+ while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
+ pr_debug("Sent skb %p\n", skb);
+ __skb_unlink(skb, &vi->send);
+ vi->dev->stats.tx_bytes += len;
+ vi->dev->stats.tx_packets++;
+ kfree_skb(skb);
+ }
+}
+
+static int start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int num, err;
+ struct scatterlist sg[1+MAX_SKB_FRAGS];
+ struct virtio_net_hdr *hdr;
+ const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
+ DECLARE_MAC_BUF(mac);
+
+ pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest));
+
+ free_old_xmit_skbs(vi);
+
+ /* Encode metadata header at front. */
+ hdr = skb_vnet_hdr(skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = skb->csum_start - skb_headroom(skb);
+ hdr->csum_offset = skb->csum_offset;
+ } else {
+ hdr->flags = 0;
+ hdr->csum_offset = hdr->csum_start = 0;
+ }
+
+ if (skb_is_gso(skb)) {
+ hdr->gso_size = skb_shinfo(skb)->gso_size;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4_ECN;
+ else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ else
+ BUG();
+ } else {
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+ hdr->gso_size = 0;
+ }
+
+ vnet_hdr_to_sg(sg, skb);
+ num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
+ __skb_queue_head(&vi->send, skb);
+ err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
+ if (err) {
+ pr_debug("%s: virtio not prepared to send\n", dev->name);
+ skb_unlink(skb, &vi->send);
+ netif_stop_queue(dev);
+ return NETDEV_TX_BUSY;
+ }
+ vi->svq->vq_ops->kick(vi->svq);
+
+ return 0;
+}
+
+static int virtnet_open(struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+
+ try_fill_recv(vi);
+
+ /* If we didn't even get one input buffer, we're useless. */
+ if (vi->num == 0)
+ return -ENOMEM;
+
+ napi_enable(&vi->napi);
+ return 0;
+}
+
+static int virtnet_close(struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct sk_buff *skb;
+
+ napi_disable(&vi->napi);
+
+ /* networking core has neutered skb_xmit_done/skb_recv_done, so don't
+ * worry about races vs. get(). */
+ vi->rvq->vq_ops->shutdown(vi->rvq);
+ while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
+ kfree_skb(skb);
+ vi->num--;
+ }
+ vi->svq->vq_ops->shutdown(vi->svq);
+ while ((skb = __skb_dequeue(&vi->send)) != NULL)
+ kfree_skb(skb);
+
+ BUG_ON(vi->num != 0);
+ return 0;
+}
+
+static int virtnet_probe(struct virtio_device *vdev)
+{
+ int err;
+ unsigned int len;
+ struct net_device *dev;
+ struct virtnet_info *vi;
+ void *token;
+
+ /* Allocate ourselves a network device with room for our info */
+ dev = alloc_etherdev(sizeof(struct virtnet_info));
+ if (!dev)
+ return -ENOMEM;
+
+ /* Set up network device as normal. */
+ ether_setup(dev);
+ dev->open = virtnet_open;
+ dev->stop = virtnet_close;
+ dev->hard_start_xmit = start_xmit;
+ dev->features = NETIF_F_HIGHDMA;
+ SET_NETDEV_DEV(dev, &vdev->dev);
+
+ /* Do we support "hardware" checksums? */
+ token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_F, &len);
+ if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_NO_CSUM)) {
+ /* This opens up the world of extra features. */
+ dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
+ if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4))
+ dev->features |= NETIF_F_TSO;
+ if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_UFO))
+ dev->features |= NETIF_F_UFO;
+ if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4_ECN))
+ dev->features |= NETIF_F_TSO_ECN;
+ if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO6))
+ dev->features |= NETIF_F_TSO6;
+ }
+
+ /* Configuration may specify what MAC to use. Otherwise random. */
+ token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_MAC_F, &len);
+ if (token) {
+ dev->addr_len = len;
+ vdev->config->get(vdev, token, dev->dev_addr, len);
+ } else
+ random_ether_addr(dev->dev_addr);
+
+ /* Set up our device-specific information */
+ vi = netdev_priv(dev);
+ netif_napi_add(dev, &vi->napi, virtnet_poll, 16);
+ vi->dev = dev;
+ vi->vdev = vdev;
+
+ /* We expect two virtqueues, receive then send. */
+ vi->rvq = vdev->config->find_vq(vdev, skb_recv_done);
+ if (IS_ERR(vi->rvq)) {
+ err = PTR_ERR(vi->rvq);
+ goto free;
+ }
+
+ vi->svq = vdev->config->find_vq(vdev, skb_xmit_done);
+ if (IS_ERR(vi->svq)) {
+ err = PTR_ERR(vi->svq);
+ goto free_recv;
+ }
+
+ /* Initialize our empty receive and send queues. */
+ skb_queue_head_init(&vi->recv);
+ skb_queue_head_init(&vi->send);
+
+ err = register_netdev(dev);
+ if (err) {
+ pr_debug("virtio_net: registering device failed\n");
+ goto free_send;
+ }
+ pr_debug("virtnet: registered device %s\n", dev->name);
+ vdev->priv = vi;
+ return 0;
+
+free_send:
+ vdev->config->del_vq(vi->svq);
+free_recv:
+ vdev->config->del_vq(vi->rvq);
+free:
+ free_netdev(dev);
+ return err;
+}
+
+static void virtnet_remove(struct virtio_device *vdev)
+{
+ unregister_netdev(vdev->priv);
+ free_netdev(vdev->priv);
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_net = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtnet_probe,
+ .remove = __devexit_p(virtnet_remove),
+};
+
+static int __init init(void)
+{
+ return register_virtio_driver(&virtio_net);
+}
+
+static void __exit fini(void)
+{
+ unregister_virtio_driver(&virtio_net);
+}
+module_init(init);
+module_exit(fini);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio network driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b3c4dbff26b..7c60cbd85dc 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -42,6 +42,7 @@
#include <linux/reboot.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/scatterlist.h>
#include <asm/byteorder.h>
#include <asm/cache.h> /* for L1_CACHE_BYTES */
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index 5b86ee5c1ee..5eace9e66e1 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -557,44 +557,6 @@ lba_bios_init(void)
#ifdef CONFIG_64BIT
/*
-** Determine if a device is already configured.
-** If so, reserve it resources.
-**
-** Read PCI cfg command register and see if I/O or MMIO is enabled.
-** PAT has to enable the devices it's using.
-**
-** Note: resources are fixed up before we try to claim them.
-*/
-static void
-lba_claim_dev_resources(struct pci_dev *dev)
-{
- u16 cmd;
- int i, srch_flags;
-
- (void) pci_read_config_word(dev, PCI_COMMAND, &cmd);
-
- srch_flags = (cmd & PCI_COMMAND_IO) ? IORESOURCE_IO : 0;
- if (cmd & PCI_COMMAND_MEMORY)
- srch_flags |= IORESOURCE_MEM;
-
- if (!srch_flags)
- return;
-
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- if (dev->resource[i].flags & srch_flags) {
- pci_claim_resource(dev, i);
- DBG(" claimed %s %d [%lx,%lx]/%lx\n",
- pci_name(dev), i,
- dev->resource[i].start,
- dev->resource[i].end,
- dev->resource[i].flags
- );
- }
- }
-}
-
-
-/*
* truncate_pat_collision: Deal with overlaps or outright collisions
* between PAT PDC reported ranges.
*
@@ -653,7 +615,6 @@ truncate_pat_collision(struct resource *root, struct resource *new)
}
#else
-#define lba_claim_dev_resources(dev) do { } while (0)
#define truncate_pat_collision(r,n) (0)
#endif
@@ -684,8 +645,12 @@ lba_fixup_bus(struct pci_bus *bus)
** pci_alloc_primary_bus() mangles this.
*/
if (bus->self) {
+ int i;
/* PCI-PCI Bridge */
pci_read_bridge_bases(bus);
+ for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+ pci_claim_resource(bus->self, i);
+ }
} else {
/* Host-PCI Bridge */
int err, i;
@@ -803,6 +768,9 @@ lba_fixup_bus(struct pci_bus *bus)
DBG("lba_fixup_bus() WTF? 0x%lx [%lx/%lx] XXX",
res->flags, res->start, res->end);
}
+ if ((i != PCI_ROM_RESOURCE) ||
+ (res->flags & IORESOURCE_ROM_ENABLE))
+ pci_claim_resource(dev, i);
}
#ifdef FBB_SUPPORT
@@ -814,11 +782,6 @@ lba_fixup_bus(struct pci_bus *bus)
bus->bridge_ctl &= ~(status & PCI_STATUS_FAST_BACK);
#endif
- if (is_pdc_pat()) {
- /* Claim resources for PDC's devices */
- lba_claim_dev_resources(dev);
- }
-
/*
** P2PB's have no IRQs. ignore them.
*/
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index fc4bde259dc..ebb09e98d21 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -282,6 +282,7 @@ pdcspath_hwpath_write(struct pdcspath_entry *entry, const char *buf, size_t coun
unsigned short i;
char in[count+1], *temp;
struct device *dev;
+ int ret;
if (!entry || !buf || !count)
return -EINVAL;
@@ -333,7 +334,9 @@ pdcspath_hwpath_write(struct pdcspath_entry *entry, const char *buf, size_t coun
/* Update the symlink to the real device */
sysfs_remove_link(&entry->kobj, "device");
- sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device");
+ ret = sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device");
+ WARN_ON(ret);
+
write_unlock(&entry->rw_lock);
printk(KERN_INFO PDCS_PREFIX ": changed \"%s\" path to \"%s\"\n",
@@ -1003,8 +1006,10 @@ pdcs_register_pathentries(void)
entry->ready = 2;
/* Add a nice symlink to the real device */
- if (entry->dev)
- sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device");
+ if (entry->dev) {
+ err = sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device");
+ WARN_ON(err);
+ }
write_unlock(&entry->rw_lock);
}
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index d044c48323e..e527a0e1d6c 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -28,6 +28,7 @@
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/pci.h>
+#include <linux/scatterlist.h>
#include <asm/byteorder.h>
#include <asm/io.h>
@@ -1909,8 +1910,8 @@ sba_driver_callback(struct parisc_device *dev)
global_ioc_cnt *= 2;
}
- printk(KERN_INFO "%s found %s at 0x%lx\n",
- MODULE_NAME, version, dev->hpa.start);
+ printk(KERN_INFO "%s found %s at 0x%llx\n",
+ MODULE_NAME, version, (unsigned long long)dev->hpa.start);
sba_dev = kzalloc(sizeof(struct sba_device), GFP_KERNEL);
if (!sba_dev) {
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 38cdf9fa36a..1e8d2d17f04 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -155,6 +155,7 @@ superio_init(struct pci_dev *pcidev)
struct superio_device *sio = &sio_dev;
struct pci_dev *pdev = sio->lio_pdev;
u16 word;
+ int ret;
if (sio->suckyio_irq_enabled)
return;
@@ -200,7 +201,8 @@ superio_init(struct pci_dev *pcidev)
pci_write_config_word (pdev, PCI_COMMAND, word);
pci_set_master (pdev);
- pci_enable_device(pdev);
+ ret = pci_enable_device(pdev);
+ BUG_ON(ret < 0); /* not too much we can do about this... */
/*
* Next project is programming the onboard interrupt controllers.
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 006054a4099..55505565073 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -20,6 +20,9 @@ obj-$(CONFIG_PCI_MSI) += msi.o
# Build the Hypertransport interrupt support
obj-$(CONFIG_HT_IRQ) += htirq.o
+# Build Intel IOMMU support
+obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
+
#
# Some architectures use the generic PCI setup functions
#
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
new file mode 100644
index 00000000000..5dfdfdac92e
--- /dev/null
+++ b/drivers/pci/dmar.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Ashok Raj <ashok.raj@intel.com>
+ * Copyright (C) Shaohua Li <shaohua.li@intel.com>
+ * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ * This file implements early detection/parsing of DMA Remapping Devices
+ * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
+ * tables.
+ */
+
+#include <linux/pci.h>
+#include <linux/dmar.h>
+
+#undef PREFIX
+#define PREFIX "DMAR:"
+
+/* No locks are needed as DMA remapping hardware unit
+ * list is constructed at boot time and hotplug of
+ * these units are not supported by the architecture.
+ */
+LIST_HEAD(dmar_drhd_units);
+LIST_HEAD(dmar_rmrr_units);
+
+static struct acpi_table_header * __initdata dmar_tbl;
+
+static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
+{
+ /*
+ * add INCLUDE_ALL at the tail, so scan the list will find it at
+ * the very end.
+ */
+ if (drhd->include_all)
+ list_add_tail(&drhd->list, &dmar_drhd_units);
+ else
+ list_add(&drhd->list, &dmar_drhd_units);
+}
+
+static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
+{
+ list_add(&rmrr->list, &dmar_rmrr_units);
+}
+
+static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
+ struct pci_dev **dev, u16 segment)
+{
+ struct pci_bus *bus;
+ struct pci_dev *pdev = NULL;
+ struct acpi_dmar_pci_path *path;
+ int count;
+
+ bus = pci_find_bus(segment, scope->bus);
+ path = (struct acpi_dmar_pci_path *)(scope + 1);
+ count = (scope->length - sizeof(struct acpi_dmar_device_scope))
+ / sizeof(struct acpi_dmar_pci_path);
+
+ while (count) {
+ if (pdev)
+ pci_dev_put(pdev);
+ /*
+ * Some BIOSes list non-exist devices in DMAR table, just
+ * ignore it
+ */
+ if (!bus) {
+ printk(KERN_WARNING
+ PREFIX "Device scope bus [%d] not found\n",
+ scope->bus);
+ break;
+ }
+ pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
+ if (!pdev) {
+ printk(KERN_WARNING PREFIX
+ "Device scope device [%04x:%02x:%02x.%02x] not found\n",
+ segment, bus->number, path->dev, path->fn);
+ break;
+ }
+ path ++;
+ count --;
+ bus = pdev->subordinate;
+ }
+ if (!pdev) {
+ printk(KERN_WARNING PREFIX
+ "Device scope device [%04x:%02x:%02x.%02x] not found\n",
+ segment, scope->bus, path->dev, path->fn);
+ *dev = NULL;
+ return 0;
+ }
+ if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
+ pdev->subordinate) || (scope->entry_type == \
+ ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
+ pci_dev_put(pdev);
+ printk(KERN_WARNING PREFIX
+ "Device scope type does not match for %s\n",
+ pci_name(pdev));
+ return -EINVAL;
+ }
+ *dev = pdev;
+ return 0;
+}
+
+static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
+ struct pci_dev ***devices, u16 segment)
+{
+ struct acpi_dmar_device_scope *scope;
+ void * tmp = start;
+ int index;
+ int ret;
+
+ *cnt = 0;
+ while (start < end) {
+ scope = start;
+ if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
+ scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
+ (*cnt)++;
+ else
+ printk(KERN_WARNING PREFIX
+ "Unsupported device scope\n");
+ start += scope->length;
+ }
+ if (*cnt == 0)
+ return 0;
+
+ *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
+ if (!*devices)
+ return -ENOMEM;
+
+ start = tmp;
+ index = 0;
+ while (start < end) {
+ scope = start;
+ if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
+ scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
+ ret = dmar_parse_one_dev_scope(scope,
+ &(*devices)[index], segment);
+ if (ret) {
+ kfree(*devices);
+ return ret;
+ }
+ index ++;
+ }
+ start += scope->length;
+ }
+
+ return 0;
+}
+
+/**
+ * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
+ * structure which uniquely represent one DMA remapping hardware unit
+ * present in the platform
+ */
+static int __init
+dmar_parse_one_drhd(struct acpi_dmar_header *header)
+{
+ struct acpi_dmar_hardware_unit *drhd;
+ struct dmar_drhd_unit *dmaru;
+ int ret = 0;
+ static int include_all;
+
+ dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
+ if (!dmaru)
+ return -ENOMEM;
+
+ drhd = (struct acpi_dmar_hardware_unit *)header;
+ dmaru->reg_base_addr = drhd->address;
+ dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
+
+ if (!dmaru->include_all)
+ ret = dmar_parse_dev_scope((void *)(drhd + 1),
+ ((void *)drhd) + header->length,
+ &dmaru->devices_cnt, &dmaru->devices,
+ drhd->segment);
+ else {
+ /* Only allow one INCLUDE_ALL */
+ if (include_all) {
+ printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
+ "device scope is allowed\n");
+ ret = -EINVAL;
+ }
+ include_all = 1;
+ }
+
+ if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all))
+ kfree(dmaru);
+ else
+ dmar_register_drhd_unit(dmaru);
+ return ret;
+}
+
+static int __init
+dmar_parse_one_rmrr(struct acpi_dmar_header *header)
+{
+ struct acpi_dmar_reserved_memory *rmrr;
+ struct dmar_rmrr_unit *rmrru;
+ int ret = 0;
+
+ rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
+ if (!rmrru)
+ return -ENOMEM;
+
+ rmrr = (struct acpi_dmar_reserved_memory *)header;
+ rmrru->base_address = rmrr->base_address;
+ rmrru->end_address = rmrr->end_address;
+ ret = dmar_parse_dev_scope((void *)(rmrr + 1),
+ ((void *)rmrr) + header->length,
+ &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
+
+ if (ret || (rmrru->devices_cnt == 0))
+ kfree(rmrru);
+ else
+ dmar_register_rmrr_unit(rmrru);
+ return ret;
+}
+
+static void __init
+dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
+{
+ struct acpi_dmar_hardware_unit *drhd;
+ struct acpi_dmar_reserved_memory *rmrr;
+
+ switch (header->type) {
+ case ACPI_DMAR_TYPE_HARDWARE_UNIT:
+ drhd = (struct acpi_dmar_hardware_unit *)header;
+ printk (KERN_INFO PREFIX
+ "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
+ drhd->flags, drhd->address);
+ break;
+ case ACPI_DMAR_TYPE_RESERVED_MEMORY:
+ rmrr = (struct acpi_dmar_reserved_memory *)header;
+
+ printk (KERN_INFO PREFIX
+ "RMRR base: 0x%016Lx end: 0x%016Lx\n",
+ rmrr->base_address, rmrr->end_address);
+ break;
+ }
+}
+
+/**
+ * parse_dmar_table - parses the DMA reporting table
+ */
+static int __init
+parse_dmar_table(void)
+{
+ struct acpi_table_dmar *dmar;
+ struct acpi_dmar_header *entry_header;
+ int ret = 0;
+
+ dmar = (struct acpi_table_dmar *)dmar_tbl;
+ if (!dmar)
+ return -ENODEV;
+
+ if (!dmar->width) {
+ printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n");
+ return -EINVAL;
+ }
+
+ printk (KERN_INFO PREFIX "Host address width %d\n",
+ dmar->width + 1);
+
+ entry_header = (struct acpi_dmar_header *)(dmar + 1);
+ while (((unsigned long)entry_header) <
+ (((unsigned long)dmar) + dmar_tbl->length)) {
+ dmar_table_print_dmar_entry(entry_header);
+
+ switch (entry_header->type) {
+ case ACPI_DMAR_TYPE_HARDWARE_UNIT:
+ ret = dmar_parse_one_drhd(entry_header);
+ break;
+ case ACPI_DMAR_TYPE_RESERVED_MEMORY:
+ ret = dmar_parse_one_rmrr(entry_header);
+ break;
+ default:
+ printk(KERN_WARNING PREFIX
+ "Unknown DMAR structure type\n");
+ ret = 0; /* for forward compatibility */
+ break;
+ }
+ if (ret)
+ break;
+
+ entry_header = ((void *)entry_header + entry_header->length);
+ }
+ return ret;
+}
+
+
+int __init dmar_table_init(void)
+{
+
+ parse_dmar_table();
+ if (list_empty(&dmar_drhd_units)) {
+ printk(KERN_INFO PREFIX "No DMAR devices found\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+
+/**
+ * early_dmar_detect - checks to see if the platform supports DMAR devices
+ */
+int __init early_dmar_detect(void)
+{
+ acpi_status status = AE_OK;
+
+ /* if we could find DMAR table, then there are DMAR devices */
+ status = acpi_get_table(ACPI_SIG_DMAR, 0,
+ (struct acpi_table_header **)&dmar_tbl);
+
+ if (ACPI_SUCCESS(status) && !dmar_tbl) {
+ printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+ status = AE_NOT_FOUND;
+ }
+
+ return (ACPI_SUCCESS(status) ? 1 : 0);
+}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
new file mode 100644
index 00000000000..0c4ab3b0727
--- /dev/null
+++ b/drivers/pci/intel-iommu.c
@@ -0,0 +1,2271 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Ashok Raj <ashok.raj@intel.com>
+ * Copyright (C) Shaohua Li <shaohua.li@intel.com>
+ * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/sysdev.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/dma-mapping.h>
+#include <linux/mempool.h>
+#include "iova.h"
+#include "intel-iommu.h"
+#include <asm/proto.h> /* force_iommu in this header in x86-64*/
+#include <asm/cacheflush.h>
+#include <asm/iommu.h>
+#include "pci.h"
+
+#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
+#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+
+#define IOAPIC_RANGE_START (0xfee00000)
+#define IOAPIC_RANGE_END (0xfeefffff)
+#define IOVA_START_ADDR (0x1000)
+
+#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
+
+#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
+
+#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
+
+static void domain_remove_dev_info(struct dmar_domain *domain);
+
+static int dmar_disabled;
+static int __initdata dmar_map_gfx = 1;
+static int dmar_forcedac;
+
+#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
+static DEFINE_SPINLOCK(device_domain_lock);
+static LIST_HEAD(device_domain_list);
+
+static int __init intel_iommu_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+ while (*str) {
+ if (!strncmp(str, "off", 3)) {
+ dmar_disabled = 1;
+ printk(KERN_INFO"Intel-IOMMU: disabled\n");
+ } else if (!strncmp(str, "igfx_off", 8)) {
+ dmar_map_gfx = 0;
+ printk(KERN_INFO
+ "Intel-IOMMU: disable GFX device mapping\n");
+ } else if (!strncmp(str, "forcedac", 8)) {
+ printk (KERN_INFO
+ "Intel-IOMMU: Forcing DAC for PCI devices\n");
+ dmar_forcedac = 1;
+ }
+
+ str += strcspn(str, ",");
+ while (*str == ',')
+ str++;
+ }
+ return 0;
+}
+__setup("intel_iommu=", intel_iommu_setup);
+
+static struct kmem_cache *iommu_domain_cache;
+static struct kmem_cache *iommu_devinfo_cache;
+static struct kmem_cache *iommu_iova_cache;
+
+static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
+{
+ unsigned int flags;
+ void *vaddr;
+
+ /* trying to avoid low memory issues */
+ flags = current->flags & PF_MEMALLOC;
+ current->flags |= PF_MEMALLOC;
+ vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
+ current->flags &= (~PF_MEMALLOC | flags);
+ return vaddr;
+}
+
+
+static inline void *alloc_pgtable_page(void)
+{
+ unsigned int flags;
+ void *vaddr;
+
+ /* trying to avoid low memory issues */
+ flags = current->flags & PF_MEMALLOC;
+ current->flags |= PF_MEMALLOC;
+ vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
+ current->flags &= (~PF_MEMALLOC | flags);
+ return vaddr;
+}
+
+static inline void free_pgtable_page(void *vaddr)
+{
+ free_page((unsigned long)vaddr);
+}
+
+static inline void *alloc_domain_mem(void)
+{
+ return iommu_kmem_cache_alloc(iommu_domain_cache);
+}
+
+static inline void free_domain_mem(void *vaddr)
+{
+ kmem_cache_free(iommu_domain_cache, vaddr);
+}
+
+static inline void * alloc_devinfo_mem(void)
+{
+ return iommu_kmem_cache_alloc(iommu_devinfo_cache);
+}
+
+static inline void free_devinfo_mem(void *vaddr)
+{
+ kmem_cache_free(iommu_devinfo_cache, vaddr);
+}
+
+struct iova *alloc_iova_mem(void)
+{
+ return iommu_kmem_cache_alloc(iommu_iova_cache);
+}
+
+void free_iova_mem(struct iova *iova)
+{
+ kmem_cache_free(iommu_iova_cache, iova);
+}
+
+static inline void __iommu_flush_cache(
+ struct intel_iommu *iommu, void *addr, int size)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+}
+
+/* Gets context entry for a given bus and devfn */
+static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
+ u8 bus, u8 devfn)
+{
+ struct root_entry *root;
+ struct context_entry *context;
+ unsigned long phy_addr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ root = &iommu->root_entry[bus];
+ context = get_context_addr_from_root(root);
+ if (!context) {
+ context = (struct context_entry *)alloc_pgtable_page();
+ if (!context) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return NULL;
+ }
+ __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
+ phy_addr = virt_to_phys((void *)context);
+ set_root_value(root, phy_addr);
+ set_root_present(root);
+ __iommu_flush_cache(iommu, root, sizeof(*root));
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return &context[devfn];
+}
+
+static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ struct root_entry *root;
+ struct context_entry *context;
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ root = &iommu->root_entry[bus];
+ context = get_context_addr_from_root(root);
+ if (!context) {
+ ret = 0;
+ goto out;
+ }
+ ret = context_present(context[devfn]);
+out:
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return ret;
+}
+
+static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ struct root_entry *root;
+ struct context_entry *context;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ root = &iommu->root_entry[bus];
+ context = get_context_addr_from_root(root);
+ if (context) {
+ context_clear_entry(context[devfn]);
+ __iommu_flush_cache(iommu, &context[devfn], \
+ sizeof(*context));
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void free_context_table(struct intel_iommu *iommu)
+{
+ struct root_entry *root;
+ int i;
+ unsigned long flags;
+ struct context_entry *context;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ if (!iommu->root_entry) {
+ goto out;
+ }
+ for (i = 0; i < ROOT_ENTRY_NR; i++) {
+ root = &iommu->root_entry[i];
+ context = get_context_addr_from_root(root);
+ if (context)
+ free_pgtable_page(context);
+ }
+ free_pgtable_page(iommu->root_entry);
+ iommu->root_entry = NULL;
+out:
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+/* page table handling */
+#define LEVEL_STRIDE (9)
+#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
+
+static inline int agaw_to_level(int agaw)
+{
+ return agaw + 2;
+}
+
+static inline int agaw_to_width(int agaw)
+{
+ return 30 + agaw * LEVEL_STRIDE;
+
+}
+
+static inline int width_to_agaw(int width)
+{
+ return (width - 30) / LEVEL_STRIDE;
+}
+
+static inline unsigned int level_to_offset_bits(int level)
+{
+ return (12 + (level - 1) * LEVEL_STRIDE);
+}
+
+static inline int address_level_offset(u64 addr, int level)
+{
+ return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
+}
+
+static inline u64 level_mask(int level)
+{
+ return ((u64)-1 << level_to_offset_bits(level));
+}
+
+static inline u64 level_size(int level)
+{
+ return ((u64)1 << level_to_offset_bits(level));
+}
+
+static inline u64 align_to_level(u64 addr, int level)
+{
+ return ((addr + level_size(level) - 1) & level_mask(level));
+}
+
+static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
+{
+ int addr_width = agaw_to_width(domain->agaw);
+ struct dma_pte *parent, *pte = NULL;
+ int level = agaw_to_level(domain->agaw);
+ int offset;
+ unsigned long flags;
+
+ BUG_ON(!domain->pgd);
+
+ addr &= (((u64)1) << addr_width) - 1;
+ parent = domain->pgd;
+
+ spin_lock_irqsave(&domain->mapping_lock, flags);
+ while (level > 0) {
+ void *tmp_page;
+
+ offset = address_level_offset(addr, level);
+ pte = &parent[offset];
+ if (level == 1)
+ break;
+
+ if (!dma_pte_present(*pte)) {
+ tmp_page = alloc_pgtable_page();
+
+ if (!tmp_page) {
+ spin_unlock_irqrestore(&domain->mapping_lock,
+ flags);
+ return NULL;
+ }
+ __iommu_flush_cache(domain->iommu, tmp_page,
+ PAGE_SIZE_4K);
+ dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
+ /*
+ * high level table always sets r/w, last level page
+ * table control read/write
+ */
+ dma_set_pte_readable(*pte);
+ dma_set_pte_writable(*pte);
+ __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ }
+ parent = phys_to_virt(dma_pte_addr(*pte));
+ level--;
+ }
+
+ spin_unlock_irqrestore(&domain->mapping_lock, flags);
+ return pte;
+}
+
+/* return address's pte at specific level */
+static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
+ int level)
+{
+ struct dma_pte *parent, *pte = NULL;
+ int total = agaw_to_level(domain->agaw);
+ int offset;
+
+ parent = domain->pgd;
+ while (level <= total) {
+ offset = address_level_offset(addr, total);
+ pte = &parent[offset];
+ if (level == total)
+ return pte;
+
+ if (!dma_pte_present(*pte))
+ break;
+ parent = phys_to_virt(dma_pte_addr(*pte));
+ total--;
+ }
+ return NULL;
+}
+
+/* clear one page's page table */
+static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
+{
+ struct dma_pte *pte = NULL;
+
+ /* get last level pte */
+ pte = dma_addr_level_pte(domain, addr, 1);
+
+ if (pte) {
+ dma_clear_pte(*pte);
+ __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ }
+}
+
+/* clear last level pte, a tlb flush should be followed */
+static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
+{
+ int addr_width = agaw_to_width(domain->agaw);
+
+ start &= (((u64)1) << addr_width) - 1;
+ end &= (((u64)1) << addr_width) - 1;
+ /* in case it's partial page */
+ start = PAGE_ALIGN_4K(start);
+ end &= PAGE_MASK_4K;
+
+ /* we don't need lock here, nobody else touches the iova range */
+ while (start < end) {
+ dma_pte_clear_one(domain, start);
+ start += PAGE_SIZE_4K;
+ }
+}
+
+/* free page table pages. last level pte should already be cleared */
+static void dma_pte_free_pagetable(struct dmar_domain *domain,
+ u64 start, u64 end)
+{
+ int addr_width = agaw_to_width(domain->agaw);
+ struct dma_pte *pte;
+ int total = agaw_to_level(domain->agaw);
+ int level;
+ u64 tmp;
+
+ start &= (((u64)1) << addr_width) - 1;
+ end &= (((u64)1) << addr_width) - 1;
+
+ /* we don't need lock here, nobody else touches the iova range */
+ level = 2;
+ while (level <= total) {
+ tmp = align_to_level(start, level);
+ if (tmp >= end || (tmp + level_size(level) > end))
+ return;
+
+ while (tmp < end) {
+ pte = dma_addr_level_pte(domain, tmp, level);
+ if (pte) {
+ free_pgtable_page(
+ phys_to_virt(dma_pte_addr(*pte)));
+ dma_clear_pte(*pte);
+ __iommu_flush_cache(domain->iommu,
+ pte, sizeof(*pte));
+ }
+ tmp += level_size(level);
+ }
+ level++;
+ }
+ /* free pgd */
+ if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
+ free_pgtable_page(domain->pgd);
+ domain->pgd = NULL;
+ }
+}
+
+/* iommu handling */
+static int iommu_alloc_root_entry(struct intel_iommu *iommu)
+{
+ struct root_entry *root;
+ unsigned long flags;
+
+ root = (struct root_entry *)alloc_pgtable_page();
+ if (!root)
+ return -ENOMEM;
+
+ __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ iommu->root_entry = root;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return 0;
+}
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+{\
+ unsigned long start_time = jiffies;\
+ while (1) {\
+ sts = op (iommu->reg + offset);\
+ if (cond)\
+ break;\
+ if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
+ panic("DMAR hardware is malfunctioning\n");\
+ cpu_relax();\
+ }\
+}
+
+static void iommu_set_root_entry(struct intel_iommu *iommu)
+{
+ void *addr;
+ u32 cmd, sts;
+ unsigned long flag;
+
+ addr = iommu->root_entry;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
+
+ cmd = iommu->gcmd | DMA_GCMD_SRTP;
+ writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, (sts & DMA_GSTS_RTPS), sts);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+static void iommu_flush_write_buffer(struct intel_iommu *iommu)
+{
+ u32 val;
+ unsigned long flag;
+
+ if (!cap_rwbf(iommu->cap))
+ return;
+ val = iommu->gcmd | DMA_GCMD_WBF;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(val, iommu->reg + DMAR_GCMD_REG);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, (!(val & DMA_GSTS_WBFS)), val);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+/* return value determine if we need a write buffer flush */
+static int __iommu_flush_context(struct intel_iommu *iommu,
+ u16 did, u16 source_id, u8 function_mask, u64 type,
+ int non_present_entry_flush)
+{
+ u64 val = 0;
+ unsigned long flag;
+
+ /*
+ * In the non-present entry flush case, if hardware doesn't cache
+ * non-present entry we do nothing and if hardware cache non-present
+ * entry, we flush entries of domain 0 (the domain id is used to cache
+ * any non-present entries)
+ */
+ if (non_present_entry_flush) {
+ if (!cap_caching_mode(iommu->cap))
+ return 1;
+ else
+ did = 0;
+ }
+
+ switch (type) {
+ case DMA_CCMD_GLOBAL_INVL:
+ val = DMA_CCMD_GLOBAL_INVL;
+ break;
+ case DMA_CCMD_DOMAIN_INVL:
+ val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
+ break;
+ case DMA_CCMD_DEVICE_INVL:
+ val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
+ | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
+ break;
+ default:
+ BUG();
+ }
+ val |= DMA_CCMD_ICC;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
+ dmar_readq, (!(val & DMA_CCMD_ICC)), val);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ /* flush context entry will implictly flush write buffer */
+ return 0;
+}
+
+static int inline iommu_flush_context_global(struct intel_iommu *iommu,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
+ non_present_entry_flush);
+}
+
+static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
+ non_present_entry_flush);
+}
+
+static int inline iommu_flush_context_device(struct intel_iommu *iommu,
+ u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
+{
+ return __iommu_flush_context(iommu, did, source_id, function_mask,
+ DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
+}
+
+/* return value determine if we need a write buffer flush */
+static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
+ u64 addr, unsigned int size_order, u64 type,
+ int non_present_entry_flush)
+{
+ int tlb_offset = ecap_iotlb_offset(iommu->ecap);
+ u64 val = 0, val_iva = 0;
+ unsigned long flag;
+
+ /*
+ * In the non-present entry flush case, if hardware doesn't cache
+ * non-present entry we do nothing and if hardware cache non-present
+ * entry, we flush entries of domain 0 (the domain id is used to cache
+ * any non-present entries)
+ */
+ if (non_present_entry_flush) {
+ if (!cap_caching_mode(iommu->cap))
+ return 1;
+ else
+ did = 0;
+ }
+
+ switch (type) {
+ case DMA_TLB_GLOBAL_FLUSH:
+ /* global flush doesn't need set IVA_REG */
+ val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
+ break;
+ case DMA_TLB_DSI_FLUSH:
+ val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
+ break;
+ case DMA_TLB_PSI_FLUSH:
+ val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
+ /* Note: always flush non-leaf currently */
+ val_iva = size_order | addr;
+ break;
+ default:
+ BUG();
+ }
+ /* Note: set drain read/write */
+#if 0
+ /*
+ * This is probably to be super secure.. Looks like we can
+ * ignore it without any impact.
+ */
+ if (cap_read_drain(iommu->cap))
+ val |= DMA_TLB_READ_DRAIN;
+#endif
+ if (cap_write_drain(iommu->cap))
+ val |= DMA_TLB_WRITE_DRAIN;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ /* Note: Only uses first TLB reg currently */
+ if (val_iva)
+ dmar_writeq(iommu->reg + tlb_offset, val_iva);
+ dmar_writeq(iommu->reg + tlb_offset + 8, val);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, tlb_offset + 8,
+ dmar_readq, (!(val & DMA_TLB_IVT)), val);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ /* check IOTLB invalidation granularity */
+ if (DMA_TLB_IAIG(val) == 0)
+ printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
+ if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
+ pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
+ DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
+ /* flush context entry will implictly flush write buffer */
+ return 0;
+}
+
+static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
+ non_present_entry_flush);
+}
+
+static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
+ non_present_entry_flush);
+}
+
+static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
+ u64 addr, unsigned int pages, int non_present_entry_flush)
+{
+ unsigned int mask;
+
+ BUG_ON(addr & (~PAGE_MASK_4K));
+ BUG_ON(pages == 0);
+
+ /* Fallback to domain selective flush if no PSI support */
+ if (!cap_pgsel_inv(iommu->cap))
+ return iommu_flush_iotlb_dsi(iommu, did,
+ non_present_entry_flush);
+
+ /*
+ * PSI requires page size to be 2 ^ x, and the base address is naturally
+ * aligned to the size
+ */
+ mask = ilog2(__roundup_pow_of_two(pages));
+ /* Fallback to domain selective flush if size is too big */
+ if (mask > cap_max_amask_val(iommu->cap))
+ return iommu_flush_iotlb_dsi(iommu, did,
+ non_present_entry_flush);
+
+ return __iommu_flush_iotlb(iommu, did, addr, mask,
+ DMA_TLB_PSI_FLUSH, non_present_entry_flush);
+}
+
+static int iommu_enable_translation(struct intel_iommu *iommu)
+{
+ u32 sts;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, (sts & DMA_GSTS_TES), sts);
+
+ iommu->gcmd |= DMA_GCMD_TE;
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ return 0;
+}
+
+static int iommu_disable_translation(struct intel_iommu *iommu)
+{
+ u32 sts;
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ iommu->gcmd &= ~DMA_GCMD_TE;
+ writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, (!(sts & DMA_GSTS_TES)), sts);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ return 0;
+}
+
+/* iommu interrupt handling. Most stuff are MSI-like. */
+
+static char *fault_reason_strings[] =
+{
+ "Software",
+ "Present bit in root entry is clear",
+ "Present bit in context entry is clear",
+ "Invalid context entry",
+ "Access beyond MGAW",
+ "PTE Write access is not set",
+ "PTE Read access is not set",
+ "Next page table ptr is invalid",
+ "Root table address invalid",
+ "Context table ptr is invalid",
+ "non-zero reserved fields in RTP",
+ "non-zero reserved fields in CTP",
+ "non-zero reserved fields in PTE",
+ "Unknown"
+};
+#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings)
+
+char *dmar_get_fault_reason(u8 fault_reason)
+{
+ if (fault_reason > MAX_FAULT_REASON_IDX)
+ return fault_reason_strings[MAX_FAULT_REASON_IDX];
+ else
+ return fault_reason_strings[fault_reason];
+}
+
+void dmar_msi_unmask(unsigned int irq)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ /* unmask it */
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(0, iommu->reg + DMAR_FECTL_REG);
+ /* Read a reg to force flush the post write */
+ readl(iommu->reg + DMAR_FECTL_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_mask(unsigned int irq)
+{
+ unsigned long flag;
+ struct intel_iommu *iommu = get_irq_data(irq);
+
+ /* mask it */
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
+ /* Read a reg to force flush the post write */
+ readl(iommu->reg + DMAR_FECTL_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_write(int irq, struct msi_msg *msg)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
+ writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
+ writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_read(int irq, struct msi_msg *msg)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
+ msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
+ msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
+ u8 fault_reason, u16 source_id, u64 addr)
+{
+ char *reason;
+
+ reason = dmar_get_fault_reason(fault_reason);
+
+ printk(KERN_ERR
+ "DMAR:[%s] Request device [%02x:%02x.%d] "
+ "fault addr %llx \n"
+ "DMAR:[fault reason %02d] %s\n",
+ (type ? "DMA Read" : "DMA Write"),
+ (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+ return 0;
+}
+
+#define PRIMARY_FAULT_REG_LEN (16)
+static irqreturn_t iommu_page_fault(int irq, void *dev_id)
+{
+ struct intel_iommu *iommu = dev_id;
+ int reg, fault_index;
+ u32 fault_status;
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+
+ /* TBD: ignore advanced fault log currently */
+ if (!(fault_status & DMA_FSTS_PPF))
+ goto clear_overflow;
+
+ fault_index = dma_fsts_fault_record_index(fault_status);
+ reg = cap_fault_reg_offset(iommu->cap);
+ while (1) {
+ u8 fault_reason;
+ u16 source_id;
+ u64 guest_addr;
+ int type;
+ u32 data;
+
+ /* highest 32 bits */
+ data = readl(iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12);
+ if (!(data & DMA_FRCD_F))
+ break;
+
+ fault_reason = dma_frcd_fault_reason(data);
+ type = dma_frcd_type(data);
+
+ data = readl(iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 8);
+ source_id = dma_frcd_source_id(data);
+
+ guest_addr = dmar_readq(iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN);
+ guest_addr = dma_frcd_page_addr(guest_addr);
+ /* clear the fault */
+ writel(DMA_FRCD_F, iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ iommu_page_fault_do_one(iommu, type, fault_reason,
+ source_id, guest_addr);
+
+ fault_index++;
+ if (fault_index > cap_num_fault_regs(iommu->cap))
+ fault_index = 0;
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ }
+clear_overflow:
+ /* clear primary fault overflow */
+ fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+ if (fault_status & DMA_FSTS_PFO)
+ writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ return IRQ_HANDLED;
+}
+
+int dmar_set_interrupt(struct intel_iommu *iommu)
+{
+ int irq, ret;
+
+ irq = create_irq();
+ if (!irq) {
+ printk(KERN_ERR "IOMMU: no free vectors\n");
+ return -EINVAL;
+ }
+
+ set_irq_data(irq, iommu);
+ iommu->irq = irq;
+
+ ret = arch_setup_dmar_msi(irq);
+ if (ret) {
+ set_irq_data(irq, NULL);
+ iommu->irq = 0;
+ destroy_irq(irq);
+ return 0;
+ }
+
+ /* Force fault register is cleared */
+ iommu_page_fault(irq, iommu);
+
+ ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
+ if (ret)
+ printk(KERN_ERR "IOMMU: can't request irq\n");
+ return ret;
+}
+
+static int iommu_init_domains(struct intel_iommu *iommu)
+{
+ unsigned long ndomains;
+ unsigned long nlongs;
+
+ ndomains = cap_ndoms(iommu->cap);
+ pr_debug("Number of Domains supportd <%ld>\n", ndomains);
+ nlongs = BITS_TO_LONGS(ndomains);
+
+ /* TBD: there might be 64K domains,
+ * consider other allocation for future chip
+ */
+ iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
+ if (!iommu->domain_ids) {
+ printk(KERN_ERR "Allocating domain id array failed\n");
+ return -ENOMEM;
+ }
+ iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
+ GFP_KERNEL);
+ if (!iommu->domains) {
+ printk(KERN_ERR "Allocating domain array failed\n");
+ kfree(iommu->domain_ids);
+ return -ENOMEM;
+ }
+
+ /*
+ * if Caching mode is set, then invalid translations are tagged
+ * with domainid 0. Hence we need to pre-allocate it.
+ */
+ if (cap_caching_mode(iommu->cap))
+ set_bit(0, iommu->domain_ids);
+ return 0;
+}
+
+static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+{
+ struct intel_iommu *iommu;
+ int ret;
+ int map_size;
+ u32 ver;
+
+ iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+ if (!iommu)
+ return NULL;
+ iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+ if (!iommu->reg) {
+ printk(KERN_ERR "IOMMU: can't map the region\n");
+ goto error;
+ }
+ iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+ iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+ /* the registers might be more than one page */
+ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+ cap_max_fault_reg_offset(iommu->cap));
+ map_size = PAGE_ALIGN_4K(map_size);
+ if (map_size > PAGE_SIZE_4K) {
+ iounmap(iommu->reg);
+ iommu->reg = ioremap(drhd->reg_base_addr, map_size);
+ if (!iommu->reg) {
+ printk(KERN_ERR "IOMMU: can't map the region\n");
+ goto error;
+ }
+ }
+
+ ver = readl(iommu->reg + DMAR_VER_REG);
+ pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
+ drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+ iommu->cap, iommu->ecap);
+ ret = iommu_init_domains(iommu);
+ if (ret)
+ goto error_unmap;
+ spin_lock_init(&iommu->lock);
+ spin_lock_init(&iommu->register_lock);
+
+ drhd->iommu = iommu;
+ return iommu;
+error_unmap:
+ iounmap(iommu->reg);
+ iommu->reg = 0;
+error:
+ kfree(iommu);
+ return NULL;
+}
+
+static void domain_exit(struct dmar_domain *domain);
+static void free_iommu(struct intel_iommu *iommu)
+{
+ struct dmar_domain *domain;
+ int i;
+
+ if (!iommu)
+ return;
+
+ i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
+ for (; i < cap_ndoms(iommu->cap); ) {
+ domain = iommu->domains[i];
+ clear_bit(i, iommu->domain_ids);
+ domain_exit(domain);
+ i = find_next_bit(iommu->domain_ids,
+ cap_ndoms(iommu->cap), i+1);
+ }
+
+ if (iommu->gcmd & DMA_GCMD_TE)
+ iommu_disable_translation(iommu);
+
+ if (iommu->irq) {
+ set_irq_data(iommu->irq, NULL);
+ /* This will mask the irq */
+ free_irq(iommu->irq, iommu);
+ destroy_irq(iommu->irq);
+ }
+
+ kfree(iommu->domains);
+ kfree(iommu->domain_ids);
+
+ /* free context mapping */
+ free_context_table(iommu);
+
+ if (iommu->reg)
+ iounmap(iommu->reg);
+ kfree(iommu);
+}
+
+static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
+{
+ unsigned long num;
+ unsigned long ndomains;
+ struct dmar_domain *domain;
+ unsigned long flags;
+
+ domain = alloc_domain_mem();
+ if (!domain)
+ return NULL;
+
+ ndomains = cap_ndoms(iommu->cap);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num >= ndomains) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ free_domain_mem(domain);
+ printk(KERN_ERR "IOMMU: no free domain ids\n");
+ return NULL;
+ }
+
+ set_bit(num, iommu->domain_ids);
+ domain->id = num;
+ domain->iommu = iommu;
+ iommu->domains[num] = domain;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return domain;
+}
+
+static void iommu_free_domain(struct dmar_domain *domain)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->iommu->lock, flags);
+ clear_bit(domain->id, domain->iommu->domain_ids);
+ spin_unlock_irqrestore(&domain->iommu->lock, flags);
+}
+
+static struct iova_domain reserved_iova_list;
+
+static void dmar_init_reserved_ranges(void)
+{
+ struct pci_dev *pdev = NULL;
+ struct iova *iova;
+ int i;
+ u64 addr, size;
+
+ init_iova_domain(&reserved_iova_list);
+
+ /* IOAPIC ranges shouldn't be accessed by DMA */
+ iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
+ IOVA_PFN(IOAPIC_RANGE_END));
+ if (!iova)
+ printk(KERN_ERR "Reserve IOAPIC range failed\n");
+
+ /* Reserve all PCI MMIO to avoid peer-to-peer access */
+ for_each_pci_dev(pdev) {
+ struct resource *r;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ r = &pdev->resource[i];
+ if (!r->flags || !(r->flags & IORESOURCE_MEM))
+ continue;
+ addr = r->start;
+ addr &= PAGE_MASK_4K;
+ size = r->end - addr;
+ size = PAGE_ALIGN_4K(size);
+ iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
+ IOVA_PFN(size + addr) - 1);
+ if (!iova)
+ printk(KERN_ERR "Reserve iova failed\n");
+ }
+ }
+
+}
+
+static void domain_reserve_special_ranges(struct dmar_domain *domain)
+{
+ copy_reserved_iova(&reserved_iova_list, &domain->iovad);
+}
+
+static inline int guestwidth_to_adjustwidth(int gaw)
+{
+ int agaw;
+ int r = (gaw - 12) % 9;
+
+ if (r == 0)
+ agaw = gaw;
+ else
+ agaw = gaw + 9 - r;
+ if (agaw > 64)
+ agaw = 64;
+ return agaw;
+}
+
+static int domain_init(struct dmar_domain *domain, int guest_width)
+{
+ struct intel_iommu *iommu;
+ int adjust_width, agaw;
+ unsigned long sagaw;
+
+ init_iova_domain(&domain->iovad);
+ spin_lock_init(&domain->mapping_lock);
+
+ domain_reserve_special_ranges(domain);
+
+ /* calculate AGAW */
+ iommu = domain->iommu;
+ if (guest_width > cap_mgaw(iommu->cap))
+ guest_width = cap_mgaw(iommu->cap);
+ domain->gaw = guest_width;
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ agaw = width_to_agaw(adjust_width);
+ sagaw = cap_sagaw(iommu->cap);
+ if (!test_bit(agaw, &sagaw)) {
+ /* hardware doesn't support it, choose a bigger one */
+ pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
+ agaw = find_next_bit(&sagaw, 5, agaw);
+ if (agaw >= 5)
+ return -ENODEV;
+ }
+ domain->agaw = agaw;
+ INIT_LIST_HEAD(&domain->devices);
+
+ /* always allocate the top pgd */
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+ if (!domain->pgd)
+ return -ENOMEM;
+ __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
+ return 0;
+}
+
+static void domain_exit(struct dmar_domain *domain)
+{
+ u64 end;
+
+ /* Domain 0 is reserved, so dont process it */
+ if (!domain)
+ return;
+
+ domain_remove_dev_info(domain);
+ /* destroy iovas */
+ put_iova_domain(&domain->iovad);
+ end = DOMAIN_MAX_ADDR(domain->gaw);
+ end = end & (~PAGE_MASK_4K);
+
+ /* clear ptes */
+ dma_pte_clear_range(domain, 0, end);
+
+ /* free page tables */
+ dma_pte_free_pagetable(domain, 0, end);
+
+ iommu_free_domain(domain);
+ free_domain_mem(domain);
+}
+
+static int domain_context_mapping_one(struct dmar_domain *domain,
+ u8 bus, u8 devfn)
+{
+ struct context_entry *context;
+ struct intel_iommu *iommu = domain->iommu;
+ unsigned long flags;
+
+ pr_debug("Set context mapping for %02x:%02x.%d\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ BUG_ON(!domain->pgd);
+ context = device_to_context_entry(iommu, bus, devfn);
+ if (!context)
+ return -ENOMEM;
+ spin_lock_irqsave(&iommu->lock, flags);
+ if (context_present(*context)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return 0;
+ }
+
+ context_set_domain_id(*context, domain->id);
+ context_set_address_width(*context, domain->agaw);
+ context_set_address_root(*context, virt_to_phys(domain->pgd));
+ context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+ context_set_fault_enable(*context);
+ context_set_present(*context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
+
+ /* it's a non-present to present mapping */
+ if (iommu_flush_context_device(iommu, domain->id,
+ (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
+ iommu_flush_write_buffer(iommu);
+ else
+ iommu_flush_iotlb_dsi(iommu, 0, 0);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return 0;
+}
+
+static int
+domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
+{
+ int ret;
+ struct pci_dev *tmp, *parent;
+
+ ret = domain_context_mapping_one(domain, pdev->bus->number,
+ pdev->devfn);
+ if (ret)
+ return ret;
+
+ /* dependent device mapping */
+ tmp = pci_find_upstream_pcie_bridge(pdev);
+ if (!tmp)
+ return 0;
+ /* Secondary interface's bus number and devfn 0 */
+ parent = pdev->bus->self;
+ while (parent != tmp) {
+ ret = domain_context_mapping_one(domain, parent->bus->number,
+ parent->devfn);
+ if (ret)
+ return ret;
+ parent = parent->bus->self;
+ }
+ if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
+ return domain_context_mapping_one(domain,
+ tmp->subordinate->number, 0);
+ else /* this is a legacy PCI bridge */
+ return domain_context_mapping_one(domain,
+ tmp->bus->number, tmp->devfn);
+}
+
+static int domain_context_mapped(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ int ret;
+ struct pci_dev *tmp, *parent;
+
+ ret = device_context_mapped(domain->iommu,
+ pdev->bus->number, pdev->devfn);
+ if (!ret)
+ return ret;
+ /* dependent device mapping */
+ tmp = pci_find_upstream_pcie_bridge(pdev);
+ if (!tmp)
+ return ret;
+ /* Secondary interface's bus number and devfn 0 */
+ parent = pdev->bus->self;
+ while (parent != tmp) {
+ ret = device_context_mapped(domain->iommu, parent->bus->number,
+ parent->devfn);
+ if (!ret)
+ return ret;
+ parent = parent->bus->self;
+ }
+ if (tmp->is_pcie)
+ return device_context_mapped(domain->iommu,
+ tmp->subordinate->number, 0);
+ else
+ return device_context_mapped(domain->iommu,
+ tmp->bus->number, tmp->devfn);
+}
+
+static int
+domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot)
+{
+ u64 start_pfn, end_pfn;
+ struct dma_pte *pte;
+ int index;
+
+ if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
+ return -EINVAL;
+ iova &= PAGE_MASK_4K;
+ start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
+ end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
+ index = 0;
+ while (start_pfn < end_pfn) {
+ pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
+ if (!pte)
+ return -ENOMEM;
+ /* We don't need lock here, nobody else
+ * touches the iova range
+ */
+ BUG_ON(dma_pte_addr(*pte));
+ dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
+ dma_set_pte_prot(*pte, prot);
+ __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ start_pfn++;
+ index++;
+ }
+ return 0;
+}
+
+static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+ clear_context_table(domain->iommu, bus, devfn);
+ iommu_flush_context_global(domain->iommu, 0);
+ iommu_flush_iotlb_global(domain->iommu, 0);
+}
+
+static void domain_remove_dev_info(struct dmar_domain *domain)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+ list_del(&info->link);
+ list_del(&info->global);
+ if (info->dev)
+ info->dev->dev.archdata.iommu = NULL;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ detach_domain_for_dev(info->domain, info->bus, info->devfn);
+ free_devinfo_mem(info);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+/*
+ * find_domain
+ * Note: we use struct pci_dev->dev.archdata.iommu stores the info
+ */
+struct dmar_domain *
+find_domain(struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+
+ /* No lock here, assumes no domain exit in normal case */
+ info = pdev->dev.archdata.iommu;
+ if (info)
+ return info->domain;
+ return NULL;
+}
+
+static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
+ struct pci_dev *dev)
+{
+ int index;
+
+ while (dev) {
+ for (index = 0; index < cnt; index ++)
+ if (dev == devices[index])
+ return 1;
+
+ /* Check our parent */
+ dev = dev->bus->self;
+ }
+
+ return 0;
+}
+
+static struct dmar_drhd_unit *
+dmar_find_matched_drhd_unit(struct pci_dev *dev)
+{
+ struct dmar_drhd_unit *drhd = NULL;
+
+ list_for_each_entry(drhd, &dmar_drhd_units, list) {
+ if (drhd->include_all || dmar_pci_device_match(drhd->devices,
+ drhd->devices_cnt, dev))
+ return drhd;
+ }
+
+ return NULL;
+}
+
+/* domain is initialized */
+static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
+{
+ struct dmar_domain *domain, *found = NULL;
+ struct intel_iommu *iommu;
+ struct dmar_drhd_unit *drhd;
+ struct device_domain_info *info, *tmp;
+ struct pci_dev *dev_tmp;
+ unsigned long flags;
+ int bus = 0, devfn = 0;
+
+ domain = find_domain(pdev);
+ if (domain)
+ return domain;
+
+ dev_tmp = pci_find_upstream_pcie_bridge(pdev);
+ if (dev_tmp) {
+ if (dev_tmp->is_pcie) {
+ bus = dev_tmp->subordinate->number;
+ devfn = 0;
+ } else {
+ bus = dev_tmp->bus->number;
+ devfn = dev_tmp->devfn;
+ }
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_for_each_entry(info, &device_domain_list, global) {
+ if (info->bus == bus && info->devfn == devfn) {
+ found = info->domain;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ /* pcie-pci bridge already has a domain, uses it */
+ if (found) {
+ domain = found;
+ goto found_domain;
+ }
+ }
+
+ /* Allocate new domain for the device */
+ drhd = dmar_find_matched_drhd_unit(pdev);
+ if (!drhd) {
+ printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
+ pci_name(pdev));
+ return NULL;
+ }
+ iommu = drhd->iommu;
+
+ domain = iommu_alloc_domain(iommu);
+ if (!domain)
+ goto error;
+
+ if (domain_init(domain, gaw)) {
+ domain_exit(domain);
+ goto error;
+ }
+
+ /* register pcie-to-pci device */
+ if (dev_tmp) {
+ info = alloc_devinfo_mem();
+ if (!info) {
+ domain_exit(domain);
+ goto error;
+ }
+ info->bus = bus;
+ info->devfn = devfn;
+ info->dev = NULL;
+ info->domain = domain;
+ /* This domain is shared by devices under p2p bridge */
+ domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
+
+ /* pcie-to-pci bridge already has a domain, uses it */
+ found = NULL;
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_for_each_entry(tmp, &device_domain_list, global) {
+ if (tmp->bus == bus && tmp->devfn == devfn) {
+ found = tmp->domain;
+ break;
+ }
+ }
+ if (found) {
+ free_devinfo_mem(info);
+ domain_exit(domain);
+ domain = found;
+ } else {
+ list_add(&info->link, &domain->devices);
+ list_add(&info->global, &device_domain_list);
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ }
+
+found_domain:
+ info = alloc_devinfo_mem();
+ if (!info)
+ goto error;
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->dev = pdev;
+ info->domain = domain;
+ spin_lock_irqsave(&device_domain_lock, flags);
+ /* somebody is fast */
+ found = find_domain(pdev);
+ if (found != NULL) {
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ if (found != domain) {
+ domain_exit(domain);
+ domain = found;
+ }
+ free_devinfo_mem(info);
+ return domain;
+ }
+ list_add(&info->link, &domain->devices);
+ list_add(&info->global, &device_domain_list);
+ pdev->dev.archdata.iommu = info;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ return domain;
+error:
+ /* recheck it here, maybe others set it */
+ return find_domain(pdev);
+}
+
+static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
+{
+ struct dmar_domain *domain;
+ unsigned long size;
+ u64 base;
+ int ret;
+
+ printk(KERN_INFO
+ "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
+ pci_name(pdev), start, end);
+ /* page table init */
+ domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ if (!domain)
+ return -ENOMEM;
+
+ /* The address might not be aligned */
+ base = start & PAGE_MASK_4K;
+ size = end - base;
+ size = PAGE_ALIGN_4K(size);
+ if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
+ IOVA_PFN(base + size) - 1)) {
+ printk(KERN_ERR "IOMMU: reserve iova failed\n");
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ pr_debug("Mapping reserved region %lx@%llx for %s\n",
+ size, base, pci_name(pdev));
+ /*
+ * RMRR range might have overlap with physical memory range,
+ * clear it first
+ */
+ dma_pte_clear_range(domain, base, base + size);
+
+ ret = domain_page_mapping(domain, base, base, size,
+ DMA_PTE_READ|DMA_PTE_WRITE);
+ if (ret)
+ goto error;
+
+ /* context entry init */
+ ret = domain_context_mapping(domain, pdev);
+ if (!ret)
+ return 0;
+error:
+ domain_exit(domain);
+ return ret;
+
+}
+
+static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
+ struct pci_dev *pdev)
+{
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+ return 0;
+ return iommu_prepare_identity_map(pdev, rmrr->base_address,
+ rmrr->end_address + 1);
+}
+
+#ifdef CONFIG_DMAR_GFX_WA
+extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
+static void __init iommu_prepare_gfx_mapping(void)
+{
+ struct pci_dev *pdev = NULL;
+ u64 base, size;
+ int slot;
+ int ret;
+
+ for_each_pci_dev(pdev) {
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
+ !IS_GFX_DEVICE(pdev))
+ continue;
+ printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
+ pci_name(pdev));
+ slot = arch_get_ram_range(0, &base, &size);
+ while (slot >= 0) {
+ ret = iommu_prepare_identity_map(pdev,
+ base, base + size);
+ if (ret)
+ goto error;
+ slot = arch_get_ram_range(slot, &base, &size);
+ }
+ continue;
+error:
+ printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
+ }
+}
+#endif
+
+#ifdef CONFIG_DMAR_FLOPPY_WA
+static inline void iommu_prepare_isa(void)
+{
+ struct pci_dev *pdev;
+ int ret;
+
+ pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
+ if (!pdev)
+ return;
+
+ printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
+ ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
+
+ if (ret)
+ printk("IOMMU: Failed to create 0-64M identity map, "
+ "floppy might not work\n");
+
+}
+#else
+static inline void iommu_prepare_isa(void)
+{
+ return;
+}
+#endif /* !CONFIG_DMAR_FLPY_WA */
+
+int __init init_dmars(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct dmar_rmrr_unit *rmrr;
+ struct pci_dev *pdev;
+ struct intel_iommu *iommu;
+ int ret, unit = 0;
+
+ /*
+ * for each drhd
+ * allocate root
+ * initialize and program root entry to not present
+ * endfor
+ */
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = alloc_iommu(drhd);
+ if (!iommu) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ /*
+ * TBD:
+ * we could share the same root & context tables
+ * amoung all IOMMU's. Need to Split it later.
+ */
+ ret = iommu_alloc_root_entry(iommu);
+ if (ret) {
+ printk(KERN_ERR "IOMMU: allocate root entry failed\n");
+ goto error;
+ }
+ }
+
+ /*
+ * For each rmrr
+ * for each dev attached to rmrr
+ * do
+ * locate drhd for dev, alloc domain for dev
+ * allocate free domain
+ * allocate page table entries for rmrr
+ * if context not allocated for bus
+ * allocate and init context
+ * set present in root table for this bus
+ * init context with domain, translation etc
+ * endfor
+ * endfor
+ */
+ for_each_rmrr_units(rmrr) {
+ int i;
+ for (i = 0; i < rmrr->devices_cnt; i++) {
+ pdev = rmrr->devices[i];
+ /* some BIOS lists non-exist devices in DMAR table */
+ if (!pdev)
+ continue;
+ ret = iommu_prepare_rmrr_dev(rmrr, pdev);
+ if (ret)
+ printk(KERN_ERR
+ "IOMMU: mapping reserved region failed\n");
+ }
+ }
+
+ iommu_prepare_gfx_mapping();
+
+ iommu_prepare_isa();
+
+ /*
+ * for each drhd
+ * enable fault log
+ * global invalidate context cache
+ * global invalidate iotlb
+ * enable translation
+ */
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+ sprintf (iommu->name, "dmar%d", unit++);
+
+ iommu_flush_write_buffer(iommu);
+
+ ret = dmar_set_interrupt(iommu);
+ if (ret)
+ goto error;
+
+ iommu_set_root_entry(iommu);
+
+ iommu_flush_context_global(iommu, 0);
+ iommu_flush_iotlb_global(iommu, 0);
+
+ ret = iommu_enable_translation(iommu);
+ if (ret)
+ goto error;
+ }
+
+ return 0;
+error:
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+ free_iommu(iommu);
+ }
+ return ret;
+}
+
+static inline u64 aligned_size(u64 host_addr, size_t size)
+{
+ u64 addr;
+ addr = (host_addr & (~PAGE_MASK_4K)) + size;
+ return PAGE_ALIGN_4K(addr);
+}
+
+struct iova *
+iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
+{
+ struct iova *piova;
+
+ /* Make sure it's in range */
+ end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
+ if (!size || (IOVA_START_ADDR + size > end))
+ return NULL;
+
+ piova = alloc_iova(&domain->iovad,
+ size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
+ return piova;
+}
+
+static struct iova *
+__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
+ size_t size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct iova *iova = NULL;
+
+ if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
+ iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
+ } else {
+ /*
+ * First try to allocate an io virtual address in
+ * DMA_32BIT_MASK and if that fails then try allocating
+ * from higer range
+ */
+ iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
+ if (!iova)
+ iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
+ }
+
+ if (!iova) {
+ printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
+ return NULL;
+ }
+
+ return iova;
+}
+
+static struct dmar_domain *
+get_valid_domain_for_dev(struct pci_dev *pdev)
+{
+ struct dmar_domain *domain;
+ int ret;
+
+ domain = get_domain_for_dev(pdev,
+ DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ if (!domain) {
+ printk(KERN_ERR
+ "Allocating domain for %s failed", pci_name(pdev));
+ return 0;
+ }
+
+ /* make sure context mapping is ok */
+ if (unlikely(!domain_context_mapped(domain, pdev))) {
+ ret = domain_context_mapping(domain, pdev);
+ if (ret) {
+ printk(KERN_ERR
+ "Domain context map for %s failed",
+ pci_name(pdev));
+ return 0;
+ }
+ }
+
+ return domain;
+}
+
+static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
+ size_t size, int dir)
+{
+ struct pci_dev *pdev = to_pci_dev(hwdev);
+ int ret;
+ struct dmar_domain *domain;
+ unsigned long start_addr;
+ struct iova *iova;
+ int prot = 0;
+
+ BUG_ON(dir == DMA_NONE);
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+ return virt_to_bus(addr);
+
+ domain = get_valid_domain_for_dev(pdev);
+ if (!domain)
+ return 0;
+
+ addr = (void *)virt_to_phys(addr);
+ size = aligned_size((u64)addr, size);
+
+ iova = __intel_alloc_iova(hwdev, domain, size);
+ if (!iova)
+ goto error;
+
+ start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+
+ /*
+ * Check if DMAR supports zero-length reads on write only
+ * mappings..
+ */
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
+ !cap_zlr(domain->iommu->cap))
+ prot |= DMA_PTE_READ;
+ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+ prot |= DMA_PTE_WRITE;
+ /*
+ * addr - (addr + size) might be partial page, we should map the whole
+ * page. Note: if two part of one page are separately mapped, we
+ * might have two guest_addr mapping to the same host addr, but this
+ * is not a big problem
+ */
+ ret = domain_page_mapping(domain, start_addr,
+ ((u64)addr) & PAGE_MASK_4K, size, prot);
+ if (ret)
+ goto error;
+
+ pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
+ pci_name(pdev), size, (u64)addr,
+ size, (u64)start_addr, dir);
+
+ /* it's a non-present to present mapping */
+ ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ start_addr, size >> PAGE_SHIFT_4K, 1);
+ if (ret)
+ iommu_flush_write_buffer(domain->iommu);
+
+ return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
+
+error:
+ if (iova)
+ __free_iova(&domain->iovad, iova);
+ printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
+ pci_name(pdev), size, (u64)addr, dir);
+ return 0;
+}
+
+static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct dmar_domain *domain;
+ unsigned long start_addr;
+ struct iova *iova;
+
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+ return;
+ domain = find_domain(pdev);
+ BUG_ON(!domain);
+
+ iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
+ if (!iova)
+ return;
+
+ start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ size = aligned_size((u64)dev_addr, size);
+
+ pr_debug("Device %s unmapping: %lx@%llx\n",
+ pci_name(pdev), size, (u64)start_addr);
+
+ /* clear the whole page */
+ dma_pte_clear_range(domain, start_addr, start_addr + size);
+ /* free page tables */
+ dma_pte_free_pagetable(domain, start_addr, start_addr + size);
+
+ if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
+ size >> PAGE_SHIFT_4K, 0))
+ iommu_flush_write_buffer(domain->iommu);
+
+ /* free iova */
+ __free_iova(&domain->iovad, iova);
+}
+
+static void * intel_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags)
+{
+ void *vaddr;
+ int order;
+
+ size = PAGE_ALIGN_4K(size);
+ order = get_order(size);
+ flags &= ~(GFP_DMA | GFP_DMA32);
+
+ vaddr = (void *)__get_free_pages(flags, order);
+ if (!vaddr)
+ return NULL;
+ memset(vaddr, 0, size);
+
+ *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
+ if (*dma_handle)
+ return vaddr;
+ free_pages((unsigned long)vaddr, order);
+ return NULL;
+}
+
+static void intel_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ int order;
+
+ size = PAGE_ALIGN_4K(size);
+ order = get_order(size);
+
+ intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
+ free_pages((unsigned long)vaddr, order);
+}
+
+#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
+static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
+ int nelems, int dir)
+{
+ int i;
+ struct pci_dev *pdev = to_pci_dev(hwdev);
+ struct dmar_domain *domain;
+ unsigned long start_addr;
+ struct iova *iova;
+ size_t size = 0;
+ void *addr;
+ struct scatterlist *sg;
+
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+ return;
+
+ domain = find_domain(pdev);
+
+ iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
+ if (!iova)
+ return;
+ for_each_sg(sglist, sg, nelems, i) {
+ addr = SG_ENT_VIRT_ADDRESS(sg);
+ size += aligned_size((u64)addr, sg->length);
+ }
+
+ start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+
+ /* clear the whole page */
+ dma_pte_clear_range(domain, start_addr, start_addr + size);
+ /* free page tables */
+ dma_pte_free_pagetable(domain, start_addr, start_addr + size);
+
+ if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
+ size >> PAGE_SHIFT_4K, 0))
+ iommu_flush_write_buffer(domain->iommu);
+
+ /* free iova */
+ __free_iova(&domain->iovad, iova);
+}
+
+static int intel_nontranslate_map_sg(struct device *hddev,
+ struct scatterlist *sglist, int nelems, int dir)
+{
+ int i;
+ struct scatterlist *sg;
+
+ for_each_sg(sglist, sg, nelems, i) {
+ BUG_ON(!sg_page(sg));
+ sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
+ sg->dma_length = sg->length;
+ }
+ return nelems;
+}
+
+static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
+ int nelems, int dir)
+{
+ void *addr;
+ int i;
+ struct pci_dev *pdev = to_pci_dev(hwdev);
+ struct dmar_domain *domain;
+ size_t size = 0;
+ int prot = 0;
+ size_t offset = 0;
+ struct iova *iova = NULL;
+ int ret;
+ struct scatterlist *sg;
+ unsigned long start_addr;
+
+ BUG_ON(dir == DMA_NONE);
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+ return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
+
+ domain = get_valid_domain_for_dev(pdev);
+ if (!domain)
+ return 0;
+
+ for_each_sg(sglist, sg, nelems, i) {
+ addr = SG_ENT_VIRT_ADDRESS(sg);
+ addr = (void *)virt_to_phys(addr);
+ size += aligned_size((u64)addr, sg->length);
+ }
+
+ iova = __intel_alloc_iova(hwdev, domain, size);
+ if (!iova) {
+ sglist->dma_length = 0;
+ return 0;
+ }
+
+ /*
+ * Check if DMAR supports zero-length reads on write only
+ * mappings..
+ */
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
+ !cap_zlr(domain->iommu->cap))
+ prot |= DMA_PTE_READ;
+ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+ prot |= DMA_PTE_WRITE;
+
+ start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ offset = 0;
+ for_each_sg(sglist, sg, nelems, i) {
+ addr = SG_ENT_VIRT_ADDRESS(sg);
+ addr = (void *)virt_to_phys(addr);
+ size = aligned_size((u64)addr, sg->length);
+ ret = domain_page_mapping(domain, start_addr + offset,
+ ((u64)addr) & PAGE_MASK_4K,
+ size, prot);
+ if (ret) {
+ /* clear the page */
+ dma_pte_clear_range(domain, start_addr,
+ start_addr + offset);
+ /* free page tables */
+ dma_pte_free_pagetable(domain, start_addr,
+ start_addr + offset);
+ /* free iova */
+ __free_iova(&domain->iovad, iova);
+ return 0;
+ }
+ sg->dma_address = start_addr + offset +
+ ((u64)addr & (~PAGE_MASK_4K));
+ sg->dma_length = sg->length;
+ offset += size;
+ }
+
+ /* it's a non-present to present mapping */
+ if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ start_addr, offset >> PAGE_SHIFT_4K, 1))
+ iommu_flush_write_buffer(domain->iommu);
+ return nelems;
+}
+
+static struct dma_mapping_ops intel_dma_ops = {
+ .alloc_coherent = intel_alloc_coherent,
+ .free_coherent = intel_free_coherent,
+ .map_single = intel_map_single,
+ .unmap_single = intel_unmap_single,
+ .map_sg = intel_map_sg,
+ .unmap_sg = intel_unmap_sg,
+};
+
+static inline int iommu_domain_cache_init(void)
+{
+ int ret = 0;
+
+ iommu_domain_cache = kmem_cache_create("iommu_domain",
+ sizeof(struct dmar_domain),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+
+ NULL);
+ if (!iommu_domain_cache) {
+ printk(KERN_ERR "Couldn't create iommu_domain cache\n");
+ ret = -ENOMEM;
+ }
+
+ return ret;
+}
+
+static inline int iommu_devinfo_cache_init(void)
+{
+ int ret = 0;
+
+ iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
+ sizeof(struct device_domain_info),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+
+ NULL);
+ if (!iommu_devinfo_cache) {
+ printk(KERN_ERR "Couldn't create devinfo cache\n");
+ ret = -ENOMEM;
+ }
+
+ return ret;
+}
+
+static inline int iommu_iova_cache_init(void)
+{
+ int ret = 0;
+
+ iommu_iova_cache = kmem_cache_create("iommu_iova",
+ sizeof(struct iova),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+
+ NULL);
+ if (!iommu_iova_cache) {
+ printk(KERN_ERR "Couldn't create iova cache\n");
+ ret = -ENOMEM;
+ }
+
+ return ret;
+}
+
+static int __init iommu_init_mempool(void)
+{
+ int ret;
+ ret = iommu_iova_cache_init();
+ if (ret)
+ return ret;
+
+ ret = iommu_domain_cache_init();
+ if (ret)
+ goto domain_error;
+
+ ret = iommu_devinfo_cache_init();
+ if (!ret)
+ return ret;
+
+ kmem_cache_destroy(iommu_domain_cache);
+domain_error:
+ kmem_cache_destroy(iommu_iova_cache);
+
+ return -ENOMEM;
+}
+
+static void __init iommu_exit_mempool(void)
+{
+ kmem_cache_destroy(iommu_devinfo_cache);
+ kmem_cache_destroy(iommu_domain_cache);
+ kmem_cache_destroy(iommu_iova_cache);
+
+}
+
+void __init detect_intel_iommu(void)
+{
+ if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
+ return;
+ if (early_dmar_detect()) {
+ iommu_detected = 1;
+ }
+}
+
+static void __init init_no_remapping_devices(void)
+{
+ struct dmar_drhd_unit *drhd;
+
+ for_each_drhd_unit(drhd) {
+ if (!drhd->include_all) {
+ int i;
+ for (i = 0; i < drhd->devices_cnt; i++)
+ if (drhd->devices[i] != NULL)
+ break;
+ /* ignore DMAR unit if no pci devices exist */
+ if (i == drhd->devices_cnt)
+ drhd->ignored = 1;
+ }
+ }
+
+ if (dmar_map_gfx)
+ return;
+
+ for_each_drhd_unit(drhd) {
+ int i;
+ if (drhd->ignored || drhd->include_all)
+ continue;
+
+ for (i = 0; i < drhd->devices_cnt; i++)
+ if (drhd->devices[i] &&
+ !IS_GFX_DEVICE(drhd->devices[i]))
+ break;
+
+ if (i < drhd->devices_cnt)
+ continue;
+
+ /* bypass IOMMU if it is just for gfx devices */
+ drhd->ignored = 1;
+ for (i = 0; i < drhd->devices_cnt; i++) {
+ if (!drhd->devices[i])
+ continue;
+ drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ }
+ }
+}
+
+int __init intel_iommu_init(void)
+{
+ int ret = 0;
+
+ if (no_iommu || swiotlb || dmar_disabled)
+ return -ENODEV;
+
+ if (dmar_table_init())
+ return -ENODEV;
+
+ iommu_init_mempool();
+ dmar_init_reserved_ranges();
+
+ init_no_remapping_devices();
+
+ ret = init_dmars();
+ if (ret) {
+ printk(KERN_ERR "IOMMU: dmar init failed\n");
+ put_iova_domain(&reserved_iova_list);
+ iommu_exit_mempool();
+ return ret;
+ }
+ printk(KERN_INFO
+ "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
+
+ force_iommu = 1;
+ dma_ops = &intel_dma_ops;
+ return 0;
+}
+
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
new file mode 100644
index 00000000000..ee88dd2400c
--- /dev/null
+++ b/drivers/pci/intel-iommu.h
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Ashok Raj <ashok.raj@intel.com>
+ * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+#include <linux/types.h>
+#include <linux/msi.h>
+#include "iova.h"
+#include <linux/io.h>
+
+/*
+ * Intel IOMMU register specification per version 1.0 public spec.
+ */
+
+#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
+#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
+#define DMAR_GCMD_REG 0x18 /* Global command register */
+#define DMAR_GSTS_REG 0x1c /* Global status register */
+#define DMAR_RTADDR_REG 0x20 /* Root entry table */
+#define DMAR_CCMD_REG 0x28 /* Context command reg */
+#define DMAR_FSTS_REG 0x34 /* Fault Status register */
+#define DMAR_FECTL_REG 0x38 /* Fault control register */
+#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
+#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG 0x44 /* Upper address register */
+#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */
+#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */
+#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
+#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
+
+#define OFFSET_STRIDE (9)
+/*
+#define dmar_readl(dmar, reg) readl(dmar + reg)
+#define dmar_readq(dmar, reg) ({ \
+ u32 lo, hi; \
+ lo = readl(dmar + reg); \
+ hi = readl(dmar + reg + 4); \
+ (((u64) hi) << 32) + lo; })
+*/
+static inline u64 dmar_readq(void *addr)
+{
+ u32 lo, hi;
+ lo = readl(addr);
+ hi = readl(addr + 4);
+ return (((u64) hi) << 32) + lo;
+}
+
+static inline void dmar_writeq(void __iomem *addr, u64 val)
+{
+ writel((u32)val, addr);
+ writel((u32)(val >> 32), addr + 4);
+}
+
+#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
+#define DMAR_VER_MINOR(v) ((v) & 0x0f)
+
+/*
+ * Decoding Capability Register
+ */
+#define cap_read_drain(c) (((c) >> 55) & 1)
+#define cap_write_drain(c) (((c) >> 54) & 1)
+#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
+#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1)
+#define cap_pgsel_inv(c) (((c) >> 39) & 1)
+
+#define cap_super_page_val(c) (((c) >> 34) & 0xf)
+#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \
+ * OFFSET_STRIDE) + 21)
+
+#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16)
+#define cap_max_fault_reg_offset(c) \
+ (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
+
+#define cap_zlr(c) (((c) >> 22) & 1)
+#define cap_isoch(c) (((c) >> 23) & 1)
+#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
+#define cap_sagaw(c) (((c) >> 8) & 0x1f)
+#define cap_caching_mode(c) (((c) >> 7) & 1)
+#define cap_phmr(c) (((c) >> 6) & 1)
+#define cap_plmr(c) (((c) >> 5) & 1)
+#define cap_rwbf(c) (((c) >> 4) & 1)
+#define cap_afl(c) (((c) >> 3) & 1)
+#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
+/*
+ * Extended Capability Register
+ */
+
+#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1)
+#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
+#define ecap_max_iotlb_offset(e) \
+ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
+#define ecap_coherent(e) ((e) & 0x1)
+
+
+/* IOTLB_REG */
+#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
+#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
+#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
+#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
+#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32)
+#define DMA_TLB_IVT (((u64)1) << 63)
+#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_TLB_MAX_SIZE (0x3f)
+
+/* GCMD_REG */
+#define DMA_GCMD_TE (((u32)1) << 31)
+#define DMA_GCMD_SRTP (((u32)1) << 30)
+#define DMA_GCMD_SFL (((u32)1) << 29)
+#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_WBF (((u32)1) << 27)
+
+/* GSTS_REG */
+#define DMA_GSTS_TES (((u32)1) << 31)
+#define DMA_GSTS_RTPS (((u32)1) << 30)
+#define DMA_GSTS_FLS (((u32)1) << 29)
+#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_WBFS (((u32)1) << 27)
+
+/* CCMD_REG */
+#define DMA_CCMD_ICC (((u64)1) << 63)
+#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
+#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
+#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
+#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
+#define DMA_CCMD_MASK_NOBIT 0
+#define DMA_CCMD_MASK_1BIT 1
+#define DMA_CCMD_MASK_2BIT 2
+#define DMA_CCMD_MASK_3BIT 3
+#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
+#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+
+/* FECTL_REG */
+#define DMA_FECTL_IM (((u32)1) << 31)
+
+/* FSTS_REG */
+#define DMA_FSTS_PPF ((u32)2)
+#define DMA_FSTS_PFO ((u32)1)
+#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
+
+/* FRCD_REG, 32 bits access */
+#define DMA_FRCD_F (((u32)1) << 31)
+#define dma_frcd_type(d) ((d >> 30) & 1)
+#define dma_frcd_fault_reason(c) (c & 0xff)
+#define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 val;
+ u64 rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+ return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+ root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+ root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+ return (struct context_entry *)
+ (root_present(root)?phys_to_virt(
+ root->val & PAGE_MASK_4K):
+ NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi & 7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+ do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+ do { \
+ (c).lo &= (((u64)-1) << 4) | 3; \
+ (c).lo |= ((val) & 3) << 2; \
+ } while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+ do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+ do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+ u64 val;
+};
+#define dma_clear_pte(p) do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+ do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+ (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+ int id; /* domain id */
+ struct intel_iommu *iommu; /* back pointer to owning iommu */
+
+ struct list_head devices; /* all devices' list */
+ struct iova_domain iovad; /* iova's that belong to this domain */
+
+ struct dma_pte *pgd; /* virtual address */
+ spinlock_t mapping_lock; /* page table lock */
+ int gaw; /* max guest address width */
+
+ /* adjusted guest address width, 0 is level 2 30-bit */
+ int agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+ int flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+ struct list_head link; /* link to domain siblings */
+ struct list_head global; /* link to global list */
+ u8 bus; /* PCI bus numer */
+ u8 devfn; /* PCI devfn number */
+ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+ struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+
+struct intel_iommu {
+ void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+ u64 cap;
+ u64 ecap;
+ unsigned long *domain_ids; /* bitmap of domains */
+ struct dmar_domain **domains; /* ptr to domains */
+ int seg;
+ u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+ spinlock_t lock; /* protect context, domain ids */
+ spinlock_t register_lock; /* protect register handling */
+ struct root_entry *root_entry; /* virtual address */
+
+ unsigned int irq;
+ unsigned char name[7]; /* Device Name */
+ struct msi_msg saved_msg;
+ struct sys_device sysdev;
+};
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+ return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+#endif
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
new file mode 100644
index 00000000000..a84571c2936
--- /dev/null
+++ b/drivers/pci/iova.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#include "iova.h"
+
+void
+init_iova_domain(struct iova_domain *iovad)
+{
+ spin_lock_init(&iovad->iova_alloc_lock);
+ spin_lock_init(&iovad->iova_rbtree_lock);
+ iovad->rbroot = RB_ROOT;
+ iovad->cached32_node = NULL;
+
+}
+
+static struct rb_node *
+__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
+{
+ if ((*limit_pfn != DMA_32BIT_PFN) ||
+ (iovad->cached32_node == NULL))
+ return rb_last(&iovad->rbroot);
+ else {
+ struct rb_node *prev_node = rb_prev(iovad->cached32_node);
+ struct iova *curr_iova =
+ container_of(iovad->cached32_node, struct iova, node);
+ *limit_pfn = curr_iova->pfn_lo - 1;
+ return prev_node;
+ }
+}
+
+static void
+__cached_rbnode_insert_update(struct iova_domain *iovad,
+ unsigned long limit_pfn, struct iova *new)
+{
+ if (limit_pfn != DMA_32BIT_PFN)
+ return;
+ iovad->cached32_node = &new->node;
+}
+
+static void
+__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
+{
+ struct iova *cached_iova;
+ struct rb_node *curr;
+
+ if (!iovad->cached32_node)
+ return;
+ curr = iovad->cached32_node;
+ cached_iova = container_of(curr, struct iova, node);
+
+ if (free->pfn_lo >= cached_iova->pfn_lo)
+ iovad->cached32_node = rb_next(&free->node);
+}
+
+/* Computes the padding size required, to make the
+ * the start address naturally aligned on its size
+ */
+static int
+iova_get_pad_size(int size, unsigned int limit_pfn)
+{
+ unsigned int pad_size = 0;
+ unsigned int order = ilog2(size);
+
+ if (order)
+ pad_size = (limit_pfn + 1) % (1 << order);
+
+ return pad_size;
+}
+
+static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
+ unsigned long limit_pfn, struct iova *new, bool size_aligned)
+{
+ struct rb_node *curr = NULL;
+ unsigned long flags;
+ unsigned long saved_pfn;
+ unsigned int pad_size = 0;
+
+ /* Walk the tree backwards */
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ saved_pfn = limit_pfn;
+ curr = __get_cached_rbnode(iovad, &limit_pfn);
+ while (curr) {
+ struct iova *curr_iova = container_of(curr, struct iova, node);
+ if (limit_pfn < curr_iova->pfn_lo)
+ goto move_left;
+ else if (limit_pfn < curr_iova->pfn_hi)
+ goto adjust_limit_pfn;
+ else {
+ if (size_aligned)
+ pad_size = iova_get_pad_size(size, limit_pfn);
+ if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
+ break; /* found a free slot */
+ }
+adjust_limit_pfn:
+ limit_pfn = curr_iova->pfn_lo - 1;
+move_left:
+ curr = rb_prev(curr);
+ }
+
+ if (!curr) {
+ if (size_aligned)
+ pad_size = iova_get_pad_size(size, limit_pfn);
+ if ((IOVA_START_PFN + size + pad_size) > limit_pfn) {
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return -ENOMEM;
+ }
+ }
+
+ /* pfn_lo will point to size aligned address if size_aligned is set */
+ new->pfn_lo = limit_pfn - (size + pad_size) + 1;
+ new->pfn_hi = new->pfn_lo + size - 1;
+
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return 0;
+}
+
+static void
+iova_insert_rbtree(struct rb_root *root, struct iova *iova)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ /* Figure out where to put new node */
+ while (*new) {
+ struct iova *this = container_of(*new, struct iova, node);
+ parent = *new;
+
+ if (iova->pfn_lo < this->pfn_lo)
+ new = &((*new)->rb_left);
+ else if (iova->pfn_lo > this->pfn_lo)
+ new = &((*new)->rb_right);
+ else
+ BUG(); /* this should not happen */
+ }
+ /* Add new node and rebalance tree. */
+ rb_link_node(&iova->node, parent, new);
+ rb_insert_color(&iova->node, root);
+}
+
+/**
+ * alloc_iova - allocates an iova
+ * @iovad - iova domain in question
+ * @size - size of page frames to allocate
+ * @limit_pfn - max limit address
+ * @size_aligned - set if size_aligned address range is required
+ * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
+ * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
+ * flag is set then the allocated address iova->pfn_lo will be naturally
+ * aligned on roundup_power_of_two(size).
+ */
+struct iova *
+alloc_iova(struct iova_domain *iovad, unsigned long size,
+ unsigned long limit_pfn,
+ bool size_aligned)
+{
+ unsigned long flags;
+ struct iova *new_iova;
+ int ret;
+
+ new_iova = alloc_iova_mem();
+ if (!new_iova)
+ return NULL;
+
+ /* If size aligned is set then round the size to
+ * to next power of two.
+ */
+ if (size_aligned)
+ size = __roundup_pow_of_two(size);
+
+ spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
+ ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova,
+ size_aligned);
+
+ if (ret) {
+ spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+ free_iova_mem(new_iova);
+ return NULL;
+ }
+
+ /* Insert the new_iova into domain rbtree by holding writer lock */
+ spin_lock(&iovad->iova_rbtree_lock);
+ iova_insert_rbtree(&iovad->rbroot, new_iova);
+ __cached_rbnode_insert_update(iovad, limit_pfn, new_iova);
+ spin_unlock(&iovad->iova_rbtree_lock);
+
+ spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+
+ return new_iova;
+}
+
+/**
+ * find_iova - find's an iova for a given pfn
+ * @iovad - iova domain in question.
+ * pfn - page frame number
+ * This function finds and returns an iova belonging to the
+ * given doamin which matches the given pfn.
+ */
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
+{
+ unsigned long flags;
+ struct rb_node *node;
+
+ /* Take the lock so that no other thread is manipulating the rbtree */
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ node = iovad->rbroot.rb_node;
+ while (node) {
+ struct iova *iova = container_of(node, struct iova, node);
+
+ /* If pfn falls within iova's range, return iova */
+ if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ /* We are not holding the lock while this iova
+ * is referenced by the caller as the same thread
+ * which called this function also calls __free_iova()
+ * and it is by desing that only one thread can possibly
+ * reference a particular iova and hence no conflict.
+ */
+ return iova;
+ }
+
+ if (pfn < iova->pfn_lo)
+ node = node->rb_left;
+ else if (pfn > iova->pfn_lo)
+ node = node->rb_right;
+ }
+
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return NULL;
+}
+
+/**
+ * __free_iova - frees the given iova
+ * @iovad: iova domain in question.
+ * @iova: iova in question.
+ * Frees the given iova belonging to the giving domain
+ */
+void
+__free_iova(struct iova_domain *iovad, struct iova *iova)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ __cached_rbnode_delete_update(iovad, iova);
+ rb_erase(&iova->node, &iovad->rbroot);
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ free_iova_mem(iova);
+}
+
+/**
+ * free_iova - finds and frees the iova for a given pfn
+ * @iovad: - iova domain in question.
+ * @pfn: - pfn that is allocated previously
+ * This functions finds an iova for a given pfn and then
+ * frees the iova from that domain.
+ */
+void
+free_iova(struct iova_domain *iovad, unsigned long pfn)
+{
+ struct iova *iova = find_iova(iovad, pfn);
+ if (iova)
+ __free_iova(iovad, iova);
+
+}
+
+/**
+ * put_iova_domain - destroys the iova doamin
+ * @iovad: - iova domain in question.
+ * All the iova's in that domain are destroyed.
+ */
+void put_iova_domain(struct iova_domain *iovad)
+{
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ node = rb_first(&iovad->rbroot);
+ while (node) {
+ struct iova *iova = container_of(node, struct iova, node);
+ rb_erase(node, &iovad->rbroot);
+ free_iova_mem(iova);
+ node = rb_first(&iovad->rbroot);
+ }
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+}
+
+static int
+__is_range_overlap(struct rb_node *node,
+ unsigned long pfn_lo, unsigned long pfn_hi)
+{
+ struct iova *iova = container_of(node, struct iova, node);
+
+ if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
+ return 1;
+ return 0;
+}
+
+static struct iova *
+__insert_new_range(struct iova_domain *iovad,
+ unsigned long pfn_lo, unsigned long pfn_hi)
+{
+ struct iova *iova;
+
+ iova = alloc_iova_mem();
+ if (!iova)
+ return iova;
+
+ iova->pfn_hi = pfn_hi;
+ iova->pfn_lo = pfn_lo;
+ iova_insert_rbtree(&iovad->rbroot, iova);
+ return iova;
+}
+
+static void
+__adjust_overlap_range(struct iova *iova,
+ unsigned long *pfn_lo, unsigned long *pfn_hi)
+{
+ if (*pfn_lo < iova->pfn_lo)
+ iova->pfn_lo = *pfn_lo;
+ if (*pfn_hi > iova->pfn_hi)
+ *pfn_lo = iova->pfn_hi + 1;
+}
+
+/**
+ * reserve_iova - reserves an iova in the given range
+ * @iovad: - iova domain pointer
+ * @pfn_lo: - lower page frame address
+ * @pfn_hi:- higher pfn adderss
+ * This function allocates reserves the address range from pfn_lo to pfn_hi so
+ * that this address is not dished out as part of alloc_iova.
+ */
+struct iova *
+reserve_iova(struct iova_domain *iovad,
+ unsigned long pfn_lo, unsigned long pfn_hi)
+{
+ struct rb_node *node;
+ unsigned long flags;
+ struct iova *iova;
+ unsigned int overlap = 0;
+
+ spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
+ spin_lock(&iovad->iova_rbtree_lock);
+ for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
+ if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
+ iova = container_of(node, struct iova, node);
+ __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
+ if ((pfn_lo >= iova->pfn_lo) &&
+ (pfn_hi <= iova->pfn_hi))
+ goto finish;
+ overlap = 1;
+
+ } else if (overlap)
+ break;
+ }
+
+ /* We are here either becasue this is the first reserver node
+ * or need to insert remaining non overlap addr range
+ */
+ iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
+finish:
+
+ spin_unlock(&iovad->iova_rbtree_lock);
+ spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+ return iova;
+}
+
+/**
+ * copy_reserved_iova - copies the reserved between domains
+ * @from: - source doamin from where to copy
+ * @to: - destination domin where to copy
+ * This function copies reserved iova's from one doamin to
+ * other.
+ */
+void
+copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
+{
+ unsigned long flags;
+ struct rb_node *node;
+
+ spin_lock_irqsave(&from->iova_alloc_lock, flags);
+ spin_lock(&from->iova_rbtree_lock);
+ for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
+ struct iova *iova = container_of(node, struct iova, node);
+ struct iova *new_iova;
+ new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
+ if (!new_iova)
+ printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
+ iova->pfn_lo, iova->pfn_lo);
+ }
+ spin_unlock(&from->iova_rbtree_lock);
+ spin_unlock_irqrestore(&from->iova_alloc_lock, flags);
+}
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
new file mode 100644
index 00000000000..ae3028d5a94
--- /dev/null
+++ b/drivers/pci/iova.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ */
+
+#ifndef _IOVA_H_
+#define _IOVA_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/dma-mapping.h>
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K (12)
+#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN (1)
+
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+
+/* iova structure */
+struct iova {
+ struct rb_node node;
+ unsigned long pfn_hi; /* IOMMU dish out addr hi */
+ unsigned long pfn_lo; /* IOMMU dish out addr lo */
+};
+
+/* holds all the iova translations for a domain */
+struct iova_domain {
+ spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */
+ spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
+ struct rb_root rbroot; /* iova domain rbtree root */
+ struct rb_node *cached32_node; /* Save last alloced node */
+};
+
+struct iova *alloc_iova_mem(void);
+void free_iova_mem(struct iova *iova);
+void free_iova(struct iova_domain *iovad, unsigned long pfn);
+void __free_iova(struct iova_domain *iovad, struct iova *iova);
+struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
+ unsigned long limit_pfn,
+ bool size_aligned);
+struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
+ unsigned long pfn_hi);
+void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
+void init_iova_domain(struct iova_domain *iovad);
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
+void put_iova_domain(struct iova_domain *iovad);
+
+#endif
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6fda33de84e..fc87e14b50d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -90,3 +90,4 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
return NULL;
}
+struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 5db6b6690b5..463a5a9d583 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -837,6 +837,19 @@ static void pci_release_dev(struct device *dev)
kfree(pci_dev);
}
+static void set_pcie_port_type(struct pci_dev *pdev)
+{
+ int pos;
+ u16 reg16;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return;
+ pdev->is_pcie = 1;
+ pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+ pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+
/**
* pci_cfg_space_size - get the configuration space size of the PCI device.
* @dev: PCI device
@@ -951,6 +964,7 @@ pci_scan_device(struct pci_bus *bus, int devfn)
dev->device = (l >> 16) & 0xffff;
dev->cfg_size = pci_cfg_space_size(dev);
dev->error_state = pci_channel_io_normal;
+ set_pcie_port_type(dev);
/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
set this higher, assuming the system even supports it. */
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index c6e79d01ce3..b001b5922e3 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -14,6 +14,40 @@
#include "pci.h"
DECLARE_RWSEM(pci_bus_sem);
+/*
+ * find the upstream PCIE-to-PCI bridge of a PCI device
+ * if the device is PCIE, return NULL
+ * if the device isn't connected to a PCIE bridge (that is its parent is a
+ * legacy PCI bridge and the bridge is directly connected to bus 0), return its
+ * parent
+ */
+struct pci_dev *
+pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
+{
+ struct pci_dev *tmp = NULL;
+
+ if (pdev->is_pcie)
+ return NULL;
+ while (1) {
+ if (!pdev->bus->self)
+ break;
+ pdev = pdev->bus->self;
+ /* a p2p bridge */
+ if (!pdev->is_pcie) {
+ tmp = pdev;
+ continue;
+ }
+ /* PCI device should connect to a PCIE bridge */
+ if (pdev->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE) {
+ /* Busted hardware? */
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+ return pdev;
+ }
+
+ return tmp;
+}
static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr)
{
diff --git a/drivers/power/apm_power.c b/drivers/power/apm_power.c
index 39a90a6f0f8..bbf3ee10da0 100644
--- a/drivers/power/apm_power.c
+++ b/drivers/power/apm_power.c
@@ -26,65 +26,124 @@ static struct power_supply *main_battery;
static void find_main_battery(void)
{
struct device *dev;
- struct power_supply *bat, *batm;
+ struct power_supply *bat = NULL;
+ struct power_supply *max_charge_bat = NULL;
+ struct power_supply *max_energy_bat = NULL;
union power_supply_propval full;
int max_charge = 0;
+ int max_energy = 0;
main_battery = NULL;
- batm = NULL;
+
list_for_each_entry(dev, &power_supply_class->devices, node) {
bat = dev_get_drvdata(dev);
- /* If none of battery devices cantains 'use_for_apm' flag,
- choice one with maximum design charge */
- if (!PSY_PROP(bat, CHARGE_FULL_DESIGN, &full)) {
+
+ if (bat->use_for_apm) {
+ /* nice, we explicitly asked to report this battery. */
+ main_battery = bat;
+ return;
+ }
+
+ if (!PSY_PROP(bat, CHARGE_FULL_DESIGN, &full) ||
+ !PSY_PROP(bat, CHARGE_FULL, &full)) {
if (full.intval > max_charge) {
- batm = bat;
+ max_charge_bat = bat;
max_charge = full.intval;
}
+ } else if (!PSY_PROP(bat, ENERGY_FULL_DESIGN, &full) ||
+ !PSY_PROP(bat, ENERGY_FULL, &full)) {
+ if (full.intval > max_energy) {
+ max_energy_bat = bat;
+ max_energy = full.intval;
+ }
}
+ }
- if (bat->use_for_apm)
- main_battery = bat;
+ if ((max_energy_bat && max_charge_bat) &&
+ (max_energy_bat != max_charge_bat)) {
+ /* try guess battery with more capacity */
+ if (!PSY_PROP(max_charge_bat, VOLTAGE_MAX_DESIGN, &full)) {
+ if (max_energy > max_charge * full.intval)
+ main_battery = max_energy_bat;
+ else
+ main_battery = max_charge_bat;
+ } else if (!PSY_PROP(max_energy_bat, VOLTAGE_MAX_DESIGN,
+ &full)) {
+ if (max_charge > max_energy / full.intval)
+ main_battery = max_charge_bat;
+ else
+ main_battery = max_energy_bat;
+ } else {
+ /* give up, choice any */
+ main_battery = max_energy_bat;
+ }
+ } else if (max_charge_bat) {
+ main_battery = max_charge_bat;
+ } else if (max_energy_bat) {
+ main_battery = max_energy_bat;
+ } else {
+ /* give up, try the last if any */
+ main_battery = bat;
}
- if (!main_battery)
- main_battery = batm;
}
-static int calculate_time(int status)
+static int calculate_time(int status, int using_charge)
{
- union power_supply_propval charge_full, charge_empty;
- union power_supply_propval charge, I;
+ union power_supply_propval full;
+ union power_supply_propval empty;
+ union power_supply_propval cur;
+ union power_supply_propval I;
+ enum power_supply_property full_prop;
+ enum power_supply_property full_design_prop;
+ enum power_supply_property empty_prop;
+ enum power_supply_property empty_design_prop;
+ enum power_supply_property cur_avg_prop;
+ enum power_supply_property cur_now_prop;
- if (MPSY_PROP(CHARGE_FULL, &charge_full)) {
- /* if battery can't report this property, use design value */
- if (MPSY_PROP(CHARGE_FULL_DESIGN, &charge_full))
+ if (MPSY_PROP(CURRENT_AVG, &I)) {
+ /* if battery can't report average value, use momentary */
+ if (MPSY_PROP(CURRENT_NOW, &I))
return -1;
}
- if (MPSY_PROP(CHARGE_EMPTY, &charge_empty)) {
- /* if battery can't report this property, use design value */
- if (MPSY_PROP(CHARGE_EMPTY_DESIGN, &charge_empty))
- charge_empty.intval = 0;
+ if (using_charge) {
+ full_prop = POWER_SUPPLY_PROP_CHARGE_FULL;
+ full_design_prop = POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN;
+ empty_prop = POWER_SUPPLY_PROP_CHARGE_EMPTY;
+ empty_design_prop = POWER_SUPPLY_PROP_CHARGE_EMPTY;
+ cur_avg_prop = POWER_SUPPLY_PROP_CHARGE_AVG;
+ cur_now_prop = POWER_SUPPLY_PROP_CHARGE_NOW;
+ } else {
+ full_prop = POWER_SUPPLY_PROP_ENERGY_FULL;
+ full_design_prop = POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN;
+ empty_prop = POWER_SUPPLY_PROP_ENERGY_EMPTY;
+ empty_design_prop = POWER_SUPPLY_PROP_CHARGE_EMPTY;
+ cur_avg_prop = POWER_SUPPLY_PROP_ENERGY_AVG;
+ cur_now_prop = POWER_SUPPLY_PROP_ENERGY_NOW;
}
- if (MPSY_PROP(CHARGE_AVG, &charge)) {
- /* if battery can't report average value, use momentary */
- if (MPSY_PROP(CHARGE_NOW, &charge))
+ if (_MPSY_PROP(full_prop, &full)) {
+ /* if battery can't report this property, use design value */
+ if (_MPSY_PROP(full_design_prop, &full))
return -1;
}
- if (MPSY_PROP(CURRENT_AVG, &I)) {
+ if (_MPSY_PROP(empty_prop, &empty)) {
+ /* if battery can't report this property, use design value */
+ if (_MPSY_PROP(empty_design_prop, &empty))
+ empty.intval = 0;
+ }
+
+ if (_MPSY_PROP(cur_avg_prop, &cur)) {
/* if battery can't report average value, use momentary */
- if (MPSY_PROP(CURRENT_NOW, &I))
+ if (_MPSY_PROP(cur_now_prop, &cur))
return -1;
}
if (status == POWER_SUPPLY_STATUS_CHARGING)
- return ((charge.intval - charge_full.intval) * 60L) /
- I.intval;
+ return ((cur.intval - full.intval) * 60L) / I.intval;
else
- return -((charge.intval - charge_empty.intval) * 60L) /
- I.intval;
+ return -((cur.intval - empty.intval) * 60L) / I.intval;
}
static int calculate_capacity(int using_charge)
@@ -200,18 +259,22 @@ static void apm_battery_apm_get_power_status(struct apm_power_info *info)
info->units = APM_UNITS_MINS;
if (status.intval == POWER_SUPPLY_STATUS_CHARGING) {
- if (MPSY_PROP(TIME_TO_FULL_AVG, &time_to_full)) {
- if (MPSY_PROP(TIME_TO_FULL_NOW, &time_to_full))
- info->time = calculate_time(status.intval);
- else
- info->time = time_to_full.intval / 60;
+ if (!MPSY_PROP(TIME_TO_FULL_AVG, &time_to_full) ||
+ !MPSY_PROP(TIME_TO_FULL_NOW, &time_to_full)) {
+ info->time = time_to_full.intval / 60;
+ } else {
+ info->time = calculate_time(status.intval, 0);
+ if (info->time == -1)
+ info->time = calculate_time(status.intval, 1);
}
} else {
- if (MPSY_PROP(TIME_TO_EMPTY_AVG, &time_to_empty)) {
- if (MPSY_PROP(TIME_TO_EMPTY_NOW, &time_to_empty))
- info->time = calculate_time(status.intval);
- else
- info->time = time_to_empty.intval / 60;
+ if (!MPSY_PROP(TIME_TO_EMPTY_AVG, &time_to_empty) ||
+ !MPSY_PROP(TIME_TO_EMPTY_NOW, &time_to_empty)) {
+ info->time = time_to_empty.intval / 60;
+ } else {
+ info->time = calculate_time(status.intval, 0);
+ if (info->time == -1)
+ info->time = calculate_time(status.intval, 1);
}
}
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 2edd5fb6d3d..8d1c64a24de 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -48,8 +48,8 @@ struct raw3270 {
struct timer_list timer; /* Device timer. */
unsigned char *ascebc; /* ascii -> ebcdic table */
- struct class_device *clttydev; /* 3270-class tty device ptr */
- struct class_device *cltubdev; /* 3270-class tub device ptr */
+ struct device *clttydev; /* 3270-class tty device ptr */
+ struct device *cltubdev; /* 3270-class tub device ptr */
struct raw3270_request init_request;
unsigned char init_data[256];
@@ -1107,11 +1107,9 @@ raw3270_delete_device(struct raw3270 *rp)
/* Remove from device chain. */
mutex_lock(&raw3270_mutex);
if (rp->clttydev && !IS_ERR(rp->clttydev))
- class_device_destroy(class3270,
- MKDEV(IBM_TTY3270_MAJOR, rp->minor));
+ device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor));
if (rp->cltubdev && !IS_ERR(rp->cltubdev))
- class_device_destroy(class3270,
- MKDEV(IBM_FS3270_MAJOR, rp->minor));
+ device_destroy(class3270, MKDEV(IBM_FS3270_MAJOR, rp->minor));
list_del_init(&rp->list);
mutex_unlock(&raw3270_mutex);
@@ -1181,24 +1179,22 @@ static int raw3270_create_attributes(struct raw3270 *rp)
if (rc)
goto out;
- rp->clttydev = class_device_create(class3270, NULL,
- MKDEV(IBM_TTY3270_MAJOR, rp->minor),
- &rp->cdev->dev, "tty%s",
- rp->cdev->dev.bus_id);
+ rp->clttydev = device_create(class3270, &rp->cdev->dev,
+ MKDEV(IBM_TTY3270_MAJOR, rp->minor),
+ "tty%s", rp->cdev->dev.bus_id);
if (IS_ERR(rp->clttydev)) {
rc = PTR_ERR(rp->clttydev);
goto out_ttydev;
}
- rp->cltubdev = class_device_create(class3270, NULL,
- MKDEV(IBM_FS3270_MAJOR, rp->minor),
- &rp->cdev->dev, "tub%s",
- rp->cdev->dev.bus_id);
+ rp->cltubdev = device_create(class3270, &rp->cdev->dev,
+ MKDEV(IBM_FS3270_MAJOR, rp->minor),
+ "tub%s", rp->cdev->dev.bus_id);
if (!IS_ERR(rp->cltubdev))
goto out;
rc = PTR_ERR(rp->cltubdev);
- class_device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor));
+ device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor));
out_ttydev:
sysfs_remove_group(&rp->cdev->dev.kobj, &raw3270_attr_group);
diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c
index 2e0d29730b6..aa7f166f403 100644
--- a/drivers/s390/char/tape_class.c
+++ b/drivers/s390/char/tape_class.c
@@ -69,12 +69,9 @@ struct tape_class_device *register_tape_dev(
if (rc)
goto fail_with_cdev;
- tcd->class_device = class_device_create(
- tape_class,
- NULL,
- tcd->char_device->dev,
- device,
- "%s", tcd->device_name
+ tcd->class_device = device_create(tape_class, device,
+ tcd->char_device->dev,
+ "%s", tcd->device_name
);
rc = IS_ERR(tcd->class_device) ? PTR_ERR(tcd->class_device) : 0;
if (rc)
@@ -90,7 +87,7 @@ struct tape_class_device *register_tape_dev(
return tcd;
fail_with_class_device:
- class_device_destroy(tape_class, tcd->char_device->dev);
+ device_destroy(tape_class, tcd->char_device->dev);
fail_with_cdev:
cdev_del(tcd->char_device);
@@ -105,11 +102,9 @@ EXPORT_SYMBOL(register_tape_dev);
void unregister_tape_dev(struct tape_class_device *tcd)
{
if (tcd != NULL && !IS_ERR(tcd)) {
- sysfs_remove_link(
- &tcd->class_device->dev->kobj,
- tcd->mode_name
- );
- class_device_destroy(tape_class, tcd->char_device->dev);
+ sysfs_remove_link(&tcd->class_device->kobj,
+ tcd->mode_name);
+ device_destroy(tape_class, tcd->char_device->dev);
cdev_del(tcd->char_device);
kfree(tcd);
}
diff --git a/drivers/s390/char/tape_class.h b/drivers/s390/char/tape_class.h
index a8bd9b47fad..e2b5ac918ac 100644
--- a/drivers/s390/char/tape_class.h
+++ b/drivers/s390/char/tape_class.h
@@ -24,8 +24,8 @@
#define TAPECLASS_NAME_LEN 32
struct tape_class_device {
- struct cdev * char_device;
- struct class_device * class_device;
+ struct cdev *char_device;
+ struct device *class_device;
char device_name[TAPECLASS_NAME_LEN];
char mode_name[TAPECLASS_NAME_LEN];
};
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index 12f7a4ce82c..e0c4c508e12 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -74,7 +74,7 @@ struct vmlogrdr_priv_t {
int dev_in_use; /* 1: already opened, 0: not opened*/
spinlock_t priv_lock;
struct device *device;
- struct class_device *class_device;
+ struct device *class_device;
int autorecording;
int autopurge;
};
@@ -762,12 +762,10 @@ static int vmlogrdr_register_device(struct vmlogrdr_priv_t *priv)
device_unregister(dev);
return ret;
}
- priv->class_device = class_device_create(
- vmlogrdr_class,
- NULL,
- MKDEV(vmlogrdr_major, priv->minor_num),
- dev,
- "%s", dev->bus_id );
+ priv->class_device = device_create(vmlogrdr_class, dev,
+ MKDEV(vmlogrdr_major,
+ priv->minor_num),
+ "%s", dev->bus_id);
if (IS_ERR(priv->class_device)) {
ret = PTR_ERR(priv->class_device);
priv->class_device=NULL;
@@ -783,8 +781,7 @@ static int vmlogrdr_register_device(struct vmlogrdr_priv_t *priv)
static int vmlogrdr_unregister_device(struct vmlogrdr_priv_t *priv)
{
- class_device_destroy(vmlogrdr_class,
- MKDEV(vmlogrdr_major, priv->minor_num));
+ device_destroy(vmlogrdr_class, MKDEV(vmlogrdr_major, priv->minor_num));
if (priv->device != NULL) {
sysfs_remove_group(&priv->device->kobj, &vmlogrdr_attr_group);
device_unregister(priv->device);
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 42c1f4659ad..297cdceb0ca 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -246,7 +246,7 @@ int chp_add_cmg_attr(struct channel_path *chp)
static ssize_t chp_status_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct channel_path *chp = container_of(dev, struct channel_path, dev);
+ struct channel_path *chp = to_channelpath(dev);
if (!chp)
return 0;
@@ -258,7 +258,7 @@ static ssize_t chp_status_write(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- struct channel_path *cp = container_of(dev, struct channel_path, dev);
+ struct channel_path *cp = to_channelpath(dev);
char cmd[10];
int num_args;
int error;
@@ -286,7 +286,7 @@ static ssize_t chp_configure_show(struct device *dev,
struct channel_path *cp;
int status;
- cp = container_of(dev, struct channel_path, dev);
+ cp = to_channelpath(dev);
status = chp_info_get_status(cp->chpid);
if (status < 0)
return status;
@@ -308,7 +308,7 @@ static ssize_t chp_configure_write(struct device *dev,
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- cp = container_of(dev, struct channel_path, dev);
+ cp = to_channelpath(dev);
chp_cfg_schedule(cp->chpid, val);
cfg_wait_idle();
@@ -320,7 +320,7 @@ static DEVICE_ATTR(configure, 0644, chp_configure_show, chp_configure_write);
static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct channel_path *chp = container_of(dev, struct channel_path, dev);
+ struct channel_path *chp = to_channelpath(dev);
if (!chp)
return 0;
@@ -374,7 +374,7 @@ static void chp_release(struct device *dev)
{
struct channel_path *cp;
- cp = container_of(dev, struct channel_path, dev);
+ cp = to_channelpath(dev);
kfree(cp);
}
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 5d83dd47146..838f7ac0dc3 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -182,6 +182,15 @@ static int css_register_subchannel(struct subchannel *sch)
sch->dev.bus = &css_bus_type;
sch->dev.release = &css_subchannel_release;
sch->dev.groups = subch_attr_groups;
+ /*
+ * We don't want to generate uevents for I/O subchannels that don't
+ * have a working ccw device behind them since they will be
+ * unregistered before they can be used anyway, so we delay the add
+ * uevent until after device recognition was successful.
+ */
+ if (!cio_is_console(sch->schid))
+ /* Console is special, no need to suppress. */
+ sch->dev.uevent_suppress = 1;
css_update_ssd_info(sch);
/* make it known to the system */
ret = css_sch_device_register(sch);
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 7507067351b..fd5d0c1570d 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -559,6 +559,7 @@ zfcp_sg_list_alloc(struct zfcp_sg_list *sg_list, size_t size)
retval = -ENOMEM;
goto out;
}
+ sg_init_table(sg_list->sg, sg_list->count);
for (i = 0, sg = sg_list->sg; i < sg_list->count; i++, sg++) {
sg->length = min(size, PAGE_SIZE);
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 57cac7008e0..326e7ee232c 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -63,7 +63,7 @@
static inline void *
zfcp_sg_to_address(struct scatterlist *list)
{
- return (void *) (page_address(list->page) + list->offset);
+ return sg_virt(list);
}
/**
@@ -74,7 +74,7 @@ zfcp_sg_to_address(struct scatterlist *list)
static inline void
zfcp_address_to_sg(void *address, struct scatterlist *list)
{
- list->page = virt_to_page(address);
+ sg_set_page(list, virt_to_page(address));
list->offset = ((unsigned long) address) & (PAGE_SIZE - 1);
}
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index a6475a2bb8a..9438d0b2879 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -308,13 +308,15 @@ zfcp_erp_adisc(struct zfcp_port *port)
if (send_els == NULL)
goto nomem;
- send_els->req = kzalloc(sizeof(struct scatterlist), GFP_ATOMIC);
+ send_els->req = kmalloc(sizeof(struct scatterlist), GFP_ATOMIC);
if (send_els->req == NULL)
goto nomem;
+ sg_init_table(send_els->req, 1);
- send_els->resp = kzalloc(sizeof(struct scatterlist), GFP_ATOMIC);
+ send_els->resp = kmalloc(sizeof(struct scatterlist), GFP_ATOMIC);
if (send_els->resp == NULL)
goto nomem;
+ sg_init_table(send_els->resp, 1);
address = (void *) get_zeroed_page(GFP_ATOMIC);
if (address == NULL)
@@ -363,7 +365,7 @@ zfcp_erp_adisc(struct zfcp_port *port)
retval = -ENOMEM;
freemem:
if (address != NULL)
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
if (send_els != NULL) {
kfree(send_els->req);
kfree(send_els->resp);
@@ -437,7 +439,7 @@ zfcp_erp_adisc_handler(unsigned long data)
out:
zfcp_port_put(port);
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
kfree(send_els->req);
kfree(send_els->resp);
kfree(send_els);
diff --git a/drivers/sbus/char/vfc_dev.c b/drivers/sbus/char/vfc_dev.c
index e7a1642b2aa..d4f8fcded51 100644
--- a/drivers/sbus/char/vfc_dev.c
+++ b/drivers/sbus/char/vfc_dev.c
@@ -134,7 +134,7 @@ int init_vfc_hw(struct vfc_dev *dev)
int init_vfc_devstruct(struct vfc_dev *dev, int instance)
{
dev->instance=instance;
- init_MUTEX(&dev->device_lock_sem);
+ mutex_init(&dev->device_lock_mtx);
dev->control_reg=0;
dev->busy=0;
return 0;
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index fb14014ee16..afb262b4be1 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1840,7 +1840,7 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
(scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) {
if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) {
struct scatterlist *sg = scsi_sglist(srb);
- char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ char *buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length);
kunmap_atomic(buf - sg->offset, KM_IRQ0);
}
@@ -1919,7 +1919,7 @@ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int re
char *buf;
unsigned long flags = 0;
local_irq_save(flags);
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length);
kunmap_atomic(buf - sg->offset, KM_IRQ0);
local_irq_restore(flags);
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index a64153b9603..59716ebeb10 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1469,7 +1469,7 @@ static void tw_transfer_internal(TW_Device_Extension *tw_dev, int request_id,
struct scatterlist *sg = scsi_sglist(cmd);
local_irq_save(flags);
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
transfer_len = min(sg->length, len);
memcpy(buf, data, transfer_len);
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index 988f0bc5eda..2597209183d 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -298,8 +298,7 @@ static __inline__ void initialize_SCp(Scsi_Cmnd * cmd)
if (cmd->use_sg) {
cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
cmd->SCp.buffers_residual = cmd->use_sg - 1;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
} else {
cmd->SCp.buffer = NULL;
@@ -2143,8 +2142,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
++cmd->SCp.buffer;
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
dprintk(NDEBUG_INFORMATION, ("scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual));
}
/*
diff --git a/drivers/scsi/NCR53C9x.c b/drivers/scsi/NCR53C9x.c
index 96e8e29aa05..5b0efc90391 100644
--- a/drivers/scsi/NCR53C9x.c
+++ b/drivers/scsi/NCR53C9x.c
@@ -927,7 +927,7 @@ static void esp_get_dmabufs(struct NCR_ESP *esp, Scsi_Cmnd *sp)
esp->dma_mmu_get_scsi_sgl(esp, sp);
else
sp->SCp.ptr =
- (char *) virt_to_phys((page_address(sp->SCp.buffer->page) + sp->SCp.buffer->offset));
+ (char *) virt_to_phys(sg_virt(sp->SCp.buffer));
}
}
@@ -1748,7 +1748,7 @@ static inline void advance_sg(struct NCR_ESP *esp, Scsi_Cmnd *sp)
if (esp->dma_advance_sg)
esp->dma_advance_sg (sp);
else
- sp->SCp.ptr = (char *) virt_to_phys((page_address(sp->SCp.buffer->page) + sp->SCp.buffer->offset));
+ sp->SCp.ptr = (char *) virt_to_phys(sg_virt(sp->SCp.buffer));
}
diff --git a/drivers/scsi/NCR53c406a.c b/drivers/scsi/NCR53c406a.c
index 3168a179484..137d065db3d 100644
--- a/drivers/scsi/NCR53c406a.c
+++ b/drivers/scsi/NCR53c406a.c
@@ -875,8 +875,7 @@ static void NCR53c406a_intr(void *dev_id)
outb(TRANSFER_INFO | DMA_OP, CMD_REG);
#if USE_PIO
scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
- NCR53c406a_pio_write(page_address(sg->page) + sg->offset,
- sg->length);
+ NCR53c406a_pio_write(sg_virt(sg), sg->length);
}
REG0;
#endif /* USE_PIO */
@@ -897,8 +896,7 @@ static void NCR53c406a_intr(void *dev_id)
outb(TRANSFER_INFO | DMA_OP, CMD_REG);
#if USE_PIO
scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
- NCR53c406a_pio_read(page_address(sg->page) + sg->offset,
- sg->length);
+ NCR53c406a_pio_read(sg_virt(sg), sg->length);
}
REG0;
#endif /* USE_PIO */
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 80e448d0f3d..a77ab8d693d 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -356,7 +356,7 @@ static void aac_internal_transfer(struct scsi_cmnd *scsicmd, void *data, unsigne
int transfer_len;
struct scatterlist *sg = scsi_sglist(scsicmd);
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
transfer_len = min(sg->length, len + offset);
transfer_len -= offset;
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index a58c265dc8a..ea8c6994764 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -613,7 +613,7 @@ struct aha152x_scdata {
#define SCNEXT(SCpnt) SCDATA(SCpnt)->next
#define SCSEM(SCpnt) SCDATA(SCpnt)->done
-#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset))
+#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer)))
/* state handling */
static void seldi_run(struct Scsi_Host *shpnt);
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 961a1882cb7..bbcc2c52d79 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -49,7 +49,7 @@
#include "aha1542.h"
#define SCSI_BUF_PA(address) isa_virt_to_bus(address)
-#define SCSI_SG_PA(sgent) (isa_page_to_bus((sgent)->page) + (sgent)->offset)
+#define SCSI_SG_PA(sgent) (isa_page_to_bus(sg_page((sgent))) + (sgent)->offset)
static void BAD_DMA(void *address, unsigned int length)
{
@@ -66,8 +66,7 @@ static void BAD_SG_DMA(Scsi_Cmnd * SCpnt,
int badseg)
{
printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n",
- badseg, nseg,
- page_address(sgp->page) + sgp->offset,
+ badseg, nseg, sg_virt(sgp),
(unsigned long long)SCSI_SG_PA(sgp),
sgp->length);
@@ -712,8 +711,7 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i);
scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) {
printk(KERN_CRIT "%d: %p %d\n", i,
- (page_address(sg->page) +
- sg->offset), sg->length);
+ sg_virt(sg), sg->length);
};
printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr);
ptr = (unsigned char *) &cptr[i];
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index f81777586b8..f7a252885a5 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -1343,7 +1343,7 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
/* 4 bytes: Areca io control code */
sg = scsi_sglist(cmd);
- buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
if (scsi_sg_count(cmd) > 1) {
retvalue = ARCMSR_MESSAGE_FAIL;
goto message_out;
@@ -1593,7 +1593,7 @@ static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb,
strncpy(&inqdata[32], "R001", 4); /* Product Revision */
sg = scsi_sglist(cmd);
- buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
memcpy(buffer, inqdata, sizeof(inqdata));
sg = scsi_sglist(cmd);
diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c
index 52d0b87e9aa..d1780980fb2 100644
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c
@@ -515,8 +515,7 @@ static inline void initialize_SCp(Scsi_Cmnd *cmd)
if (cmd->use_sg) {
cmd->SCp.buffer = (struct scatterlist *)cmd->request_buffer;
cmd->SCp.buffers_residual = cmd->use_sg - 1;
- cmd->SCp.ptr = (char *)page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
/* ++roman: Try to merge some scatter-buffers if they are at
* contiguous physical addresses.
@@ -2054,8 +2053,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
++cmd->SCp.buffer;
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
/* ++roman: Try to merge some scatter-buffers if
* they are at contiguous physical addresses.
*/
diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c
index 96180bb47e4..982c5092be1 100644
--- a/drivers/scsi/eata_pio.c
+++ b/drivers/scsi/eata_pio.c
@@ -172,7 +172,7 @@ static void IncStat(struct scsi_pointer *SCp, unsigned int Increment)
SCp->Status = 0;
else {
SCp->buffer++;
- SCp->ptr = page_address(SCp->buffer->page) + SCp->buffer->offset;
+ SCp->ptr = sg_virt(SCp->buffer);
SCp->this_residual = SCp->buffer->length;
}
}
@@ -410,7 +410,7 @@ static int eata_pio_queue(struct scsi_cmnd *cmd,
} else {
cmd->SCp.buffer = cmd->request_buffer;
cmd->SCp.buffers_residual = cmd->use_sg;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
}
cmd->SCp.Status = (cmd->SCp.this_residual != 0); /* TRUE as long as bytes
diff --git a/drivers/scsi/fd_mcs.c b/drivers/scsi/fd_mcs.c
index 668569e8856..8335b608e57 100644
--- a/drivers/scsi/fd_mcs.c
+++ b/drivers/scsi/fd_mcs.c
@@ -973,7 +973,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id)
if (current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
} else
break;
@@ -1006,7 +1006,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id)
if (!current_SC->SCp.this_residual && current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
}
}
@@ -1109,7 +1109,7 @@ static int fd_mcs_queue(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
if (current_SC->use_sg) {
current_SC->SCp.buffer = (struct scatterlist *) current_SC->request_buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
current_SC->SCp.buffers_residual = current_SC->use_sg - 1;
} else {
diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c
index 5d282e6a6ae..2cd6b4959eb 100644
--- a/drivers/scsi/fdomain.c
+++ b/drivers/scsi/fdomain.c
@@ -1321,7 +1321,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id)
if (current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
} else
break;
@@ -1354,7 +1354,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id)
&& current_SC->SCp.buffers_residual) {
--current_SC->SCp.buffers_residual;
++current_SC->SCp.buffer;
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
}
}
@@ -1439,8 +1439,7 @@ static int fdomain_16x0_queue(struct scsi_cmnd *SCpnt,
if (scsi_sg_count(current_SC)) {
current_SC->SCp.buffer = scsi_sglist(current_SC);
- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page)
- + current_SC->SCp.buffer->offset;
+ current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1;
} else {
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 3ac080ee6e2..5ab3ce76248 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -2374,18 +2374,18 @@ static void gdth_copy_internal_data(gdth_ha_str *ha, Scsi_Cmnd *scp,
if (cpsum+cpnow > cpcount)
cpnow = cpcount - cpsum;
cpsum += cpnow;
- if (!sl->page) {
+ if (!sg_page(sl)) {
printk("GDT-HA %d: invalid sc/gt element in gdth_copy_internal_data()\n",
ha->hanum);
return;
}
local_irq_save(flags);
- address = kmap_atomic(sl->page, KM_BIO_SRC_IRQ) + sl->offset;
+ address = kmap_atomic(sg_page(sl), KM_BIO_SRC_IRQ) + sl->offset;
if (to_buffer)
memcpy(buffer, address, cpnow);
else
memcpy(address, buffer, cpnow);
- flush_dcache_page(sl->page);
+ flush_dcache_page(sg_page(sl));
kunmap_atomic(address, KM_BIO_SRC_IRQ);
local_irq_restore(flags);
if (cpsum == cpcount)
diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c
index 714e6273a70..db004a45073 100644
--- a/drivers/scsi/ibmmca.c
+++ b/drivers/scsi/ibmmca.c
@@ -1828,7 +1828,7 @@ static int ibmmca_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
BUG_ON(scsi_sg_count(cmd) > 16);
scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
- ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset);
+ ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg_page(sg)) + sg->offset);
ld(shpnt)[ldn].sge[i].byte_length = sg->length;
}
scb->enable |= IM_POINTER_TO_LIST;
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 252d1806467..8d0244c2e7d 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -175,18 +175,18 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne
while (bcount) {
count = min(pc->sg->length - pc->b_count, bcount);
- if (PageHighMem(pc->sg->page)) {
+ if (PageHighMem(sg_page(pc->sg))) {
unsigned long flags;
local_irq_save(flags);
- buf = kmap_atomic(pc->sg->page, KM_IRQ0) +
+ buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
pc->sg->offset;
drive->hwif->atapi_input_bytes(drive,
buf + pc->b_count, count);
kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
local_irq_restore(flags);
} else {
- buf = page_address(pc->sg->page) + pc->sg->offset;
+ buf = sg_virt(pc->sg);
drive->hwif->atapi_input_bytes(drive,
buf + pc->b_count, count);
}
@@ -212,18 +212,18 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign
while (bcount) {
count = min(pc->sg->length - pc->b_count, bcount);
- if (PageHighMem(pc->sg->page)) {
+ if (PageHighMem(sg_page(pc->sg))) {
unsigned long flags;
local_irq_save(flags);
- buf = kmap_atomic(pc->sg->page, KM_IRQ0) +
+ buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
pc->sg->offset;
drive->hwif->atapi_output_bytes(drive,
buf + pc->b_count, count);
kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
local_irq_restore(flags);
} else {
- buf = page_address(pc->sg->page) + pc->sg->offset;
+ buf = sg_virt(pc->sg);
drive->hwif->atapi_output_bytes(drive,
buf + pc->b_count, count);
}
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 74cdc1f0a78..a3d0c6b1495 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -705,9 +705,7 @@ static int imm_completion(struct scsi_cmnd *cmd)
cmd->SCp.buffer++;
cmd->SCp.this_residual =
cmd->SCp.buffer->length;
- cmd->SCp.ptr =
- page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
/*
* Make sure that we transfer even number of bytes
@@ -844,9 +842,7 @@ static int imm_engine(imm_struct *dev, struct scsi_cmnd *cmd)
cmd->SCp.buffer =
(struct scatterlist *) cmd->request_buffer;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr =
- page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
} else {
/* else fill the only available buffer */
cmd->SCp.buffer = NULL;
diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c
index ab7cbf3449c..c8b452f2878 100644
--- a/drivers/scsi/in2000.c
+++ b/drivers/scsi/in2000.c
@@ -372,7 +372,7 @@ static int in2000_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
if (cmd->use_sg) {
cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
cmd->SCp.buffers_residual = cmd->use_sg - 1;
- cmd->SCp.ptr = (char *) page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
} else {
cmd->SCp.buffer = NULL;
@@ -764,7 +764,7 @@ static void transfer_bytes(Scsi_Cmnd * cmd, int data_in_dir)
++cmd->SCp.buffer;
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
}
/* Set up hardware registers */
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index c316a0bcae6..439b97a6a26 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -2872,6 +2872,7 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len)
}
scatterlist = sglist->scatterlist;
+ sg_init_table(scatterlist, num_elem);
sglist->order = order;
sglist->num_sg = num_elem;
@@ -2884,12 +2885,12 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len)
/* Free up what we already allocated */
for (j = i - 1; j >= 0; j--)
- __free_pages(scatterlist[j].page, order);
+ __free_pages(sg_page(&scatterlist[j]), order);
kfree(sglist);
return NULL;
}
- scatterlist[i].page = page;
+ sg_set_page(&scatterlist[i], page);
}
return sglist;
@@ -2910,7 +2911,7 @@ static void ipr_free_ucode_buffer(struct ipr_sglist *sglist)
int i;
for (i = 0; i < sglist->num_sg; i++)
- __free_pages(sglist->scatterlist[i].page, sglist->order);
+ __free_pages(sg_page(&sglist->scatterlist[i]), sglist->order);
kfree(sglist);
}
@@ -2940,9 +2941,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist,
scatterlist = sglist->scatterlist;
for (i = 0; i < (len / bsize_elem); i++, buffer += bsize_elem) {
- kaddr = kmap(scatterlist[i].page);
+ struct page *page = sg_page(&scatterlist[i]);
+
+ kaddr = kmap(page);
memcpy(kaddr, buffer, bsize_elem);
- kunmap(scatterlist[i].page);
+ kunmap(page);
scatterlist[i].length = bsize_elem;
@@ -2953,9 +2956,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist,
}
if (len % bsize_elem) {
- kaddr = kmap(scatterlist[i].page);
+ struct page *page = sg_page(&scatterlist[i]);
+
+ kaddr = kmap(page);
memcpy(kaddr, buffer, len % bsize_elem);
- kunmap(scatterlist[i].page);
+ kunmap(page);
scatterlist[i].length = len % bsize_elem;
}
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index edaac2714c5..5c5a9b2628f 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -1515,7 +1515,7 @@ static int ips_is_passthru(struct scsi_cmnd *SC)
/* kmap_atomic() ensures addressability of the user buffer.*/
/* local_irq_save() protects the KM_IRQ0 address slot. */
local_irq_save(flags);
- buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
if (buffer && buffer[0] == 'C' && buffer[1] == 'O' &&
buffer[2] == 'P' && buffer[3] == 'P') {
kunmap_atomic(buffer - sg->offset, KM_IRQ0);
@@ -3523,7 +3523,7 @@ ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count)
/* kmap_atomic() ensures addressability of the data buffer.*/
/* local_irq_save() protects the KM_IRQ0 address slot. */
local_irq_save(flags);
- buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset;
+ buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset;
memcpy(buffer, &cdata[xfer_cnt], min_cnt);
kunmap_atomic(buffer - sg[i].offset, KM_IRQ0);
local_irq_restore(flags);
@@ -3556,7 +3556,7 @@ ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count)
/* kmap_atomic() ensures addressability of the data buffer.*/
/* local_irq_save() protects the KM_IRQ0 address slot. */
local_irq_save(flags);
- buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset;
+ buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset;
memcpy(&cdata[xfer_cnt], buffer, min_cnt);
kunmap_atomic(buffer - sg[i].offset, KM_IRQ0);
local_irq_restore(flags);
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index a21455d0274..6ce4109efdf 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -70,9 +70,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
static inline void
iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
{
- ibuf->sg.page = virt_to_page(vbuf);
- ibuf->sg.offset = offset_in_page(vbuf);
- ibuf->sg.length = size;
+ sg_init_one(&ibuf->sg, vbuf, size);
ibuf->sent = 0;
ibuf->use_sendmsg = 1;
}
@@ -80,13 +78,14 @@ iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
static inline void
iscsi_buf_init_sg(struct iscsi_buf *ibuf, struct scatterlist *sg)
{
- ibuf->sg.page = sg->page;
+ sg_init_table(&ibuf->sg, 1);
+ sg_set_page(&ibuf->sg, sg_page(sg));
ibuf->sg.offset = sg->offset;
ibuf->sg.length = sg->length;
/*
* Fastpath: sg element fits into single page
*/
- if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg->page))
+ if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg_page(sg)))
ibuf->use_sendmsg = 0;
else
ibuf->use_sendmsg = 1;
@@ -716,7 +715,7 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) {
char *dest;
- dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
+ dest = kmap_atomic(sg_page(&sg[i]), KM_SOFTIRQ0);
rc = iscsi_ctask_copy(tcp_conn, ctask, dest + sg[i].offset,
sg[i].length, offset);
kunmap_atomic(dest, KM_SOFTIRQ0);
@@ -1103,9 +1102,9 @@ iscsi_send(struct iscsi_conn *conn, struct iscsi_buf *buf, int size, int flags)
* slab case.
*/
if (buf->use_sendmsg)
- res = sock_no_sendpage(sk, buf->sg.page, offset, size, flags);
+ res = sock_no_sendpage(sk, sg_page(&buf->sg), offset, size, flags);
else
- res = tcp_conn->sendpage(sk, buf->sg.page, offset, size, flags);
+ res = tcp_conn->sendpage(sk, sg_page(&buf->sg), offset, size, flags);
if (res >= 0) {
conn->txdata_octets += res;
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 10d1aff9938..66c65203573 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -658,7 +658,7 @@ mega_build_cmd(adapter_t *adapter, Scsi_Cmnd *cmd, int *busy)
struct scatterlist *sg;
sg = scsi_sglist(cmd);
- buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
memset(buf, 0, cmd->cmnd[4]);
kunmap_atomic(buf - sg->offset, KM_IRQ0);
@@ -1542,10 +1542,8 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status)
if( cmd->cmnd[0] == INQUIRY && !islogical ) {
sgl = scsi_sglist(cmd);
- if( sgl->page ) {
- c = *(unsigned char *)
- page_address((&sgl[0])->page) +
- (&sgl[0])->offset;
+ if( sg_page(sgl) ) {
+ c = *(unsigned char *) sg_virt(&sgl[0]);
} else {
printk(KERN_WARNING
"megaraid: invalid sg.\n");
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 78779209ac8..c8923108183 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -1584,10 +1584,8 @@ megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy)
caddr_t vaddr;
sgl = scsi_sglist(scp);
- if (sgl->page) {
- vaddr = (caddr_t)
- (page_address((&sgl[0])->page)
- + (&sgl[0])->offset);
+ if (sg_page(sgl)) {
+ vaddr = (caddr_t) sg_virt(&sgl[0]);
memset(vaddr, 0, scp->cmnd[4]);
}
@@ -2328,10 +2326,8 @@ megaraid_mbox_dpc(unsigned long devp)
&& IS_RAID_CH(raid_dev, scb->dev_channel)) {
sgl = scsi_sglist(scp);
- if (sgl->page) {
- c = *(unsigned char *)
- (page_address((&sgl[0])->page) +
- (&sgl[0])->offset);
+ if (sg_page(sgl)) {
+ c = *(unsigned char *) sg_virt(&sgl[0]);
} else {
con_log(CL_ANN, (KERN_WARNING
"megaraid mailbox: invalid sg:%d\n",
diff --git a/drivers/scsi/oktagon_esp.c b/drivers/scsi/oktagon_esp.c
index 26a6d55faf3..8e5eadbd5c5 100644
--- a/drivers/scsi/oktagon_esp.c
+++ b/drivers/scsi/oktagon_esp.c
@@ -550,8 +550,7 @@ void dma_mmu_get_scsi_one(struct NCR_ESP *esp, Scsi_Cmnd *sp)
void dma_mmu_get_scsi_sgl(struct NCR_ESP *esp, Scsi_Cmnd *sp)
{
- sp->SCp.ptr = page_address(sp->SCp.buffer->page)+
- sp->SCp.buffer->offset;
+ sp->SCp.ptr = sg_virt(sp->SCp.buffer);
}
void dma_mmu_release_scsi_one(struct NCR_ESP *esp, Scsi_Cmnd *sp)
@@ -564,8 +563,7 @@ void dma_mmu_release_scsi_sgl(struct NCR_ESP *esp, Scsi_Cmnd *sp)
void dma_advance_sg(Scsi_Cmnd *sp)
{
- sp->SCp.ptr = page_address(sp->SCp.buffer->page)+
- sp->SCp.buffer->offset;
+ sp->SCp.ptr = sg_virt(sp->SCp.buffer);
}
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 331b789937c..1c5c4b68f20 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -542,7 +542,7 @@ static int osst_verify_frame(struct osst_tape * STp, int frame_seq_number, int q
if (STp->raw) {
if (STp->buffer->syscall_result) {
for (i=0; i < STp->buffer->sg_segs; i++)
- memset(page_address(STp->buffer->sg[i].page),
+ memset(page_address(sg_page(&STp->buffer->sg[i])),
0, STp->buffer->sg[i].length);
strcpy(STp->buffer->b_data, "READ ERROR ON FRAME");
} else
@@ -4437,7 +4437,7 @@ static int os_scsi_tape_open(struct inode * inode, struct file * filp)
for (i = 0, b_size = 0;
(i < STp->buffer->sg_segs) && ((b_size + STp->buffer->sg[i].length) <= OS_DATA_SIZE);
b_size += STp->buffer->sg[i++].length);
- STp->buffer->aux = (os_aux_t *) (page_address(STp->buffer->sg[i].page) + OS_DATA_SIZE - b_size);
+ STp->buffer->aux = (os_aux_t *) (page_address(sg_page(&STp->buffer->sg[i])) + OS_DATA_SIZE - b_size);
#if DEBUG
printk(OSST_DEB_MSG "%s:D: b_data points to %p in segment 0 at %p\n", name,
STp->buffer->b_data, page_address(STp->buffer->sg[0].page));
@@ -5252,25 +5252,26 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma)
/* Try to allocate the first segment up to OS_DATA_SIZE and the others
big enough to reach the goal (code assumes no segments in place) */
for (b_size = OS_DATA_SIZE, order = OSST_FIRST_ORDER; b_size >= PAGE_SIZE; order--, b_size /= 2) {
- STbuffer->sg[0].page = alloc_pages(priority, order);
+ struct page *page = alloc_pages(priority, order);
+
STbuffer->sg[0].offset = 0;
- if (STbuffer->sg[0].page != NULL) {
+ if (page != NULL) {
+ sg_set_page(&STbuffer->sg[0], page);
STbuffer->sg[0].length = b_size;
- STbuffer->b_data = page_address(STbuffer->sg[0].page);
+ STbuffer->b_data = page_address(page);
break;
}
}
- if (STbuffer->sg[0].page == NULL) {
+ if (sg_page(&STbuffer->sg[0]) == NULL) {
printk(KERN_NOTICE "osst :I: Can't allocate tape buffer main segment.\n");
return 0;
}
/* Got initial segment of 'bsize,order', continue with same size if possible, except for AUX */
for (segs=STbuffer->sg_segs=1, got=b_size;
segs < max_segs && got < OS_FRAME_SIZE; ) {
- STbuffer->sg[segs].page =
- alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order);
+ struct page *page = alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order);
STbuffer->sg[segs].offset = 0;
- if (STbuffer->sg[segs].page == NULL) {
+ if (page == NULL) {
if (OS_FRAME_SIZE - got <= (max_segs - segs) * b_size / 2 && order) {
b_size /= 2; /* Large enough for the rest of the buffers */
order--;
@@ -5284,6 +5285,7 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma)
normalize_buffer(STbuffer);
return 0;
}
+ sg_set_page(&STbuffer->sg[segs], page);
STbuffer->sg[segs].length = (OS_FRAME_SIZE - got <= PAGE_SIZE / 2) ? (OS_FRAME_SIZE - got) : b_size;
got += STbuffer->sg[segs].length;
STbuffer->buffer_size = got;
@@ -5316,7 +5318,7 @@ static void normalize_buffer(struct osst_buffer *STbuffer)
b_size < STbuffer->sg[i].length;
b_size *= 2, order++);
- __free_pages(STbuffer->sg[i].page, order);
+ __free_pages(sg_page(&STbuffer->sg[i]), order);
STbuffer->buffer_size -= STbuffer->sg[i].length;
}
#if DEBUG
@@ -5344,7 +5346,7 @@ static int append_to_buffer(const char __user *ubp, struct osst_buffer *st_bp, i
for ( ; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length - offset < do_count ?
st_bp->sg[i].length - offset : do_count;
- res = copy_from_user(page_address(st_bp->sg[i].page) + offset, ubp, cnt);
+ res = copy_from_user(page_address(sg_page(&st_bp->sg[i])) + offset, ubp, cnt);
if (res)
return (-EFAULT);
do_count -= cnt;
@@ -5377,7 +5379,7 @@ static int from_buffer(struct osst_buffer *st_bp, char __user *ubp, int do_count
for ( ; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length - offset < do_count ?
st_bp->sg[i].length - offset : do_count;
- res = copy_to_user(ubp, page_address(st_bp->sg[i].page) + offset, cnt);
+ res = copy_to_user(ubp, page_address(sg_page(&st_bp->sg[i])) + offset, cnt);
if (res)
return (-EFAULT);
do_count -= cnt;
@@ -5410,7 +5412,7 @@ static int osst_zero_buffer_tail(struct osst_buffer *st_bp)
i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length - offset < do_count ?
st_bp->sg[i].length - offset : do_count ;
- memset(page_address(st_bp->sg[i].page) + offset, 0, cnt);
+ memset(page_address(sg_page(&st_bp->sg[i])) + offset, 0, cnt);
do_count -= cnt;
offset = 0;
}
@@ -5430,7 +5432,7 @@ static int osst_copy_to_buffer(struct osst_buffer *st_bp, unsigned char *ptr)
for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length < do_count ?
st_bp->sg[i].length : do_count ;
- memcpy(page_address(st_bp->sg[i].page), ptr, cnt);
+ memcpy(page_address(sg_page(&st_bp->sg[i])), ptr, cnt);
do_count -= cnt;
ptr += cnt;
}
@@ -5451,7 +5453,7 @@ static int osst_copy_from_buffer(struct osst_buffer *st_bp, unsigned char *ptr)
for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) {
cnt = st_bp->sg[i].length < do_count ?
st_bp->sg[i].length : do_count ;
- memcpy(ptr, page_address(st_bp->sg[i].page), cnt);
+ memcpy(ptr, page_address(sg_page(&st_bp->sg[i])), cnt);
do_count -= cnt;
ptr += cnt;
}
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index 98397559c53..7db28cd4944 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -393,7 +393,7 @@ enum _burst_mode {
#define MSG_EXT_SDTR 0x01
/* scatter-gather table */
-# define BUFFER_ADDR ((char *)((unsigned int)(SCpnt->SCp.buffer->page) + SCpnt->SCp.buffer->offset))
+# define BUFFER_ADDR ((char *)((sg_virt(SCpnt->SCp.buffer))))
#endif /*__nsp_cs__*/
/* end */
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 190e2a7d706..969b9387a0c 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -443,8 +443,7 @@ SYM53C500_intr(int irq, void *dev_id)
scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
SYM53C500_pio_write(fast_pio, port_base,
- page_address(sg->page) + sg->offset,
- sg->length);
+ sg_virt(sg), sg->length);
}
REG0(port_base);
}
@@ -463,8 +462,7 @@ SYM53C500_intr(int irq, void *dev_id)
scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
SYM53C500_pio_read(fast_pio, port_base,
- page_address(sg->page) + sg->offset,
- sg->length);
+ sg_virt(sg), sg->length);
}
REG0(port_base);
}
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index 67b6d76a6c8..67ee51a3d7e 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -608,9 +608,7 @@ static int ppa_completion(struct scsi_cmnd *cmd)
cmd->SCp.buffer++;
cmd->SCp.this_residual =
cmd->SCp.buffer->length;
- cmd->SCp.ptr =
- page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
}
}
/* Now check to see if the drive is ready to comunicate */
@@ -756,8 +754,7 @@ static int ppa_engine(ppa_struct *dev, struct scsi_cmnd *cmd)
/* if many buffers are available, start filling the first */
cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
} else {
/* else fill the only available buffer */
cmd->SCp.buffer = NULL;
diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 0f43d1d046d..17b4a7c4618 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -111,14 +111,14 @@ static int fill_from_dev_buffer(struct scsi_cmnd *cmd, const void *buf)
req_len = act_len = 0;
scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
if (active) {
- kaddr = kmap_atomic(sgpnt->page, KM_IRQ0);
+ kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
len = sgpnt->length;
if ((req_len + len) > buflen) {
active = 0;
len = buflen - req_len;
}
memcpy(kaddr + sgpnt->offset, buf + req_len, len);
- flush_kernel_dcache_page(sgpnt->page);
+ flush_kernel_dcache_page(sg_page(sgpnt));
kunmap_atomic(kaddr, KM_IRQ0);
act_len += len;
}
@@ -147,7 +147,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *cmd, void *buf)
req_len = fin = 0;
scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
- kaddr = kmap_atomic(sgpnt->page, KM_IRQ0);
+ kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
len = sgpnt->length;
if ((req_len + len) > buflen) {
len = buflen - req_len;
diff --git a/drivers/scsi/qlogicfas408.c b/drivers/scsi/qlogicfas408.c
index 2bfbf26c00e..de7b3bc2cbc 100644
--- a/drivers/scsi/qlogicfas408.c
+++ b/drivers/scsi/qlogicfas408.c
@@ -317,7 +317,7 @@ static unsigned int ql_pcmd(struct scsi_cmnd *cmd)
return ((priv->qabort == 1 ?
DID_ABORT : DID_RESET) << 16);
}
- buf = page_address(sg->page) + sg->offset;
+ buf = sg_virt(sg);
if (ql_pdma(priv, phase, buf, sg->length))
break;
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 72ee4c9cfb1..46cae5a212d 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -625,7 +625,7 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
scsi_for_each_sg(scp, sg, scp->use_sg, k) {
if (active) {
kaddr = (unsigned char *)
- kmap_atomic(sg->page, KM_USER0);
+ kmap_atomic(sg_page(sg), KM_USER0);
if (NULL == kaddr)
return (DID_ERROR << 16);
kaddr_off = (unsigned char *)kaddr + sg->offset;
@@ -672,7 +672,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
sg = scsi_sglist(scp);
req_len = fin = 0;
for (k = 0; k < scp->use_sg; ++k, sg = sg_next(sg)) {
- kaddr = (unsigned char *)kmap_atomic(sg->page, KM_USER0);
+ kaddr = (unsigned char *)kmap_atomic(sg_page(sg), KM_USER0);
if (NULL == kaddr)
return -1;
kaddr_off = (unsigned char *)kaddr + sg->offset;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aac8a02cbe8..61fdaf02f25 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -295,7 +295,7 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
int i, err, nr_vecs = 0;
for_each_sg(sgl, sg, nsegs, i) {
- page = sg->page;
+ page = sg_page(sg);
off = sg->offset;
len = sg->length;
data_len += len;
@@ -764,7 +764,7 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
if (unlikely(!sgl))
goto enomem;
- memset(sgl, 0, sizeof(*sgl) * sgp->size);
+ sg_init_table(sgl, sgp->size);
/*
* first loop through, set initial index and return value
@@ -781,6 +781,13 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
/*
+ * if we have nothing left, mark the last segment as
+ * end-of-list
+ */
+ if (!left)
+ sg_mark_end(sgl, this);
+
+ /*
* don't allow subsequent mempool allocs to sleep, it would
* violate the mempool principle.
*/
@@ -2353,7 +2360,7 @@ void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
*offset = *offset - len_complete + sg->offset;
/* Assumption: contiguous pages can be accessed as "page + i" */
- page = nth_page(sg->page, (*offset >> PAGE_SHIFT));
+ page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT));
*offset &= ~PAGE_MASK;
/* Bytes in this sg-entry from *offset to the end of the page */
diff --git a/drivers/scsi/seagate.c b/drivers/scsi/seagate.c
index ce80fa9ad81..b11324479b5 100644
--- a/drivers/scsi/seagate.c
+++ b/drivers/scsi/seagate.c
@@ -999,14 +999,14 @@ connect_loop:
for (i = 0; i < nobuffs; ++i)
printk("scsi%d : buffer %d address = %p length = %d\n",
hostno, i,
- page_address(buffer[i].page) + buffer[i].offset,
+ sg_virt(&buffer[i]),
buffer[i].length);
}
#endif
buffer = (struct scatterlist *) SCint->request_buffer;
len = buffer->length;
- data = page_address(buffer->page) + buffer->offset;
+ data = sg_virt(buffer);
} else {
DPRINTK (DEBUG_SG, "scsi%d : scatter gather not requested.\n", hostno);
buffer = NULL;
@@ -1239,7 +1239,7 @@ connect_loop:
--nobuffs;
++buffer;
len = buffer->length;
- data = page_address(buffer->page) + buffer->offset;
+ data = sg_virt(buffer);
DPRINTK (DEBUG_SG,
"scsi%d : next scatter-gather buffer len = %d address = %08x\n",
hostno, len, data);
@@ -1396,7 +1396,7 @@ connect_loop:
--nobuffs;
++buffer;
len = buffer->length;
- data = page_address(buffer->page) + buffer->offset;
+ data = sg_virt(buffer);
DPRINTK (DEBUG_SG, "scsi%d : next scatter-gather buffer len = %d address = %08x\n", hostno, len, data);
}
break;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 7238b2dfc49..cc197100284 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1169,7 +1169,7 @@ sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
len = vma->vm_end - sa;
len = (len < sg->length) ? len : sg->length;
if (offset < len) {
- page = virt_to_page(page_address(sg->page) + offset);
+ page = virt_to_page(page_address(sg_page(sg)) + offset);
get_page(page); /* increment page count */
break;
}
@@ -1717,13 +1717,13 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
goto out_unlock; */
}
- sgl[0].page = pages[0];
+ sg_set_page(sgl, pages[0]);
sgl[0].offset = uaddr & ~PAGE_MASK;
if (nr_pages > 1) {
sgl[0].length = PAGE_SIZE - sgl[0].offset;
count -= sgl[0].length;
for (i=1; i < nr_pages ; i++) {
- sgl[i].page = pages[i];
+ sg_set_page(&sgl[i], pages[i]);
sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE;
count -= PAGE_SIZE;
}
@@ -1754,7 +1754,7 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
int i;
for (i=0; i < nr_pages; i++) {
- struct page *page = sgl[i].page;
+ struct page *page = sg_page(&sgl[i]);
if (dirtied)
SetPageDirty(page);
@@ -1854,7 +1854,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
scatter_elem_sz_prev = ret_sz;
}
}
- sg->page = p;
+ sg_set_page(sg, p);
sg->length = (ret_sz > num) ? num : ret_sz;
SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, "
@@ -1907,14 +1907,14 @@ sg_write_xfer(Sg_request * srp)
onum = 1;
ksglen = sg->length;
- p = page_address(sg->page);
+ p = page_address(sg_page(sg));
for (j = 0, k = 0; j < onum; ++j) {
res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up);
if (res)
return res;
for (; p; sg = sg_next(sg), ksglen = sg->length,
- p = page_address(sg->page)) {
+ p = page_address(sg_page(sg))) {
if (usglen <= 0)
break;
if (ksglen > usglen) {
@@ -1991,12 +1991,12 @@ sg_remove_scat(Sg_scatter_hold * schp)
} else {
int k;
- for (k = 0; (k < schp->k_use_sg) && sg->page;
+ for (k = 0; (k < schp->k_use_sg) && sg_page(sg);
++k, sg = sg_next(sg)) {
SCSI_LOG_TIMEOUT(5, printk(
"sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
- k, sg->page, sg->length));
- sg_page_free(sg->page, sg->length);
+ k, sg_page(sg), sg->length));
+ sg_page_free(sg_page(sg), sg->length);
}
}
kfree(schp->buffer);
@@ -2038,7 +2038,7 @@ sg_read_xfer(Sg_request * srp)
} else
onum = 1;
- p = page_address(sg->page);
+ p = page_address(sg_page(sg));
ksglen = sg->length;
for (j = 0, k = 0; j < onum; ++j) {
res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up);
@@ -2046,7 +2046,7 @@ sg_read_xfer(Sg_request * srp)
return res;
for (; p; sg = sg_next(sg), ksglen = sg->length,
- p = page_address(sg->page)) {
+ p = page_address(sg_page(sg))) {
if (usglen <= 0)
break;
if (ksglen > usglen) {
@@ -2092,15 +2092,15 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
if ((!outp) || (num_read_xfer <= 0))
return 0;
- for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, sg = sg_next(sg)) {
+ for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) {
num = sg->length;
if (num > num_read_xfer) {
- if (__copy_to_user(outp, page_address(sg->page),
+ if (__copy_to_user(outp, page_address(sg_page(sg)),
num_read_xfer))
return -EFAULT;
break;
} else {
- if (__copy_to_user(outp, page_address(sg->page),
+ if (__copy_to_user(outp, page_address(sg_page(sg)),
num))
return -EFAULT;
num_read_xfer -= num;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 73c44cbdea4..ce69b9efc10 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3797,7 +3797,7 @@ static void buf_to_sg(struct st_buffer *STbp, unsigned int length)
sg = &(STbp->sg[0]);
frp = STbp->frp;
for (i=count=0; count < length; i++) {
- sg[i].page = frp[i].page;
+ sg_set_page(&sg[i], frp[i].page);
if (length - count > frp[i].length)
sg[i].length = frp[i].length;
else
@@ -4446,14 +4446,14 @@ static int sgl_map_user_pages(struct scatterlist *sgl, const unsigned int max_pa
}
/* Populate the scatter/gather list */
- sgl[0].page = pages[0];
+ sg_set_page(&sgl[0], pages[0]);
sgl[0].offset = uaddr & ~PAGE_MASK;
if (nr_pages > 1) {
sgl[0].length = PAGE_SIZE - sgl[0].offset;
count -= sgl[0].length;
for (i=1; i < nr_pages ; i++) {
+ sg_set_page(&sgl[i], pages[i]);;
sgl[i].offset = 0;
- sgl[i].page = pages[i];
sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE;
count -= PAGE_SIZE;
}
@@ -4483,7 +4483,7 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p
int i;
for (i=0; i < nr_pages; i++) {
- struct page *page = sgl[i].page;
+ struct page *page = sg_page(&sgl[i]);
if (dirtied)
SetPageDirty(page);
diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index 4aafe89b557..2dcde373b20 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c
@@ -272,8 +272,7 @@ static struct scsi_host_template *the_template = NULL;
#define HOSTNO instance->host_no
#define H_NO(cmd) (cmd)->device->host->host_no
-#define SGADDR(buffer) (void *)(((unsigned long)page_address((buffer)->page)) + \
- (buffer)->offset)
+#define SGADDR(buffer) (void *)(((unsigned long)sg_virt(((buffer)))))
#ifdef SUPPORT_TAGS
diff --git a/drivers/scsi/sym53c416.c b/drivers/scsi/sym53c416.c
index 8befab7e983..90cee94d952 100644
--- a/drivers/scsi/sym53c416.c
+++ b/drivers/scsi/sym53c416.c
@@ -196,7 +196,7 @@ static unsigned int sym53c416_base_3[2] = {0,0};
#define MAXHOSTS 4
-#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset))
+#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer)))
enum phases
{
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index 5c72ca31a47..44193049c4a 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -430,10 +430,7 @@ static __inline__ void dc390_Going_remove (struct dc390_dcb* pDCB, struct dc390_
static struct scatterlist* dc390_sg_build_single(struct scatterlist *sg, void *addr, unsigned int length)
{
- memset(sg, 0, sizeof(struct scatterlist));
- sg->page = virt_to_page(addr);
- sg->length = length;
- sg->offset = (unsigned long)addr & ~PAGE_MASK;
+ sg_init_one(sg, addr, length);
return sg;
}
diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c
index ea72bbeb8f9..6d1f0edd798 100644
--- a/drivers/scsi/ultrastor.c
+++ b/drivers/scsi/ultrastor.c
@@ -681,7 +681,7 @@ static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt)
max = scsi_sg_count(SCpnt);
scsi_for_each_sg(SCpnt, sg, max, i) {
- mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset;
+ mscp->sglist[i].address = isa_page_to_bus(sg_page(sg)) + sg->offset;
mscp->sglist[i].num_bytes = sg->length;
transfer_length += sg->length;
}
diff --git a/drivers/scsi/wd33c93.c b/drivers/scsi/wd33c93.c
index 0e8e642fd3b..fdbb92d1f72 100644
--- a/drivers/scsi/wd33c93.c
+++ b/drivers/scsi/wd33c93.c
@@ -410,8 +410,7 @@ wd33c93_queuecommand(struct scsi_cmnd *cmd,
if (cmd->use_sg) {
cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
cmd->SCp.buffers_residual = cmd->use_sg - 1;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
} else {
cmd->SCp.buffer = NULL;
@@ -745,8 +744,7 @@ transfer_bytes(const wd33c93_regs regs, struct scsi_cmnd *cmd,
++cmd->SCp.buffer;
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) +
- cmd->SCp.buffer->offset;
+ cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
}
if (!cmd->SCp.this_residual) /* avoid bogus setups */
return;
diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c
index 255c611e78b..03cd44f231d 100644
--- a/drivers/scsi/wd7000.c
+++ b/drivers/scsi/wd7000.c
@@ -1123,7 +1123,7 @@ static int wd7000_queuecommand(struct scsi_cmnd *SCpnt,
any2scsi(scb->maxlen, nseg * sizeof(Sgb));
scsi_for_each_sg(SCpnt, sg, nseg, i) {
- any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset);
+ any2scsi(sgb[i].ptr, isa_page_to_bus(sg_page(sg)) + sg->offset);
any2scsi(sgb[i].len, sg->length);
}
} else {
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 87665d7df6f..ed438bc7e98 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -624,7 +624,7 @@ choice
config SERIAL_BFIN_DMA
bool "DMA mode"
- depends on DMA_UNCACHED_1M && !KGDB_UART
+ depends on !DMA_UNCACHED_NONE && !KGDB_UART
help
This driver works under DMA mode. If this option is selected, the
blackfin simple dma driver is also enabled.
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
new file mode 100644
index 00000000000..a7d4360ea7d
--- /dev/null
+++ b/drivers/serial/mcf.c
@@ -0,0 +1,653 @@
+/****************************************************************************/
+
+/*
+ * mcf.c -- Freescale ColdFire UART driver
+ *
+ * (C) Copyright 2003-2007, Greg Ungerer <gerg@snapgear.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/****************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/io.h>
+#include <asm/coldfire.h>
+#include <asm/mcfsim.h>
+#include <asm/mcfuart.h>
+#include <asm/nettel.h>
+
+/****************************************************************************/
+
+/*
+ * Some boards implement the DTR/DCD lines using GPIO lines, most
+ * don't. Dummy out the access macros for those that don't. Those
+ * that do should define these macros somewhere in there board
+ * specific inlude files.
+ */
+#if !defined(mcf_getppdcd)
+#define mcf_getppdcd(p) (1)
+#endif
+#if !defined(mcf_getppdtr)
+#define mcf_getppdtr(p) (1)
+#endif
+#if !defined(mcf_setppdtr)
+#define mcf_setppdtr(p, v) do { } while (0)
+#endif
+
+/****************************************************************************/
+
+/*
+ * Local per-uart structure.
+ */
+struct mcf_uart {
+ struct uart_port port;
+ unsigned int sigs; /* Local copy of line sigs */
+ unsigned char imr; /* Local IMR mirror */
+};
+
+/****************************************************************************/
+
+static unsigned int mcf_tx_empty(struct uart_port *port)
+{
+ return (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXEMPTY) ?
+ TIOCSER_TEMT : 0;
+}
+
+/****************************************************************************/
+
+static unsigned int mcf_get_mctrl(struct uart_port *port)
+{
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned long flags;
+ unsigned int sigs;
+
+ spin_lock_irqsave(&port->lock, flags);
+ sigs = (readb(port->membase + MCFUART_UIPR) & MCFUART_UIPR_CTS) ?
+ 0 : TIOCM_CTS;
+ sigs |= (pp->sigs & TIOCM_RTS);
+ sigs |= (mcf_getppdcd(port->line) ? TIOCM_CD : 0);
+ sigs |= (mcf_getppdtr(port->line) ? TIOCM_DTR : 0);
+ spin_unlock_irqrestore(&port->lock, flags);
+ return sigs;
+}
+
+/****************************************************************************/
+
+static void mcf_set_mctrl(struct uart_port *port, unsigned int sigs)
+{
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ pp->sigs = sigs;
+ mcf_setppdtr(port->line, (sigs & TIOCM_DTR));
+ if (sigs & TIOCM_RTS)
+ writeb(MCFUART_UOP_RTS, port->membase + MCFUART_UOP1);
+ else
+ writeb(MCFUART_UOP_RTS, port->membase + MCFUART_UOP0);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/****************************************************************************/
+
+static void mcf_start_tx(struct uart_port *port)
+{
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ pp->imr |= MCFUART_UIR_TXREADY;
+ writeb(pp->imr, port->membase + MCFUART_UIMR);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/****************************************************************************/
+
+static void mcf_stop_tx(struct uart_port *port)
+{
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ pp->imr &= ~MCFUART_UIR_TXREADY;
+ writeb(pp->imr, port->membase + MCFUART_UIMR);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/****************************************************************************/
+
+static void mcf_stop_rx(struct uart_port *port)
+{
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ pp->imr &= ~MCFUART_UIR_RXREADY;
+ writeb(pp->imr, port->membase + MCFUART_UIMR);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/****************************************************************************/
+
+static void mcf_break_ctl(struct uart_port *port, int break_state)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ if (break_state == -1)
+ writeb(MCFUART_UCR_CMDBREAKSTART, port->membase + MCFUART_UCR);
+ else
+ writeb(MCFUART_UCR_CMDBREAKSTOP, port->membase + MCFUART_UCR);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/****************************************************************************/
+
+static void mcf_enable_ms(struct uart_port *port)
+{
+}
+
+/****************************************************************************/
+
+static int mcf_startup(struct uart_port *port)
+{
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ /* Reset UART, get it into known state... */
+ writeb(MCFUART_UCR_CMDRESETRX, port->membase + MCFUART_UCR);
+ writeb(MCFUART_UCR_CMDRESETTX, port->membase + MCFUART_UCR);
+
+ /* Enable the UART transmitter and receiver */
+ writeb(MCFUART_UCR_RXENABLE | MCFUART_UCR_TXENABLE,
+ port->membase + MCFUART_UCR);
+
+ /* Enable RX interrupts now */
+ pp->imr = MCFUART_UIR_RXREADY;
+ writeb(pp->imr, port->membase + MCFUART_UIMR);
+
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ return 0;
+}
+
+/****************************************************************************/
+
+static void mcf_shutdown(struct uart_port *port)
+{
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ /* Disable all interrupts now */
+ pp->imr = 0;
+ writeb(pp->imr, port->membase + MCFUART_UIMR);
+
+ /* Disable UART transmitter and receiver */
+ writeb(MCFUART_UCR_CMDRESETRX, port->membase + MCFUART_UCR);
+ writeb(MCFUART_UCR_CMDRESETTX, port->membase + MCFUART_UCR);
+
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/****************************************************************************/
+
+static void mcf_set_termios(struct uart_port *port, struct ktermios *termios,
+ struct ktermios *old)
+{
+ unsigned long flags;
+ unsigned int baud, baudclk;
+ unsigned char mr1, mr2;
+
+ baud = uart_get_baud_rate(port, termios, old, 0, 230400);
+ baudclk = ((MCF_BUSCLK / baud) + 16) / 32;
+
+ mr1 = MCFUART_MR1_RXIRQRDY | MCFUART_MR1_RXERRCHAR;
+ mr2 = 0;
+
+ switch (termios->c_cflag & CSIZE) {
+ case CS5: mr1 |= MCFUART_MR1_CS5; break;
+ case CS6: mr1 |= MCFUART_MR1_CS6; break;
+ case CS7: mr1 |= MCFUART_MR1_CS7; break;
+ case CS8:
+ default: mr1 |= MCFUART_MR1_CS8; break;
+ }
+
+ if (termios->c_cflag & PARENB) {
+ if (termios->c_cflag & CMSPAR) {
+ if (termios->c_cflag & PARODD)
+ mr1 |= MCFUART_MR1_PARITYMARK;
+ else
+ mr1 |= MCFUART_MR1_PARITYSPACE;
+ } else {
+ if (termios->c_cflag & PARODD)
+ mr1 |= MCFUART_MR1_PARITYODD;
+ else
+ mr1 |= MCFUART_MR1_PARITYEVEN;
+ }
+ } else {
+ mr1 |= MCFUART_MR1_PARITYNONE;
+ }
+
+ if (termios->c_cflag & CSTOPB)
+ mr2 |= MCFUART_MR2_STOP2;
+ else
+ mr2 |= MCFUART_MR2_STOP1;
+
+ if (termios->c_cflag & CRTSCTS) {
+ mr1 |= MCFUART_MR1_RXRTS;
+ mr2 |= MCFUART_MR2_TXCTS;
+ }
+
+ spin_lock_irqsave(&port->lock, flags);
+ writeb(MCFUART_UCR_CMDRESETRX, port->membase + MCFUART_UCR);
+ writeb(MCFUART_UCR_CMDRESETTX, port->membase + MCFUART_UCR);
+ writeb(MCFUART_UCR_CMDRESETMRPTR, port->membase + MCFUART_UCR);
+ writeb(mr1, port->membase + MCFUART_UMR);
+ writeb(mr2, port->membase + MCFUART_UMR);
+ writeb((baudclk & 0xff00) >> 8, port->membase + MCFUART_UBG1);
+ writeb((baudclk & 0xff), port->membase + MCFUART_UBG2);
+ writeb(MCFUART_UCSR_RXCLKTIMER | MCFUART_UCSR_TXCLKTIMER,
+ port->membase + MCFUART_UCSR);
+ writeb(MCFUART_UCR_RXENABLE | MCFUART_UCR_TXENABLE,
+ port->membase + MCFUART_UCR);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/****************************************************************************/
+
+static void mcf_rx_chars(struct mcf_uart *pp)
+{
+ struct uart_port *port = (struct uart_port *) pp;
+ unsigned char status, ch, flag;
+
+ while ((status = readb(port->membase + MCFUART_USR)) & MCFUART_USR_RXREADY) {
+ ch = readb(port->membase + MCFUART_URB);
+ flag = TTY_NORMAL;
+ port->icount.rx++;
+
+ if (status & MCFUART_USR_RXERR) {
+ writeb(MCFUART_UCR_CMDRESETERR,
+ port->membase + MCFUART_UCR);
+
+ if (status & MCFUART_USR_RXBREAK) {
+ port->icount.brk++;
+ if (uart_handle_break(port))
+ continue;
+ } else if (status & MCFUART_USR_RXPARITY) {
+ port->icount.parity++;
+ } else if (status & MCFUART_USR_RXOVERRUN) {
+ port->icount.overrun++;
+ } else if (status & MCFUART_USR_RXFRAMING) {
+ port->icount.frame++;
+ }
+
+ status &= port->read_status_mask;
+
+ if (status & MCFUART_USR_RXBREAK)
+ flag = TTY_BREAK;
+ else if (status & MCFUART_USR_RXPARITY)
+ flag = TTY_PARITY;
+ else if (status & MCFUART_USR_RXFRAMING)
+ flag = TTY_FRAME;
+ }
+
+ if (uart_handle_sysrq_char(port, ch))
+ continue;
+ uart_insert_char(port, status, MCFUART_USR_RXOVERRUN, ch, flag);
+ }
+
+ tty_flip_buffer_push(port->info->tty);
+}
+
+/****************************************************************************/
+
+static void mcf_tx_chars(struct mcf_uart *pp)
+{
+ struct uart_port *port = (struct uart_port *) pp;
+ struct circ_buf *xmit = &port->info->xmit;
+
+ if (port->x_char) {
+ /* Send special char - probably flow control */
+ writeb(port->x_char, port->membase + MCFUART_UTB);
+ port->x_char = 0;
+ port->icount.tx++;
+ return;
+ }
+
+ while (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY) {
+ if (xmit->head == xmit->tail)
+ break;
+ writeb(xmit->buf[xmit->tail], port->membase + MCFUART_UTB);
+ xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE -1);
+ port->icount.tx++;
+ }
+
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
+
+ if (xmit->head == xmit->tail) {
+ pp->imr &= ~MCFUART_UIR_TXREADY;
+ writeb(pp->imr, port->membase + MCFUART_UIMR);
+ }
+}
+
+/****************************************************************************/
+
+static irqreturn_t mcf_interrupt(int irq, void *data)
+{
+ struct uart_port *port = data;
+ struct mcf_uart *pp = (struct mcf_uart *) port;
+ unsigned int isr;
+
+ isr = readb(port->membase + MCFUART_UISR) & pp->imr;
+ if (isr & MCFUART_UIR_RXREADY)
+ mcf_rx_chars(pp);
+ if (isr & MCFUART_UIR_TXREADY)
+ mcf_tx_chars(pp);
+ return IRQ_HANDLED;
+}
+
+/****************************************************************************/
+
+static void mcf_config_port(struct uart_port *port, int flags)
+{
+ port->type = PORT_MCF;
+
+ /* Clear mask, so no surprise interrupts. */
+ writeb(0, port->membase + MCFUART_UIMR);
+
+ if (request_irq(port->irq, mcf_interrupt, IRQF_DISABLED, "UART", port))
+ printk(KERN_ERR "MCF: unable to attach ColdFire UART %d "
+ "interrupt vector=%d\n", port->line, port->irq);
+}
+
+/****************************************************************************/
+
+static const char *mcf_type(struct uart_port *port)
+{
+ return (port->type == PORT_MCF) ? "ColdFire UART" : NULL;
+}
+
+/****************************************************************************/
+
+static int mcf_request_port(struct uart_port *port)
+{
+ /* UARTs always present */
+ return 0;
+}
+
+/****************************************************************************/
+
+static void mcf_release_port(struct uart_port *port)
+{
+ /* Nothing to release... */
+}
+
+/****************************************************************************/
+
+static int mcf_verify_port(struct uart_port *port, struct serial_struct *ser)
+{
+ if ((ser->type != PORT_UNKNOWN) && (ser->type != PORT_MCF))
+ return -EINVAL;
+ return 0;
+}
+
+/****************************************************************************/
+
+/*
+ * Define the basic serial functions we support.
+ */
+static struct uart_ops mcf_uart_ops = {
+ .tx_empty = mcf_tx_empty,
+ .get_mctrl = mcf_get_mctrl,
+ .set_mctrl = mcf_set_mctrl,
+ .start_tx = mcf_start_tx,
+ .stop_tx = mcf_stop_tx,
+ .stop_rx = mcf_stop_rx,
+ .enable_ms = mcf_enable_ms,
+ .break_ctl = mcf_break_ctl,
+ .startup = mcf_startup,
+ .shutdown = mcf_shutdown,
+ .set_termios = mcf_set_termios,
+ .type = mcf_type,
+ .request_port = mcf_request_port,
+ .release_port = mcf_release_port,
+ .config_port = mcf_config_port,
+ .verify_port = mcf_verify_port,
+};
+
+static struct mcf_uart mcf_ports[3];
+
+#define MCF_MAXPORTS (sizeof(mcf_ports) / sizeof(struct mcf_uart))
+
+/****************************************************************************/
+#if defined(CONFIG_SERIAL_MCF_CONSOLE)
+/****************************************************************************/
+
+int __init early_mcf_setup(struct mcf_platform_uart *platp)
+{
+ struct uart_port *port;
+ int i;
+
+ for (i = 0; ((i < MCF_MAXPORTS) && (platp[i].mapbase)); i++) {
+ port = &mcf_ports[i].port;
+
+ port->line = i;
+ port->type = PORT_MCF;
+ port->mapbase = platp[i].mapbase;
+ port->membase = (platp[i].membase) ? platp[i].membase :
+ (unsigned char __iomem *) port->mapbase;
+ port->iotype = SERIAL_IO_MEM;
+ port->irq = platp[i].irq;
+ port->uartclk = MCF_BUSCLK;
+ port->flags = ASYNC_BOOT_AUTOCONF;
+ port->ops = &mcf_uart_ops;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+
+static void mcf_console_putc(struct console *co, const char c)
+{
+ struct uart_port *port = &(mcf_ports + co->index)->port;
+ int i;
+
+ for (i = 0; (i < 0x10000); i++) {
+ if (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY)
+ break;
+ }
+ writeb(c, port->membase + MCFUART_UTB);
+ for (i = 0; (i < 0x10000); i++) {
+ if (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY)
+ break;
+ }
+}
+
+/****************************************************************************/
+
+static void mcf_console_write(struct console *co, const char *s, unsigned int count)
+{
+ for (; (count); count--, s++) {
+ mcf_console_putc(co, *s);
+ if (*s == '\n')
+ mcf_console_putc(co, '\r');
+ }
+}
+
+/****************************************************************************/
+
+static int __init mcf_console_setup(struct console *co, char *options)
+{
+ struct uart_port *port;
+ int baud = CONFIG_SERIAL_MCF_BAUDRATE;
+ int bits = 8;
+ int parity = 'n';
+ int flow = 'n';
+
+ if ((co->index >= 0) && (co->index <= MCF_MAXPORTS))
+ co->index = 0;
+ port = &mcf_ports[co->index].port;
+ if (port->membase == 0)
+ return -ENODEV;
+
+ if (options)
+ uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+ return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+/****************************************************************************/
+
+static struct uart_driver mcf_driver;
+
+static struct console mcf_console = {
+ .name = "ttyS",
+ .write = mcf_console_write,
+ .device = uart_console_device,
+ .setup = mcf_console_setup,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+ .data = &mcf_driver,
+};
+
+static int __init mcf_console_init(void)
+{
+ register_console(&mcf_console);
+ return 0;
+}
+
+console_initcall(mcf_console_init);
+
+#define MCF_CONSOLE &mcf_console
+
+/****************************************************************************/
+#else
+/****************************************************************************/
+
+#define MCF_CONSOLE NULL
+
+/****************************************************************************/
+#endif /* CONFIG_MCF_CONSOLE */
+/****************************************************************************/
+
+/*
+ * Define the mcf UART driver structure.
+ */
+static struct uart_driver mcf_driver = {
+ .owner = THIS_MODULE,
+ .driver_name = "mcf",
+ .dev_name = "ttyS",
+ .major = TTY_MAJOR,
+ .minor = 64,
+ .nr = MCF_MAXPORTS,
+ .cons = MCF_CONSOLE,
+};
+
+/****************************************************************************/
+
+static int __devinit mcf_probe(struct platform_device *pdev)
+{
+ struct mcf_platform_uart *platp = pdev->dev.platform_data;
+ struct uart_port *port;
+ int i;
+
+ for (i = 0; ((i < MCF_MAXPORTS) && (platp[i].mapbase)); i++) {
+ port = &mcf_ports[i].port;
+
+ port->line = i;
+ port->type = PORT_MCF;
+ port->mapbase = platp[i].mapbase;
+ port->membase = (platp[i].membase) ? platp[i].membase :
+ (unsigned char __iomem *) platp[i].mapbase;
+ port->iotype = SERIAL_IO_MEM;
+ port->irq = platp[i].irq;
+ port->uartclk = MCF_BUSCLK;
+ port->ops = &mcf_uart_ops;
+ port->flags = ASYNC_BOOT_AUTOCONF;
+
+ uart_add_one_port(&mcf_driver, port);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+
+static int mcf_remove(struct platform_device *pdev)
+{
+ struct uart_port *port;
+ int i;
+
+ for (i = 0; (i < MCF_MAXPORTS); i++) {
+ port = &mcf_ports[i].port;
+ if (port)
+ uart_remove_one_port(&mcf_driver, port);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+
+static struct platform_driver mcf_platform_driver = {
+ .probe = mcf_probe,
+ .remove = __devexit_p(mcf_remove),
+ .driver = {
+ .name = "mcfuart",
+ .owner = THIS_MODULE,
+ },
+};
+
+/****************************************************************************/
+
+static int __init mcf_init(void)
+{
+ int rc;
+
+ printk("ColdFire internal UART serial driver\n");
+
+ rc = uart_register_driver(&mcf_driver);
+ if (rc)
+ return rc;
+ rc = platform_driver_register(&mcf_platform_driver);
+ if (rc)
+ return rc;
+ return 0;
+}
+
+/****************************************************************************/
+
+static void __exit mcf_exit(void)
+{
+ platform_driver_unregister(&mcf_platform_driver);
+ uart_unregister_driver(&mcf_driver);
+}
+
+/****************************************************************************/
+
+module_init(mcf_init);
+module_exit(mcf_exit);
+
+MODULE_AUTHOR("Greg Ungerer <gerg@snapgear.com>");
+MODULE_DESCRIPTION("Freescale ColdFire UART driver");
+MODULE_LICENSE("GPL");
+
+/****************************************************************************/
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 8dd5a6afd51..8bdaa157ffe 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -11,9 +11,9 @@
#include <linux/timer.h>
#include <linux/ctype.h>
#include <linux/device.h>
+#include <linux/scatterlist.h>
#include <linux/usb/quirks.h>
#include <asm/byteorder.h>
-#include <asm/scatterlist.h>
#include "hcd.h" /* for usbcore internals */
#include "usb.h"
@@ -437,13 +437,11 @@ int usb_sg_init (
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_IOMMU)
io->urbs[i]->transfer_buffer = NULL;
#else
- io->urbs[i]->transfer_buffer =
- page_address(sg[i].page) + sg[i].offset;
+ io->urbs[i]->transfer_buffer = sg_virt(&sg[i]);
#endif
} else {
/* hc may use _only_ transfer_buffer */
- io->urbs [i]->transfer_buffer =
- page_address (sg [i].page) + sg [i].offset;
+ io->urbs [i]->transfer_buffer = sg_virt(&sg[i]);
len = sg [i].length;
}
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index e7d982a7154..91e999c9f68 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -519,8 +519,7 @@ static void mts_do_sg (struct urb* transfer)
context->fragment++;
mts_int_submit_urb(transfer,
context->data_pipe,
- page_address(sg[context->fragment].page) +
- sg[context->fragment].offset,
+ sg_virt(&sg[context->fragment]),
sg[context->fragment].length,
context->fragment + 1 == scsi_sg_count(context->srb) ?
mts_data_done : mts_do_sg);
@@ -557,7 +556,7 @@ mts_build_transfer_context(struct scsi_cmnd *srb, struct mts_desc* desc)
return;
} else {
sg = scsi_sglist(srb);
- desc->context.data = page_address(sg[0].page) + sg[0].offset;
+ desc->context.data = sg_virt(&sg[0]);
desc->context.data_length = sg[0].length;
}
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index e901d31e051..ea316214648 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -360,9 +360,9 @@ static void free_sglist (struct scatterlist *sg, int nents)
if (!sg)
return;
for (i = 0; i < nents; i++) {
- if (!sg [i].page)
+ if (!sg_page(&sg[i]))
continue;
- kfree (page_address (sg [i].page) + sg [i].offset);
+ kfree (sg_virt(&sg[i]));
}
kfree (sg);
}
diff --git a/drivers/usb/storage/protocol.c b/drivers/usb/storage/protocol.c
index cc8f7c52c72..889622baac2 100644
--- a/drivers/usb/storage/protocol.c
+++ b/drivers/usb/storage/protocol.c
@@ -195,7 +195,7 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
* the *offset and *index values for the next loop. */
cnt = 0;
while (cnt < buflen) {
- struct page *page = sg->page +
+ struct page *page = sg_page(sg) +
((sg->offset + *offset) >> PAGE_SHIFT);
unsigned int poff =
(sg->offset + *offset) & (PAGE_SIZE-1);
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
new file mode 100644
index 00000000000..9e33fc4da87
--- /dev/null
+++ b/drivers/virtio/Kconfig
@@ -0,0 +1,8 @@
+# Virtio always gets selected by whoever wants it.
+config VIRTIO
+ bool
+
+# Similarly the virtio ring implementation.
+config VIRTIO_RING
+ bool
+ depends on VIRTIO
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
new file mode 100644
index 00000000000..f70e40971dd
--- /dev/null
+++ b/drivers/virtio/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_VIRTIO) += virtio.o
+obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
diff --git a/drivers/virtio/config.c b/drivers/virtio/config.c
new file mode 100644
index 00000000000..983d482fba4
--- /dev/null
+++ b/drivers/virtio/config.c
@@ -0,0 +1,13 @@
+/* Configuration space parsing helpers for virtio.
+ *
+ * The configuration is [type][len][... len bytes ...] fields.
+ *
+ * Copyright 2007 Rusty Russell, IBM Corporation.
+ * GPL v2 or later.
+ */
+#include <linux/err.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/bug.h>
+#include <asm/system.h>
+
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
new file mode 100644
index 00000000000..15d7787dea8
--- /dev/null
+++ b/drivers/virtio/virtio.c
@@ -0,0 +1,189 @@
+#include <linux/virtio.h>
+#include <linux/spinlock.h>
+#include <linux/virtio_config.h>
+
+static ssize_t device_show(struct device *_d,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+ return sprintf(buf, "%hu", dev->id.device);
+}
+static ssize_t vendor_show(struct device *_d,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+ return sprintf(buf, "%hu", dev->id.vendor);
+}
+static ssize_t status_show(struct device *_d,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+ return sprintf(buf, "0x%08x", dev->config->get_status(dev));
+}
+static ssize_t modalias_show(struct device *_d,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+
+ return sprintf(buf, "virtio:d%08Xv%08X\n",
+ dev->id.device, dev->id.vendor);
+}
+static struct device_attribute virtio_dev_attrs[] = {
+ __ATTR_RO(device),
+ __ATTR_RO(vendor),
+ __ATTR_RO(status),
+ __ATTR_RO(modalias),
+ __ATTR_NULL
+};
+
+static inline int virtio_id_match(const struct virtio_device *dev,
+ const struct virtio_device_id *id)
+{
+ if (id->device != dev->id.device)
+ return 0;
+
+ return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor != dev->id.vendor;
+}
+
+/* This looks through all the IDs a driver claims to support. If any of them
+ * match, we return 1 and the kernel will call virtio_dev_probe(). */
+static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
+{
+ unsigned int i;
+ struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+ const struct virtio_device_id *ids;
+
+ ids = container_of(_dr, struct virtio_driver, driver)->id_table;
+ for (i = 0; ids[i].device; i++)
+ if (virtio_id_match(dev, &ids[i]))
+ return 1;
+ return 0;
+}
+
+static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
+{
+ struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+
+ return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
+ dev->id.device, dev->id.vendor);
+}
+
+static struct bus_type virtio_bus = {
+ .name = "virtio",
+ .match = virtio_dev_match,
+ .dev_attrs = virtio_dev_attrs,
+ .uevent = virtio_uevent,
+};
+
+static void add_status(struct virtio_device *dev, unsigned status)
+{
+ dev->config->set_status(dev, dev->config->get_status(dev) | status);
+}
+
+static int virtio_dev_probe(struct device *_d)
+{
+ int err;
+ struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+ struct virtio_driver *drv = container_of(dev->dev.driver,
+ struct virtio_driver, driver);
+
+ add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+ err = drv->probe(dev);
+ if (err)
+ add_status(dev, VIRTIO_CONFIG_S_FAILED);
+ else
+ add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+ return err;
+}
+
+int register_virtio_driver(struct virtio_driver *driver)
+{
+ driver->driver.bus = &virtio_bus;
+ driver->driver.probe = virtio_dev_probe;
+ return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(register_virtio_driver);
+
+void unregister_virtio_driver(struct virtio_driver *driver)
+{
+ driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(unregister_virtio_driver);
+
+int register_virtio_device(struct virtio_device *dev)
+{
+ int err;
+
+ dev->dev.bus = &virtio_bus;
+ sprintf(dev->dev.bus_id, "%u", dev->index);
+
+ /* Acknowledge that we've seen the device. */
+ add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+
+ /* device_register() causes the bus infrastructure to look for a
+ * matching driver. */
+ err = device_register(&dev->dev);
+ if (err)
+ add_status(dev, VIRTIO_CONFIG_S_FAILED);
+ return err;
+}
+EXPORT_SYMBOL_GPL(register_virtio_device);
+
+void unregister_virtio_device(struct virtio_device *dev)
+{
+ device_unregister(&dev->dev);
+}
+EXPORT_SYMBOL_GPL(unregister_virtio_device);
+
+int __virtio_config_val(struct virtio_device *vdev,
+ u8 type, void *val, size_t size)
+{
+ void *token;
+ unsigned int len;
+
+ token = vdev->config->find(vdev, type, &len);
+ if (!token)
+ return -ENOENT;
+
+ if (len != size)
+ return -EIO;
+
+ vdev->config->get(vdev, token, val, size);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__virtio_config_val);
+
+int virtio_use_bit(struct virtio_device *vdev,
+ void *token, unsigned int len, unsigned int bitnum)
+{
+ unsigned long bits[16];
+
+ /* This makes it convenient to pass-through find() results. */
+ if (!token)
+ return 0;
+
+ /* bit not in range of this bitfield? */
+ if (bitnum * 8 >= len / 2)
+ return 0;
+
+ /* Giant feature bitfields are silly. */
+ BUG_ON(len > sizeof(bits));
+ vdev->config->get(vdev, token, bits, len);
+
+ if (!test_bit(bitnum, bits))
+ return 0;
+
+ /* Set acknowledge bit, and write it back. */
+ set_bit(bitnum + len * 8 / 2, bits);
+ vdev->config->set(vdev, token, bits, len);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(virtio_use_bit);
+
+static int virtio_init(void)
+{
+ if (bus_register(&virtio_bus) != 0)
+ panic("virtio bus registration failed");
+ return 0;
+}
+core_initcall(virtio_init);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
new file mode 100644
index 00000000000..0e4baca21b8
--- /dev/null
+++ b/drivers/virtio/virtio_ring.c
@@ -0,0 +1,313 @@
+/* Virtio ring implementation.
+ *
+ * Copyright 2007 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/device.h>
+
+#ifdef DEBUG
+/* For development, we want to crash whenever the ring is screwed. */
+#define BAD_RING(vq, fmt...) \
+ do { dev_err(&vq->vq.vdev->dev, fmt); BUG(); } while(0)
+#define START_USE(vq) \
+ do { if ((vq)->in_use) panic("in_use = %i\n", (vq)->in_use); (vq)->in_use = __LINE__; mb(); } while(0)
+#define END_USE(vq) \
+ do { BUG_ON(!(vq)->in_use); (vq)->in_use = 0; mb(); } while(0)
+#else
+#define BAD_RING(vq, fmt...) \
+ do { dev_err(&vq->vq.vdev->dev, fmt); (vq)->broken = true; } while(0)
+#define START_USE(vq)
+#define END_USE(vq)
+#endif
+
+struct vring_virtqueue
+{
+ struct virtqueue vq;
+
+ /* Actual memory layout for this queue */
+ struct vring vring;
+
+ /* Other side has made a mess, don't try any more. */
+ bool broken;
+
+ /* Number of free buffers */
+ unsigned int num_free;
+ /* Head of free buffer list. */
+ unsigned int free_head;
+ /* Number we've added since last sync. */
+ unsigned int num_added;
+
+ /* Last used index we've seen. */
+ unsigned int last_used_idx;
+
+ /* How to notify other side. FIXME: commonalize hcalls! */
+ void (*notify)(struct virtqueue *vq);
+
+#ifdef DEBUG
+ /* They're supposed to lock for us. */
+ unsigned int in_use;
+#endif
+
+ /* Tokens for callbacks. */
+ void *data[];
+};
+
+#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
+
+static int vring_add_buf(struct virtqueue *_vq,
+ struct scatterlist sg[],
+ unsigned int out,
+ unsigned int in,
+ void *data)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ unsigned int i, avail, head, uninitialized_var(prev);
+
+ BUG_ON(data == NULL);
+ BUG_ON(out + in > vq->vring.num);
+ BUG_ON(out + in == 0);
+
+ START_USE(vq);
+
+ if (vq->num_free < out + in) {
+ pr_debug("Can't add buf len %i - avail = %i\n",
+ out + in, vq->num_free);
+ END_USE(vq);
+ return -ENOSPC;
+ }
+
+ /* We're about to use some buffers from the free list. */
+ vq->num_free -= out + in;
+
+ head = vq->free_head;
+ for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
+ vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
+ vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT)
+ + sg->offset;
+ vq->vring.desc[i].len = sg->length;
+ prev = i;
+ sg++;
+ }
+ for (; in; i = vq->vring.desc[i].next, in--) {
+ vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
+ vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT)
+ + sg->offset;
+ vq->vring.desc[i].len = sg->length;
+ prev = i;
+ sg++;
+ }
+ /* Last one doesn't continue. */
+ vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
+
+ /* Update free pointer */
+ vq->free_head = i;
+
+ /* Set token. */
+ vq->data[head] = data;
+
+ /* Put entry in available array (but don't update avail->idx until they
+ * do sync). FIXME: avoid modulus here? */
+ avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num;
+ vq->vring.avail->ring[avail] = head;
+
+ pr_debug("Added buffer head %i to %p\n", head, vq);
+ END_USE(vq);
+ return 0;
+}
+
+static void vring_kick(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ START_USE(vq);
+ /* Descriptors and available array need to be set before we expose the
+ * new available array entries. */
+ wmb();
+
+ vq->vring.avail->idx += vq->num_added;
+ vq->num_added = 0;
+
+ /* Need to update avail index before checking if we should notify */
+ mb();
+
+ if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
+ /* Prod other side to tell it about changes. */
+ vq->notify(&vq->vq);
+
+ END_USE(vq);
+}
+
+static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
+{
+ unsigned int i;
+
+ /* Clear data ptr. */
+ vq->data[head] = NULL;
+
+ /* Put back on free list: find end */
+ i = head;
+ while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
+ i = vq->vring.desc[i].next;
+ vq->num_free++;
+ }
+
+ vq->vring.desc[i].next = vq->free_head;
+ vq->free_head = head;
+ /* Plus final descriptor */
+ vq->num_free++;
+}
+
+/* FIXME: We need to tell other side about removal, to synchronize. */
+static void vring_shutdown(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ unsigned int i;
+
+ for (i = 0; i < vq->vring.num; i++)
+ detach_buf(vq, i);
+}
+
+static inline bool more_used(const struct vring_virtqueue *vq)
+{
+ return vq->last_used_idx != vq->vring.used->idx;
+}
+
+static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ void *ret;
+ unsigned int i;
+
+ START_USE(vq);
+
+ if (!more_used(vq)) {
+ pr_debug("No more buffers in queue\n");
+ END_USE(vq);
+ return NULL;
+ }
+
+ i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
+ *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
+
+ if (unlikely(i >= vq->vring.num)) {
+ BAD_RING(vq, "id %u out of range\n", i);
+ return NULL;
+ }
+ if (unlikely(!vq->data[i])) {
+ BAD_RING(vq, "id %u is not a head!\n", i);
+ return NULL;
+ }
+
+ /* detach_buf clears data, so grab it now. */
+ ret = vq->data[i];
+ detach_buf(vq, i);
+ vq->last_used_idx++;
+ END_USE(vq);
+ return ret;
+}
+
+static bool vring_restart(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ START_USE(vq);
+ BUG_ON(!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT));
+
+ /* We optimistically turn back on interrupts, then check if there was
+ * more to do. */
+ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+ mb();
+ if (unlikely(more_used(vq))) {
+ vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+ END_USE(vq);
+ return false;
+ }
+
+ END_USE(vq);
+ return true;
+}
+
+irqreturn_t vring_interrupt(int irq, void *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ if (!more_used(vq)) {
+ pr_debug("virtqueue interrupt with no work for %p\n", vq);
+ return IRQ_NONE;
+ }
+
+ if (unlikely(vq->broken))
+ return IRQ_HANDLED;
+
+ pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
+ if (vq->vq.callback && !vq->vq.callback(&vq->vq))
+ vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+
+ return IRQ_HANDLED;
+}
+
+static struct virtqueue_ops vring_vq_ops = {
+ .add_buf = vring_add_buf,
+ .get_buf = vring_get_buf,
+ .kick = vring_kick,
+ .restart = vring_restart,
+ .shutdown = vring_shutdown,
+};
+
+struct virtqueue *vring_new_virtqueue(unsigned int num,
+ struct virtio_device *vdev,
+ void *pages,
+ void (*notify)(struct virtqueue *),
+ bool (*callback)(struct virtqueue *))
+{
+ struct vring_virtqueue *vq;
+ unsigned int i;
+
+ vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
+ if (!vq)
+ return NULL;
+
+ vring_init(&vq->vring, num, pages);
+ vq->vq.callback = callback;
+ vq->vq.vdev = vdev;
+ vq->vq.vq_ops = &vring_vq_ops;
+ vq->notify = notify;
+ vq->broken = false;
+ vq->last_used_idx = 0;
+ vq->num_added = 0;
+#ifdef DEBUG
+ vq->in_use = false;
+#endif
+
+ /* No callback? Tell other side not to bother us. */
+ if (!callback)
+ vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+
+ /* Put everything in free lists. */
+ vq->num_free = num;
+ vq->free_head = 0;
+ for (i = 0; i < num-1; i++)
+ vq->vring.desc[i].next = i+1;
+
+ return &vq->vq;
+}
+
+void vring_del_virtqueue(struct virtqueue *vq)
+{
+ kfree(to_vvq(vq));
+}
+
diff --git a/drivers/watchdog/mpc5200_wdt.c b/drivers/watchdog/mpc5200_wdt.c
index 9cfb9757662..11f6a111e75 100644
--- a/drivers/watchdog/mpc5200_wdt.c
+++ b/drivers/watchdog/mpc5200_wdt.c
@@ -176,6 +176,8 @@ static int mpc5200_wdt_probe(struct of_device *op, const struct of_device_id *ma
has_wdt = of_get_property(op->node, "has-wdt", NULL);
if (!has_wdt)
+ has_wdt = of_get_property(op->node, "fsl,has-wdt", NULL);
+ if (!has_wdt)
return -ENODEV;
wdt = kzalloc(sizeof(*wdt), GFP_KERNEL);
@@ -254,6 +256,7 @@ static int mpc5200_wdt_shutdown(struct of_device *op)
static struct of_device_id mpc5200_wdt_match[] = {
{ .compatible = "mpc5200-gpt", },
+ { .compatible = "fsl,mpc5200-gpt", },
{},
};
static struct of_platform_driver mpc5200_wdt_driver = {