From e53e97ce3c7119199d2788d8fd1618efa9c2d1eb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 Jul 2007 22:22:44 -0700 Subject: [SPARC64]: Add LDOM virtual channel driver and VIO device layer. Virtual devices on Sun Logical Domains are built on top of a virtual channel framework. This, with help of hypervisor interfaces, provides a link layer protocol with basic handshaking over which virtual device clients and servers communicate. Built on top of this is a VIO device protocol which has it's own handshaking and message types. At this layer attributes are exchanged (disk size, network device addresses, etc.) descriptor rings are registered, and data transfers are triggers and replied to. Signed-off-by: David S. Miller --- include/asm-sparc64/ldc.h | 136 ++++++++++++++++ include/asm-sparc64/vio.h | 402 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 538 insertions(+) create mode 100644 include/asm-sparc64/ldc.h create mode 100644 include/asm-sparc64/vio.h (limited to 'include') diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h new file mode 100644 index 00000000000..24fd2367d33 --- /dev/null +++ b/include/asm-sparc64/ldc.h @@ -0,0 +1,136 @@ +#ifndef _SPARC64_LDC_H +#define _SPARC64_LDC_H + +#include + +extern int ldom_domaining_enabled; + +/* The event handler will be evoked when link state changes + * or data becomes available on the receive side. + * + * For non-RAW links, if the LDC_EVENT_RESET event arrives the + * driver should reset all of it's internal state and reinvoke + * ldc_connect() to try and bring the link up again. + * + * For RAW links, ldc_connect() is not used. Instead the driver + * just waits for the LDC_EVENT_UP event. + */ +struct ldc_channel_config { + void (*event)(void *arg, int event); + + u32 mtu; + unsigned int rx_irq; + unsigned int tx_irq; + u8 mode; +#define LDC_MODE_RAW 0x00 +#define LDC_MODE_UNRELIABLE 0x01 +#define LDC_MODE_RESERVED 0x02 +#define LDC_MODE_RELIABLE 0x03 +#define LDC_MODE_STREAM 0x04 + + u8 debug; +#define LDC_DEBUG_HS 0x01 +#define LDC_DEBUG_STATE 0x02 +#define LDC_DEBUG_RX 0x04 +#define LDC_DEBUG_TX 0x08 +#define LDC_DEBUG_DATA 0x10 +}; + +#define LDC_EVENT_RESET 0x01 +#define LDC_EVENT_UP 0x02 +#define LDC_EVENT_DATA_READY 0x04 + +#define LDC_STATE_INVALID 0x00 +#define LDC_STATE_INIT 0x01 +#define LDC_STATE_BOUND 0x02 +#define LDC_STATE_READY 0x03 +#define LDC_STATE_CONNECTED 0x04 + +struct ldc_channel; + +/* Allocate state for a channel. */ +extern struct ldc_channel *ldc_alloc(unsigned long id, + const struct ldc_channel_config *cfgp, + void *event_arg); + +/* Shut down and free state for a channel. */ +extern void ldc_free(struct ldc_channel *lp); + +/* Register TX and RX queues of the link with the hypervisor. */ +extern int ldc_bind(struct ldc_channel *lp); + +/* For non-RAW protocols we need to complete a handshake before + * communication can proceed. ldc_connect() does that, if the + * handshake completes successfully, an LDC_EVENT_UP event will + * be sent up to the driver. + */ +extern int ldc_connect(struct ldc_channel *lp); +extern int ldc_disconnect(struct ldc_channel *lp); + +extern int ldc_state(struct ldc_channel *lp); + +/* Read and write operations. Only valid when the link is up. */ +extern int ldc_write(struct ldc_channel *lp, const void *buf, + unsigned int size); +extern int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size); + +#define LDC_MAP_SHADOW 0x01 +#define LDC_MAP_DIRECT 0x02 +#define LDC_MAP_IO 0x04 +#define LDC_MAP_R 0x08 +#define LDC_MAP_W 0x10 +#define LDC_MAP_X 0x20 +#define LDC_MAP_RW (LDC_MAP_R | LDC_MAP_W) +#define LDC_MAP_RWX (LDC_MAP_R | LDC_MAP_W | LDC_MAP_X) +#define LDC_MAP_ALL 0x03f + +struct ldc_trans_cookie { + u64 cookie_addr; + u64 cookie_size; +}; + +struct scatterlist; +extern int ldc_map_sg(struct ldc_channel *lp, + struct scatterlist *sg, int num_sg, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm); + +extern int ldc_map_single(struct ldc_channel *lp, + void *buf, unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm); + +extern void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, + int ncookies); + +extern int ldc_copy(struct ldc_channel *lp, int copy_dir, + void *buf, unsigned int len, unsigned long offset, + struct ldc_trans_cookie *cookies, int ncookies); + +static inline int ldc_get_dring_entry(struct ldc_channel *lp, + void *buf, unsigned int len, + unsigned long offset, + struct ldc_trans_cookie *cookies, + int ncookies) +{ + return ldc_copy(lp, LDC_COPY_IN, buf, len, offset, cookies, ncookies); +} + +static inline int ldc_put_dring_entry(struct ldc_channel *lp, + void *buf, unsigned int len, + unsigned long offset, + struct ldc_trans_cookie *cookies, + int ncookies) +{ + return ldc_copy(lp, LDC_COPY_OUT, buf, len, offset, cookies, ncookies); +} + +extern void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len, + struct ldc_trans_cookie *cookies, + int *ncookies, unsigned int map_perm); + +extern void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, + unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies); + +#endif /* _SPARC64_LDC_H */ diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h new file mode 100644 index 00000000000..47c3da76dcb --- /dev/null +++ b/include/asm-sparc64/vio.h @@ -0,0 +1,402 @@ +#ifndef _SPARC64_VIO_H +#define _SPARC64_VIO_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct vio_msg_tag { + u8 type; +#define VIO_TYPE_CTRL 0x01 +#define VIO_TYPE_DATA 0x02 +#define VIO_TYPE_ERR 0x04 + + u8 stype; +#define VIO_SUBTYPE_INFO 0x01 +#define VIO_SUBTYPE_ACK 0x02 +#define VIO_SUBTYPE_NACK 0x04 + + u16 stype_env; +#define VIO_VER_INFO 0x0001 +#define VIO_ATTR_INFO 0x0002 +#define VIO_DRING_REG 0x0003 +#define VIO_DRING_UNREG 0x0004 +#define VIO_RDX 0x0005 +#define VIO_PKT_DATA 0x0040 +#define VIO_DESC_DATA 0x0041 +#define VIO_DRING_DATA 0x0042 +#define VNET_MCAST_INFO 0x0101 + + u32 sid; +}; + +struct vio_rdx { + struct vio_msg_tag tag; + u64 resv[6]; +}; + +struct vio_ver_info { + struct vio_msg_tag tag; + u16 major; + u16 minor; + u8 dev_class; +#define VDEV_NETWORK 0x01 +#define VDEV_NETWORK_SWITCH 0x02 +#define VDEV_DISK 0x03 +#define VDEV_DISK_SERVER 0x04 + + u8 resv1[3]; + u64 resv2[5]; +}; + +struct vio_dring_register { + struct vio_msg_tag tag; + u64 dring_ident; + u32 num_descr; + u32 descr_size; + u16 options; +#define VIO_TX_DRING 0x0001 +#define VIO_RX_DRING 0x0002 + u16 resv; + u32 num_cookies; + struct ldc_trans_cookie cookies[0]; +}; + +struct vio_dring_unregister { + struct vio_msg_tag tag; + u64 dring_ident; + u64 resv[5]; +}; + +/* Data transfer modes */ +#define VIO_PKT_MODE 0x01 /* Packet based transfer */ +#define VIO_DESC_MODE 0x02 /* In-band descriptors */ +#define VIO_DRING_MODE 0x03 /* Descriptor rings */ + +struct vio_dring_data { + struct vio_msg_tag tag; + u64 seq; + u64 dring_ident; + u32 start_idx; + u32 end_idx; + u8 state; +#define VIO_DRING_ACTIVE 0x01 +#define VIO_DRING_STOPPED 0x02 + + u8 __pad1; + u16 __pad2; + u32 __pad3; + u64 __par4[2]; +}; + +struct vio_dring_hdr { + u8 state; +#define VIO_DESC_FREE 0x01 +#define VIO_DESC_READY 0x02 +#define VIO_DESC_ACCEPTED 0x03 +#define VIO_DESC_DONE 0x04 + u8 ack; +#define VIO_ACK_ENABLE 0x01 +#define VIO_ACK_DISABLE 0x00 + + u16 __pad1; + u32 __pad2; +}; + +/* VIO disk specific structures and defines */ +struct vio_disk_attr_info { + struct vio_msg_tag tag; + u8 xfer_mode; + u8 vdisk_type; +#define VD_DISK_TYPE_SLICE 0x01 /* Slice in block device */ +#define VD_DISK_TYPE_DISK 0x02 /* Entire block device */ + u16 resv1; + u32 vdisk_block_size; + u64 operations; + u64 vdisk_size; + u64 max_xfer_size; + u64 resv2[2]; +}; + +struct vio_disk_desc { + struct vio_dring_hdr hdr; + u64 req_id; + u8 operation; +#define VD_OP_BREAD 0x01 /* Block read */ +#define VD_OP_BWRITE 0x02 /* Block write */ +#define VD_OP_FLUSH 0x03 /* Flush disk contents */ +#define VD_OP_GET_WCE 0x04 /* Get write-cache status */ +#define VD_OP_SET_WCE 0x05 /* Enable/disable write-cache */ +#define VD_OP_GET_VTOC 0x06 /* Get VTOC */ +#define VD_OP_SET_VTOC 0x07 /* Set VTOC */ +#define VD_OP_GET_DISKGEOM 0x08 /* Get disk geometry */ +#define VD_OP_SET_DISKGEOM 0x09 /* Set disk geometry */ +#define VD_OP_SCSICMD 0x0a /* SCSI control command */ +#define VD_OP_GET_DEVID 0x0b /* Get device ID */ +#define VD_OP_GET_EFI 0x0c /* Get EFI */ +#define VD_OP_SET_EFI 0x0d /* Set EFI */ + u8 slice; + u16 resv1; + u32 status; + u64 offset; + u64 size; + u32 ncookies; + u32 resv2; + struct ldc_trans_cookie cookies[0]; +}; + +#define VIO_DISK_VNAME_LEN 8 +#define VIO_DISK_ALABEL_LEN 128 +#define VIO_DISK_NUM_PART 8 + +struct vio_disk_vtoc { + u8 volume_name[VIO_DISK_VNAME_LEN]; + u16 sector_size; + u16 num_partitions; + u8 ascii_label[VIO_DISK_ALABEL_LEN]; + struct { + u16 id; + u16 perm_flags; + u32 resv; + u64 start_block; + u64 num_blocks; + } partitions[VIO_DISK_NUM_PART]; +}; + +struct vio_disk_geom { + u16 num_cyl; /* Num data cylinders */ + u16 alt_cyl; /* Num alternate cylinders */ + u16 beg_cyl; /* Cyl off of fixed head area */ + u16 num_hd; /* Num heads */ + u16 num_sec; /* Num sectors */ + u16 ifact; /* Interleave factor */ + u16 apc; /* Alts per cylinder (SCSI) */ + u16 rpm; /* Revolutions per minute */ + u16 phy_cyl; /* Num physical cylinders */ + u16 wr_skip; /* Num sects to skip, writes */ + u16 rd_skip; /* Num sects to skip, writes */ +}; + +struct vio_disk_devid { + u16 resv; + u16 type; + u32 len; + char id[0]; +}; + +struct vio_disk_efi { + u64 lba; + u64 len; + char data[0]; +}; + +/* VIO net specific structures and defines */ +struct vio_net_attr_info { + struct vio_msg_tag tag; + u8 xfer_mode; + u8 addr_type; +#define VNET_ADDR_ETHERMAC 0x01 + u16 ack_freq; + u32 resv1; + u64 addr; + u64 mtu; + u64 resv2[3]; +}; + +#define VNET_NUM_MCAST 7 + +struct vio_net_mcast_info { + struct vio_msg_tag tag; + u8 set; + u8 count; + u8 mcast_addr[VNET_NUM_MCAST * 6]; + u32 resv; +}; + +struct vio_net_desc { + struct vio_dring_hdr hdr; + u32 size; + u32 ncookies; + struct ldc_trans_cookie cookies[0]; +}; + +#define VIO_MAX_RING_COOKIES 24 + +struct vio_dring_state { + u64 ident; + void *base; + u64 snd_nxt; + u64 rcv_nxt; + u32 entry_size; + u32 num_entries; + u32 prod; + u32 cons; + u32 pending; + int ncookies; + struct ldc_trans_cookie cookies[VIO_MAX_RING_COOKIES]; +}; + +static inline void *vio_dring_cur(struct vio_dring_state *dr) +{ + return dr->base + (dr->entry_size * dr->prod); +} + +static inline void *vio_dring_entry(struct vio_dring_state *dr, + unsigned int index) +{ + return dr->base + (dr->entry_size * index); +} + +static inline u32 vio_dring_avail(struct vio_dring_state *dr, + unsigned int ring_size) +{ + /* Ensure build-time power-of-2. */ + BUILD_BUG_ON(ring_size & (ring_size - 1)); + + return (dr->pending - + ((dr->prod - dr->cons) & (ring_size - 1))); +} + +struct vio_dev { + struct mdesc_node *mp; + struct device_node *dp; + + const char *type; + const char *compat; + int compat_len; + + struct device dev; +}; + +struct vio_driver { + struct list_head node; + const struct vio_device_id *id_table; + int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); + int (*remove)(struct vio_dev *dev); + void (*shutdown)(struct vio_dev *dev); + unsigned long driver_data; + struct device_driver driver; +}; + +struct vio_version { + u16 major; + u16 minor; +}; + +struct vio_driver_state; +struct vio_driver_ops { + int (*send_attr)(struct vio_driver_state *vio); + int (*handle_attr)(struct vio_driver_state *vio, void *pkt); + void (*handshake_complete)(struct vio_driver_state *vio); +}; + +struct vio_completion { + struct completion com; + int err; + int waiting_for; +}; + +struct vio_driver_state { + /* Protects VIO handshake and, optionally, driver private state. */ + spinlock_t lock; + + struct ldc_channel *lp; + + u32 _peer_sid; + u32 _local_sid; + struct vio_dring_state drings[2]; +#define VIO_DRIVER_TX_RING 0 +#define VIO_DRIVER_RX_RING 1 + + u8 hs_state; +#define VIO_HS_INVALID 0x00 +#define VIO_HS_GOTVERS 0x01 +#define VIO_HS_GOT_ATTR 0x04 +#define VIO_HS_SENT_DREG 0x08 +#define VIO_HS_SENT_RDX 0x10 +#define VIO_HS_GOT_RDX_ACK 0x20 +#define VIO_HS_GOT_RDX 0x40 +#define VIO_HS_SENT_RDX_ACK 0x80 +#define VIO_HS_COMPLETE (VIO_HS_GOT_RDX_ACK | VIO_HS_SENT_RDX_ACK) + + u8 dev_class; + + u8 dr_state; +#define VIO_DR_STATE_TXREG 0x01 +#define VIO_DR_STATE_RXREG 0x02 +#define VIO_DR_STATE_TXREQ 0x10 +#define VIO_DR_STATE_RXREQ 0x20 + + u8 debug; +#define VIO_DEBUG_HS 0x01 +#define VIO_DEBUG_DATA 0x02 + + void *desc_buf; + unsigned int desc_buf_len; + + struct vio_completion *cmp; + + struct vio_dev *vdev; + + unsigned long channel_id; + unsigned int tx_irq; + unsigned int rx_irq; + + struct timer_list timer; + + struct vio_version ver; + + struct mdesc_node *endpoint; + + struct vio_version *ver_table; + int ver_table_entries; + + char *name; + + struct vio_driver_ops *ops; +}; + +#define viodbg(TYPE, f, a...) \ +do { if (vio->debug & VIO_DEBUG_##TYPE) \ + printk(KERN_INFO "vio: ID[%lu] " f, vio->channel_id, ## a); \ +} while (0) + +extern int vio_register_driver(struct vio_driver *drv); +extern void vio_unregister_driver(struct vio_driver *drv); + +static inline struct vio_driver *to_vio_driver(struct device_driver *drv) +{ + return container_of(drv, struct vio_driver, driver); +} + +static inline struct vio_dev *to_vio_dev(struct device *dev) +{ + return container_of(dev, struct vio_dev, dev); +} + +extern int vio_ldc_send(struct vio_driver_state *vio, void *data, int len); +extern void vio_link_state_change(struct vio_driver_state *vio, int event); +extern void vio_conn_reset(struct vio_driver_state *vio); +extern int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt); +extern int vio_validate_sid(struct vio_driver_state *vio, + struct vio_msg_tag *tp); +extern u32 vio_send_sid(struct vio_driver_state *vio); +extern int vio_ldc_alloc(struct vio_driver_state *vio, + struct ldc_channel_config *base_cfg, void *event_arg); +extern void vio_ldc_free(struct vio_driver_state *vio); +extern int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, + u8 dev_class, struct mdesc_node *channel_endpoint, + struct vio_version *ver_table, int ver_table_size, + struct vio_driver_ops *ops, char *name); + +extern struct mdesc_node *vio_find_endpoint(struct vio_dev *vdev); +extern void vio_port_up(struct vio_driver_state *vio); + +#endif /* _SPARC64_VIO_H */ -- cgit v1.2.3 From cb4812358423e7ea47d2b6471918d65238452cc5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2007 18:14:41 -0700 Subject: [SPARC64]: Assorted LDC bug cures. 1) LDC_MODE_RELIABLE is deprecated an unused by anything, plus it and LDC_MODE_STREAM were mis-numbered. 2) read_stream() should try to read as much as possible into the per-LDC stream buffer area, so do not trim the read_nonraw() length by the caller's size parameter. 3) Send data ACKs when necessary in read_nonraw(). 4) In read_nonraw() when we get a pure ACK, advance the RX head unconditionally past it. 5) Provide the ACKID field in the ldcdgb() packet dump in read_nonraw(). This helps debugging stream mode LDC channel problems. 6) Decrease verbosity of rx_data_wait() so that it is more useful. A debugging message each loop iteration is too much. 7) In process_data_ack() stop the loop checking when we hit lp->tx_tail not lp->tx_head. 8) Set the seqid field properly in send_data_nack(). Signed-off-by: David S. Miller --- include/asm-sparc64/ldc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 24fd2367d33..1c13738f13f 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -25,8 +25,7 @@ struct ldc_channel_config { #define LDC_MODE_RAW 0x00 #define LDC_MODE_UNRELIABLE 0x01 #define LDC_MODE_RESERVED 0x02 -#define LDC_MODE_RELIABLE 0x03 -#define LDC_MODE_STREAM 0x04 +#define LDC_MODE_STREAM 0x03 u8 debug; #define LDC_DEBUG_HS 0x01 -- cgit v1.2.3 From 13077d80286205e02eebe1c2786a914a4bbd2588 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2007 18:18:04 -0700 Subject: [SPARC64]: Export powerd facilities for external entities. Besides the existing usage for power-button interrupts, we'll want to make use of this code for domain-services where the LDOM manager can send reboot requests to the guest node. Signed-off-by: David S. Miller --- include/asm-sparc64/power.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 include/asm-sparc64/power.h (limited to 'include') diff --git a/include/asm-sparc64/power.h b/include/asm-sparc64/power.h new file mode 100644 index 00000000000..94495c1ac4f --- /dev/null +++ b/include/asm-sparc64/power.h @@ -0,0 +1,7 @@ +#ifndef _SPARC64_POWER_H +#define _SPARC64_POWER_H + +extern void wake_up_powerd(void); +extern int start_powerd(void); + +#endif /* !(_SPARC64_POWER_H) */ -- cgit v1.2.3 From 133f09a169f3022be3de671b29658b7ecb375022 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2007 23:22:55 -0700 Subject: [SPARC64]: Use more mearningful names for IRQ registry. All of the interrupts say "LDX RX" and "LDX TX" currently which is next to useless. Put a device specific prefix before "RX" and "TX" instead which makes it much more useful. Signed-off-by: David S. Miller --- include/asm-sparc64/ldc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 1c13738f13f..3c91f269f9d 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -56,7 +56,7 @@ extern struct ldc_channel *ldc_alloc(unsigned long id, extern void ldc_free(struct ldc_channel *lp); /* Register TX and RX queues of the link with the hypervisor. */ -extern int ldc_bind(struct ldc_channel *lp); +extern int ldc_bind(struct ldc_channel *lp, const char *name); /* For non-RAW protocols we need to complete a handshake before * communication can proceed. ldc_connect() does that, if the -- cgit v1.2.3 From 43fdf27470b216ebdef47e09ff83bed2f2894b13 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2007 13:47:50 -0700 Subject: [SPARC64]: Abstract out mdesc accesses for better MD update handling. Since we have to be able to handle MD updates, having an in-tree set of data structures representing the MD objects actually makes things more painful. The MD itself is easy to parse, and we can implement the existing interfaces using direct parsing of the MD binary image. The MD is now reference counted, so accesses have to now take the form: handle = mdesc_grab(); ... operations on MD ... mdesc_release(handle); The only remaining issue are cases where code holds on to references to MD property values. mdesc_get_property() returns a direct pointer to the property value, most cases just pull in the information they need and discard the pointer, but there are few that use the pointer directly over a long lifetime. Those will be fixed up in a subsequent changeset. A preliminary handler for MD update events from domain services is there, it is rudimentry but it works and handles all of the reference counting. It does not check the generation number of the MDs, and it does not generate a "add/delete" list for notification to interesting parties about MD changes but that will be forthcoming. Signed-off-by: David S. Miller --- include/asm-sparc64/mdesc.h | 67 +++++++++++++++++++++++++-------------------- include/asm-sparc64/vio.h | 23 ++++++++-------- 2 files changed, 48 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h index c6383982b53..bbb0c0bed48 100644 --- a/include/asm-sparc64/mdesc.h +++ b/include/asm-sparc64/mdesc.h @@ -4,36 +4,43 @@ #include #include -struct mdesc_node; -struct mdesc_arc { - const char *name; - struct mdesc_node *arc; -}; - -struct mdesc_node { - const char *name; - u64 node; - unsigned int unique_id; - unsigned int num_arcs; - unsigned int irqs[2]; - struct property *properties; - struct mdesc_node *hash_next; - struct mdesc_node *allnodes_next; - struct mdesc_arc arcs[0]; -}; - -extern struct mdesc_node *md_find_node_by_name(struct mdesc_node *from, - const char *name); -#define md_for_each_node_by_name(__mn, __name) \ - for (__mn = md_find_node_by_name(NULL, __name); __mn; \ - __mn = md_find_node_by_name(__mn, __name)) - -extern struct property *md_find_property(const struct mdesc_node *mp, - const char *name, - int *lenp); -extern const void *md_get_property(const struct mdesc_node *mp, - const char *name, - int *lenp); +struct mdesc_handle; + +/* Machine description operations are to be surrounded by grab and + * release calls. The mdesc_handle returned from the grab is + * the first argument to all of the operational calls that work + * on mdescs. + */ +extern struct mdesc_handle *mdesc_grab(void); +extern void mdesc_release(struct mdesc_handle *); + +#define MDESC_NODE_NULL (~(u64)0) + +extern u64 mdesc_node_by_name(struct mdesc_handle *handle, + u64 from_node, const char *name); +#define mdesc_for_each_node_by_name(__hdl, __node, __name) \ + for (__node = mdesc_node_by_name(__hdl, MDESC_NODE_NULL, __name); \ + (__node) != MDESC_NODE_NULL; \ + __node = mdesc_node_by_name(__hdl, __node, __name)) + +extern const void *mdesc_get_property(struct mdesc_handle *handle, + u64 node, const char *name, int *lenp); + +#define MDESC_ARC_TYPE_FWD "fwd" +#define MDESC_ARC_TYPE_BACK "back" + +extern u64 mdesc_next_arc(struct mdesc_handle *handle, u64 from, + const char *arc_type); +#define mdesc_for_each_arc(__arc, __hdl, __node, __type) \ + for (__arc = mdesc_next_arc(__hdl, __node, __type); \ + (__arc) != MDESC_NODE_NULL; \ + __arc = mdesc_next_arc(__hdl, __arc, __type)) + +extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); + +extern const char *mdesc_node_name(struct mdesc_handle *hp, u64 node); + +extern void mdesc_update(void); extern void sun4v_mdesc_init(void); diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h index 47c3da76dcb..a8a53e6fc25 100644 --- a/include/asm-sparc64/vio.h +++ b/include/asm-sparc64/vio.h @@ -265,13 +265,18 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, } struct vio_dev { - struct mdesc_node *mp; + u64 mp; struct device_node *dp; const char *type; const char *compat; int compat_len; + unsigned long channel_id; + + unsigned int tx_irq; + unsigned int rx_irq; + struct device dev; }; @@ -345,16 +350,10 @@ struct vio_driver_state { struct vio_dev *vdev; - unsigned long channel_id; - unsigned int tx_irq; - unsigned int rx_irq; - struct timer_list timer; struct vio_version ver; - struct mdesc_node *endpoint; - struct vio_version *ver_table; int ver_table_entries; @@ -365,7 +364,8 @@ struct vio_driver_state { #define viodbg(TYPE, f, a...) \ do { if (vio->debug & VIO_DEBUG_##TYPE) \ - printk(KERN_INFO "vio: ID[%lu] " f, vio->channel_id, ## a); \ + printk(KERN_INFO "vio: ID[%lu] " f, \ + vio->vdev->channel_id, ## a); \ } while (0) extern int vio_register_driver(struct vio_driver *drv); @@ -392,11 +392,10 @@ extern int vio_ldc_alloc(struct vio_driver_state *vio, struct ldc_channel_config *base_cfg, void *event_arg); extern void vio_ldc_free(struct vio_driver_state *vio); extern int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, - u8 dev_class, struct mdesc_node *channel_endpoint, - struct vio_version *ver_table, int ver_table_size, - struct vio_driver_ops *ops, char *name); + u8 dev_class, struct vio_version *ver_table, + int ver_table_size, struct vio_driver_ops *ops, + char *name); -extern struct mdesc_node *vio_find_endpoint(struct vio_dev *vdev); extern void vio_port_up(struct vio_driver_state *vio); #endif /* _SPARC64_VIO_H */ -- cgit v1.2.3 From 83292e0a9c3f1c326b28fbf8cb70a8ce81a98163 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2007 14:16:22 -0700 Subject: [SPARC64]: Fix MD property lifetime bugs. Property values cannot be referenced outside of mdesc_grab()/mdesc_release() pairs. The only major offender was the VIO bus layer, easily fixed. Add some commentary to mdesc.h describing these rules. Signed-off-by: David S. Miller --- include/asm-sparc64/mdesc.h | 22 ++++++++++++++++++++-- include/asm-sparc64/vio.h | 7 +++++-- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h index bbb0c0bed48..dc372df23fb 100644 --- a/include/asm-sparc64/mdesc.h +++ b/include/asm-sparc64/mdesc.h @@ -23,8 +23,28 @@ extern u64 mdesc_node_by_name(struct mdesc_handle *handle, (__node) != MDESC_NODE_NULL; \ __node = mdesc_node_by_name(__hdl, __node, __name)) +/* Access to property values returned from mdesc_get_property() are + * only valid inside of a mdesc_grab()/mdesc_release() sequence. + * Once mdesc_release() is called, the memory backed up by these + * pointers may reference freed up memory. + * + * Therefore callers must make copies of any property values + * they need. + * + * These same rules apply to mdesc_node_name(). + */ extern const void *mdesc_get_property(struct mdesc_handle *handle, u64 node, const char *name, int *lenp); +extern const char *mdesc_node_name(struct mdesc_handle *hp, u64 node); + +/* MD arc iteration, the standard sequence is: + * + * unsigned long arc; + * mdesc_for_each_arc(arc, handle, node, MDESC_ARC_TYPE_{FWD,BACK}) { + * unsigned long target = mdesc_arc_target(handle, arc); + * ... + * } + */ #define MDESC_ARC_TYPE_FWD "fwd" #define MDESC_ARC_TYPE_BACK "back" @@ -38,8 +58,6 @@ extern u64 mdesc_next_arc(struct mdesc_handle *handle, u64 from, extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); -extern const char *mdesc_node_name(struct mdesc_handle *hp, u64 node); - extern void mdesc_update(void); extern void sun4v_mdesc_init(void); diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h index a8a53e6fc25..83c96422e9d 100644 --- a/include/asm-sparc64/vio.h +++ b/include/asm-sparc64/vio.h @@ -264,12 +264,15 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, ((dr->prod - dr->cons) & (ring_size - 1))); } +#define VIO_MAX_TYPE_LEN 64 +#define VIO_MAX_COMPAT_LEN 64 + struct vio_dev { u64 mp; struct device_node *dp; - const char *type; - const char *compat; + char type[VIO_MAX_TYPE_LEN]; + char compat[VIO_MAX_COMPAT_LEN]; int compat_len; unsigned long channel_id; -- cgit v1.2.3 From b3e13fbeb9ac1eb8e7b0791bf56e1775c692972b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2007 15:55:55 -0700 Subject: [SPARC64]: Fix setting of variables in LDOM guest. There is a special domain services capability for setting variables in the OBP options node. Guests don't have permanent store for the OBP variables like a normal system, so they are instead maintained in the LDOM control node or in the SC. Signed-off-by: David S. Miller --- include/asm-sparc64/ldc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 3c91f269f9d..a21996c6b15 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -4,6 +4,8 @@ #include extern int ldom_domaining_enabled; +extern void ldom_set_var(const char *var, const char *value); +extern void ldom_reboot(const char *boot_command); /* The event handler will be evoked when link state changes * or data becomes available on the receive side. -- cgit v1.2.3 From 4f0234f4f9da485ecb9729af1b88567700fd4767 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Jul 2007 16:03:42 -0700 Subject: [SPARC64]: Initial LDOM cpu hotplug support. Only adding cpus is supports at the moment, removal will come next. When new cpus are configured, the machine description is updated. When we get the configure request we pass in a cpu mask of to-be-added cpus to the mdesc CPU node parser so it only fetches information for those cpus. That code also proceeds to update the SMT/multi-core scheduling bitmaps. cpu_up() does all the work and we return the status back over the DS channel. CPUs via dr-cpu need to be booted straight out of the hypervisor, and this requires: 1) A new trampoline mechanism. CPUs are booted straight out of the hypervisor with MMU disabled and running in physical addresses with no mappings installed in the TLB. The new hvtramp.S code sets up the critical cpu state, installs the locked TLB mappings for the kernel, and turns the MMU on. It then proceeds to follow the logic of the existing trampoline.S SMP cpu bringup code. 2) All calls into OBP have to be disallowed when domaining is enabled. Since cpus boot straight into the kernel from the hypervisor, OBP has no state about that cpu and therefore cannot handle being invoked on that cpu. Luckily it's only a handful of interfaces which can be called after the OBP device tree is obtained. For example, rebooting, halting, powering-off, and setting options node variables. CPU removal support will require some infrastructure changes here. Namely we'll have to process the requests via a true kernel thread instead of in a workqueue. workqueues run on a per-cpu thread, but when unconfiguring we might need to force the thread to execute on another cpu if the current cpu is the one being removed. Removal of a cpu also causes the kernel to destroy that cpu's workqueue running thread. Another issue on removal is that we may have interrupts still pointing to the cpu-to-be-removed. So new code will be needed to walk the active INO list and retarget those cpus as-needed. Signed-off-by: David S. Miller --- include/asm-sparc64/cpudata.h | 3 ++- include/asm-sparc64/hvtramp.h | 37 +++++++++++++++++++++++++++++++++++++ include/asm-sparc64/hypervisor.h | 2 +- include/asm-sparc64/ldc.h | 2 ++ include/asm-sparc64/mdesc.h | 3 +++ include/asm-sparc64/smp.h | 8 +++++--- 6 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 include/asm-sparc64/hvtramp.h (limited to 'include') diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 445026fbec3..0016d8b4531 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -80,7 +80,8 @@ struct trap_per_cpu { unsigned int dev_mondo_qmask; unsigned int resum_qmask; unsigned int nonresum_qmask; - unsigned int __pad2[3]; + unsigned int __pad2[1]; + void *hdesc; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(struct thread_info *); diff --git a/include/asm-sparc64/hvtramp.h b/include/asm-sparc64/hvtramp.h new file mode 100644 index 00000000000..c7dd6ad056d --- /dev/null +++ b/include/asm-sparc64/hvtramp.h @@ -0,0 +1,37 @@ +#ifndef _SPARC64_HVTRAP_H +#define _SPARC64_HVTRAP_H + +#ifndef __ASSEMBLY__ + +#include + +struct hvtramp_mapping { + __u64 vaddr; + __u64 tte; +}; + +struct hvtramp_descr { + __u32 cpu; + __u32 num_mappings; + __u64 fault_info_va; + __u64 fault_info_pa; + __u64 thread_reg; + struct hvtramp_mapping maps[2]; +}; + +extern void hv_cpu_startup(unsigned long hvdescr_pa); + +#endif + +#define HVTRAMP_DESCR_CPU 0x00 +#define HVTRAMP_DESCR_NUM_MAPPINGS 0x04 +#define HVTRAMP_DESCR_FAULT_INFO_VA 0x08 +#define HVTRAMP_DESCR_FAULT_INFO_PA 0x10 +#define HVTRAMP_DESCR_THREAD_REG 0x18 +#define HVTRAMP_DESCR_MAPS 0x20 + +#define HVTRAMP_MAPPING_VADDR 0x00 +#define HVTRAMP_MAPPING_TTE 0x08 +#define HVTRAMP_MAPPING_SIZE 0x10 + +#endif /* _SPARC64_HVTRAP_H */ diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h index db2130a95d6..524d49835df 100644 --- a/include/asm-sparc64/hypervisor.h +++ b/include/asm-sparc64/hypervisor.h @@ -98,7 +98,7 @@ #define HV_FAST_MACH_EXIT 0x00 #ifndef __ASSEMBLY__ -extern void sun4v_mach_exit(unsigned long exit_core); +extern void sun4v_mach_exit(unsigned long exit_code); #endif /* Domain services. */ diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index a21996c6b15..8d17bd6bd5d 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -6,6 +6,8 @@ extern int ldom_domaining_enabled; extern void ldom_set_var(const char *var, const char *value); extern void ldom_reboot(const char *boot_command); +extern void ldom_power_off(void); +extern void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg); /* The event handler will be evoked when link state changes * or data becomes available on the receive side. diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h index dc372df23fb..e97c4313375 100644 --- a/include/asm-sparc64/mdesc.h +++ b/include/asm-sparc64/mdesc.h @@ -2,6 +2,7 @@ #define _SPARC64_MDESC_H #include +#include #include struct mdesc_handle; @@ -60,6 +61,8 @@ extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); extern void mdesc_update(void); +extern void mdesc_fill_in_cpu_data(cpumask_t mask); + extern void sun4v_mdesc_init(void); #endif diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index 4fb8c4bfb84..c42c5a035c7 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -29,9 +29,6 @@ #include #include -extern cpumask_t phys_cpu_present_map; -#define cpu_possible_map phys_cpu_present_map - extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; extern int sparc64_multi_core; @@ -46,6 +43,11 @@ extern int hard_smp_processor_id(void); extern void smp_fill_in_sib_core_maps(void); extern unsigned char boot_cpu_id; +#ifdef CONFIG_HOTPLUG_CPU +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); +#endif + #endif /* !(__ASSEMBLY__) */ #else -- cgit v1.2.3 From b14f5c100ce4c63e4c5a71ab47e71cf4a1caa9e3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 14 Jul 2007 00:45:16 -0700 Subject: [SPARC64]: Fix build regressions added by dr-cpu changes. Do not select HOTPLUG_CPU from SUN_LDOMS, that causes HOTPLUG_CPU to be selected even on non-SMP which is illegal. Only build hvtramp.o when SMP, just like trampoline.o Protect dr-cpu code in ds.c with HOTPLUG_CPU. Likewise move ldom_startcpu_cpuid() to smp.c and protect it and the call site with SUN_LDOMS && HOTPLUG_CPU. Signed-off-by: David S. Miller --- include/asm-sparc64/ldc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 8d17bd6bd5d..bdb524a7b81 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -7,7 +7,6 @@ extern int ldom_domaining_enabled; extern void ldom_set_var(const char *var, const char *value); extern void ldom_reboot(const char *boot_command); extern void ldom_power_off(void); -extern void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg); /* The event handler will be evoked when link state changes * or data becomes available on the receive side. -- cgit v1.2.3 From 8b99cfb8cc51adae7f5294c8962a026c63100959 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 14 Jul 2007 02:23:37 -0700 Subject: [SPARC64]: More sensible udelay implementation. Take a page from the powerpc folks and just calculate the delay factor directly. Since frequency scaling chips use a system-tick register, the value is going to be the same system-wide. Signed-off-by: David S. Miller --- include/asm-sparc64/bugs.h | 5 ----- include/asm-sparc64/cpudata.h | 2 +- include/asm-sparc64/delay.h | 32 ++++++-------------------------- 3 files changed, 7 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/bugs.h b/include/asm-sparc64/bugs.h index bf39d86c0c9..11ade684197 100644 --- a/include/asm-sparc64/bugs.h +++ b/include/asm-sparc64/bugs.h @@ -4,12 +4,7 @@ */ #include -extern unsigned long loops_per_jiffy; - static void __init check_bugs(void) { -#ifndef CONFIG_SMP - cpu_data(0).udelay_val = loops_per_jiffy; -#endif sstate_running(); } diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 0016d8b4531..98a6e609163 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -19,7 +19,7 @@ typedef struct { unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ unsigned int __pad0; unsigned long clock_tick; /* %tick's per second */ - unsigned long udelay_val; + unsigned long __pad; unsigned int __pad1; unsigned int __pad2; diff --git a/include/asm-sparc64/delay.h b/include/asm-sparc64/delay.h index a4aae6f8062..a77aa622d76 100644 --- a/include/asm-sparc64/delay.h +++ b/include/asm-sparc64/delay.h @@ -1,37 +1,17 @@ /* delay.h: Linux delay routines on sparc64. * - * Copyright (C) 1996, 2004 David S. Miller (davem@davemloft.net). - * - * Based heavily upon x86 variant which is: - * Copyright (C) 1993 Linus Torvalds - * - * Delay routines calling functions in arch/sparc64/lib/delay.c + * Copyright (C) 1996, 2004, 2007 David S. Miller (davem@davemloft.net). */ -#ifndef __SPARC64_DELAY_H -#define __SPARC64_DELAY_H - -#include -#include +#ifndef _SPARC64_DELAY_H +#define _SPARC64_DELAY_H #ifndef __ASSEMBLY__ -extern void __bad_udelay(void); -extern void __bad_ndelay(void); - -extern void __udelay(unsigned long usecs); -extern void __ndelay(unsigned long nsecs); -extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); - -#define udelay(n) (__builtin_constant_p(n) ? \ - ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ - __udelay(n)) - -#define ndelay(n) (__builtin_constant_p(n) ? \ - ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ - __ndelay(n)) +extern void udelay(unsigned long usecs); +#define mdelay(n) udelay((n) * 1000) #endif /* !__ASSEMBLY__ */ -#endif /* defined(__SPARC64_DELAY_H) */ +#endif /* _SPARC64_DELAY_H */ -- cgit v1.2.3 From e0204409df29fe1b7d18f81dfc3ae6f9d90e7a63 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2007 03:49:40 -0700 Subject: [SPARC64]: dr-cpu unconfigure support. Signed-off-by: David S. Miller --- include/asm-sparc64/irq.h | 2 ++ include/asm-sparc64/mmu_context.h | 3 +++ include/asm-sparc64/smp.h | 3 +-- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h index 90781e34a95..e6c436ef935 100644 --- a/include/asm-sparc64/irq.h +++ b/include/asm-sparc64/irq.h @@ -53,6 +53,8 @@ extern unsigned int sun4v_build_msi(u32 devhandle, unsigned int *virt_irq_p, extern void sun4v_destroy_msi(unsigned int virt_irq); extern unsigned int sbus_build_irq(void *sbus, unsigned int ino); +extern void fixup_irqs(void); + static __inline__ void set_softint(unsigned long bits) { __asm__ __volatile__("wr %0, 0x0, %%set_softint" diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 8d129032013..9fc225ed550 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -76,6 +76,9 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str unsigned long ctx_valid, flags; int cpu; + if (unlikely(mm == &init_mm)) + return; + spin_lock_irqsave(&mm->context.lock, flags); ctx_valid = CTX_VALID(mm->context); if (!ctx_valid) diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index c42c5a035c7..e8a96a31761 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -41,7 +41,7 @@ extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) extern void smp_fill_in_sib_core_maps(void); -extern unsigned char boot_cpu_id; +extern void cpu_play_dead(void); #ifdef CONFIG_HOTPLUG_CPU extern int __cpu_disable(void); @@ -54,7 +54,6 @@ extern void __cpu_die(unsigned int cpu); #define hard_smp_processor_id() 0 #define smp_fill_in_sib_core_maps() do { } while (0) -#define boot_cpu_id (0) #endif /* !(CONFIG_SMP) */ -- cgit v1.2.3