diff options
Diffstat (limited to 'drivers/misc/sgi-xp/xpc_main.c')
-rw-r--r-- | drivers/misc/sgi-xp/xpc_main.c | 977 |
1 files changed, 526 insertions, 451 deletions
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 579b01ff82d..46325fc8481 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -25,37 +25,31 @@ * * Caveats: * - * . We currently have no way to determine which nasid an IPI came - * from. Thus, xpc_IPI_send() does a remote AMO write followed by - * an IPI. The AMO indicates where data is to be pulled from, so - * after the IPI arrives, the remote partition checks the AMO word. - * The IPI can actually arrive before the AMO however, so other code - * must periodically check for this case. Also, remote AMO operations - * do not reliably time out. Thus we do a remote PIO read solely to - * know whether the remote partition is down and whether we should - * stop sending IPIs to it. This remote PIO read operation is set up - * in a special nofault region so SAL knows to ignore (and cleanup) - * any errors due to the remote AMO write, PIO read, and/or PIO - * write operations. + * . Currently on sn2, we have no way to determine which nasid an IRQ + * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write + * followed by an IPI. The amo indicates where data is to be pulled + * from, so after the IPI arrives, the remote partition checks the amo + * word. The IPI can actually arrive before the amo however, so other + * code must periodically check for this case. Also, remote amo + * operations do not reliably time out. Thus we do a remote PIO read + * solely to know whether the remote partition is down and whether we + * should stop sending IPIs to it. This remote PIO read operation is + * set up in a special nofault region so SAL knows to ignore (and + * cleanup) any errors due to the remote amo write, PIO read, and/or + * PIO write operations. * * If/when new hardware solves this IPI problem, we should abandon * the current approach. * */ -#include <linux/kernel.h> #include <linux/module.h> -#include <linux/init.h> -#include <linux/cache.h> -#include <linux/interrupt.h> +#include <linux/sysctl.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/reboot.h> -#include <linux/completion.h> #include <linux/kdebug.h> #include <linux/kthread.h> -#include <linux/uaccess.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> #include "xpc.h" /* define two XPC debug device structures to be used with dev_dbg() et al */ @@ -89,9 +83,9 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; static int xpc_hb_check_min_interval = 10; static int xpc_hb_check_max_interval = 120; -int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT; -static int xpc_disengage_request_min_timelimit; /* = 0 */ -static int xpc_disengage_request_max_timelimit = 120; +int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT; +static int xpc_disengage_min_timelimit; /* = 0 */ +static int xpc_disengage_max_timelimit = 120; static ctl_table xpc_sys_xpc_hb_dir[] = { { @@ -124,14 +118,14 @@ static ctl_table xpc_sys_xpc_dir[] = { .child = xpc_sys_xpc_hb_dir}, { .ctl_name = CTL_UNNUMBERED, - .procname = "disengage_request_timelimit", - .data = &xpc_disengage_request_timelimit, + .procname = "disengage_timelimit", + .data = &xpc_disengage_timelimit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, - .extra1 = &xpc_disengage_request_min_timelimit, - .extra2 = &xpc_disengage_request_max_timelimit}, + .extra1 = &xpc_disengage_min_timelimit, + .extra2 = &xpc_disengage_max_timelimit}, {} }; static ctl_table xpc_sys_dir[] = { @@ -144,16 +138,19 @@ static ctl_table xpc_sys_dir[] = { }; static struct ctl_table_header *xpc_sysctl; -/* non-zero if any remote partition disengage request was timed out */ -int xpc_disengage_request_timedout; +/* non-zero if any remote partition disengage was timed out */ +int xpc_disengage_timedout; -/* #of IRQs received */ -static atomic_t xpc_act_IRQ_rcvd; +/* #of activate IRQs received and not yet processed */ +int xpc_activate_IRQ_rcvd; +DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock); /* IRQ handler notifies this wait queue on receipt of an IRQ */ -static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); +DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq); static unsigned long xpc_hb_check_timeout; +static struct timer_list xpc_hb_timer; +void *xpc_heartbeating_to_mask; /* notification that the xpc_hb_checker thread has exited */ static DECLARE_COMPLETION(xpc_hb_checker_exited); @@ -161,8 +158,6 @@ static DECLARE_COMPLETION(xpc_hb_checker_exited); /* notification that the xpc_discovery thread has exited */ static DECLARE_COMPLETION(xpc_discovery_exited); -static struct timer_list xpc_hb_timer; - static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); @@ -175,31 +170,76 @@ static struct notifier_block xpc_die_notifier = { .notifier_call = xpc_system_die, }; +int (*xpc_setup_partitions_sn) (void); +enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie, + unsigned long *rp_pa, + size_t *len); +int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp); +void (*xpc_heartbeat_init) (void); +void (*xpc_heartbeat_exit) (void); +void (*xpc_increment_heartbeat) (void); +void (*xpc_offline_heartbeat) (void); +void (*xpc_online_heartbeat) (void); +enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part); + +enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part); +void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch); +u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part); +enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch); +void (*xpc_teardown_msg_structures) (struct xpc_channel *ch); +void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number); +int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch); +void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch); + +void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_pa, + int nasid); +void (*xpc_request_partition_reactivation) (struct xpc_partition *part); +void (*xpc_request_partition_deactivation) (struct xpc_partition *part); +void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part); + +void (*xpc_process_activate_IRQ_rcvd) (void); +enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part); +void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part); + +void (*xpc_indicate_partition_engaged) (struct xpc_partition *part); +int (*xpc_partition_engaged) (short partid); +int (*xpc_any_partition_engaged) (void); +void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part); +void (*xpc_assume_partition_disengaged) (short partid); + +void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_closereply) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_openreply) (struct xpc_channel *ch, + unsigned long *irq_flags); + +void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch, + unsigned long msgqueue_pa); + +enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags, + void *payload, u16 payload_size, + u8 notify_type, xpc_notify_func func, + void *key); +void (*xpc_received_payload) (struct xpc_channel *ch, void *payload); + /* - * Timer function to enforce the timelimit on the partition disengage request. + * Timer function to enforce the timelimit on the partition disengage. */ static void -xpc_timeout_partition_disengage_request(unsigned long data) +xpc_timeout_partition_disengage(unsigned long data) { struct xpc_partition *part = (struct xpc_partition *)data; - DBUG_ON(time_before(jiffies, part->disengage_request_timeout)); + DBUG_ON(time_is_after_jiffies(part->disengage_timeout)); (void)xpc_partition_disengaged(part); - DBUG_ON(part->disengage_request_timeout != 0); - DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0); -} - -/* - * Notify the heartbeat check thread that an IRQ has been received. - */ -static irqreturn_t -xpc_act_IRQ_handler(int irq, void *dev_id) -{ - atomic_inc(&xpc_act_IRQ_rcvd); - wake_up_interruptible(&xpc_act_IRQ_wq); - return IRQ_HANDLED; + DBUG_ON(part->disengage_timeout != 0); + DBUG_ON(xpc_partition_engaged(XPC_PARTID(part))); } /* @@ -210,15 +250,63 @@ xpc_act_IRQ_handler(int irq, void *dev_id) static void xpc_hb_beater(unsigned long dummy) { - xpc_vars->heartbeat++; + xpc_increment_heartbeat(); - if (time_after_eq(jiffies, xpc_hb_check_timeout)) - wake_up_interruptible(&xpc_act_IRQ_wq); + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) + wake_up_interruptible(&xpc_activate_IRQ_wq); xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); add_timer(&xpc_hb_timer); } +static void +xpc_start_hb_beater(void) +{ + xpc_heartbeat_init(); + init_timer(&xpc_hb_timer); + xpc_hb_timer.function = xpc_hb_beater; + xpc_hb_beater(0); +} + +static void +xpc_stop_hb_beater(void) +{ + del_timer_sync(&xpc_hb_timer); + xpc_heartbeat_exit(); +} + +/* + * At periodic intervals, scan through all active partitions and ensure + * their heartbeat is still active. If not, the partition is deactivated. + */ +static void +xpc_check_remote_hb(void) +{ + struct xpc_partition *part; + short partid; + enum xp_retval ret; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + + if (xpc_exiting) + break; + + if (partid == xp_partition_id) + continue; + + part = &xpc_partitions[partid]; + + if (part->act_state == XPC_P_AS_INACTIVE || + part->act_state == XPC_P_AS_DEACTIVATING) { + continue; + } + + ret = xpc_get_remote_heartbeat(part); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(part, ret); + } +} + /* * This thread is responsible for nearly all of the partition * activation/deactivation. @@ -226,67 +314,57 @@ xpc_hb_beater(unsigned long dummy) static int xpc_hb_checker(void *ignore) { - int last_IRQ_count = 0; - int new_IRQ_count; int force_IRQ = 0; - cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU); /* this thread was marked active by xpc_hb_init() */ - set_cpus_allowed_ptr(current, cpumask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU)); /* set our heartbeating to other partitions into motion */ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); - xpc_hb_beater(0); + xpc_start_hb_beater(); while (!xpc_exiting) { dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " "been received\n", (int)(xpc_hb_check_timeout - jiffies), - atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); + xpc_activate_IRQ_rcvd); /* checking of remote heartbeats is skewed by IRQ handling */ - if (time_after_eq(jiffies, xpc_hb_check_timeout)) { + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) { + xpc_hb_check_timeout = jiffies + + (xpc_hb_check_interval * HZ); + dev_dbg(xpc_part, "checking remote heartbeats\n"); xpc_check_remote_hb(); /* - * We need to periodically recheck to ensure no - * IPI/AMO pairs have been missed. That check - * must always reset xpc_hb_check_timeout. + * On sn2 we need to periodically recheck to ensure no + * IRQ/amo pairs have been missed. */ - force_IRQ = 1; + if (is_shub()) + force_IRQ = 1; } /* check for outstanding IRQs */ - new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); - if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { + if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) { force_IRQ = 0; - - dev_dbg(xpc_part, "found an IRQ to process; will be " - "resetting xpc_hb_check_timeout\n"); - - last_IRQ_count += xpc_identify_act_IRQ_sender(); - if (last_IRQ_count < new_IRQ_count) { - /* retry once to help avoid missing AMO */ - (void)xpc_identify_act_IRQ_sender(); - } - last_IRQ_count = new_IRQ_count; - - xpc_hb_check_timeout = jiffies + - (xpc_hb_check_interval * HZ); + dev_dbg(xpc_part, "processing activate IRQs " + "received\n"); + xpc_process_activate_IRQ_rcvd(); } /* wait for IRQ or timeout */ - (void)wait_event_interruptible(xpc_act_IRQ_wq, - (last_IRQ_count < - atomic_read(&xpc_act_IRQ_rcvd) - || time_after_eq(jiffies, - xpc_hb_check_timeout) || + (void)wait_event_interruptible(xpc_activate_IRQ_wq, + (time_is_before_eq_jiffies( + xpc_hb_check_timeout) || + xpc_activate_IRQ_rcvd > 0 || xpc_exiting)); } + xpc_stop_hb_beater(); + dev_dbg(xpc_part, "heartbeat checker is exiting\n"); /* mark this thread as having exited */ @@ -312,37 +390,8 @@ xpc_initiate_discovery(void *ignore) } /* - * Establish first contact with the remote partititon. This involves pulling - * the XPC per partition variables from the remote partition and waiting for - * the remote partition to pull ours. - */ -static enum xp_retval -xpc_make_first_contact(struct xpc_partition *part) -{ - enum xp_retval ret; - - while ((ret = xpc_pull_remote_vars_part(part)) != xpSuccess) { - if (ret != xpRetry) { - XPC_DEACTIVATE_PARTITION(part, ret); - return ret; - } - - dev_dbg(xpc_chan, "waiting to make first contact with " - "partition %d\n", XPC_PARTID(part)); - - /* wait a 1/4 of a second or so */ - (void)msleep_interruptible(250); - - if (part->act_state == XPC_P_DEACTIVATING) - return part->reason; - } - - return xpc_mark_partition_active(part); -} - -/* * The first kthread assigned to a newly activated partition is the one - * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to + * created by XPC HB with which it calls xpc_activating(). XPC hangs on to * that kthread until the partition is brought down, at which time that kthread * returns back to XPC HB. (The return of that kthread will signify to XPC HB * that XPC has dismantled all communication infrastructure for the associated @@ -355,11 +404,11 @@ xpc_make_first_contact(struct xpc_partition *part) static void xpc_channel_mgr(struct xpc_partition *part) { - while (part->act_state != XPC_P_DEACTIVATING || + while (part->act_state != XPC_P_AS_DEACTIVATING || atomic_read(&part->nchannels_active) > 0 || !xpc_partition_disengaged(part)) { - xpc_process_channel_activity(part); + xpc_process_sent_chctl_flags(part); /* * Wait until we've been requested to activate kthreads or @@ -377,8 +426,8 @@ xpc_channel_mgr(struct xpc_partition *part) atomic_dec(&part->channel_mgr_requests); (void)wait_event_interruptible(part->channel_mgr_wq, (atomic_read(&part->channel_mgr_requests) > 0 || - part->local_IPI_amo != 0 || - (part->act_state == XPC_P_DEACTIVATING && + part->chctl.all_flags != 0 || + (part->act_state == XPC_P_AS_DEACTIVATING && atomic_read(&part->nchannels_active) == 0 && xpc_partition_disengaged(part)))); atomic_set(&part->channel_mgr_requests, 1); @@ -386,47 +435,163 @@ xpc_channel_mgr(struct xpc_partition *part) } /* - * When XPC HB determines that a partition has come up, it will create a new - * kthread and that kthread will call this function to attempt to set up the - * basic infrastructure used for Cross Partition Communication with the newly - * upped partition. - * - * The kthread that was created by XPC HB and which setup the XPC - * infrastructure will remain assigned to the partition until the partition - * goes down. At which time the kthread will teardown the XPC infrastructure - * and then exit. - * - * XPC HB will put the remote partition's XPC per partition specific variables - * physical address into xpc_partitions[partid].remote_vars_part_pa prior to - * calling xpc_partition_up(). + * Guarantee that the kzalloc'd memory is cacheline aligned. */ -static void -xpc_partition_up(struct xpc_partition *part) +void * +xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kzalloc will give us cachline aligned memory by default */ + *base = kzalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kzalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Setup the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static enum xp_retval +xpc_setup_ch_structures(struct xpc_partition *part) { + enum xp_retval ret; + int ch_number; + struct xpc_channel *ch; + short partid = XPC_PARTID(part); + + /* + * Allocate all of the channel structures as a contiguous chunk of + * memory. + */ DBUG_ON(part->channels != NULL); + part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS, + GFP_KERNEL); + if (part->channels == NULL) { + dev_err(xpc_chan, "can't get memory for channels\n"); + return xpNoMemory; + } - dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); + /* allocate the remote open and close args */ - if (xpc_setup_infrastructure(part) != xpSuccess) - return; + part->remote_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part-> + remote_openclose_args_base); + if (part->remote_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for remote connect args\n"); + ret = xpNoMemory; + goto out_1; + } + + part->chctl.all_flags = 0; + spin_lock_init(&part->chctl_lock); + + atomic_set(&part->channel_mgr_requests, 1); + init_waitqueue_head(&part->channel_mgr_wq); + + part->nchannels = XPC_MAX_NCHANNELS; + + atomic_set(&part->nchannels_active, 0); + atomic_set(&part->nchannels_engaged, 0); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + ch->partid = partid; + ch->number = ch_number; + ch->flags = XPC_C_DISCONNECTED; + + atomic_set(&ch->kthreads_assigned, 0); + atomic_set(&ch->kthreads_idle, 0); + atomic_set(&ch->kthreads_active, 0); + + atomic_set(&ch->references, 0); + atomic_set(&ch->n_to_notify, 0); + + spin_lock_init(&ch->lock); + init_completion(&ch->wdisconnect_wait); + + atomic_set(&ch->n_on_msg_allocate_wq, 0); + init_waitqueue_head(&ch->msg_allocate_wq); + init_waitqueue_head(&ch->idle_wq); + } + + ret = xpc_setup_ch_structures_sn(part); + if (ret != xpSuccess) + goto out_2; + + /* + * With the setting of the partition setup_state to XPC_P_SS_SETUP, + * we're declaring that this partition is ready to go. + */ + part->setup_state = XPC_P_SS_SETUP; + + return xpSuccess; + + /* setup of ch structures failed */ +out_2: + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; +out_1: + kfree(part->channels); + part->channels = NULL; + return ret; +} + +/* + * Teardown the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static void +xpc_teardown_ch_structures(struct xpc_partition *part) +{ + DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); + DBUG_ON(atomic_read(&part->nchannels_active) != 0); /* - * The kthread that XPC HB called us with will become the - * channel manager for this partition. It will not return - * back to XPC HB until the partition's XPC infrastructure - * has been dismantled. + * Make this partition inaccessible to local processes by marking it + * as no longer setup. Then wait before proceeding with the teardown + * until all existing references cease. */ + DBUG_ON(part->setup_state != XPC_P_SS_SETUP); + part->setup_state = XPC_P_SS_WTEARDOWN; - (void)xpc_part_ref(part); /* this will always succeed */ + wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); - if (xpc_make_first_contact(part) == xpSuccess) - xpc_channel_mgr(part); + /* now we can begin tearing down the infrastructure */ - xpc_part_deref(part); + xpc_teardown_ch_structures_sn(part); - xpc_teardown_infrastructure(part); + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->channels); + part->channels = NULL; + + part->setup_state = XPC_P_SS_TORNDOWN; } +/* + * When XPC HB determines that a partition has come up, it will create a new + * kthread and that kthread will call this function to attempt to set up the + * basic infrastructure used for Cross Partition Communication with the newly + * upped partition. + * + * The kthread that was created by XPC HB and which setup the XPC + * infrastructure will remain assigned to the partition becoming the channel + * manager for that partition until the partition is deactivating, at which + * time the kthread will teardown the XPC infrastructure and then exit. + */ static int xpc_activating(void *__partid) { @@ -434,64 +599,47 @@ xpc_activating(void *__partid) struct xpc_partition *part = &xpc_partitions[partid]; unsigned long irq_flags; - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); spin_lock_irqsave(&part->act_lock, irq_flags); - if (part->act_state == XPC_P_DEACTIVATING) { - part->act_state = XPC_P_INACTIVE; + if (part->act_state == XPC_P_AS_DEACTIVATING) { + part->act_state = XPC_P_AS_INACTIVE; spin_unlock_irqrestore(&part->act_lock, irq_flags); part->remote_rp_pa = 0; return 0; } /* indicate the thread is activating */ - DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); - part->act_state = XPC_P_ACTIVATING; + DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ); + part->act_state = XPC_P_AS_ACTIVATING; XPC_SET_REASON(part, 0, 0); spin_unlock_irqrestore(&part->act_lock, irq_flags); - dev_dbg(xpc_part, "bringing partition %d up\n", partid); + dev_dbg(xpc_part, "activating partition %d\n", partid); - /* - * Register the remote partition's AMOs with SAL so it can handle - * and cleanup errors within that address range should the remote - * partition go down. We don't unregister this range because it is - * difficult to tell when outstanding writes to the remote partition - * are finished and thus when it is safe to unregister. This should - * not result in wasted space in the SAL xp_addr_region table because - * we should get the same page for remote_amos_page_pa after module - * reloads and system reboots. - */ - if (sn_register_xp_addr_region(part->remote_amos_page_pa, - PAGE_SIZE, 1) < 0) { - dev_warn(xpc_part, "xpc_partition_up(%d) failed to register " - "xp_addr region\n", partid); + xpc_allow_hb(partid); - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, xpPhysAddrRegFailed, __LINE__); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; - return 0; - } + if (xpc_setup_ch_structures(part) == xpSuccess) { + (void)xpc_part_ref(part); /* this will always succeed */ - xpc_allow_hb(partid, xpc_vars); - xpc_IPI_send_activated(part); + if (xpc_make_first_contact(part) == xpSuccess) { + xpc_mark_partition_active(part); + xpc_channel_mgr(part); + /* won't return until partition is deactivating */ + } - /* - * xpc_partition_up() holds this thread and marks this partition as - * XPC_P_ACTIVE by calling xpc_hb_mark_active(). - */ - (void)xpc_partition_up(part); + xpc_part_deref(part); + xpc_teardown_ch_structures(part); + } - xpc_disallow_hb(partid, xpc_vars); + xpc_disallow_hb(partid); xpc_mark_partition_inactive(part); if (part->reason == xpReactivating) { /* interrupting ourselves results in activating partition */ - xpc_IPI_send_reactivate(part); + xpc_request_partition_reactivation(part); } return 0; @@ -506,9 +654,9 @@ xpc_activate_partition(struct xpc_partition *part) spin_lock_irqsave(&part->act_lock, irq_flags); - DBUG_ON(part->act_state != XPC_P_INACTIVE); + DBUG_ON(part->act_state != XPC_P_AS_INACTIVE); - part->act_state = XPC_P_ACTIVATION_REQ; + part->act_state = XPC_P_AS_ACTIVATION_REQ; XPC_SET_REASON(part, xpCloneKThread, __LINE__); spin_unlock_irqrestore(&part->act_lock, irq_flags); @@ -517,62 +665,12 @@ xpc_activate_partition(struct xpc_partition *part) partid); if (IS_ERR(kthread)) { spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; + part->act_state = XPC_P_AS_INACTIVE; XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__); spin_unlock_irqrestore(&part->act_lock, irq_flags); } } -/* - * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified - * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more - * than one partition, we use an AMO_t structure per partition to indicate - * whether a partition has sent an IPI or not. If it has, then wake up the - * associated kthread to handle it. - * - * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC - * running on other partitions. - * - * Noteworthy Arguments: - * - * irq - Interrupt ReQuest number. NOT USED. - * - * dev_id - partid of IPI's potential sender. - */ -irqreturn_t -xpc_notify_IRQ_handler(int irq, void *dev_id) -{ - short partid = (short)(u64)dev_id; - struct xpc_partition *part = &xpc_partitions[partid]; - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - xpc_part_deref(part); - } - return IRQ_HANDLED; -} - -/* - * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor - * because the write to their associated IPI amo completed after the IRQ/IPI - * was received. - */ -void -xpc_dropped_IPI_check(struct xpc_partition *part) -{ - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - part->dropped_IPI_timer.expires = jiffies + - XPC_P_DROPPED_IPI_WAIT; - add_timer(&part->dropped_IPI_timer); - xpc_part_deref(part); - } -} - void xpc_activate_kthreads(struct xpc_channel *ch, int needed) { @@ -617,9 +715,9 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) do { /* deliver messages to their intended recipients */ - while (ch->w_local_GP.get < ch->w_remote_GP.put && + while (xpc_n_of_deliverable_payloads(ch) > 0 && !(ch->flags & XPC_C_DISCONNECTING)) { - xpc_deliver_msg(ch); + xpc_deliver_payload(ch); } if (atomic_inc_return(&ch->kthreads_idle) > @@ -633,7 +731,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) "wait_event_interruptible_exclusive()\n"); (void)wait_event_interruptible_exclusive(ch->idle_wq, - (ch->w_local_GP.get < ch->w_remote_GP.put || + (xpc_n_of_deliverable_payloads(ch) > 0 || (ch->flags & XPC_C_DISCONNECTING))); atomic_dec(&ch->kthreads_idle); @@ -678,7 +776,7 @@ xpc_kthread_start(void *args) * additional kthreads to help deliver them. We only * need one less than total #of messages to deliver. */ - n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1; + n_needed = xpc_n_of_deliverable_payloads(ch) - 1; if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) xpc_activate_kthreads(ch, n_needed); @@ -704,11 +802,9 @@ xpc_kthread_start(void *args) } spin_unlock_irqrestore(&ch->lock, irq_flags); - if (atomic_dec_return(&ch->kthreads_assigned) == 0) { - if (atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + xpc_indicate_partition_disengaged(part); } xpc_msgqueue_deref(ch); @@ -759,9 +855,9 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed, } else if (ch->flags & XPC_C_DISCONNECTING) { break; - } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { - if (atomic_inc_return(&part->nchannels_engaged) == 1) - xpc_mark_partition_engaged(part); + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 && + atomic_inc_return(&part->nchannels_engaged) == 1) { + xpc_indicate_partition_engaged(part); } (void)xpc_part_ref(part); xpc_msgqueue_ref(ch); @@ -783,8 +879,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed, if (atomic_dec_return(&ch->kthreads_assigned) == 0 && atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); + xpc_indicate_partition_disengaged(part); } xpc_msgqueue_deref(ch); xpc_part_deref(part); @@ -816,7 +911,7 @@ xpc_disconnect_wait(int ch_number) int wakeup_channel_mgr; /* now wait for all callouts to the caller's function to cease */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; if (!xpc_part_ref(part)) @@ -835,16 +930,15 @@ xpc_disconnect_wait(int ch_number) DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); wakeup_channel_mgr = 0; - if (ch->delayed_IPI_flags) { - if (part->act_state != XPC_P_DEACTIVATING) { - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch->number, - ch->delayed_IPI_flags); - spin_unlock(&part->IPI_lock); + if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); wakeup_channel_mgr = 1; } - ch->delayed_IPI_flags = 0; + ch->delayed_chctl_flags = 0; } ch->flags &= ~XPC_C_WDISCONNECT; @@ -857,13 +951,63 @@ xpc_disconnect_wait(int ch_number) } } +static int +xpc_setup_partitions(void) +{ + short partid; + struct xpc_partition *part; + + xpc_partitions = kzalloc(sizeof(struct xpc_partition) * + xp_max_npartitions, GFP_KERNEL); + if (xpc_partitions == NULL) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + return -ENOMEM; + } + + /* + * The first few fields of each entry of xpc_partitions[] need to + * be initialized now so that calls to xpc_connect() and + * xpc_disconnect() can be made prior to the activation of any remote + * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE + * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING + * PARTITION HAS BEEN ACTIVATED. + */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part)); + + part->activate_IRQ_rcvd = 0; + spin_lock_init(&part->act_lock); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, 0, 0); + + init_timer(&part->disengage_timer); + part->disengage_timer.function = + xpc_timeout_partition_disengage; + part->disengage_timer.data = (unsigned long)part; + + part->setup_state = XPC_P_SS_UNSET; + init_waitqueue_head(&part->teardown_wq); + atomic_set(&part->references, 0); + } + + return xpc_setup_partitions_sn(); +} + +static void +xpc_teardown_partitions(void) +{ + kfree(xpc_partitions); +} + static void xpc_do_exit(enum xp_retval reason) { short partid; int active_part_count, printed_waiting_msg = 0; struct xpc_partition *part; - unsigned long printmsg_time, disengage_request_timeout = 0; + unsigned long printmsg_time, disengage_timeout = 0; /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ DBUG_ON(xpc_exiting == 1); @@ -874,10 +1018,7 @@ xpc_do_exit(enum xp_retval reason) * the heartbeat checker thread in case it's sleeping. */ xpc_exiting = 1; - wake_up_interruptible(&xpc_act_IRQ_wq); - - /* ignore all incoming interrupts */ - free_irq(SGI_XPC_ACTIVATE, NULL); + wake_up_interruptible(&xpc_activate_IRQ_wq); /* wait for the discovery thread to exit */ wait_for_completion(&xpc_discovery_exited); @@ -890,17 +1031,17 @@ xpc_do_exit(enum xp_retval reason) /* wait for all partitions to become inactive */ - printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); - xpc_disengage_request_timedout = 0; + printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + xpc_disengage_timedout = 0; do { active_part_count = 0; - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_INACTIVE) { + part->act_state == XPC_P_AS_INACTIVE) { continue; } @@ -908,36 +1049,32 @@ xpc_do_exit(enum xp_retval reason) XPC_DEACTIVATE_PARTITION(part, reason); - if (part->disengage_request_timeout > - disengage_request_timeout) { - disengage_request_timeout = - part->disengage_request_timeout; - } + if (part->disengage_timeout > disengage_timeout) + disengage_timeout = part->disengage_timeout; } - if (xpc_partition_engaged(-1UL)) { - if (time_after(jiffies, printmsg_time)) { + if (xpc_any_partition_engaged()) { + if (time_is_before_jiffies(printmsg_time)) { dev_info(xpc_part, "waiting for remote " - "partitions to disengage, timeout in " - "%ld seconds\n", - (disengage_request_timeout - jiffies) - / HZ); + "partitions to deactivate, timeout in " + "%ld seconds\n", (disengage_timeout - + jiffies) / HZ); printmsg_time = jiffies + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); printed_waiting_msg = 1; } } else if (active_part_count > 0) { if (printed_waiting_msg) { dev_info(xpc_part, "waiting for local partition" - " to disengage\n"); + " to deactivate\n"); printed_waiting_msg = 0; } } else { - if (!xpc_disengage_request_timedout) { + if (!xpc_disengage_timedout) { dev_info(xpc_part, "all partitions have " - "disengaged\n"); + "deactivated\n"); } break; } @@ -947,33 +1084,28 @@ xpc_do_exit(enum xp_retval reason) } while (1); - DBUG_ON(xpc_partition_engaged(-1UL)); + DBUG_ON(xpc_any_partition_engaged()); + DBUG_ON(xpc_any_hbs_allowed() != 0); - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* now it's time to eliminate our heartbeat */ - del_timer_sync(&xpc_hb_timer); - DBUG_ON(xpc_vars->heartbeating_to_mask != 0); + xpc_teardown_rsvd_page(); if (reason == xpUnloading) { - /* take ourselves off of the reboot_notifier_list */ - (void)unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); } - /* close down protections for IPI operations */ - xpc_restrict_IPI_ops(); - /* clear the interface to XPC's functions */ xpc_clear_interface(); if (xpc_sysctl) unregister_sysctl_table(xpc_sysctl); - kfree(xpc_remote_copy_buffer_base); + xpc_teardown_partitions(); + + if (is_shub()) + xpc_exit_sn2(); + else + xpc_exit_uv(); } /* @@ -1003,60 +1135,57 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) } /* - * Notify other partitions to disengage from all references to our memory. + * Notify other partitions to deactivate from us by first disengaging from all + * references to our memory. */ static void -xpc_die_disengage(void) +xpc_die_deactivate(void) { struct xpc_partition *part; short partid; - unsigned long engaged; - long time, printmsg_time, disengage_request_timeout; + int any_engaged; + long keep_waiting; + long wait_to_print; /* keep xpc_hb_checker thread from doing anything (just in case) */ xpc_exiting = 1; - xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */ + xpc_disallow_all_hbs(); /*indicate we're deactivated */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; - if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part-> - remote_vars_version)) { - - /* just in case it was left set by an earlier XPC */ - xpc_clear_partition_engaged(1UL << partid); - continue; - } - - if (xpc_partition_engaged(1UL << partid) || - part->act_state != XPC_P_INACTIVE) { - xpc_request_partition_disengage(part); - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); + if (xpc_partition_engaged(partid) || + part->act_state != XPC_P_AS_INACTIVE) { + xpc_request_partition_deactivation(part); + xpc_indicate_partition_disengaged(part); } } - time = rtc_time(); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second); - disengage_request_timeout = time + - (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); - - /* wait for all other partitions to disengage from us */ + /* + * Though we requested that all other partitions deactivate from us, + * we only wait until they've all disengaged or we've reached the + * defined timelimit. + * + * Given that one iteration through the following while-loop takes + * approximately 200 microseconds, calculate the #of loops to take + * before bailing and the #of loops before printing a waiting message. + */ + keep_waiting = xpc_disengage_timelimit * 1000 * 5; + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5; while (1) { - engaged = xpc_partition_engaged(-1UL); - if (!engaged) { - dev_info(xpc_part, "all partitions have disengaged\n"); + any_engaged = xpc_any_partition_engaged(); + if (!any_engaged) { + dev_info(xpc_part, "all partitions have deactivated\n"); break; } - time = rtc_time(); - if (time >= disengage_request_timeout) { - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - if (engaged & (1UL << partid)) { - dev_info(xpc_part, "disengage from " + if (!keep_waiting--) { + for (partid = 0; partid < xp_max_npartitions; + partid++) { + if (xpc_partition_engaged(partid)) { + dev_info(xpc_part, "deactivate from " "remote partition %d timed " "out\n", partid); } @@ -1064,15 +1193,15 @@ xpc_die_disengage(void) break; } - if (time >= printmsg_time) { + if (!wait_to_print--) { dev_info(xpc_part, "waiting for remote partitions to " - "disengage, timeout in %ld seconds\n", - (disengage_request_timeout - time) / - sn_rtc_cycles_per_second); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * - sn_rtc_cycles_per_second); + "deactivate, timeout in %ld seconds\n", + keep_waiting / (1000 * 5)); + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * + 1000 * 5; } + + udelay(200); } } @@ -1087,10 +1216,11 @@ xpc_die_disengage(void) static int xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) { +#ifdef CONFIG_IA64 /* !!! temporary kludge */ switch (event) { case DIE_MACHINE_RESTART: case DIE_MACHINE_HALT: - xpc_die_disengage(); + xpc_die_deactivate(); break; case DIE_KDEBUG_ENTER: @@ -1101,8 +1231,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) /* fall through */ case DIE_MCA_MONARCH_ENTER: case DIE_INIT_MONARCH_ENTER: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 1; + xpc_offline_heartbeat(); break; case DIE_KDEBUG_LEAVE: @@ -1113,10 +1242,12 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) /* fall through */ case DIE_MCA_MONARCH_LEAVE: case DIE_INIT_MONARCH_LEAVE: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 0; + xpc_online_heartbeat(); break; } +#else + xpc_die_deactivate(); +#endif return NOTIFY_DONE; } @@ -1125,105 +1256,52 @@ int __init xpc_init(void) { int ret; - short partid; - struct xpc_partition *part; struct task_struct *kthread; - size_t buf_size; - - if (!ia64_platform_is("sn2")) - return -ENODEV; - - buf_size = max(XPC_RP_VARS_SIZE, - XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES); - xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size, - GFP_KERNEL, - &xpc_remote_copy_buffer_base); - if (xpc_remote_copy_buffer == NULL) - return -ENOMEM; snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part"); snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan"); - xpc_sysctl = register_sysctl_table(xpc_sys_dir); - - /* - * The first few fields of each entry of xpc_partitions[] need to - * be initialized now so that calls to xpc_connect() and - * xpc_disconnect() can be made prior to the activation of any remote - * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE - * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING - * PARTITION HAS BEEN ACTIVATED. - */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part)); - - part->act_IRQ_rcvd = 0; - spin_lock_init(&part->act_lock); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, 0, 0); + if (is_shub()) { + /* + * The ia64-sn2 architecture supports at most 64 partitions. + * And the inability to unregister remote amos restricts us + * further to only support exactly 64 partitions on this + * architecture, no less. + */ + if (xp_max_npartitions != 64) { + dev_err(xpc_part, "max #of partitions not set to 64\n"); + ret = -EINVAL; + } else { + ret = xpc_init_sn2(); + } - init_timer(&part->disengage_request_timer); - part->disengage_request_timer.function = - xpc_timeout_partition_disengage_request; - part->disengage_request_timer.data = (unsigned long)part; + } else if (is_uv()) { + ret = xpc_init_uv(); - part->setup_state = XPC_P_UNSET; - init_waitqueue_head(&part->teardown_wq); - atomic_set(&part->references, 0); + } else { + ret = -ENODEV; } - /* - * Open up protections for IPI operations (and AMO operations on - * Shub 1.1 systems). - */ - xpc_allow_IPI_ops(); - - /* - * Interrupts being processed will increment this atomic variable and - * awaken the heartbeat thread which will process the interrupts. - */ - atomic_set(&xpc_act_IRQ_rcvd, 0); + if (ret != 0) + return ret; - /* - * This is safe to do before the xpc_hb_checker thread has started - * because the handler releases a wait queue. If an interrupt is - * received before the thread is waiting, it will not go to sleep, - * but rather immediately process the interrupt. - */ - ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0, - "xpc hb", NULL); + ret = xpc_setup_partitions(); if (ret != 0) { - dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " - "errno=%d\n", -ret); - - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) - unregister_sysctl_table(xpc_sysctl); - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; + dev_err(xpc_part, "can't get memory for partition structure\n"); + goto out_1; } + xpc_sysctl = register_sysctl_table(xpc_sys_dir); + /* * Fill the partition reserved page with the information needed by * other partitions to discover we are alive and establish initial * communications. */ - xpc_rsvd_page = xpc_rsvd_page_init(); - if (xpc_rsvd_page == NULL) { - dev_err(xpc_part, "could not setup our reserved page\n"); - - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) - unregister_sysctl_table(xpc_sysctl); - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; + ret = xpc_setup_rsvd_page(); + if (ret != 0) { + dev_err(xpc_part, "can't setup our reserved page\n"); + goto out_2; } /* add ourselves to the reboot_notifier_list */ @@ -1236,9 +1314,6 @@ xpc_init(void) if (ret != 0) dev_warn(xpc_part, "can't register die notifier\n"); - init_timer(&xpc_hb_timer); - xpc_hb_timer.function = xpc_hb_beater; - /* * The real work-horse behind xpc. This processes incoming * interrupts and monitors remote heartbeats. @@ -1246,25 +1321,8 @@ xpc_init(void) kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME); if (IS_ERR(kthread)) { dev_err(xpc_part, "failed while forking hb check thread\n"); - - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* take ourselves off of the reboot_notifier_list */ - (void)unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ - (void)unregister_die_notifier(&xpc_die_notifier); - - del_timer_sync(&xpc_hb_timer); - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) - unregister_sysctl_table(xpc_sysctl); - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; + ret = -EBUSY; + goto out_3; } /* @@ -1286,11 +1344,28 @@ xpc_init(void) /* set the interface to point at XPC's functions */ xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, - xpc_initiate_allocate, xpc_initiate_send, - xpc_initiate_send_notify, xpc_initiate_received, - xpc_initiate_partid_to_nasids); + xpc_initiate_send, xpc_initiate_send_notify, + xpc_initiate_received, xpc_initiate_partid_to_nasids); return 0; + + /* initialization was not successful */ +out_3: + xpc_teardown_rsvd_page(); + + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); +out_2: + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); +out_1: + if (is_shub()) + xpc_exit_sn2(); + else + xpc_exit_uv(); + return ret; } module_init(xpc_init); @@ -1315,9 +1390,9 @@ module_param(xpc_hb_check_interval, int, 0); MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " "heartbeat checks."); -module_param(xpc_disengage_request_timelimit, int, 0); -MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " - "for disengage request to complete."); +module_param(xpc_disengage_timelimit, int, 0); +MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait " + "for disengage to complete."); module_param(xpc_kdebug_ignore, int, 0); MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " |