changeset 5076:342323d1ccaa

6292092 callout should not be blocked by interrupts from executing realtime timeouts 6540436 kpreempt() needs a more reliable way to generate level1 intr
author mishra
date Mon, 17 Sep 2007 15:47:19 -0700
parents 199eb2ec2c2c
children 160fd36577f5
files usr/src/uts/common/Makefile.files usr/src/uts/common/conf/param.c usr/src/uts/common/io/avintr.c usr/src/uts/common/os/clock.c usr/src/uts/common/os/cpu.c usr/src/uts/common/os/softint.c usr/src/uts/common/sys/cpuvar.h usr/src/uts/sun4/os/intr.c
diffstat 8 files changed, 498 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/Makefile.files	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/common/Makefile.files	Mon Sep 17 15:47:19 2007 -0700
@@ -61,6 +61,7 @@
 		rctl_proc.o	\
 		rwlock.o	\
 		seg_kmem.o	\
+		softint.o	\
 		string.o	\
 		thread_intr.o	\
 		vm_page.o	\
@@ -279,7 +280,6 @@
 		sigsuspend.o	\
 		sigtimedwait.o	\
 		sleepq.o	\
-		softint.o	\
 		space.o		\
 		sscanf.o	\
 		ssig.o		\
--- a/usr/src/uts/common/conf/param.c	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/common/conf/param.c	Mon Sep 17 15:47:19 2007 -0700
@@ -224,14 +224,20 @@
 };
 
 
+#if defined(__sparc)
+	extern void siron_mp_init();
+#endif
+
 /*
  * Any per cpu resources should be initialized via
  * an entry in mp_init_tbl().
  */
-
 void	(*mp_init_tbl[])(void) = {
 	ftrace_init,
 	cyclic_mp_init,
+#if defined(__sparc)
+	siron_mp_init,
+#endif
 	0
 };
 
--- a/usr/src/uts/common/io/avintr.c	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/common/io/avintr.c	Mon Sep 17 15:47:19 2007 -0700
@@ -572,6 +572,39 @@
 }
 
 /*
+ * The handler which is executed on the target CPU.
+ */
+/*ARGSUSED*/
+static int
+siron_poke_intr(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
+{
+	siron();
+	return (0);
+}
+
+/*
+ * May get called from softcall to poke CPUs.
+ */
+void
+siron_poke_cpu(cpuset_t poke)
+{
+	int cpuid = CPU->cpu_id;
+
+	/*
+	 * If we are poking to ourself then we can simply
+	 * generate level1 using siron()
+	 */
+	if (CPU_IN_SET(poke, cpuid)) {
+		siron();
+		CPUSET_DEL(poke, cpuid);
+		if (CPUSET_ISNULL(poke))
+			return;
+	}
+
+	xc_call(0, 0, 0, X_CALL_MEDPRI, poke, (xc_func_t)siron_poke_intr);
+}
+
+/*
  * Walk the autovector table for this vector, invoking each
  * interrupt handler as we go.
  */
--- a/usr/src/uts/common/os/clock.c	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/common/os/clock.c	Mon Sep 17 15:47:19 2007 -0700
@@ -451,8 +451,49 @@
 			 * going to perform one_second processing.
 			 */
 			w_io += CPU_STATS(cp, sys.iowait);
+		}
 
+		if (one_sec && (cp->cpu_flags & CPU_EXISTS)) {
+			int i, load, change;
+			hrtime_t intracct, intrused;
+			const hrtime_t maxnsec = 1000000000;
+			const int precision = 100;
+
+			/*
+			 * Estimate interrupt load on this cpu each second.
+			 * Computes cpu_intrload as %utilization (0-99).
+			 */
+
+			/* add up interrupt time from all micro states */
+			for (intracct = 0, i = 0; i < NCMSTATES; i++)
+				intracct += cp->cpu_intracct[i];
+			scalehrtime(&intracct);
+
+			/* compute nsec used in the past second */
+			intrused = intracct - cp->cpu_intrlast;
+			cp->cpu_intrlast = intracct;
+
+			/* limit the value for safety (and the first pass) */
+			if (intrused >= maxnsec)
+				intrused = maxnsec - 1;
+
+			/* calculate %time in interrupt */
+			load = (precision * intrused) / maxnsec;
+			ASSERT(load >= 0 && load < precision);
+			change = cp->cpu_intrload - load;
+
+			/* jump to new max, or decay the old max */
+			if (change < 0)
+				cp->cpu_intrload = load;
+			else if (change > 0)
+				cp->cpu_intrload -= (change + 3) / 4;
+
+			DTRACE_PROBE3(cpu_intrload,
+			    cpu_t *, cp,
+			    hrtime_t, intracct,
+			    hrtime_t, intrused);
 		}
+
 		if (do_lgrp_load &&
 		    (cp->cpu_flags & CPU_EXISTS)) {
 			/*
@@ -884,8 +925,8 @@
 			pgcnt_t avail =
 			    MAX((spgcnt_t)(availrmem - swapfs_minfree), 0);
 
-			maxswap = k_anoninfo.ani_mem_resv
-					+ k_anoninfo.ani_max +avail;
+			maxswap = k_anoninfo.ani_mem_resv +
+			    k_anoninfo.ani_max +avail;
 			free = k_anoninfo.ani_free + avail;
 			resv = k_anoninfo.ani_phys_resv +
 			    k_anoninfo.ani_mem_resv;
@@ -1940,7 +1981,7 @@
 		case TOD_NOFAULT:
 			plat_tod_fault(TOD_NOFAULT);
 			cmn_err(CE_NOTE, "Restarted tracking "
-					"Time of Day clock.");
+			    "Time of Day clock.");
 			tod_faulted = ftype;
 			break;
 		case TOD_REVERSED:
@@ -2104,7 +2145,7 @@
 		 * variation from reference freq in quartiles
 		 */
 		dtick_delta = (dtick_avg - TOD_REF_FREQ) /
-			(TOD_REF_FREQ >> 2);
+		    (TOD_REF_FREQ >> 2);
 
 		/*
 		 * Even with a perfectly functioning TOD device,
--- a/usr/src/uts/common/os/cpu.c	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/common/os/cpu.c	Mon Sep 17 15:47:19 2007 -0700
@@ -169,6 +169,8 @@
 	kstat_named_t cpu_nsec_idle;
 	kstat_named_t cpu_nsec_user;
 	kstat_named_t cpu_nsec_kernel;
+	kstat_named_t cpu_nsec_intr;
+	kstat_named_t cpu_load_intr;
 	kstat_named_t wait_ticks_io;
 	kstat_named_t bread;
 	kstat_named_t bwrite;
@@ -224,6 +226,8 @@
 	{ "cpu_nsec_idle",	KSTAT_DATA_UINT64 },
 	{ "cpu_nsec_user",	KSTAT_DATA_UINT64 },
 	{ "cpu_nsec_kernel",	KSTAT_DATA_UINT64 },
+	{ "cpu_nsec_intr",	KSTAT_DATA_UINT64 },
+	{ "cpu_load_intr",	KSTAT_DATA_UINT64 },
 	{ "wait_ticks_io", 	KSTAT_DATA_UINT64 },
 	{ "bread", 		KSTAT_DATA_UINT64 },
 	{ "bwrite", 		KSTAT_DATA_UINT64 },
@@ -3012,6 +3016,8 @@
 	    NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64);
 	csskd->cpu_ticks_kernel.value.ui64 =
 	    NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64);
+	csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast;
+	csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload;
 	csskd->bread.value.ui64 = css->bread;
 	csskd->bwrite.value.ui64 = css->bwrite;
 	csskd->lread.value.ui64 = css->lread;
--- a/usr/src/uts/common/os/softint.c	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/common/os/softint.c	Mon Sep 17 15:47:19 2007 -0700
@@ -33,6 +33,9 @@
 #include <sys/cmn_err.h>
 #include <sys/debug.h>
 #include <sys/kdi_impl.h>
+#include <sys/cpuvar.h>
+#include <sys/cpuvar.h>
+#include <sys/archsystm.h>
 
 /*
  * Handle software interrupts through 'softcall' mechanism
@@ -44,25 +47,83 @@
  *
  * softint must take care of executing the entries in the FIFO
  * order. It could be called simultaneously from multiple cpus, however only
- * one instance of softint should process the softcall list, this is
- * ensured by
- * - the state the variable softcall_state will be at time to time.
- *   (IDLE->PEND->DRAIN->IDLE)
+ * one instance of softint should process the softcall list with the exception
+ * when CPU is stuck due to high interrupt load and can't execute callbacks.
+ * State diagram is as follows :-
+ *
+ *	- Upper half which is same as old state machine
+ *	  (IDLE->PEND->DRAIN->IDLE)
+ *
+ *	- Lower half which steals the entries from softcall queue and execute
+ *        in the context of softint interrupt handler. The interrupt handler
+ *        is fired on a different CPU by sending a cross-call.
+ *
+ * Starting state is IDLE.
+ *
+ * 				softint()
+ *
  *
- * These states are needed for softcall mechanism since  Solaris has only
- * one interface(ie. siron ) as of now for
- * - raising a soft interrupt architecture independently(ie not through
+ *				(c)
+ * 	____________________________________________________
+ * 	|                          ^                         ^
+ * 	v            (a)           |           (b)           |
+ * 	IDLE--------------------->PEND--------------------->DRAIN
+ *	^                         |                         |
+ * 	|                         |                         |
+ * 	|                         |                         |
+ * 	|                         |                         |
+ * 	|                         |                         |
+ * 	|                         d                         d
+ * 	|                         |                         |
+ * 	|                         v                         v
+ * 	|                         PEND                      DRAIN
+ * 	|            (e)           &                          &
+ * 	|<-----------------------STEAL                      STEAL
+ * 	^                                                    |
+ * 	|                                                    |
+ * 	|                         (e)                        v
+ * 	|_________________________<__________________________|
+ *
+ *
+ *
+ * Edge (a)->(b)->(c) are same as old state machine and these
+ * are mutually exclusive state.
+ *
+ * a - When an entry is being enqueued to softcall queue then the state
+ *     moves from IDLE to PEND.
+ *
+ * b - When interrupt handler has started processing softcall queue.
+ *
+ * c - When interrupt handler finished processing softcall queue, the
+ *     state of machines goes back to IDLE.
+ *
+ * d - softcall() generates another softlevel1 iff interrupt handler
+ *     hasn't run recently.
+ *
+ * e - Either PEND|STEAL or DRAIN|STEAL is set. We let softlevel1
+ *     handler exit because we have processed all the entries.
+ *
+ * When CPU is being pinned by higher level interrupts for more than
+ * softcall_delay clock ticks, SOFT_STEAL is OR'ed so that softlevel1
+ * handler on the other CPU can drain the queue.
+ *
+ * These states are needed for softcall mechanism since Solaris has only
+ * one interface (ie. siron ) as of now for :
+ *
+ * - raising a soft interrupt architecture independently (ie not through
  *   setsoftint(..) )
  * - to process the softcall queue.
  */
 
 #define	NSOFTCALLS	200
+
 /*
  * Defined states for softcall processing.
  */
 #define	SOFT_IDLE		0x01	/* no processing is needed */
 #define	SOFT_PEND		0x02	/* softcall list needs processing */
-#define	SOFT_DRAIN		0x04	/* the list is being processed */
+#define	SOFT_DRAIN		0x04	/* list is being processed */
+#define	SOFT_STEAL		0x08	/* list is being stolen for draining */
 
 typedef struct softcall {
 	void (*sc_func)(void *);	/* function to call */
@@ -70,8 +131,33 @@
 	struct softcall *sc_next;	/* next in list */
 } softcall_t;
 
-static softcall_t softcalls[NSOFTCALLS], *softhead, *softtail, *softfree;
+/*
+ * softcall list and state variables.
+ */
+static softcall_t *softcalls;
+static softcall_t *softhead, *softtail, *softfree;
 static uint_t	softcall_state;
+static clock_t softcall_tick;
+
+/*
+ * This ensures that softcall entries don't get stuck for long. It's expressed
+ * in 10 milliseconds as 1 unit. When hires_tick is set or other clock frequency
+ * is used, softcall_init() ensures that it's still expressed as 1 =  10 milli
+ * seconds.
+ */
+static int softcall_delay = 1;
+
+/*
+ * The last CPU which will drain softcall queue.
+ */
+static int softcall_latest_cpuid = -1;
+
+/*
+ * CPUSET to hold the CPU which is processing softcall queue
+ * currently. There can be more than one CPU having bit set
+ * but it will happen only when they are stuck.
+ */
+static cpuset_t *softcall_cpuset = NULL;
 
 /*
  * protects softcall lists and control variable softcall_state.
@@ -79,20 +165,121 @@
 static kmutex_t	softcall_lock;
 
 static void (*kdi_softcall_func)(void);
+extern void siron_poke_cpu(cpuset_t);
 
 extern void siron(void);
-extern void kdi_siron(void);
 
 void
 softcall_init(void)
 {
 	softcall_t *sc;
 
+	softcalls = kmem_zalloc(sizeof (softcall_t) * NSOFTCALLS, KM_SLEEP);
+	softcall_cpuset = kmem_zalloc(sizeof (cpuset_t), KM_SLEEP);
 	for (sc = softcalls; sc < &softcalls[NSOFTCALLS]; sc++) {
 		sc->sc_next = softfree;
 		softfree = sc;
 	}
-	mutex_init(&softcall_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
+	mutex_init(&softcall_lock, NULL, MUTEX_SPIN,
+	    (void *)ipltospl(SPL8));
+	softcall_state = SOFT_IDLE;
+	softcall_tick = lbolt;
+
+	if (softcall_delay < 0)
+		softcall_delay = 1;
+
+	/*
+	 * Since softcall_delay is expressed as 1 = 10 milliseconds.
+	 */
+	softcall_delay = softcall_delay * (hz/100);
+	CPUSET_ZERO(*softcall_cpuset);
+}
+
+/*
+ * Gets called when softcall queue is not moving forward. We choose
+ * a CPU and poke except the ones which are already poked.
+ */
+static int
+softcall_choose_cpu()
+{
+	cpu_t *cplist = CPU;
+	cpu_t *cp;
+	int intr_load = INT_MAX;
+	int cpuid = -1;
+	cpuset_t poke;
+	int s;
+
+	ASSERT(getpil() >= DISP_LEVEL);
+	ASSERT(ncpus > 1);
+	ASSERT(MUTEX_HELD(&softcall_lock));
+
+	CPUSET_ZERO(poke);
+
+	/*
+	 * The hint is to start from current CPU.
+	 */
+	cp = cplist;
+	do {
+		if (CPU_IN_SET(*softcall_cpuset, cp->cpu_id) ||
+		    (cp->cpu_flags & CPU_ENABLE) == 0)
+			continue;
+
+		/* if CPU is not busy */
+		if (cp->cpu_intrload == 0) {
+			cpuid = cp->cpu_id;
+			break;
+		}
+
+		if (cp->cpu_intrload < intr_load) {
+			cpuid = cp->cpu_id;
+			intr_load = cp->cpu_intrload;
+		} else if (cp->cpu_intrload == intr_load) {
+			/*
+			 * We want to poke CPUs having similar
+			 * load because we don't know which CPU is
+			 * can acknowledge level1 interrupt. The
+			 * list of such CPUs should not be large.
+			 */
+			if (cpuid != -1) {
+				/*
+				 * Put the last CPU chosen because
+				 * it also has same interrupt load.
+				 */
+				CPUSET_ADD(poke, cpuid);
+				cpuid = -1;
+			}
+
+			CPUSET_ADD(poke, cp->cpu_id);
+		}
+	} while ((cp = cp->cpu_next_onln) != cplist);
+
+	/* if we found a CPU which suits best to poke */
+	if (cpuid != -1) {
+		CPUSET_ZERO(poke);
+		CPUSET_ADD(poke, cpuid);
+	}
+
+	if (CPUSET_ISNULL(poke)) {
+		mutex_exit(&softcall_lock);
+		return (0);
+	}
+
+	/*
+	 * We first set the bit in cpuset and then poke.
+	 */
+	CPUSET_XOR(*softcall_cpuset, poke);
+	mutex_exit(&softcall_lock);
+
+	/*
+	 * If softcall() was called at low pil then we may
+	 * get preempted before we raise PIL. It should be okay
+	 * because we are just going to poke CPUs now or at most
+	 * another thread may start choosing CPUs in this routine.
+	 */
+	s = splhigh();
+	siron_poke_cpu(poke);
+	splx(s);
+	return (1);
 }
 
 /*
@@ -103,6 +290,7 @@
 softcall(void (*func)(void *), void *arg)
 {
 	softcall_t *sc;
+	clock_t w;
 
 	/*
 	 * protect against cross-calls
@@ -111,13 +299,13 @@
 	/* coalesce identical softcalls */
 	for (sc = softhead; sc != 0; sc = sc->sc_next) {
 		if (sc->sc_func == func && sc->sc_arg == arg) {
-			mutex_exit(&softcall_lock);
-			return;
+			goto intr;
 		}
 	}
 
 	if ((sc = softfree) == 0)
 		panic("too many softcalls");
+
 	softfree = sc->sc_next;
 	sc->sc_func = func;
 	sc->sc_arg = arg;
@@ -126,23 +314,34 @@
 	if (softhead) {
 		softtail->sc_next = sc;
 		softtail = sc;
+	} else
+		softhead = softtail = sc;
+
+intr:
+	if (softcall_state & SOFT_IDLE) {
+		softcall_state = SOFT_PEND;
+		softcall_tick = lbolt;
 		mutex_exit(&softcall_lock);
-	} else {
-		softhead = softtail = sc;
-		if (softcall_state == SOFT_DRAIN)
+		siron();
+	} else if (softcall_state & (SOFT_DRAIN|SOFT_PEND)) {
+		w = lbolt - softcall_tick;
+		if (w <= softcall_delay || ncpus == 1) {
+			mutex_exit(&softcall_lock);
+			return;
+		}
+
+		if (!(softcall_state & SOFT_STEAL)) {
+			softcall_state |= SOFT_STEAL;
+
 			/*
-			 * softint is already running; no need to
-			 * raise a siron. Due to lock protection of
-			 * softhead / softcall state, we know
-			 * that softint() will see the new addition to
-			 * the softhead queue.
+			 * We want to give some more chance before
+			 * fishing around again.
 			 */
-			mutex_exit(&softcall_lock);
-		else {
-			softcall_state = SOFT_PEND;
-			mutex_exit(&softcall_lock);
-			siron();
+			softcall_tick = lbolt;
 		}
+
+		/* softcall_lock will be released by this routine */
+		(void) softcall_choose_cpu();
 	}
 }
 
@@ -152,39 +351,65 @@
 	kdi_softcall_func = func;
 
 	if (softhead == NULL)
-		kdi_siron();
+		siron();
 }
 
 /*
  * Called to process software interrupts take one off queue, call it,
  * repeat.
  *
- * Note queue may change during call; softcall_lock and state variables
- * softcall_state ensures that
- * -we don't have multiple cpus pulling from the list (thus causing
- *  a violation of FIFO order).
- * -we don't miss a new entry having been added to the head.
- * -we don't miss a wakeup.
+ * Note queue may change during call; softcall_lock, state variables
+ * softcall_state and softcall_latest_cpuid ensures that -
+ * - we don't have multiple cpus pulling from the list (thus causing
+ *   a violation of FIFO order with an exception when we are stuck).
+ * - we don't miss a new entry having been added to the head.
+ * - we don't miss a wakeup.
  */
 
 void
 softint(void)
 {
-	softcall_t *sc;
+	softcall_t *sc = NULL;
 	void (*func)();
 	caddr_t arg;
+	int cpu_id = CPU->cpu_id;
 
-	/*
-	 * Check if we are asked to process the softcall list.
-	 */
 	mutex_enter(&softcall_lock);
-	if (softcall_state != SOFT_PEND) {
+
+	if (softcall_state & (SOFT_STEAL|SOFT_PEND)) {
+		softcall_state = SOFT_DRAIN;
+	} else  {
+		/*
+		 * The check for softcall_cpuset being
+		 * NULL is required because it may get
+		 * called very early during boot.
+		 */
+		if (softcall_cpuset != NULL &&
+		    CPU_IN_SET(*softcall_cpuset, cpu_id))
+			CPUSET_DEL(*softcall_cpuset, cpu_id);
 		mutex_exit(&softcall_lock);
 		goto out;
 	}
-	softcall_state = SOFT_DRAIN;
+
+	/*
+	 * Setting softcall_latest_cpuid to current CPU ensures
+	 * that there is only one active softlevel1 handler to
+	 * process softcall queues.
+	 *
+	 * Since softcall_lock lock is dropped before calling
+	 * func (callback), we need softcall_latest_cpuid
+	 * to prevent two softlevel1 hanlders working on the
+	 * queue when the first softlevel1 handler gets
+	 * stuck due to high interrupt load.
+	 */
+	softcall_latest_cpuid = cpu_id;
+
+	/* add ourself to the cpuset */
+	if (!CPU_IN_SET(*softcall_cpuset, cpu_id))
+		CPUSET_ADD(*softcall_cpuset, cpu_id);
 
 	for (;;) {
+		softcall_tick = lbolt;
 		if ((sc = softhead) != NULL) {
 			func = sc->sc_func;
 			arg = sc->sc_arg;
@@ -192,15 +417,40 @@
 			sc->sc_next = softfree;
 			softfree = sc;
 		}
+
 		if (sc == NULL) {
+			if (CPU_IN_SET(*softcall_cpuset, cpu_id))
+				CPUSET_DEL(*softcall_cpuset, cpu_id);
+
 			softcall_state = SOFT_IDLE;
+			ASSERT(softcall_latest_cpuid == cpu_id);
+			softcall_latest_cpuid = -1;
+
 			mutex_exit(&softcall_lock);
 			break;
 		}
+
 		mutex_exit(&softcall_lock);
 		func(arg);
 		mutex_enter(&softcall_lock);
+
+		/*
+		 * No longer need softcall processing from current
+		 * interrupt handler because either
+		 *  (a) softcall is in SOFT_IDLE state or
+		 *  (b) There is a CPU already draining softcall
+		 *	queue and the current softlevel1 is no
+		 *	longer required.
+		 */
+		if (softcall_latest_cpuid != cpu_id) {
+			if (CPU_IN_SET(*softcall_cpuset, cpu_id))
+				CPUSET_DEL(*softcall_cpuset, cpu_id);
+
+			mutex_exit(&softcall_lock);
+			break;
+		}
 	}
+
 out:
 	if ((func = kdi_softcall_func) != NULL) {
 		kdi_softcall_func = NULL;
--- a/usr/src/uts/common/sys/cpuvar.h	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/common/sys/cpuvar.h	Mon Sep 17 15:47:19 2007 -0700
@@ -214,6 +214,12 @@
 	char		*cpu_supp_freqs;	/* supported freqs in Hz */
 
 	/*
+	 * Interrupt load factor used by dispatcher & softcall
+	 */
+	hrtime_t	cpu_intrlast;   /* total interrupt time (nsec) */
+	int		cpu_intrload;   /* interrupt load factor (0-99%) */
+
+	/*
 	 * New members must be added /before/ this member, as the CTF tools
 	 * rely on this being the last field before cpu_m, so they can
 	 * correctly calculate the offset when synthetically adding the cpu_m
@@ -438,6 +444,12 @@
 		(set1).cpub[_i] |= (set2).cpub[_i];	\
 }
 
+#define	CPUSET_XOR(set1, set2)		{		\
+	int _i;						\
+	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
+		(set1).cpub[_i] ^= (set2).cpub[_i];	\
+}
+
 #define	CPUSET_AND(set1, set2)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
@@ -465,6 +477,7 @@
 #define	CPUSET_ISNULL(set)		((set) == 0)
 #define	CPUSET_ISEQUAL(set1, set2)	((set1) == (set2))
 #define	CPUSET_OR(set1, set2)		((void)((set1) |= (set2)))
+#define	CPUSET_XOR(set1, set2)		((void)((set1) ^= (set2)))
 #define	CPUSET_AND(set1, set2)		((void)((set1) &= (set2)))
 #define	CPUSET_ZERO(set)		((void)((set) = 0))
 
--- a/usr/src/uts/sun4/os/intr.c	Mon Sep 17 15:08:19 2007 -0700
+++ b/usr/src/uts/sun4/os/intr.c	Mon Sep 17 15:47:19 2007 -0700
@@ -56,8 +56,14 @@
 static struct intr_dist *intr_dist_whead = NULL;
 
 uint64_t siron_inum;
+uint64_t *siron_cpu_inum = NULL;
+uint64_t siron_poke_cpu_inum;
+static int siron_cpu_setup(cpu_setup_t, int, void *);
+extern uint_t softlevel1();
+
 uint64_t poke_cpu_inum;
 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
+uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2);
 
 /*
  * Note:-
@@ -99,8 +105,15 @@
 	init_ivintr();
 	REGISTER_BBUS_INTR();
 
+	/*
+	 * We just allocate memory for per-cpu siron right now. Rest of
+	 * the work is done when CPU is configured.
+	 */
+	siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
 	siron_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
 	poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
+	siron_poke_cpu_inum = add_softintr(PIL_13,
+	    siron_poke_cpu_intr, 0, SOFTINT_MT);
 	cp->cpu_m.poke_cpu_outstanding = B_FALSE;
 
 	mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -156,16 +169,104 @@
 	kdi_setsoftint(inum);
 }
 
+/*
+ * Generates softlevel1 interrupt on current CPU if it
+ * is not pending already.
+ */
 void
 siron(void)
 {
-	if (siron_inum != 0)
-		setsoftint(siron_inum);
-	else
+	uint64_t inum;
+
+	if (siron_inum != 0) {
+		if (siron_cpu_inum[CPU->cpu_id] != 0)
+			inum = siron_cpu_inum[CPU->cpu_id];
+		else
+			inum = siron_inum;
+
+		setsoftint(inum);
+	} else
 		siron_pending = 1;
 }
 
 /*
+ * This routine creates per-CPU siron inum for CPUs which are
+ * configured during boot.
+ */
+void
+siron_mp_init()
+{
+	cpu_t *c;
+
+	mutex_enter(&cpu_lock);
+	c = cpu_list;
+	do {
+		(void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL);
+	} while ((c = c->cpu_next) != cpu_list);
+
+	register_cpu_setup_func(siron_cpu_setup, NULL);
+	mutex_exit(&cpu_lock);
+}
+
+/*
+ * siron_poke_cpu_intr - cross-call handler.
+ */
+/* ARGSUSED */
+uint_t
+siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2)
+{
+	/* generate level1 softint */
+	siron();
+	return (1);
+}
+
+/*
+ * This routine generates a cross-call on target CPU(s).
+ */
+void
+siron_poke_cpu(cpuset_t poke)
+{
+	int cpuid = CPU->cpu_id;
+
+	if (CPU_IN_SET(poke, cpuid)) {
+		siron();
+		CPUSET_DEL(poke, cpuid);
+		if (CPUSET_ISNULL(poke))
+			return;
+	}
+
+	xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0);
+}
+
+/*
+ * This callback function allows us to create per-CPU siron inum.
+ */
+/* ARGSUSED */
+static int
+siron_cpu_setup(cpu_setup_t what, int id, void *arg)
+{
+	cpu_t *cp = cpu[id];
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+	ASSERT(cp != NULL);
+
+	switch (what) {
+	case CPU_CONFIG:
+		siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1,
+		    (softintrfunc)softlevel1, 0, SOFTINT_ST);
+		break;
+	case CPU_UNCONFIG:
+		(void) rem_softintr(siron_cpu_inum[cp->cpu_id]);
+		siron_cpu_inum[cp->cpu_id] = 0;
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
  * no_ivintr()
  * 	called by setvecint_tl1() through sys_trap()
  *	vector interrupt received but not valid or not