# HG changeset patch # User pm145316 # Date 1175119283 25200 # Node ID 9f2fcd00d0606f63b2dfcb48715265c126a61c86 # Parent 661924bac5685d1604b76a8c168071888bd738ec 6486343 poor mutex performance on large OPL machines diff -r 661924bac568 -r 9f2fcd00d060 usr/src/uts/common/os/mutex.c --- a/usr/src/uts/common/os/mutex.c Wed Mar 28 13:15:12 2007 -0700 +++ b/usr/src/uts/common/os/mutex.c Wed Mar 28 15:01:23 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -212,8 +212,14 @@ * throughput was observed with the given values. For cases where * more than 20 threads were waiting on the same lock, lock throughput * increased by a factor of 5 or more using the backoff algorithm. + * + * Some platforms may provide their own platform specific delay code, + * using plat_lock_delay(backoff). If it is available, plat_lock_delay + * is executed instead of the default delay code. */ +#pragma weak plat_lock_delay + #include #include #include @@ -307,7 +313,11 @@ CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1); - backoff = BACKOFF_BASE; + if (&plat_lock_delay) { + backoff = 0; + } else { + backoff = BACKOFF_BASE; + } for (;;) { spin: @@ -318,16 +328,20 @@ * the spin_count test and call to nulldev are to prevent * the compiler optimizer from eliminating the delay loop. */ - for (backctr = backoff; backctr; backctr--) { - if (!spin_count) (void) nulldev(); - }; /* delay */ - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; + if (&plat_lock_delay) { + plat_lock_delay(&backoff); + } else { + for (backctr = backoff; backctr; backctr--) { + if (!spin_count) (void) nulldev(); + }; /* delay */ + backoff = backoff << 1; /* double it */ + if (backoff > BACKOFF_CAP) { + backoff = BACKOFF_CAP; + } + + SMT_PAUSE(); } - SMT_PAUSE(); - if (panicstr) return; @@ -579,7 +593,12 @@ if (ncpus == 1) panic("lock_set: %p lock held and only one CPU", lp); - backoff = BACKOFF_BASE; + if (&plat_lock_delay) { + backoff = 0; + } else { + backoff = BACKOFF_BASE; + } + while (LOCK_HELD(lp) || !lock_spin_try(lp)) { if (panicstr) return; @@ -590,15 +609,20 @@ * the spin_count test and call to nulldev are to prevent * the compiler optimizer from eliminating the delay loop. */ - for (backctr = backoff; backctr; backctr--) { /* delay */ - if (!spin_count) (void) nulldev(); - } + if (&plat_lock_delay) { + plat_lock_delay(&backoff); + } else { + /* delay */ + for (backctr = backoff; backctr; backctr--) { + if (!spin_count) (void) nulldev(); + } - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; + backoff = backoff << 1; /* double it */ + if (backoff > BACKOFF_CAP) { + backoff = BACKOFF_CAP; + } + SMT_PAUSE(); } - SMT_PAUSE(); } if (spin_count) { @@ -623,7 +647,11 @@ ASSERT(new_pil > LOCK_LEVEL); - backoff = BACKOFF_BASE; + if (&plat_lock_delay) { + backoff = 0; + } else { + backoff = BACKOFF_BASE; + } do { splx(old_pil); while (LOCK_HELD(lp)) { @@ -638,15 +666,19 @@ * spin_count test and call to nulldev are to prevent * compiler optimizer from eliminating the delay loop. */ - for (backctr = backoff; backctr; backctr--) { - if (!spin_count) (void) nulldev(); + if (&plat_lock_delay) { + plat_lock_delay(&backoff); + } else { + for (backctr = backoff; backctr; backctr--) { + if (!spin_count) (void) nulldev(); + } + backoff = backoff << 1; /* double it */ + if (backoff > BACKOFF_CAP) { + backoff = BACKOFF_CAP; + } + + SMT_PAUSE(); } - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; - } - - SMT_PAUSE(); } old_pil = splr(new_pil); } while (!lock_spin_try(lp)); diff -r 661924bac568 -r 9f2fcd00d060 usr/src/uts/common/sys/mutex.h --- a/usr/src/uts/common/sys/mutex.h Wed Mar 28 13:15:12 2007 -0700 +++ b/usr/src/uts/common/sys/mutex.h Wed Mar 28 15:01:23 2007 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,8 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 1991-1998 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #ifndef _SYS_MUTEX_H @@ -84,6 +83,7 @@ extern void mutex_exit(kmutex_t *); extern int mutex_owned(kmutex_t *); extern struct _kthread *mutex_owner(kmutex_t *); +extern void plat_lock_delay(int *); #endif /* _KERNEL */ diff -r 661924bac568 -r 9f2fcd00d060 usr/src/uts/sun4u/opl/os/opl.c --- a/usr/src/uts/sun4u/opl/os/opl.c Wed Mar 28 13:15:12 2007 -0700 +++ b/usr/src/uts/sun4u/opl/os/opl.c Wed Mar 28 15:01:23 2007 -0700 @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); @@ -88,6 +90,24 @@ static struct memlist *opl_memlist_per_board(struct memlist *ml); +/* + * Note FF/DC out-of-order instruction engine takes only a + * single cycle to execute each spin loop + * for comparison, Panther takes 6 cycles for same loop + * 1500 approx nsec for OPL sleep instruction + * if spin count = OPL_BOFF_SLEEP*OPL_BOFF_SPIN then + * spin time should be equal to OPL_BOFF_TM nsecs + * Listed values tuned for 2.15GHz to 2.4GHz systems + * Value may change for future systems + */ +#define OPL_BOFF_SPIN 720 +#define OPL_BOFF_BASE 1 +#define OPL_BOFF_SLEEP 5 +#define OPL_BOFF_CAP1 20 +#define OPL_BOFF_CAP2 60 +#define OPL_BOFF_MAX (40 * OPL_BOFF_SLEEP) +#define OPL_BOFF_TM 1500 + int set_platform_max_ncpus(void) { @@ -998,3 +1018,96 @@ } return (opl_get_mem_addr(unum, sid, offset, addrp)); } + +void +plat_lock_delay(int *backoff) +{ + int i; + int cnt; + int flag; + int ctr; + hrtime_t delay_start; + /* + * Platform specific lock delay code for OPL + * + * Using staged linear increases in the delay. + * The sleep instruction is the preferred method of delay, + * but is too large of granularity for the initial backoff. + */ + + if (*backoff == 0) *backoff = OPL_BOFF_BASE; + + flag = !*backoff; + + if (*backoff < OPL_BOFF_CAP1) { + /* + * If desired backoff is long enough, + * use sleep for most of it + */ + for (cnt = *backoff; + cnt >= OPL_BOFF_SLEEP; + cnt -= OPL_BOFF_SLEEP) { + cpu_smt_pause(); + } + /* + * spin for small remainder of backoff + * + * fake call to nulldev included to prevent + * compiler from optimizing out the spin loop + */ + for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { + if (flag) (void) nulldev(); + } + } else { + /* backoff is very large. Fill it by sleeping */ + delay_start = gethrtime(); + cnt = *backoff/OPL_BOFF_SLEEP; + /* + * use sleep instructions for delay + */ + for (i = 0; i < cnt; i++) { + cpu_smt_pause(); + } + + /* + * Note: if the other strand executes a sleep instruction, + * then the sleep ends immediately with a minimum time of + * 42 clocks. We check gethrtime to insure we have + * waited long enough. And we include both a short + * spin loop and a sleep for any final delay time. + */ + + while ((gethrtime() - delay_start) < cnt * OPL_BOFF_TM) { + cpu_smt_pause(); + for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { + if (flag) (void) nulldev(); + } + } + } + + /* + * We adjust the backoff in three linear stages + * The initial stage has small increases as this phase is + * usually handle locks with light contention. We don't want + * to have a long backoff on a lock that is available. + * + * In the second stage, we are in transition, unsure whether + * the lock is under heavy contention. As the failures to + * obtain the lock increase, we back off further. + * + * For the final stage, we are in a heavily contended or + * long held long so we want to reduce the number of tries. + */ + if (*backoff < OPL_BOFF_CAP1) { + *backoff += 1; + } else { + if (*backoff < OPL_BOFF_CAP2) { + *backoff += OPL_BOFF_SLEEP; + } else { + *backoff += 2 * OPL_BOFF_SLEEP; + } + if (*backoff > OPL_BOFF_MAX) { + *backoff = OPL_BOFF_MAX; + } + } +}