changeset 6901:307e592cef33

6510471 svm overlap chain book keeping does not scale well
author jkennedy
date Wed, 18 Jun 2008 08:22:31 -0700
parents 50f0e694522d
children 5b004da8de91
files usr/src/common/lvm/md_convert.c usr/src/uts/common/io/lvm/mirror/mirror.c usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c usr/src/uts/common/io/lvm/mirror/mirror_resync.c usr/src/uts/common/sys/lvm/md_mirror.h
diffstat 5 files changed, 444 insertions(+), 515 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/common/lvm/md_convert.c	Wed Jun 18 00:57:00 2008 -0700
+++ b/usr/src/common/lvm/md_convert.c	Wed Jun 18 08:22:31 2008 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -136,7 +135,7 @@
 
 	/* Compute the offset of the first component */
 	first_comp = sizeof (ms_unit_t) +
-			sizeof (struct ms_row) * (un->un_nrows - 1);
+	    sizeof (struct ms_row) * (un->un_nrows - 1);
 	first_comp = roundup(first_comp, sizeof (long long));
 	if (first_comp_only == FIRST_COMP_OFFSET)
 		return (first_comp);
@@ -169,7 +168,7 @@
 
 	/* Compute the size of the new small ms_unit */
 	first_comp = sizeof (ms_unit32_od_t) +
-			sizeof (struct ms_row32_od) * (un->un_nrows - 1);
+	    sizeof (struct ms_row32_od) * (un->un_nrows - 1);
 	first_comp = roundup(first_comp, sizeof (long long));
 	if (first_comp_only == FIRST_COMP_OFFSET)
 		return (first_comp);
@@ -222,9 +221,9 @@
 		small_un->un_hsp_id = big_un->un_hsp_id;
 		small_un->un_nrows  = big_un->un_nrows;
 		small_un->c.un_size =
-			get_small_stripe_req_size(big_un, COMPLETE_STRUCTURE);
+		    get_small_stripe_req_size(big_un, COMPLETE_STRUCTURE);
 		small_un->un_ocomp  =
-			get_small_stripe_req_size(big_un, FIRST_COMP_OFFSET);
+		    get_small_stripe_req_size(big_un, FIRST_COMP_OFFSET);
 
 		/* walk through all rows */
 		big_mdr   = &big_un->un_row[0];
@@ -236,10 +235,10 @@
 		}
 
 		/* Now copy the components */
-		big_mdcomp   = (ms_comp_t *)(void *)&((char *)big_un)
-				[big_un->un_ocomp];
+		big_mdcomp = (ms_comp_t *)(void *)&((char *)big_un)
+		    [big_un->un_ocomp];
 		small_mdcomp = (ms_comp32_od_t *)(void *)&((char *)small_un)
-				[small_un->un_ocomp];
+		    [small_un->un_ocomp];
 		for (comp = 0; (comp < ncomps); ++comp) {
 			ms_comp_t	*big_mdcp   = &big_mdcomp[comp];
 			ms_comp32_od_t	*small_mdcp = &small_mdcomp[comp];
@@ -255,9 +254,9 @@
 		big_un->un_hsp_id = small_un->un_hsp_id;
 		big_un->un_nrows  = small_un->un_nrows;
 		big_un->c.un_size =
-			get_big_stripe_req_size(small_un, COMPLETE_STRUCTURE);
+		    get_big_stripe_req_size(small_un, COMPLETE_STRUCTURE);
 		big_un->un_ocomp  =
-			get_big_stripe_req_size(small_un, FIRST_COMP_OFFSET);
+		    get_big_stripe_req_size(small_un, FIRST_COMP_OFFSET);
 
 
 		/* walk through all rows */
@@ -270,9 +269,9 @@
 		}
 		/* Now copy the components */
 		big_mdcomp = (ms_comp_t *)(void *)&((char *)big_un)
-				[big_un->un_ocomp];
+		    [big_un->un_ocomp];
 		small_mdcomp = (ms_comp32_od_t *)(void *)&((char *)small_un)
-				[small_un->un_ocomp];
+		    [small_un->un_ocomp];
 		for (comp = 0; (comp < ncomps); ++comp) {
 			ms_comp_t *big_mdcp = &big_mdcomp[comp];
 			ms_comp32_od_t *small_mdcp = &small_mdcomp[comp];
@@ -320,7 +319,7 @@
 			MMSM_BIG2SMALL((&(big_un->un_sm[i])),
 			    (&(small_un->un_sm[i])));
 		}
-		small_un->un_ovrlap_chn_flg = big_un->un_ovrlap_chn_flg;
+		small_un->un_overlap_tree_flag = big_un->un_overlap_tree_flag;
 		small_un->un_read_option = big_un->un_read_option;
 		small_un->un_write_option = big_un->un_write_option;
 		small_un->un_pass_num = big_un->un_pass_num;
@@ -353,7 +352,7 @@
 
 
 		/* Now back to the simple things again */
-		big_un->un_ovrlap_chn_flg = small_un->un_ovrlap_chn_flg;
+		big_un->un_overlap_tree_flag = small_un->un_overlap_tree_flag;
 		big_un->un_read_option = small_un->un_read_option;
 		big_un->un_write_option = small_un->un_write_option;
 		big_un->un_pass_num = small_un->un_pass_num;
--- a/usr/src/uts/common/io/lvm/mirror/mirror.c	Wed Jun 18 00:57:00 2008 -0700
+++ b/usr/src/uts/common/io/lvm/mirror/mirror.c	Wed Jun 18 08:22:31 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -58,6 +58,7 @@
 #include <sys/sysevent/eventdefs.h>
 #include <sys/sysevent/svm.h>
 #include <sys/lvm/mdmn_commd.h>
+#include <sys/avl.h>
 
 md_ops_t		mirror_md_ops;
 #ifndef	lint
@@ -337,11 +338,11 @@
 			 * flag. They are both exclusive tests.
 			 */
 			open_comp = (frm_probe) ?
-					(shared->ms_flags & MDM_S_PROBEOPEN):
-					(shared->ms_flags & MDM_S_ISOPEN);
+			    (shared->ms_flags & MDM_S_PROBEOPEN):
+			    (shared->ms_flags & MDM_S_ISOPEN);
 			if ((shared->ms_flags & MDM_S_IOERR || !open_comp) &&
-				((shared->ms_state == CS_OKAY) ||
-				(shared->ms_state == CS_RESYNC))) {
+			    ((shared->ms_state == CS_OKAY) ||
+			    (shared->ms_state == CS_RESYNC))) {
 				if (clr_error) {
 					shared->ms_flags &= ~MDM_S_IOERR;
 				}
@@ -418,7 +419,7 @@
 	sm = &un->un_sm[smi];
 	smic = &un->un_smic[smi];
 	shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
-		(sm->sm_dev, sm, ci);
+	    (sm->sm_dev, sm, ci);
 
 	if (shared->ms_state != CS_ERRED)
 		return (0);
@@ -447,12 +448,12 @@
 			rw_exit(&mirror_md_ops.md_link_rw.lock);
 #ifdef DEBUG
 		if (mirror_debug_flag)
-		    printf("send alloc hotspare, flags=0x%x %x, %x, %x, %x\n",
-			flags,
-			allochspmsg.msg_allochsp_mnum,
-			allochspmsg.msg_allochsp_sm,
-			allochspmsg.msg_allochsp_comp,
-			allochspmsg.msg_allochsp_hs_id);
+			printf("send alloc hotspare, flags="
+			    "0x%x %x, %x, %x, %x\n", flags,
+			    allochspmsg.msg_allochsp_mnum,
+			    allochspmsg.msg_allochsp_sm,
+			    allochspmsg.msg_allochsp_comp,
+			    allochspmsg.msg_allochsp_hs_id);
 #endif
 		if (flags & MD_HOTSPARE_WMUPDATE) {
 			msgtype  = MD_MN_MSG_ALLOCATE_HOTSPARE2;
@@ -661,16 +662,15 @@
 			md_m_shared_t		*shared;
 
 			shared = (md_m_shared_t *)
-				(*(smic->sm_shared_by_indx))(sm->sm_dev,
-				sm, ci);
+			    (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, ci);
 			/*
 			 * Never called from ioctl context, so pass in
 			 * (IOLOCK *)NULL.  Pass through flags from calling
 			 * routine, also setting XMIT flag.
 			 */
 			if (check_comp_4_hotspares(un, i, ci,
-				(MD_HOTSPARE_XMIT | flags),
-				shared->ms_hs_id, (IOLOCK *)NULL) != 0)
+			    (MD_HOTSPARE_XMIT | flags),
+			    shared->ms_hs_id, (IOLOCK *)NULL) != 0)
 				return (1);
 		}
 	}
@@ -762,8 +762,8 @@
 	if (hotspare_request.dr_pending == 0) {
 		hotspare_request.dr_pending = 1;
 		daemon_request(&md_mhs_daemon,
-		    check_4_hotspares,
-				(daemon_queue_t *)&hotspare_request, REQ_OLD);
+		    check_4_hotspares, (daemon_queue_t *)&hotspare_request,
+		    REQ_OLD);
 	}
 	mutex_exit(&hotspare_request.dr_mx);
 	return (0);
@@ -804,12 +804,11 @@
 	if (get_dev != NULL) {
 		(void) (*get_dev)(tmpdev, smi, ci, &cd);
 		cmn_err(CE_WARN, "md %s: open error on %s",
-			md_shortname(MD_SID(un)),
-			md_devname(MD_UN2SET(un), cd.cd_dev,
-			NULL, 0));
+		    md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un),
+		    cd.cd_dev, NULL, 0));
 	} else {
 		cmn_err(CE_WARN, "md %s: open error",
-			md_shortname(MD_SID(un)));
+		    md_shortname(MD_SID(un)));
 	}
 }
 
@@ -840,62 +839,63 @@
 {
 	mutex_enter(&non_ff_drv_mutex);
 	if (non_ff_drivers == NULL) {
-	    non_ff_drivers = (char **)kmem_alloc(2 * sizeof (char *),
-		KM_NOSLEEP);
-	    if (non_ff_drivers == NULL) {
-		mutex_exit(&non_ff_drv_mutex);
-		return (1);
-	    }
-
-	    non_ff_drivers[0] = (char *)kmem_alloc(strlen(s) + 1, KM_NOSLEEP);
-	    if (non_ff_drivers[0] == NULL) {
-		kmem_free(non_ff_drivers, 2 * sizeof (char *));
-		non_ff_drivers = NULL;
-		mutex_exit(&non_ff_drv_mutex);
-		return (1);
-	    }
-
-	    (void) strcpy(non_ff_drivers[0], s);
-	    non_ff_drivers[1] = NULL;
+		non_ff_drivers = (char **)kmem_alloc(2 * sizeof (char *),
+		    KM_NOSLEEP);
+		if (non_ff_drivers == NULL) {
+			mutex_exit(&non_ff_drv_mutex);
+			return (1);
+		}
+
+		non_ff_drivers[0] = (char *)kmem_alloc(strlen(s) + 1,
+		    KM_NOSLEEP);
+		if (non_ff_drivers[0] == NULL) {
+			kmem_free(non_ff_drivers, 2 * sizeof (char *));
+			non_ff_drivers = NULL;
+			mutex_exit(&non_ff_drv_mutex);
+			return (1);
+		}
+
+		(void) strcpy(non_ff_drivers[0], s);
+		non_ff_drivers[1] = NULL;
 
 	} else {
-	    int i;
-	    char **tnames;
-	    char **tmp;
-
-	    for (i = 0; non_ff_drivers[i] != NULL; i++) {
-		if (strcmp(s, non_ff_drivers[i]) == 0) {
-		    mutex_exit(&non_ff_drv_mutex);
-		    return (0);
+		int i;
+		char **tnames;
+		char **tmp;
+
+		for (i = 0; non_ff_drivers[i] != NULL; i++) {
+			if (strcmp(s, non_ff_drivers[i]) == 0) {
+				mutex_exit(&non_ff_drv_mutex);
+				return (0);
+			}
+		}
+
+		/* allow for new element and null */
+		i += 2;
+		tnames = (char **)kmem_alloc(i * sizeof (char *), KM_NOSLEEP);
+		if (tnames == NULL) {
+			mutex_exit(&non_ff_drv_mutex);
+			return (1);
 		}
-	    }
-
-	    /* allow for new element and null */
-	    i += 2;
-	    tnames = (char **)kmem_alloc(i * sizeof (char *), KM_NOSLEEP);
-	    if (tnames == NULL) {
-		mutex_exit(&non_ff_drv_mutex);
-		return (1);
-	    }
-
-	    for (i = 0; non_ff_drivers[i] != NULL; i++)
-		tnames[i] = non_ff_drivers[i];
-
-	    tnames[i] = (char *)kmem_alloc(strlen(s) + 1, KM_NOSLEEP);
-	    if (tnames[i] == NULL) {
-		/* adjust i so that it is the right count to free */
-		kmem_free(tnames, (i + 2) * sizeof (char *));
-		mutex_exit(&non_ff_drv_mutex);
-		return (1);
-	    }
-
-	    (void) strcpy(tnames[i++], s);
-	    tnames[i] = NULL;
-
-	    tmp = non_ff_drivers;
-	    non_ff_drivers = tnames;
-	    /* i now represents the count we previously alloced */
-	    kmem_free(tmp, i * sizeof (char *));
+
+		for (i = 0; non_ff_drivers[i] != NULL; i++)
+			tnames[i] = non_ff_drivers[i];
+
+		tnames[i] = (char *)kmem_alloc(strlen(s) + 1, KM_NOSLEEP);
+		if (tnames[i] == NULL) {
+			/* adjust i so that it is the right count to free */
+			kmem_free(tnames, (i + 2) * sizeof (char *));
+			mutex_exit(&non_ff_drv_mutex);
+			return (1);
+		}
+
+		(void) strcpy(tnames[i++], s);
+		tnames[i] = NULL;
+
+		tmp = non_ff_drivers;
+		non_ff_drivers = tnames;
+		/* i now represents the count we previously alloced */
+		kmem_free(tmp, i * sizeof (char *));
 	}
 	mutex_exit(&non_ff_drv_mutex);
 
@@ -918,110 +918,126 @@
 	mm_unit_t	*un;
 
 	if (md_ff_disable)
-	    return;
+		return;
 
 	un = MD_UNIT(mnum);
 
 	for (i = 0; i < NMIRROR; i++) {
-	    int			ci;
-	    int			cnt;
-	    int			ff = 1;
-	    mm_submirror_t	*sm;
-	    mm_submirror_ic_t	*smic;
-	    void		(*get_dev)();
-
-	    if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE))
-		continue;
-
-	    sm = &un->un_sm[i];
-	    smic = &un->un_smic[i];
-
-	    get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
-		"get device", 0);
-
-	    cnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm);
-	    for (ci = 0; ci < cnt; ci++) {
-		int		found = 0;
-		dev_t		ci_dev;
-		major_t		major;
-		dev_info_t	*devi;
-		ms_cd_info_t	cd;
-
-		/* this already returns the hs dev if the device is spared */
-		(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
-
-		ci_dev = md_dev64_to_dev(cd.cd_dev);
-		major = getmajor(ci_dev);
-
-		if (major == md_major) {
-		    /* this component must be a soft partition; get real dev */
-		    minor_t	dev_mnum;
-		    mdi_unit_t	*ui;
-		    mp_unit_t	*un;
-		    set_t	setno;
-		    side_t	side;
-		    md_dev64_t	tmpdev;
-
-		    ui = MDI_UNIT(getminor(ci_dev));
-
-		    /* grab necessary lock */
-		    un = (mp_unit_t *)md_unit_readerlock(ui);
-
-		    dev_mnum = MD_SID(un);
-		    setno = MD_MIN2SET(dev_mnum);
-		    side = mddb_getsidenum(setno);
-
-		    tmpdev = un->un_dev;
-
-		    /* Get dev by device id */
-		    if (md_devid_found(setno, side, un->un_key) == 1) {
-			tmpdev = md_resolve_bydevid(dev_mnum, tmpdev,
-				un->un_key);
-		    }
-
-		    md_unit_readerexit(ui);
-
-		    ci_dev = md_dev64_to_dev(tmpdev);
-		    major = getmajor(ci_dev);
+		int			ci;
+		int			cnt;
+		int			ff = 1;
+		mm_submirror_t		*sm;
+		mm_submirror_ic_t	*smic;
+		void			(*get_dev)();
+
+		if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE))
+			continue;
+
+		sm = &un->un_sm[i];
+		smic = &un->un_smic[i];
+
+		get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
+		    "get device", 0);
+
+		cnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm);
+		for (ci = 0; ci < cnt; ci++) {
+			int		found = 0;
+			dev_t		ci_dev;
+			major_t		major;
+			dev_info_t	*devi;
+			ms_cd_info_t	cd;
+
+			/*
+			 * this already returns the hs
+			 * dev if the device is spared
+			 */
+			(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
+
+			ci_dev = md_dev64_to_dev(cd.cd_dev);
+			major = getmajor(ci_dev);
+
+			if (major == md_major) {
+				/*
+				 * this component must be a soft
+				 * partition; get the real dev
+				 */
+				minor_t	dev_mnum;
+				mdi_unit_t	*ui;
+				mp_unit_t	*un;
+				set_t	setno;
+				side_t	side;
+				md_dev64_t	tmpdev;
+
+				ui = MDI_UNIT(getminor(ci_dev));
+
+				/* grab necessary lock */
+				un = (mp_unit_t *)md_unit_readerlock(ui);
+
+				dev_mnum = MD_SID(un);
+				setno = MD_MIN2SET(dev_mnum);
+				side = mddb_getsidenum(setno);
+
+				tmpdev = un->un_dev;
+
+				/* Get dev by device id */
+				if (md_devid_found(setno, side,
+				    un->un_key) == 1) {
+					tmpdev = md_resolve_bydevid(dev_mnum,
+					    tmpdev, un->un_key);
+				}
+
+				md_unit_readerexit(ui);
+
+				ci_dev = md_dev64_to_dev(tmpdev);
+				major = getmajor(ci_dev);
+			}
+
+			if (ci_dev != NODEV32 &&
+			    (devi = e_ddi_hold_devi_by_dev(ci_dev, 0))
+			    != NULL) {
+				ddi_prop_op_t	prop_op = PROP_LEN_AND_VAL_BUF;
+				int		propvalue = 0;
+				int		proplength = sizeof (int);
+				int		error;
+				struct cb_ops	*cb;
+
+				if ((cb = devopsp[major]->devo_cb_ops) !=
+				    NULL) {
+					error = (*cb->cb_prop_op)
+					    (DDI_DEV_T_ANY, devi, prop_op,
+					    DDI_PROP_NOTPROM|DDI_PROP_DONTPASS,
+					    "ddi-failfast-supported",
+					    (caddr_t)&propvalue, &proplength);
+
+					if (error == DDI_PROP_SUCCESS)
+						found = 1;
+				}
+
+				if (!found && new_non_ff_driver(
+				    ddi_driver_name(devi))) {
+					cmn_err(CE_NOTE, "!md: B_FAILFAST I/O"
+					    "disabled on %s",
+					    ddi_driver_name(devi));
+				}
+
+				ddi_release_devi(devi);
+			}
+
+			/*
+			 * All components must support
+			 * failfast in the submirror.
+			 */
+			if (!found) {
+				ff = 0;
+				break;
+			}
 		}
 
-		if (ci_dev != NODEV32 &&
-		    (devi = e_ddi_hold_devi_by_dev(ci_dev, 0)) != NULL) {
-		    ddi_prop_op_t	prop_op = PROP_LEN_AND_VAL_BUF;
-		    int			propvalue = 0;
-		    int			proplength = sizeof (int);
-		    int			error;
-		    struct cb_ops	*cb;
-
-		    if ((cb = devopsp[major]->devo_cb_ops) != NULL) {
-			error = (*cb->cb_prop_op)(DDI_DEV_T_ANY, devi, prop_op,
-			    DDI_PROP_NOTPROM|DDI_PROP_DONTPASS,
-			    "ddi-failfast-supported",
-			    (caddr_t)&propvalue, &proplength);
-
-			if (error == DDI_PROP_SUCCESS)
-			    found = 1;
-		    }
-
-		    if (!found && new_non_ff_driver(ddi_driver_name(devi)))
-			cmn_err(CE_NOTE, "!md: B_FAILFAST I/O disabled on %s",
-			    ddi_driver_name(devi));
-
-		    ddi_release_devi(devi);
+		if (ff) {
+			sm->sm_flags |= MD_SM_FAILFAST;
+		} else {
+			sm->sm_flags &= ~MD_SM_FAILFAST;
 		}
-
-		/* All components must support failfast in the submirror. */
-		if (!found) {
-		    ff = 0;
-		    break;
-		}
-	    }
-
-	    if (ff) {
-		sm->sm_flags |= MD_SM_FAILFAST;
-	    } else {
-		sm->sm_flags &= ~MD_SM_FAILFAST;
-	    }
 	}
 }
 
@@ -1288,37 +1304,24 @@
 }
 
 void
-mirror_overlap_chain_remove(md_mps_t *ps)
+mirror_overlap_tree_remove(md_mps_t *ps)
 {
 	mm_unit_t	*un;
 
 	if (panicstr)
 		return;
 
-	ASSERT(ps->ps_flags & MD_MPS_ON_OVERLAP);
-
+	VERIFY(ps->ps_flags & MD_MPS_ON_OVERLAP);
 	un = ps->ps_un;
 
-	mutex_enter(&un->un_ovrlap_chn_mx);
-	if (ps->ps_ovrlap_prev != &un->un_ovrlap_chn)
-		ps->ps_ovrlap_prev->ps_ovrlap_next = ps->ps_ovrlap_next;
-	else
-		un->un_ovrlap_chn.ps_ovrlap_next = ps->ps_ovrlap_next;
-	if (ps->ps_ovrlap_next != &un->un_ovrlap_chn)
-		ps->ps_ovrlap_next->ps_ovrlap_prev = ps->ps_ovrlap_prev;
-	else
-		un->un_ovrlap_chn.ps_ovrlap_prev = ps->ps_ovrlap_prev;
-	/* Handle empty overlap chain */
-	if (un->un_ovrlap_chn.ps_ovrlap_prev == &un->un_ovrlap_chn) {
-		un->un_ovrlap_chn.ps_ovrlap_prev =
-		    un->un_ovrlap_chn.ps_ovrlap_next = NULL;
+	mutex_enter(&un->un_overlap_tree_mx);
+	avl_remove(&un->un_overlap_root, ps);
+	ps->ps_flags &= ~MD_MPS_ON_OVERLAP;
+	if (un->un_overlap_tree_flag != 0) {
+		un->un_overlap_tree_flag = 0;
+		cv_broadcast(&un->un_overlap_tree_cv);
 	}
-	if (un->un_ovrlap_chn_flg) {
-		un->un_ovrlap_chn_flg = 0;
-		cv_broadcast(&un->un_ovrlap_chn_cv);
-	}
-	ps->ps_flags &= ~MD_MPS_ON_OVERLAP;
-	mutex_exit(&un->un_ovrlap_chn_mx);
+	mutex_exit(&un->un_overlap_tree_mx);
 }
 
 
@@ -1328,139 +1331,53 @@
  * Check that given i/o request does not cause an overlap with already pending
  * i/o. If it does, block until the overlapped i/o completes.
  *
- * Note: the overlap chain is held as a monotonically increasing doubly-linked
- * list with the sentinel contained in un->un_ovrlap_chn. We avoid a linear
- * search of the list by the following logic:
- *	ps->ps_lastblk < un_ovrlap_chn.ps_ovrlap_next->ps_firstblk => No overlap
- *	ps->ps_firstblk > un_ovrlap_chn.ps_ovrlap_prev->ps_lastblk => No overlap
- * otherwise
- *	scan un_ovrlap_chn.ps_ovrlap_next for location where ps->ps_firstblk
- *	> chain->ps_lastblk. This is the insertion point. As the list is
- *	guaranteed to be ordered there is no need to continue scanning.
- *
  * The flag argument has MD_OVERLAP_ALLOW_REPEAT set if it is ok for the parent
- *	structure to be already on the overlap chain and MD_OVERLAP_NO_REPEAT
- *	if it must not already be on the chain
+ * structure to be already in the overlap tree and MD_OVERLAP_NO_REPEAT if
+ * it must not already be in the tree.
  */
 static void
 wait_for_overlaps(md_mps_t *ps, int flags)
 {
 	mm_unit_t	*un;
-	md_mps_t	*ps1, **head, **tail;
+	avl_index_t	where;
+	md_mps_t	*ps1;
 
 	if (panicstr)
 		return;
 
-
 	un = ps->ps_un;
-
-	mutex_enter(&un->un_ovrlap_chn_mx);
+	mutex_enter(&un->un_overlap_tree_mx);
 	if ((flags & MD_OVERLAP_ALLOW_REPEAT) &&
 	    (ps->ps_flags & MD_MPS_ON_OVERLAP)) {
-		mutex_exit(&un->un_ovrlap_chn_mx);
-		return;
-	}
-
-	ASSERT(!(ps->ps_flags & MD_MPS_ON_OVERLAP));
-	head = &(un->un_ovrlap_chn.ps_ovrlap_next);
-	tail = &(un->un_ovrlap_chn.ps_ovrlap_prev);
-	ps1 = *head;
-	/*
-	 * Check for simple limit cases:
-	 *	*head == NULL
-	 *		insert ps at head of list
-	 *	lastblk < head->firstblk
-	 *		insert at head of list
-	 *	firstblk > tail->lastblk
-	 *		insert at tail of list
-	 */
-	if (ps1 == NULL) {
-		/* Insert at head */
-		ps->ps_ovrlap_next = &un->un_ovrlap_chn;
-		ps->ps_ovrlap_prev = &un->un_ovrlap_chn;
-		*head = ps;
-		*tail = ps;
-		ps->ps_flags |= MD_MPS_ON_OVERLAP;
-		mutex_exit(&un->un_ovrlap_chn_mx);
-		return;
-	} else if (ps->ps_lastblk < (*head)->ps_firstblk) {
-		/* Insert at head */
-		ps->ps_ovrlap_next = (*head);
-		ps->ps_ovrlap_prev = &un->un_ovrlap_chn;
-		(*head)->ps_ovrlap_prev = ps;
-		*head = ps;
-		ps->ps_flags |= MD_MPS_ON_OVERLAP;
-		mutex_exit(&un->un_ovrlap_chn_mx);
-		return;
-	} else if (ps->ps_firstblk > (*tail)->ps_lastblk) {
-		/* Insert at tail */
-		ps->ps_ovrlap_prev = (*tail);
-		ps->ps_ovrlap_next = &un->un_ovrlap_chn;
-		(*tail)->ps_ovrlap_next = ps;
-		*tail = ps;
-		ps->ps_flags |= MD_MPS_ON_OVERLAP;
-		mutex_exit(&un->un_ovrlap_chn_mx);
+		mutex_exit(&un->un_overlap_tree_mx);
 		return;
 	}
-	/* Now we have to scan the list for possible overlaps */
-	while (ps1 != NULL) {
-		/*
-		 * If this region has been put on the chain by another thread
-		 * just exit
-		 */
-		if ((flags & MD_OVERLAP_ALLOW_REPEAT) &&
-		    (ps->ps_flags & MD_MPS_ON_OVERLAP)) {
-			mutex_exit(&un->un_ovrlap_chn_mx);
-			return;
-
+
+	VERIFY(!(ps->ps_flags & MD_MPS_ON_OVERLAP));
+
+	do {
+		ps1 = avl_find(&un->un_overlap_root, ps, &where);
+		if (ps1 == NULL) {
+			/*
+			 * The candidate range does not overlap with any
+			 * range in the tree.  Insert it and be done.
+			 */
+			avl_insert(&un->un_overlap_root, ps, where);
+			ps->ps_flags |= MD_MPS_ON_OVERLAP;
+		} else {
+			/*
+			 * The candidate range would overlap.  Set the flag
+			 * indicating we need to be woken up, and sleep
+			 * until another thread removes a range.  If upon
+			 * waking up we find this mps was put on the tree
+			 * by another thread, the loop terminates.
+			 */
+			un->un_overlap_tree_flag = 1;
+			cv_wait(&un->un_overlap_tree_cv,
+			    &un->un_overlap_tree_mx);
 		}
-		for (ps1 = *head; ps1 && (ps1 != &un->un_ovrlap_chn);
-		    ps1 = ps1->ps_ovrlap_next) {
-			if (ps->ps_firstblk > (*tail)->ps_lastblk) {
-				/* Insert at tail */
-				ps->ps_ovrlap_prev = (*tail);
-				ps->ps_ovrlap_next = &un->un_ovrlap_chn;
-				(*tail)->ps_ovrlap_next = ps;
-				*tail = ps;
-				ps->ps_flags |= MD_MPS_ON_OVERLAP;
-				mutex_exit(&un->un_ovrlap_chn_mx);
-				return;
-			}
-			if (ps->ps_firstblk > ps1->ps_lastblk)
-				continue;
-			if (ps->ps_lastblk < ps1->ps_firstblk) {
-				/* Insert into list at current 'ps1' position */
-				ps->ps_ovrlap_next = ps1;
-				ps->ps_ovrlap_prev = ps1->ps_ovrlap_prev;
-				ps1->ps_ovrlap_prev->ps_ovrlap_next = ps;
-				ps1->ps_ovrlap_prev = ps;
-				ps->ps_flags |= MD_MPS_ON_OVERLAP;
-				mutex_exit(&un->un_ovrlap_chn_mx);
-				return;
-			}
-			break;
-		}
-		if (ps1 != NULL) {
-			un->un_ovrlap_chn_flg = 1;
-			cv_wait(&un->un_ovrlap_chn_cv, &un->un_ovrlap_chn_mx);
-			/*
-			 * Now ps1 refers to the old insertion point and we
-			 * have to check the whole chain to see if we're still
-			 * overlapping any other i/o.
-			 */
-		}
-	}
-
-	/*
-	 * Only get here if we had one overlapping i/o on the list and that
-	 * has now completed. In this case the list is empty so we insert <ps>
-	 * at the head of the chain.
-	 */
-	ASSERT(*head == NULL);
-	*tail = *head = ps;
-	ps->ps_ovrlap_next = ps->ps_ovrlap_prev = &un->un_ovrlap_chn;
-	ps->ps_flags |= MD_MPS_ON_OVERLAP;
-	mutex_exit(&un->un_ovrlap_chn_mx);
+	} while (!(ps->ps_flags & MD_MPS_ON_OVERLAP));
+	mutex_exit(&un->un_overlap_tree_mx);
 }
 
 /*
@@ -1747,7 +1664,7 @@
 	ps->ps_allfrom_sm = SMI2BIT(sm_index);
 
 	if (un->un_sm[sm_index].sm_flags & MD_SM_FAILFAST) {
-	    bp->b_flags |= B_FAILFAST;
+		bp->b_flags |= B_FAILFAST;
 	}
 
 	return (0);
@@ -1794,7 +1711,7 @@
 		return;
 	if (snarfing) {
 		sm->sm_dev = md_getdevnum(setno, mddb_getsidenum(setno),
-						sm->sm_key, MD_NOTRUST_DEVT);
+		    sm->sm_key, MD_NOTRUST_DEVT);
 	} else {
 		if (md_getmajor(sm->sm_dev) == md_major) {
 			su = MD_UNIT(md_getminor(sm->sm_dev));
@@ -1807,12 +1724,10 @@
 	    0, "shared by blk", 0);
 	smic->sm_shared_by_indx = md_get_named_service(sm->sm_dev,
 	    0, "shared by indx", 0);
-	smic->sm_get_component_count =
-	    (int (*)())md_get_named_service(sm->sm_dev, 0,
-		    "get component count", 0);
-	smic->sm_get_bcss =
-	    (int (*)())md_get_named_service(sm->sm_dev, 0,
-		    "get block count skip size", 0);
+	smic->sm_get_component_count = (int (*)())md_get_named_service(
+	    sm->sm_dev, 0, "get component count", 0);
+	smic->sm_get_bcss = (int (*)())md_get_named_service(sm->sm_dev, 0,
+	    "get block count skip size", 0);
 	sm->sm_state &= ~SMS_IGNORE;
 	if (SMS_IS(sm, SMS_OFFLINE))
 		MD_STATUS(un) |= MD_UN_OFFLINE_SM;
@@ -1851,6 +1766,36 @@
 	md_rem_names(sv, nsv);
 }
 
+/*
+ * Comparison function for the avl tree which tracks
+ * outstanding writes on submirrors.
+ *
+ * Returns:
+ *	-1: ps1 < ps2
+ *	 0: ps1 and ps2 overlap
+ *	 1: ps1 > ps2
+ */
+static int
+mirror_overlap_compare(const void *p1, const void *p2)
+{
+	const md_mps_t *ps1 = (md_mps_t *)p1;
+	const md_mps_t *ps2 = (md_mps_t *)p2;
+
+	if (ps1->ps_firstblk < ps2->ps_firstblk) {
+		if (ps1->ps_lastblk >= ps2->ps_firstblk)
+			return (0);
+		return (-1);
+	}
+
+	if (ps1->ps_firstblk > ps2->ps_firstblk) {
+		if (ps1->ps_firstblk <= ps2->ps_lastblk)
+			return (0);
+		return (1);
+	}
+
+	return (0);
+}
+
 /* Return a -1 if optimized record unavailable and set should be released */
 int
 mirror_build_incore(mm_unit_t *un, int snarfing)
@@ -1873,8 +1818,9 @@
 	/* pre-4.1 didn't define CAN_META_CHILD capability */
 	MD_CAPAB(un) = MD_CAN_META_CHILD | MD_CAN_PARENT | MD_CAN_SP;
 
-	un->un_ovrlap_chn_flg = 0;
-	bzero(&un->un_ovrlap_chn, sizeof (un->un_ovrlap_chn));
+	un->un_overlap_tree_flag = 0;
+	avl_create(&un->un_overlap_root, mirror_overlap_compare,
+	    sizeof (md_mps_t), offsetof(md_mps_t, ps_overlap_node));
 
 	for (i = 0; i < NMIRROR; i++)
 		build_submirror(un, i, snarfing);
@@ -1902,8 +1848,8 @@
 			return (1);
 	}
 
-	mutex_init(&un->un_ovrlap_chn_mx, NULL, MUTEX_DEFAULT, NULL);
-	cv_init(&un->un_ovrlap_chn_cv, NULL, CV_DEFAULT, NULL);
+	mutex_init(&un->un_overlap_tree_mx, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&un->un_overlap_tree_cv, NULL, CV_DEFAULT, NULL);
 
 	un->un_suspend_wr_flag = 0;
 	mutex_init(&un->un_suspend_wr_mx, NULL, MUTEX_DEFAULT, NULL);
@@ -2001,11 +1947,13 @@
 
 	mirror_commit(un, bits, 0);
 
+	avl_destroy(&un->un_overlap_root);
+
 	/* Destroy all mutexes and condvars before returning. */
 	mutex_destroy(&un->un_suspend_wr_mx);
 	cv_destroy(&un->un_suspend_wr_cv);
-	mutex_destroy(&un->un_ovrlap_chn_mx);
-	cv_destroy(&un->un_ovrlap_chn_cv);
+	mutex_destroy(&un->un_overlap_tree_mx);
+	cv_destroy(&un->un_overlap_tree_cv);
 	mutex_destroy(&un->un_owner_mx);
 	mutex_destroy(&un->un_rs_thread_mx);
 	cv_destroy(&un->un_rs_thread_cv);
@@ -2329,11 +2277,11 @@
 	ui_sm = MDI_UNIT(getminor(md_dev64_to_dev(sm->sm_dev)));
 	if (newstate & (CS_ERRED | CS_RESYNC | CS_LAST_ERRED) &&
 	    ui_sm->ui_tstate & MD_INACCESSIBLE) {
-	    ui_sm->ui_tstate &= ~MD_INACCESSIBLE;
+		ui_sm->ui_tstate &= ~MD_INACCESSIBLE;
 	}
 
-	shared = (md_m_shared_t *)
-		(*(smic->sm_shared_by_indx))(sm->sm_dev, sm, ci);
+	shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
+	    (sm->sm_dev, sm, ci);
 	origstate = shared->ms_state;
 
 	/*
@@ -2345,9 +2293,8 @@
 	if ((! (origstate & (CS_ERRED|CS_LAST_ERRED))) &&
 	    (newstate & (CS_ERRED|CS_LAST_ERRED))) {
 
-		get_dev =
-		    (void (*)())md_get_named_service(sm->sm_dev, 0,
-				"get device", 0);
+		get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
+		    "get device", 0);
 		(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
 
 		err = md_getdevname(setno, mddb_getsidenum(setno), 0,
@@ -2355,7 +2302,7 @@
 
 		if (err == ENOENT) {
 			(void) md_devname(setno, cd.cd_dev, devname,
-				sizeof (devname));
+			    sizeof (devname));
 		}
 
 		cmn_err(CE_WARN, "md: %s: %s needs maintenance",
@@ -2480,12 +2427,8 @@
 		}
 
 		kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
-		rval = mdmn_ksend_message(setno,
-					msgtype,
-					msgflags,
-					(char *)&stchmsg,
-					sizeof (stchmsg),
-					kresult);
+		rval = mdmn_ksend_message(setno, msgtype, msgflags,
+		    (char *)&stchmsg, sizeof (stchmsg), kresult);
 
 		if (!MDMN_KSEND_MSG_OK(rval, kresult)) {
 			mdmn_ksend_show_error(rval, kresult, "STATE UPDATE");
@@ -2562,8 +2505,8 @@
 
 		mcnt = MIN(cnt, lbtodb(1024 * 1024 * 1024));	/* 1 Gig Blks */
 
-		dev = select_read_unit(un, blk, mcnt, &cando, must_be_open, &s,
-			NULL);
+		dev = select_read_unit(un, blk, mcnt, &cando,
+		    must_be_open, &s, NULL);
 		if (dev == (md_dev64_t)0)
 			break;
 
@@ -2617,7 +2560,7 @@
 	 * Make sure this component has other sources
 	 */
 	(void) (*(smic->sm_get_bcss))
-		(dev, sm, ci, &block, &count, &skip, &size);
+	    (dev, sm, ci, &block, &count, &skip, &size);
 
 	if (count == 0)
 		return (1);
@@ -2743,7 +2686,7 @@
 
 			/* Never called from ioctl context, so (IOLOCK *)NULL */
 			set_sm_comp_state(un, smi, ci, CS_LAST_ERRED, 0, flags,
-				(IOLOCK *)NULL);
+			    (IOLOCK *)NULL);
 			/*
 			 * For a MN set, the NOTIFY is done when the state
 			 * change is processed on each node
@@ -2756,7 +2699,7 @@
 		}
 		/* Never called from ioctl context, so (IOLOCK *)NULL */
 		set_sm_comp_state(un, smi, ci, CS_ERRED, 0, flags,
-			(IOLOCK *)NULL);
+		    (IOLOCK *)NULL);
 		/*
 		 * For a MN set, the NOTIFY is done when the state
 		 * change is processed on each node
@@ -2797,8 +2740,8 @@
 
 	/* if we're panicing just let this I/O error out */
 	if (panicstr) {
-	    (void) mirror_done(cb);
-	    return;
+		(void) mirror_done(cb);
+		return;
 	}
 
 	/* reissue the I/O */
@@ -2820,7 +2763,7 @@
 	clear_retry_error(cb);
 
 	cmn_err(CE_NOTE, "!md: %s: Last Erred, retry I/O without B_FAILFAST",
-		md_shortname(getminor(cb->b_edev)));
+	    md_shortname(getminor(cb->b_edev)));
 
 	md_call_strategy(cb, flags, NULL);
 }
@@ -2837,7 +2780,7 @@
 	}
 
 	if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-		mirror_overlap_chain_remove(ps);
+		mirror_overlap_tree_remove(ps);
 
 	smi = 0;
 	ci = 0;
@@ -2937,7 +2880,7 @@
 	 * md_biodone().
 	 */
 	(void) md_mirror_strategy(cb, MD_STR_NOTTOP | MD_STR_WOW
-				    | MD_STR_MAPPED, NULL);
+	    | MD_STR_MAPPED, NULL);
 }
 
 static void
@@ -2977,7 +2920,7 @@
 	 */
 	if (md_mirror_wow_flg & WOW_NOCOPY)
 		(void) md_mirror_strategy(pb, MD_STR_NOTTOP | MD_STR_WOW |
-					    MD_STR_MAPPED | MD_IO_COUNTED, ps);
+		    MD_STR_MAPPED | MD_IO_COUNTED, ps);
 	else
 		md_mirror_copy_write(ps);
 }
@@ -3144,7 +3087,7 @@
 	}
 
 	if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-		mirror_overlap_chain_remove(ps);
+		mirror_overlap_tree_remove(ps);
 
 	/*
 	 * Handle Write-on-Write problem.
@@ -3191,16 +3134,15 @@
 	un = cs->cs_ps->ps_un;
 
 	for (smi = 0; smi < NMIRROR; smi++) {
-	    if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
-		continue;
-
-	    if (cb->b_edev == md_dev64_to_dev(un->un_sm[smi].sm_dev)) {
-		break;
-	    }
+		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
+			continue;
+
+		if (cb->b_edev == md_dev64_to_dev(un->un_sm[smi].sm_dev))
+			break;
 	}
 
 	if (smi >= NMIRROR)
-	    return;
+		return;
 
 	sm = &un->un_sm[smi];
 	smic = &un->un_smic[smi];
@@ -3213,25 +3155,25 @@
 	    cb->b_blkno, &cnt);
 
 	if (shared->ms_flags & MDM_S_IOERR) {
-	    shared->ms_flags &= ~MDM_S_IOERR;
+		shared->ms_flags &= ~MDM_S_IOERR;
 
 	} else {
-	    /* the I/O buf spans components and the first one is not erred */
-	    int	cnt;
-	    int	i;
-
-	    cnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
-	    for (i = 0; i < cnt; i++) {
-		shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
-		    (sm->sm_dev, sm, i);
-
-		if (shared->ms_flags & MDM_S_IOERR &&
-		    shared->ms_state == CS_OKAY) {
-
-		    shared->ms_flags &= ~MDM_S_IOERR;
-		    break;
+		/* the buf spans components and the first one is not erred */
+		int	cnt;
+		int	i;
+
+		cnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
+		for (i = 0; i < cnt; i++) {
+			shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
+			    (sm->sm_dev, sm, i);
+
+			if (shared->ms_flags & MDM_S_IOERR &&
+			    shared->ms_state == CS_OKAY) {
+
+				shared->ms_flags &= ~MDM_S_IOERR;
+				break;
+			}
 		}
-	    }
 	}
 
 	md_unit_writerexit(ui_sm);
@@ -3257,8 +3199,8 @@
 		bp->b_bcount = ldbtob(count);
 		return (0);
 	}
-	bp->b_edev = md_dev64_to_dev(select_read_unit(un, blkno, count, &cando,
-							0, NULL, cs));
+	bp->b_edev = md_dev64_to_dev(select_read_unit(un, blkno,
+	    count, &cando, 0, NULL, cs));
 	bp->b_bcount = ldbtob(cando);
 	if (count != cando)
 		return (cando);
@@ -3634,11 +3576,9 @@
 
 			kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 			rval = mdmn_ksend_message(setno,
-						MD_MN_MSG_REQUIRE_OWNER,
-						msg_flags, /* flags */
-						(char *)msg,
-						sizeof (md_mn_req_owner_t),
-						kres);
+			    MD_MN_MSG_REQUIRE_OWNER, msg_flags,
+			    /* flags */ (char *)msg,
+			    sizeof (md_mn_req_owner_t), kres);
 
 			kmem_free(msg, sizeof (md_mn_req_owner_t));
 
@@ -3668,11 +3608,10 @@
 					 * Release the block on the current
 					 * resync region if it is blocked
 					 */
-					ps1 = un->un_rs_prev_ovrlap;
+					ps1 = un->un_rs_prev_overlap;
 					if ((ps1 != NULL) &&
 					    (ps1->ps_flags & MD_MPS_ON_OVERLAP))
-						mirror_overlap_chain_remove(
-						    ps1);
+						mirror_overlap_tree_remove(ps1);
 					mutex_exit(&un->un_owner_mx);
 
 					/*
@@ -3824,14 +3763,14 @@
 
 	/*
 	 * If not MN owner and this is an ABR write, make sure the current
-	 * resync region is on the overlaps chain
+	 * resync region is in the overlaps tree
 	 */
 	mutex_enter(&un->un_owner_mx);
 	if (MD_MNSET_SETNO(setno) && (!(MD_MN_MIRROR_OWNER(un))) &&
 	    ((ui->ui_tstate & MD_ABR_CAP) || (flag & MD_STR_ABR))) {
 		md_mps_t	*ps1;
 		/* Block the current resync region, if not already blocked */
-		ps1 = un->un_rs_prev_ovrlap;
+		ps1 = un->un_rs_prev_overlap;
 
 		if ((ps1 != NULL) && ((ps1->ps_firstblk != 0) ||
 		    (ps1->ps_lastblk != 0))) {
@@ -3845,11 +3784,11 @@
 			/*
 			 * Check to see if we have obtained ownership
 			 * while waiting for overlaps. If we have, remove
-			 * the resync_region entry from the overlap chain
+			 * the resync_region entry from the overlap tree
 			 */
 			if (MD_MN_MIRROR_OWNER(un) &&
 			    (ps1->ps_flags & MD_MPS_ON_OVERLAP)) {
-				mirror_overlap_chain_remove(ps1);
+				mirror_overlap_tree_remove(ps1);
 				rs_on_overlap = 0;
 			}
 		}
@@ -3885,7 +3824,7 @@
 				    MD_SID(un), ps->ps_firstblk);
 #endif
 			if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-				mirror_overlap_chain_remove(ps);
+				mirror_overlap_tree_remove(ps);
 			kmem_cache_free(mirror_parent_cache, ps);
 			md_kstat_waitq_exit(ui);
 			md_unit_readerexit(ui);
@@ -3901,15 +3840,15 @@
 		un = md_unit_readerlock(ui);
 		/*
 		 * For a MN set with an ABR write, if we are now the
-		 * owner and we have a resync region on the overlap
-		 * chain, remove the entry from overlaps and retry the write.
+		 * owner and we have a resync region in the overlap
+		 * tree, remove the entry from overlaps and retry the write.
 		 */
 
 		if (MD_MNSET_SETNO(setno) &&
 		    ((ui->ui_tstate & MD_ABR_CAP) || (flag & MD_STR_ABR))) {
 			mutex_enter(&un->un_owner_mx);
 			if (((MD_MN_MIRROR_OWNER(un))) && rs_on_overlap) {
-				mirror_overlap_chain_remove(ps);
+				mirror_overlap_tree_remove(ps);
 				md_kstat_waitq_exit(ui);
 				mutex_exit(&un->un_owner_mx);
 				md_unit_readerexit(ui);
@@ -3936,7 +3875,7 @@
 	    (flag & MD_STR_ABR)) || (flag & MD_STR_WAR))) {
 		if (!MD_MN_MIRROR_OWNER(un))  {
 			if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-				mirror_overlap_chain_remove(ps);
+				mirror_overlap_tree_remove(ps);
 			md_kstat_waitq_exit(ui);
 			ASSERT(!(flag & MD_STR_WAR));
 			md_unit_readerexit(ui);
@@ -3986,10 +3925,10 @@
 	    (pb->b_flags & B_PHYS) &&
 	    !(ps->ps_flags & MD_MPS_WOW)) {
 		if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-			mirror_overlap_chain_remove(ps);
+			mirror_overlap_tree_remove(ps);
 		md_unit_readerexit(ui);
 		daemon_request(&md_mstr_daemon, handle_wow,
-			(daemon_queue_t *)ps, REQ_OLD);
+		    (daemon_queue_t *)ps, REQ_OLD);
 		return;
 	}
 
@@ -4008,7 +3947,7 @@
 		 */
 		if (more < 0) {
 			if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-				mirror_overlap_chain_remove(ps);
+				mirror_overlap_tree_remove(ps);
 			md_kstat_runq_exit(ui);
 			kmem_cache_free(mirror_child_cache, cs);
 			kmem_cache_free(mirror_parent_cache, ps);
@@ -4205,7 +4144,7 @@
 				 */
 				if (!MD_MN_MIRROR_OWNER(un))  {
 					ps->ps_call = NULL;
-					mirror_overlap_chain_remove(ps);
+					mirror_overlap_tree_remove(ps);
 					md_kstat_waitq_exit(ui);
 					md_unit_readerexit(ui);
 					daemon_request(
@@ -4231,7 +4170,7 @@
 						    MD_SID(un),
 						    ps->ps_firstblk);
 #endif
-					mirror_overlap_chain_remove(ps);
+					mirror_overlap_tree_remove(ps);
 					kmem_cache_free(mirror_parent_cache,
 					    ps);
 					md_kstat_waitq_exit(ui);
@@ -4263,7 +4202,7 @@
 		    current_blkno, mirror_done, cb, KM_NOSLEEP);
 
 		more = mirror_map_read(ps, cs, current_blkno,
-				(u_longlong_t)current_count);
+		    (u_longlong_t)current_count);
 		if (more) {
 			mutex_enter(&ps->ps_mx);
 			ps->ps_frags++;
@@ -4592,12 +4531,12 @@
 		if ((p->rs_type == un->un_rs_type) &&
 		    (p->rs_start < un->un_resync_startbl))
 			break;
-		ps = un->un_rs_prev_ovrlap;
+		ps = un->un_rs_prev_overlap;
 
 		/* Allocate previous overlap reference if needed */
 		if (ps == NULL) {
 			ps = kmem_cache_alloc(mirror_parent_cache,
-				MD_ALLOCFLAGS);
+			    MD_ALLOCFLAGS);
 			ps->ps_un = un;
 			ps->ps_ui = ui;
 			ps->ps_firstblk = 0;
@@ -4605,7 +4544,7 @@
 			ps->ps_flags = 0;
 			md_ioctl_readerexit(lockp);
 			(void) md_ioctl_writerlock(lockp, ui);
-			un->un_rs_prev_ovrlap = ps;
+			un->un_rs_prev_overlap = ps;
 			md_ioctl_writerexit(lockp);
 		} else
 			md_ioctl_readerexit(lockp);
@@ -4642,7 +4581,7 @@
 				    p->rs_size - 1)) {
 					/* Remove previous overlap range */
 					if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-						mirror_overlap_chain_remove(ps);
+						mirror_overlap_tree_remove(ps);
 
 					ps->ps_firstblk = p->rs_start;
 					ps->ps_lastblk = ps->ps_firstblk +
@@ -4660,11 +4599,11 @@
 					 * ownership while waiting for
 					 * overlaps. If we have, remove
 					 * the resync_region entry from the
-					 * overlap chain
+					 * overlap tree
 					 */
 					if (MD_MN_MIRROR_OWNER(un) &&
 					    (ps->ps_flags & MD_MPS_ON_OVERLAP))
-						mirror_overlap_chain_remove(ps);
+						mirror_overlap_tree_remove(ps);
 				}
 			}
 			mutex_exit(&un->un_owner_mx);
@@ -4722,15 +4661,15 @@
 			mutex_exit(&un->un_owner_mx);
 		}
 		(void) md_ioctl_writerlock(lockp, ui);
-		ps = un->un_rs_prev_ovrlap;
+		ps = un->un_rs_prev_overlap;
 		if (ps != NULL) {
 			/* Remove previous overlap range */
 			if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-				mirror_overlap_chain_remove(ps);
+				mirror_overlap_tree_remove(ps);
 			/*
 			 * Release the overlap range reference
 			 */
-			un->un_rs_prev_ovrlap = NULL;
+			un->un_rs_prev_overlap = NULL;
 			kmem_cache_free(mirror_parent_cache,
 			    ps);
 		}
@@ -5023,9 +4962,9 @@
 				    (mm_unit32_od_t *)mddb_getrecaddr(recid);
 				newreqsize = sizeof (mm_unit_t);
 				big_un = (mm_unit_t *)kmem_zalloc(newreqsize,
-					KM_SLEEP);
+				    KM_SLEEP);
 				mirror_convert((caddr_t)small_un,
-					(caddr_t)big_un, SMALL_2_BIG);
+				    (caddr_t)big_un, SMALL_2_BIG);
 				kmem_free(small_un, dep->de_reqsize);
 
 				/*
@@ -5043,7 +4982,7 @@
 				 * record address.
 				 */
 				un = (mm_unit_t *)mddb_getrecaddr_resize(recid,
-					sizeof (*un), 0);
+				    sizeof (*un), 0);
 			}
 			un->c.un_revision &= ~MD_64BIT_META_DEV;
 			break;
@@ -5051,7 +4990,7 @@
 		case MDDB_REV_RB64FN:
 			/* Big device */
 			un = (mm_unit_t *)mddb_getrecaddr_resize(recid,
-				sizeof (*un), 0);
+			    sizeof (*un), 0);
 			un->c.un_revision |= MD_64BIT_META_DEV;
 			un->c.un_flag |= MD_EFILABEL;
 			break;
@@ -5212,7 +5151,7 @@
 mirror_close(dev_t dev, int flag, int otyp, cred_t *cred_p, int md_cflags)
 {
 	return (mirror_internal_close(getminor(dev), otyp, md_cflags,
-		(IOLOCK *)NULL));
+	    (IOLOCK *)NULL));
 }
 
 
@@ -5301,7 +5240,7 @@
 		sm_cnt++;
 		tmpdev = un->un_sm[i].sm_dev;
 		(void) md_layered_open(mnum, &tmpdev,
-				MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV);
+		    MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV);
 		un->un_sm[i].sm_dev = tmpdev;
 
 		sm_ui = MDI_UNIT(getminor(md_dev64_to_dev(tmpdev)));
@@ -5455,13 +5394,13 @@
 			optrec_id = &(un32->un_rr_dirty_recid);
 
 			for (i = 0; i < un32->un_nsm; i++) {
-			    tmpdev = md_expldev(un32->un_sm[i].sm_dev);
-			    un32->un_sm[i].sm_dev = md_cmpldev
-				(md_makedevice(md_major, MD_MKMIN(setno,
-				MD_MIN2UNIT(md_getminor(tmpdev)))));
-
-			    if (!md_update_minor(setno, mddb_getsidenum
-				(setno), un32->un_sm[i].sm_key))
+				tmpdev = md_expldev(un32->un_sm[i].sm_dev);
+				un32->un_sm[i].sm_dev = md_cmpldev
+				    (md_makedevice(md_major, MD_MKMIN(setno,
+				    MD_MIN2UNIT(md_getminor(tmpdev)))));
+
+				if (!md_update_minor(setno, mddb_getsidenum
+				    (setno), un32->un_sm[i].sm_key))
 				goto out;
 			}
 			break;
@@ -5474,13 +5413,13 @@
 			optrec_id = &(un64->un_rr_dirty_recid);
 
 			for (i = 0; i < un64->un_nsm; i++) {
-			    tmpdev = un64->un_sm[i].sm_dev;
-			    un64->un_sm[i].sm_dev = md_makedevice
-				(md_major, MD_MKMIN(setno, MD_MIN2UNIT
-				(md_getminor(tmpdev))));
-
-			    if (!md_update_minor(setno, mddb_getsidenum
-				(setno), un64->un_sm[i].sm_key))
+				tmpdev = un64->un_sm[i].sm_dev;
+				un64->un_sm[i].sm_dev = md_makedevice
+				    (md_major, MD_MKMIN(setno, MD_MIN2UNIT
+				    (md_getminor(tmpdev))));
+
+				if (!md_update_minor(setno, mddb_getsidenum
+				    (setno), un64->un_sm[i].sm_key))
 				goto out;
 			}
 			break;
--- a/usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c	Wed Jun 18 00:57:00 2008 -0700
+++ b/usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c	Wed Jun 18 08:22:31 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1668,10 +1668,10 @@
 			 * Release the block on the current resync region if it
 			 * is blocked
 			 */
-			ps1 = un->un_rs_prev_ovrlap;
+			ps1 = un->un_rs_prev_overlap;
 			if ((ps1 != NULL) &&
 			    (ps1->ps_flags & MD_MPS_ON_OVERLAP))
-				mirror_overlap_chain_remove(ps1);
+				mirror_overlap_tree_remove(ps1);
 		}
 
 		un->un_owner_state &= ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER);
@@ -1807,7 +1807,7 @@
 	 * mirror is marked as "Needs Maintenance" and that an optimized
 	 * resync will be done when we resync the mirror, Also clear the
 	 * PREVENT_CHANGE flag and remove the last resync region from the
-	 * overlap chain.
+	 * overlap tree.
 	 */
 	if (p->d.owner == 0) {
 		md_mps_t	*ps;
@@ -1839,9 +1839,9 @@
 		mutex_enter(&un->un_owner_mx);
 		un->un_owner_state &= ~MD_MN_MM_PREVENT_CHANGE;
 		mutex_exit(&un->un_owner_mx);
-		ps = un->un_rs_prev_ovrlap;
+		ps = un->un_rs_prev_overlap;
 		if ((ps != NULL) && (ps->ps_flags & MD_MPS_ON_OVERLAP)) {
-			mirror_overlap_chain_remove(ps);
+			mirror_overlap_tree_remove(ps);
 			ps->ps_firstblk = 0;
 			ps->ps_lastblk = 0;
 		}
--- a/usr/src/uts/common/io/lvm/mirror/mirror_resync.c	Wed Jun 18 00:57:00 2008 -0700
+++ b/usr/src/uts/common/io/lvm/mirror/mirror_resync.c	Wed Jun 18 08:22:31 2008 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -21,7 +20,7 @@
  */
 
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -190,7 +189,7 @@
 
 		if (un->c.un_status & MD_UN_KEEP_DIRTY)
 			if (IS_KEEPDIRTY(i, un))
-			    continue;
+				continue;
 
 		if (!IS_REGION_DIRTY(i, un))
 			continue;
@@ -308,7 +307,7 @@
 	if (!mirror_timeout.dr_pending) {
 		mirror_timeout.dr_pending = 1;
 		daemon_request(&md_mstr_daemon, check_resync_regions,
-				(daemon_queue_t *)&mirror_timeout, REQ_OLD);
+		    (daemon_queue_t *)&mirror_timeout, REQ_OLD);
 	}
 
 	if (mirror_md_ops.md_head != NULL)
@@ -345,7 +344,7 @@
 	for (i = 0; i < NMIRROR; i++) {
 		if (SMS_BY_INDEX_IS(un, i, SMS_OFFLINE)) {
 			mirror_set_sm_state(&un->un_sm[i],
-				&un->un_smic[i], SMS_ATTACHED, 1);
+			    &un->un_smic[i], SMS_ATTACHED, 1);
 			changed++;
 		}
 		if (SMS_BY_INDEX_IS(un, i, SMS_OFFLINE_RESYNC)) {
@@ -415,7 +414,7 @@
 	    mirror_md_ops.md_driver.md_drivername);
 
 	recid =  mddb_createrec(size, typ1, RESYNC_REC,
-			MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno);
+	    MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno);
 	if (recid < 0) {
 		if (snarfing && !(md_get_setstatus(setno) & MD_SET_STALE)) {
 			md_set_setstatus(setno, MD_SET_STALE);
@@ -497,18 +496,14 @@
 			return (err);
 	}
 
-	un->un_goingclean_bm =
-	    (uchar_t *)kmem_zalloc((uint_t)(howmany(un->un_rrd_num, NBBY)),
-		KM_SLEEP);
-	un->un_goingdirty_bm =
-	    (uchar_t *)kmem_zalloc((uint_t)(howmany(un->un_rrd_num, NBBY)),
-		KM_SLEEP);
-	un->un_outstanding_writes =
-	    (short *)kmem_zalloc((uint_t)un->un_rrd_num * sizeof (short),
-		KM_SLEEP);
-	un->un_resync_bm =
-	    (uchar_t *)kmem_zalloc((uint_t)(howmany(un->un_rrd_num, NBBY)),
-		KM_SLEEP);
+	un->un_goingclean_bm = (uchar_t *)kmem_zalloc((uint_t)(howmany(
+	    un->un_rrd_num, NBBY)), KM_SLEEP);
+	un->un_goingdirty_bm = (uchar_t *)kmem_zalloc((uint_t)(howmany(
+	    un->un_rrd_num, NBBY)), KM_SLEEP);
+	un->un_outstanding_writes = (short *)kmem_zalloc(
+	    (uint_t)un->un_rrd_num * sizeof (short), KM_SLEEP);
+	un->un_resync_bm = (uchar_t *)kmem_zalloc((uint_t)(howmany(
+	    un->un_rrd_num, NBBY)), KM_SLEEP);
 
 	if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE)
 		return (0);
@@ -819,7 +814,7 @@
 	CALLB_CPR_SAFE_BEGIN(&un->un_rs_cprinfo);
 
 	rval = mdmn_ksend_message(setno, MD_MN_MSG_RESYNC_NEXT, MD_MSGF_NO_LOG,
-		(char *)rmsg, sizeof (md_mn_msg_resync_t), kres);
+	    (char *)rmsg, sizeof (md_mn_msg_resync_t), kres);
 
 	CALLB_CPR_SAFE_END(&un->un_rs_cprinfo, &un->un_rs_cpr_mx);
 	mutex_exit(&un->un_rs_cpr_mx);
@@ -830,12 +825,11 @@
 	}
 	kmem_free(kres, sizeof (md_mn_kresult_t));
 	(void) md_unit_readerlock(ui);
-	ps = un->un_rs_prev_ovrlap;
+	ps = un->un_rs_prev_overlap;
 
 	/* Allocate previous overlap reference if needed */
 	if (ps == NULL) {
-		ps = kmem_cache_alloc(mirror_parent_cache,
-			MD_ALLOCFLAGS);
+		ps = kmem_cache_alloc(mirror_parent_cache, MD_ALLOCFLAGS);
 		ps->ps_un = un;
 		ps->ps_ui = ui;
 		ps->ps_firstblk = 0;
@@ -843,7 +837,7 @@
 		ps->ps_flags = 0;
 		md_unit_readerexit(ui);
 		(void) md_unit_writerlock(ui);
-		un->un_rs_prev_ovrlap = ps;
+		un->un_rs_prev_overlap = ps;
 		md_unit_writerexit(ui);
 		(void) md_unit_readerlock(ui);
 	}
@@ -910,10 +904,10 @@
 			 */
 			if (un->un_rs_type != rs_type)
 				return (0);
-			if (un->un_rs_prev_ovrlap->ps_firstblk >
+			if (un->un_rs_prev_overlap->ps_firstblk >
 			    rs_startblk) {
 				currentblk =
-				    un->un_rs_prev_ovrlap->ps_firstblk;
+				    un->un_rs_prev_overlap->ps_firstblk;
 				continue;
 			}
 		}
@@ -940,10 +934,10 @@
 				 */
 				if (un->un_rs_type != rs_type)
 					return (0);
-				if (un->un_rs_prev_ovrlap->ps_firstblk >
+				if (un->un_rs_prev_overlap->ps_firstblk >
 				    rs_startblk)
 					currentblk =
-					    un->un_rs_prev_ovrlap->ps_firstblk;
+					    un->un_rs_prev_overlap->ps_firstblk;
 			}
 		}
 	}
@@ -1623,7 +1617,7 @@
 	 */
 	if (MD_MNSET_SETNO(setno)) {
 		chunk = ((chunk + MD_DEF_RESYNC_BLK_SZ)/MD_DEF_RESYNC_BLK_SZ)
-			* MD_DEF_RESYNC_BLK_SZ;
+		    * MD_DEF_RESYNC_BLK_SZ;
 		if (chunk > un->c.un_total_blocks)
 			chunk = un->c.un_total_blocks;
 	}
@@ -1762,8 +1756,7 @@
 		smic = &un->un_smic[i];
 		if (!SMS_IS(sm, SMS_RUNNING | SMS_LIMPING))
 			continue;
-		compcnt = (*(smic->sm_get_component_count))
-			(sm->sm_dev, sm);
+		compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm);
 		for (ci = 0; ci < compcnt; ci++) {
 			SET_RS_SMI(un->un_rs_type, i);
 			SET_RS_CI(un->un_rs_type, ci);
@@ -1809,8 +1802,7 @@
 		smic = &un->un_smic[i];
 		if (!SMS_IS(sm, SMS_INUSE))
 			continue;
-		compcnt = (*(smic->sm_get_component_count))
-			(sm->sm_dev, sm);
+		compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, sm);
 		for (ci = 0; ci < compcnt; ci++) {
 			shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
 			    (sm->sm_dev, sm, ci);
@@ -2113,8 +2105,7 @@
 	 */
 	ASSERT(un->un_rs_resync_to_id == 0);
 	un->un_rs_resync_to_id = timeout(resync_progress, un,
-		(clock_t)(drv_usectohz(60000000) *
-		    md_mirror_resync_update_intvl));
+	    (clock_t)(drv_usectohz(60000000) * md_mirror_resync_update_intvl));
 
 	/*
 	 * Handle resync restart from the last logged position. The contents
@@ -2343,15 +2334,15 @@
 
 			un->c.un_status &= ~(MD_UN_RESYNC_CANCEL |
 			    MD_UN_RESYNC_ACTIVE);
-			ps = un->un_rs_prev_ovrlap;
+			ps = un->un_rs_prev_overlap;
 			if (ps != NULL) {
 				/* Remove previous overlap resync region */
 				if (ps->ps_flags & MD_MPS_ON_OVERLAP)
-				mirror_overlap_chain_remove(ps);
+				mirror_overlap_tree_remove(ps);
 				/*
 				 * Release the overlap range reference
 				 */
-				un->un_rs_prev_ovrlap = NULL;
+				un->un_rs_prev_overlap = NULL;
 				kmem_cache_free(mirror_parent_cache,
 				    ps);
 			}
@@ -2822,7 +2813,7 @@
 	typ1 = (mddb_type_t)md_getshared_key(setno,
 	    mirror_md_ops.md_driver.md_drivername);
 	recid = mddb_createrec(size, typ1, RESYNC_REC,
-			MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno);
+	    MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno);
 	if (recid < 0)
 		return (-1);
 
@@ -2913,7 +2904,7 @@
 	    mirror_md_ops.md_driver.md_drivername);
 
 	recid = mddb_createrec(size, typ1, RESYNC_REC,
-			MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno);
+	    MD_CRO_OPTIMIZE|MD_CRO_32BIT, setno);
 	if (recid < 0)
 		return (-1);
 
--- a/usr/src/uts/common/sys/lvm/md_mirror.h	Wed Jun 18 00:57:00 2008 -0700
+++ b/usr/src/uts/common/sys/lvm/md_mirror.h	Wed Jun 18 08:22:31 2008 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -68,9 +67,9 @@
  * macro to test if the current block is within the current resync region
  */
 #define	IN_RESYNC_REGION(un, ps) \
-	((un->un_rs_prev_ovrlap != NULL) && (ps->ps_firstblk >= \
-	    un->un_rs_prev_ovrlap->ps_firstblk) && \
-	    (ps->ps_lastblk <=  un->un_rs_prev_ovrlap->ps_lastblk))
+	((un->un_rs_prev_overlap != NULL) && (ps->ps_firstblk >= \
+	    un->un_rs_prev_overlap->ps_firstblk) && \
+	    (ps->ps_lastblk <=  un->un_rs_prev_overlap->ps_lastblk))
 /*
  * Default resync update interval (in minutes).
  */
@@ -108,8 +107,8 @@
 /*
  * Define for argument in function wait_for_overlaps()
  */
-#define	MD_OVERLAP_ALLOW_REPEAT	0x1	/* Allow if ps already on chain */
-#define	MD_OVERLAP_NO_REPEAT	0	/* ps must not already be on chain */
+#define	MD_OVERLAP_ALLOW_REPEAT	0x1	/* Allow if ps already in tree */
+#define	MD_OVERLAP_NO_REPEAT	0	/* ps must not already be in tree */
 
 /*
  * Define for max retries of mirror_owner
@@ -153,10 +152,10 @@
 	uint_t		un_changecnt;
 	ushort_t	un_nsm;			/* number of submirrors */
 	mm_submirror32_od_t un_sm[NMIRROR];
-	int		un_ovrlap_chn_flg;
-	int		xx_un_ovrlap_chn_mx[2];	/* replaces mutex */
-	ushort_t	xx_un_ovrlap_chn_cv;
-	caddr32_t	xx_un_ovrlap_chn;
+	int		un_overlap_tree_flag;
+	int		xx_un_overlap_tree_mx[2];	/* replaces mutex */
+	ushort_t	xx_un_overlap_tree_cv;
+	caddr32_t	xx_un_overlap_root;
 	mm_rd_opt_t	un_read_option;		/* mirror read option */
 	mm_wr_opt_t	un_write_option;	/* mirror write option */
 	mm_pass_num_t	un_pass_num;		/* resync pass number */
@@ -270,10 +269,11 @@
 	uint_t		 ps_active_cnt;
 	int		 ps_frags;
 	uint_t		 ps_changecnt;
-	struct md_mps	*ps_ovrlap_next;
-	struct md_mps	*ps_ovrlap_prev;
+	struct md_mps	*ps_unused1;
+	struct md_mps	*ps_unused2;
 	void		 (*ps_call)();
 	kmutex_t	 ps_mx;
+	avl_node_t	ps_overlap_node;
 } md_mps_t;
 
 #define	MD_MPS_ON_OVERLAP	0x0001
@@ -309,9 +309,9 @@
 } md_mcs_t;
 
 typedef struct  mm_mirror_ic {
-	kmutex_t	un_ovrlap_chn_mx;
-	kcondvar_t	un_ovrlap_chn_cv;
-	md_mps_t	un_ovrlap_chn;		/* Sentinel for overlaps */
+	kmutex_t	un_overlap_tree_mx;
+	kcondvar_t	un_overlap_tree_cv;
+	avl_tree_t	un_overlap_root;
 	kmutex_t	un_resync_mx;
 	kcondvar_t	un_resync_cv;
 	short		*un_outstanding_writes; /* outstanding write array */
@@ -348,7 +348,7 @@
 	uint_t		un_changecnt;
 	ushort_t	un_nsm;			/* number of submirrors */
 	mm_submirror_t	un_sm[NMIRROR];
-	int		un_ovrlap_chn_flg;
+	int		un_overlap_tree_flag;
 	mm_rd_opt_t	un_read_option;		/* mirror read option */
 	mm_wr_opt_t	un_write_option;	/* mirror write option */
 	mm_pass_num_t	un_pass_num;		/* resync pass number */
@@ -383,7 +383,7 @@
 	kmutex_t	un_rs_thread_mx;	/* Thread cv mutex */
 	kcondvar_t	un_rs_thread_cv;	/* Cond. Var. for thread */
 	uint_t		un_rs_thread_flags;	/* Thread control flags */
-	md_mps_t	*un_rs_prev_ovrlap;	/* existing overlap request */
+	md_mps_t	*un_rs_prev_overlap;	/* existing overlap request */
 	timeout_id_t	un_rs_resync_to_id;	/* resync progress timeout */
 	kmutex_t	un_rs_progress_mx;	/* Resync progress mutex */
 	kcondvar_t	un_rs_progress_cv;	/* Cond. Var. for progress */
@@ -391,9 +391,9 @@
 	void		*un_rs_msg;		/* Intra-node resync message */
 } mm_unit_t;
 
-#define	un_ovrlap_chn_mx	un_mmic.un_ovrlap_chn_mx
-#define	un_ovrlap_chn_cv	un_mmic.un_ovrlap_chn_cv
-#define	un_ovrlap_chn		un_mmic.un_ovrlap_chn
+#define	un_overlap_tree_mx	un_mmic.un_overlap_tree_mx
+#define	un_overlap_tree_cv	un_mmic.un_overlap_tree_cv
+#define	un_overlap_root		un_mmic.un_overlap_root
 #define	un_resync_mx		un_mmic.un_resync_mx
 #define	un_resync_cv		un_mmic.un_resync_cv
 #define	un_outstanding_writes	un_mmic.un_outstanding_writes
@@ -554,7 +554,7 @@
 extern void		mirror_check_failfast(minor_t mnum);
 extern int		check_comp_4_hotspares(mm_unit_t *, int, int, uint_t,
 			    mddb_recid_t, IOLOCK *);
-extern void		mirror_overlap_chain_remove(md_mps_t *ps);
+extern void		mirror_overlap_tree_remove(md_mps_t *ps);
 extern void		mirror_child_init(md_mcs_t *cs);
 
 /* Externals from mirror_ioctl.c */