Mercurial > illumos > onarm
diff usr/src/cmd/lvm/rpc.metamhd/mhd_set.c @ 0:c9caec207d52 b86
Initial porting based on b86
author | Koji Uno <koji.uno@sun.com> |
---|---|
date | Tue, 02 Jun 2009 18:56:50 +0900 |
parents | |
children | 1a15d5aaf794 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/lvm/rpc.metamhd/mhd_set.c Tue Jun 02 18:56:50 2009 +0900 @@ -0,0 +1,815 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)mhd_set.c 2.6 05/06/08 SMI" + +#include "mhd_local.h" + +/* + * manipulate set list + */ + +/* + * global set list + */ +static mutex_t mhd_set_mx = DEFAULTMUTEX; +static uint_t mhd_nset = 0; +static mhd_drive_set_t **mhd_sets = NULL; + +/* + * add drive to set + */ +void +mhd_add_drive_to_set( + mhd_drive_set_t *sp, + mhd_drive_t *dp +) +{ + mhd_drive_list_t *dlp = &sp->sr_drives; + + /* check locks */ + assert(MUTEX_HELD(&mhd_set_mx)); + assert(MUTEX_HELD(&sp->sr_mx)); + assert(DRIVE_IS_IDLE(dp)); + + /* add to set */ + mhd_add_drive(dlp, dp); + + /* adjust backlink */ + dp->dr_sp = sp; +} + +/* + * delete drive from set + */ +void +mhd_del_drive_from_set( + mhd_drive_t *dp +) +{ + mhd_drive_set_t *sp = dp->dr_sp; + mhd_drive_list_t *dlp = &sp->sr_drives; + + /* check locks */ + assert(MUTEX_HELD(&mhd_set_mx)); + assert(MUTEX_HELD(&sp->sr_mx)); + assert(DRIVE_IS_IDLE(dp)); + + /* delete from set */ + mhd_del_drive(dlp, dp); + + /* adjust backlink */ + dp->dr_sp = NULL; +} + +/* + * find set in list + */ +static mhd_drive_set_t * +mhd_find_set( + char *setname +) +{ + uint_t i; + + /* check lock */ + assert(MUTEX_HELD(&mhd_set_mx)); + + /* look for set */ + for (i = 0; (i < mhd_nset); ++i) { + mhd_drive_set_t *sp = mhd_sets[i]; + + if (strcmp(setname, sp->sr_name) == 0) + return (sp); + } + + /* not found */ + return (NULL); +} + +/* + * wait for operation to complete + */ +static void +mhd_wait_set( + mhd_drive_set_t *sp, + mhd_drive_list_t *dlp, + mhd_state_t state +) +{ + /* check lock */ + assert(MUTEX_HELD(&mhd_set_mx)); + assert(MUTEX_HELD(&sp->sr_mx)); + + /* wait for complete */ + for (;;) { + uint_t cnt = 0; + uint_t i; + + /* kick threads */ + for (i = 0; (i < dlp->dl_ndrive); ++i) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + /* IDLE or ERRORED */ + if (state == DRIVE_IDLE) { + if (DRIVE_IS_IDLE(dp)) + continue; + } + + /* operation complete */ + else { + if (! (dp->dr_state & state)) + continue; + } + + /* kick thread */ + mhd_cv_broadcast(&dp->dr_cv); + ++cnt; + } + + /* if complete, quit */ + if (cnt == 0) + break; + + /* wait for something to happen */ + (void) mhd_cv_wait(&sp->sr_cv, &sp->sr_mx); + } +} + +/* + * idle set + */ +static int +mhd_idle_set( + mhd_drive_set_t *sp, + mhd_drive_list_t *dlp, + mhd_error_t *mhep +) +{ + uint_t i; + + /* check lock */ + assert(MUTEX_HELD(&mhd_set_mx)); + assert(MUTEX_HELD(&sp->sr_mx)); + + /* disarm any failfast */ + if (dlp->dl_ndrive >= sp->sr_drives.dl_ndrive) { + if (mhd_ff_disarm(sp, mhep) != 0) + return (-1); + } + + /* set IDLING */ + for (i = 0; (i < dlp->dl_ndrive); ++i) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + if (! DRIVE_IS_IDLE(dp)) { + if (mhd_state(dp, DRIVE_IDLING, mhep) != 0) + return (-1); + } + } + + /* wait for IDLE */ + mhd_wait_set(sp, dlp, DRIVE_IDLE); + + /* return success */ + return (0); +} + +/* + * create or update new set + */ +mhd_drive_set_t * +mhd_create_set( + mhd_set_t *mhsp, + mhd_opts_t options, + mhd_drive_list_t *dlp, + mhd_error_t *mhep +) +{ + char *setname; + mhd_drive_set_t *sp; + mhd_drive_list_t *sp_dlp; + mhd_drive_set_t *null_sp; + uint_t i; + + /* check locks */ + assert(MUTEX_HELD(&mhd_set_mx)); + + /* get setname */ + if (mhsp == NULL) + setname = ""; + else + setname = mhsp->setname; + + /* find or create set */ + if ((sp = mhd_find_set(setname)) == NULL) { + /* allocate and initialize set */ + sp = Zalloc(sizeof (*sp)); + sp->sr_name = Strdup(setname); + mhd_mx_init(&sp->sr_mx); + mhd_cv_init(&sp->sr_cv); + sp->sr_ff = -1; + + /* append to set list */ + ++mhd_nset; + mhd_sets = Realloc(mhd_sets, (mhd_nset * sizeof (*mhd_sets))); + mhd_sets[mhd_nset - 1] = sp; + } + sp_dlp = &sp->sr_drives; + + /* if just grabbing null set, return */ + if (mhsp == NULL) + return (sp); + assert(strcmp(setname, "") != 0); + assert(mhep != NULL); + + /* get null set */ + null_sp = mhd_create_set(NULL, 0, NULL, NULL); + assert(null_sp != NULL); + assert(sp != null_sp); + + /* grab set lock */ + mhd_mx_lock(&sp->sr_mx); + + /* save options */ + if (options & MHD_SERIAL) + sp->sr_options |= MHD_SERIAL; + else + sp->sr_options &= ~MHD_SERIAL; + + /* move drives no longer in set to null set */ + if (! (options & MHD_PARTIAL_SET)) { + for (i = 0; (i < sp_dlp->dl_ndrive); /* void */) { + mhd_drive_t *dp = sp_dlp->dl_drives[i]; + uint_t j; + + /* check still there */ + for (j = 0; (j < mhsp->drives.drives_len); ++j) { + mhd_drivename_t mhdp; + + mhdp = mhsp->drives.drives_val[j]; + if (strcmp(dp->dr_rname, mhdp) == 0) + break; + } + if (j < mhsp->drives.drives_len) { + ++i; + continue; + } + + /* idle the drive */ + if (mhd_idle(dp, mhep) != 0) + mhd_clrerror(mhep); + + /* move to null set */ + mhd_del_drive_from_set(dp); + mhd_mx_unlock(&sp->sr_mx); + mhd_mx_lock(&null_sp->sr_mx); + mhd_add_drive_to_set(null_sp, dp); + mhd_mx_unlock(&null_sp->sr_mx); + mhd_mx_lock(&sp->sr_mx); + } + } + + /* add new drives to lists */ + for (i = 0; (i < mhsp->drives.drives_len); ++i) { + mhd_drivename_t mhdp = mhsp->drives.drives_val[i]; + uint_t j; + mhd_drive_t *dp; + + /* check already there */ + for (j = 0; (j < dlp->dl_ndrive); ++j) { + dp = dlp->dl_drives[j]; + if (strcmp(mhdp, dp->dr_rname) == 0) + break; + } + if (j < dlp->dl_ndrive) { + mhd_add_drive(dlp, dp); + continue; + } + + /* add drive to set */ + if ((dp = mhd_create_drive(sp, mhdp, NULL, mhep)) == NULL) { + mhde_perror(mhep, "mhd_create_drive: %s", mhdp); + continue; + } + mhd_add_drive(dlp, dp); + } + + /* debug */ +#ifdef MHD_DEBUG + if (mhd_debug > 0) { + for (i = 0; (i < mhd_nset); ++i) { + mhd_drive_set_t *sp = mhd_sets[i]; + mhd_drive_list_t *dlp = &sp->sr_drives; + char buf[10240]; + uint_t j; + + (void) snprintf(buf, sizeof (buf), "set '%s':", + sp->sr_name); + for (j = 0; (j < dlp->dl_ndrive); ++j) { + mhd_drive_t *dp = dlp->dl_drives[j]; + char *p; + + if ((p = strrchr(dp->dr_rname, '/')) != NULL) + ++p; + else + p = dp->dr_rname; + (void) strncat(buf, " ", sizeof (buf)); + (void) strncat(buf, p, sizeof (buf)); + } + buf[sizeof (buf) - 1] = '\0'; + mhd_eprintf("%s\n", buf); + } + } +#endif /* MHD_DEBUG */ + + /* unlock, return set */ + mhd_mx_unlock(&sp->sr_mx); + return (sp); +} + +/* + * find drive + */ +mhd_drive_t * +mhd_find_drive( + char *rname +) +{ + uint_t i; + + /* check locks */ + assert(MUTEX_HELD(&mhd_set_mx)); + + /* for each set */ + for (i = 0; (i < mhd_nset); ++i) { + mhd_drive_set_t *sp = mhd_sets[i]; + mhd_drive_list_t *dlp = &sp->sr_drives; + uint_t j; + + /* for each drive */ + for (j = 0; (j < dlp->dl_ndrive); ++j) { + mhd_drive_t *dp = dlp->dl_drives[j]; + + if (strcmp(rname, dp->dr_rname) == 0) + return (dp); + } + } + + /* not found */ + return (NULL); +} + +/* + * list all the drives + */ +int +mhd_list_drives( + char *path, + mhd_did_flags_t flags, + mhd_list_res_t *resultsp, + mhd_error_t *mhep +) +{ + mhd_state_t state; + uint_t ndrive, i, j, c; + + /* grab lock */ + mhd_mx_lock(&mhd_set_mx); + + /* add path to list */ + if (mhd_create_drives(path, mhep) != 0) { + mhd_mx_unlock(&mhd_set_mx); + return (-1); + } + + /* get what we want */ + state = 0; + if (flags & MHD_DID_SERIAL) + state |= DRIVE_SERIALING; + if (flags & MHD_DID_TIME) + state |= DRIVE_VTOCING; + if (flags & MHD_DID_CINFO) + state |= DRIVE_CINFOING; + + /* ident and count drives */ + for (ndrive = 0, i = 0; (i < mhd_nset); ++i) { + mhd_drive_set_t *sp = mhd_sets[i]; + mhd_drive_list_t *dlp = &sp->sr_drives; + + /* count drives */ + ndrive += dlp->dl_ndrive; + + /* ident drives */ + if (state != 0) { + mhd_mx_lock(&sp->sr_mx); + for (j = 0; (j < dlp->dl_ndrive); ++j) { + mhd_drive_t *dp = dlp->dl_drives[j]; + + if (mhd_state_set(dp, state, mhep) != 0) { + mhd_mx_unlock(&sp->sr_mx); + mhd_mx_unlock(&mhd_set_mx); + return (-1); + } + } + mhd_wait_set(sp, dlp, state); + mhd_mx_unlock(&sp->sr_mx); + } + } + + /* build list */ + assert(resultsp->results.mhd_drive_info_list_t_len == 0); + assert(resultsp->results.mhd_drive_info_list_t_val == NULL); + resultsp->results.mhd_drive_info_list_t_len = ndrive; + resultsp->results.mhd_drive_info_list_t_val = Zalloc( + ndrive * sizeof (*resultsp->results.mhd_drive_info_list_t_val)); + for (c = 0, i = 0; (i < mhd_nset); ++i) { + mhd_drive_set_t *sp = mhd_sets[i]; + mhd_drive_list_t *dlp = &sp->sr_drives; + + mhd_mx_lock(&sp->sr_mx); + for (j = 0; (j < dlp->dl_ndrive); ++j) { + mhd_drive_t *dp = dlp->dl_drives[j]; + mhd_drive_info_t *ip = + &resultsp->results.mhd_drive_info_list_t_val[c++]; + + ip->dif_name = Strdup(dp->dr_rname); + ip->dif_id = dp->dr_drive_id; + } + mhd_mx_unlock(&sp->sr_mx); + } + assert(c == ndrive); + + /* unlock, return count */ + mhd_mx_unlock(&mhd_set_mx); + return (ndrive); +} + +/* + * release drives + */ +static int +mhd_release_set( + mhd_drive_set_t *sp, + mhd_drive_list_t *dlp, + mhd_error_t *mhep +) +{ + uint_t i; + + /* check locks */ + assert(MUTEX_HELD(&mhd_set_mx)); + assert(MUTEX_HELD(&sp->sr_mx)); + + /* idle set */ + if (mhd_idle_set(sp, dlp, mhep) != 0) + return (-1); + + /* release drives */ + for (i = 0; (i < dlp->dl_ndrive); i++) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + if (mhd_state(dp, DRIVE_RELEASING, mhep) != 0) + return (-1); + } + mhd_wait_set(sp, dlp, DRIVE_IDLE); + + /* return success */ + return (0); +} + +/* + * release drives in set + */ +int +mhd_release_drives( + mhd_set_t *mhsp, + mhd_opts_t options, + mhd_error_t *mhep +) +{ + mhd_drive_list_t dl = mhd_null_list; + mhd_drive_set_t *sp; + int rval; + + /* grab global lock */ + mhd_mx_lock(&mhd_set_mx); + + /* create or update set */ + if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { + mhd_mx_unlock(&mhd_set_mx); + mhd_free_list(&dl); + return (-1); + } + + /* lock set */ + mhd_mx_lock(&sp->sr_mx); + + /* release drives */ + rval = mhd_release_set(sp, &dl, mhep); + + /* unlock, return success */ +out: + mhd_mx_unlock(&sp->sr_mx); + mhd_mx_unlock(&mhd_set_mx); + mhd_free_list(&dl); + return (rval); +} + +/* + * reserve drives + */ +static int +mhd_reserve_set( + mhd_drive_set_t *sp, + mhd_drive_list_t *dlp, + mhd_error_t *mhep +) +{ + mhd_msec_t ff = sp->sr_timeouts.mh_ff; + uint_t retry, i, ok; + int rval = 0; + + /* check locks */ + assert(MUTEX_HELD(&mhd_set_mx)); + assert(MUTEX_HELD(&sp->sr_mx)); + + /* idle set, idle everyone if cancelling failfast */ + if (ff == 0) { + if (mhd_idle_set(sp, &sp->sr_drives, mhep) != 0) + return (-1); + } else { + if (mhd_idle_set(sp, dlp, mhep) != 0) + return (-1); + } + + /* + * Try to take ownership of the drives twice. This helps + * to avoid the situation where the other machine retakes + * ownership of a majority drives back, but then kills itself + * leaving no owners. + */ + for (retry = 0; (retry < 2); ++retry) { + for (i = 0; (i < dlp->dl_ndrive); i++) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + if ((retry == 0) || + ((dp->dr_state == DRIVE_ERRORED) && + (dp->dr_errnum == EACCES))) { + if (mhd_state(dp, DRIVE_RESERVING, mhep) != 0) + return (-1); + } + } + mhd_wait_set(sp, dlp, DRIVE_IDLE); + } + + /* + * Did the take ownership succeed on a majority of the drives? + */ + ok = 0; + for (i = 0; (i < dlp->dl_ndrive); ++i) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + if (dp->dr_state == DRIVE_IDLE) + ++ok; + } + + /* + * Let the replica majority be the deciding factor, if able to get + * at least a single drive reserved. + */ + if (ok == 0) { + rval = mhd_error(mhep, MHD_E_MAJORITY, sp->sr_name); + goto out; + } + + /* + * Enable the failfast probes if we haven't given up yet. + */ + switch (sp->sr_ff_mode) { + + /* do nothing */ + default: + assert(0); + /* FALLTHROUGH */ + case MHD_FF_NONE: + goto out; + + /* old style per drive failfast */ + case MHD_FF_DRIVER: + for (i = 0; (i < dlp->dl_ndrive); i++) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + if (dp->dr_state != DRIVE_ERRORED) { + if (mhd_state(dp, DRIVE_FAILFASTING, + mhep) != 0) { + rval = -1; + goto out; + } + } + } + mhd_wait_set(sp, dlp, DRIVE_IDLE); + break; + + /* failfast probe threads */ + case MHD_FF_DEBUG: + case MHD_FF_HALT: + case MHD_FF_PANIC: + if (ff != 0) { + if (mhd_ff_open(sp, mhep) != 0) { + rval = -1; + goto out; + } + for (i = 0; (i < dlp->dl_ndrive); i++) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + if (mhd_state_set(dp, DRIVE_PROBING, + mhep) != 0) { + rval = -1; + goto out; + } + dp->dr_time = mhd_time(); + } + (void) mhd_ff_rearm(sp, mhep); + } + break; + } + + /* cleanup, return success */ +out: + if (rval != 0) { + mhd_error_t status = mhd_null_error; + + (void) mhd_release_set(sp, dlp, &status); + mhd_clrerror(&status); + } + return (rval); +} + +/* + * reserve drives in set + */ +int +mhd_reserve_drives( + mhd_set_t *mhsp, + mhd_mhiargs_t *timeoutp, + mhd_ff_mode_t ff_mode, + mhd_opts_t options, + mhd_error_t *mhep +) +{ + mhd_drive_list_t dl = mhd_null_list; + mhd_drive_set_t *sp; + int rval; + + /* grab global lock */ + mhd_mx_lock(&mhd_set_mx); + + /* create or update set */ + if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { + mhd_mx_unlock(&mhd_set_mx); + mhd_free_list(&dl); + return (-1); + } + + /* lock set */ + mhd_mx_lock(&sp->sr_mx); + + /* can't change mode or timeouts of partial set */ + if ((dl.dl_ndrive != sp->sr_drives.dl_ndrive) && + (options & MHD_PARTIAL_SET)) { + if (ff_mode != sp->sr_ff_mode) { + mhd_eprintf("%s: invalid ff_mode %d now %d\n", + sp->sr_name, ff_mode, sp->sr_ff_mode); + ff_mode = sp->sr_ff_mode; + } + if (timeoutp->mh_ff < sp->sr_timeouts.mh_ff) { + mhd_eprintf("%s: invalid mh_ff %d now %d\n", + sp->sr_name, timeoutp->mh_ff, + sp->sr_timeouts.mh_ff); + timeoutp->mh_ff = sp->sr_timeouts.mh_ff; + } + } + + /* save timouts and mode */ + sp->sr_timeouts = *timeoutp; + sp->sr_ff_mode = ff_mode; + + /* reserve drives */ + rval = mhd_reserve_set(sp, &dl, mhep); + + /* unlock, return success */ +out: + mhd_mx_unlock(&sp->sr_mx); + mhd_mx_unlock(&mhd_set_mx); + mhd_free_list(&dl); + return (rval); +} + +/* + * status drives + */ +static int +mhd_status_set( + mhd_drive_set_t *sp, + mhd_drive_list_t *dlp, + mhd_error_t *mhep +) +{ + uint_t i; + + /* check locks */ + assert(MUTEX_HELD(&mhd_set_mx)); + assert(MUTEX_HELD(&sp->sr_mx)); + + /* status drives */ + for (i = 0; (i < dlp->dl_ndrive); i++) { + mhd_drive_t *dp = dlp->dl_drives[i]; + + if (mhd_state_set(dp, DRIVE_STATUSING, mhep) != 0) + return (-1); + } + mhd_wait_set(sp, dlp, DRIVE_STATUSING); + + /* return success */ + return (0); +} + +/* + * status drives in set + */ +int +mhd_status_drives( + mhd_set_t *mhsp, + mhd_opts_t options, + mhd_drive_status_t **status, + mhd_error_t *mhep +) +{ + mhd_drive_list_t dl = mhd_null_list; + mhd_drive_list_t *dlp = &dl; + mhd_drive_set_t *sp; + uint_t i; + int rval = 0; + + /* grab global lock */ + mhd_mx_lock(&mhd_set_mx); + + /* create or update set */ + if ((sp = mhd_create_set(mhsp, options, &dl, mhep)) == NULL) { + mhd_mx_unlock(&mhd_set_mx); + mhd_free_list(&dl); + return (-1); + } + + /* lock set */ + mhd_mx_lock(&sp->sr_mx); + + /* status drives */ + if (mhd_status_set(sp, &dl, mhep) != 0) { + rval = -1; + goto out; + } + + /* build list */ + *status = Zalloc(dlp->dl_ndrive * sizeof (**status)); + for (i = 0; (i < dlp->dl_ndrive); ++i) { + mhd_drive_t *dp = dlp->dl_drives[i]; + mhd_drive_status_t *statusp = &(*status)[i]; + + statusp->drive = Strdup(dp->dr_rname); + statusp->errnum = dp->dr_errnum; + } + assert(i == dlp->dl_ndrive); + rval = dlp->dl_ndrive; + + /* unlock, return count */ +out: + mhd_mx_unlock(&sp->sr_mx); + mhd_mx_unlock(&mhd_set_mx); + mhd_free_list(&dl); + return (rval); +}