Mercurial > illumos > illumos-gate
view usr/src/uts/common/avs/ns/dsw/dsw_dev.c @ 13603:19698f035619
2061 uts homebrew offsetofs cause various pointer-cast warnings
Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
Reviewed by: Jason King <jason.brian.king@gmail.com>
Reviewed by: Darren Reed <avalon@coombs.anu.edu.au>
Approved by: Garrett D'Amore <garrett@damore.org>
author | Richard Lowe <richlowe@richlowe.net> |
---|---|
date | Mon, 30 Jan 2012 19:38:22 -0500 |
parents | cd587b0bd19c |
children |
line wrap: on
line source
/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> #include <sys/time.h> #include <sys/ksynch.h> #include <sys/kmem.h> #include <sys/errno.h> #include <sys/cmn_err.h> #include <sys/debug.h> #include <sys/ddi.h> #include <sys/nsc_thread.h> #include <sys/sysmacros.h> #include <sys/unistat/spcs_s.h> #include <sys/unistat/spcs_errors.h> #include <sys/unistat/spcs_s_k.h> #include <sys/nsctl/nsctl.h> #include "dsw.h" #include "dsw_dev.h" #include "../rdc/rdc_update.h" #include <sys/nskernd.h> #include <sys/sdt.h> /* dtrace is S10 or later */ #ifdef DS_DDICT #include "../contract.h" #endif /* * Instant Image * * This file contains the core implementation of II. * * II is implemented as a simple filter module that pushes itself between * user (SV, STE, etc.) and SDBC or NET. * */ #define REMOTE_VOL(s, ip) (((s) && ((ip->bi_flags)&DSW_SHDEXPORT)) || \ (!(s)&&((ip->bi_flags)&DSW_SHDIMPORT))) #define total_ref(ip) ((ip->bi_shdref + ip->bi_shdrref + ip->bi_bmpref) + \ (NSHADOWS(ip) ? 0 : ip->bi_mstref + ip->bi_mstrref)) #define II_TAIL_COPY(d, s, m, t) bcopy(&(s.m), &(d.m), \ sizeof (d) - (uintptr_t)&((t *)0)->m) extern dev_info_t *ii_dip; #define II_LINK_CLUSTER(ip, cluster) \ _ii_ll_add(ip, &_ii_cluster_mutex, &_ii_cluster_top, cluster, \ &ip->bi_cluster) #define II_UNLINK_CLUSTER(ip) \ _ii_ll_remove(ip, &_ii_cluster_mutex, &_ii_cluster_top, &ip->bi_cluster) #define II_LINK_GROUP(ip, group) \ _ii_ll_add(ip, &_ii_group_mutex, &_ii_group_top, group, &ip->bi_group) #define II_UNLINK_GROUP(ip) \ _ii_ll_remove(ip, &_ii_group_mutex, &_ii_group_top, &ip->bi_group) _ii_info_t *_ii_info_top; _ii_info_t *_ii_mst_top = 0; _ii_overflow_t *_ii_overflow_top; _ii_lsthead_t *_ii_cluster_top; _ii_lsthead_t *_ii_group_top; int ii_debug; /* level of cmn_err noise */ int ii_bitmap; /* bitmap operations switch */ uint_t ii_header = 16; /* Undocumented tunable (with adb!), start */ /* of area cleared in volume when a dependent */ /* shadow is disabled. */ /* max # of chunks in copy loop before delay */ int ii_throttle_unit = MIN_THROTTLE_UNIT; /* length of delay during update loop */ int ii_throttle_delay = MIN_THROTTLE_DELAY; int ii_copy_direct = 1; int ii_nconcopy = 10; /* default value when starting with no cache */ kmutex_t _ii_cluster_mutex; kmutex_t _ii_group_mutex; static int _ii_shutting_down = 0; static nsc_io_t *_ii_io, *_ii_ior; static nsc_mem_t *_ii_local_mem; static nsc_def_t _ii_fd_def[], _ii_io_def[], _ii_ior_def[]; static kmutex_t _ii_info_mutex; static kmutex_t _ii_overflow_mutex; static kmutex_t _ii_config_mutex; static _ii_bmp_ops_t alloc_buf_bmp, kmem_buf_bmp; static nsc_svc_t *ii_volume_update; /* IIVolumeUpdate token */ static nsc_svc_t *ii_report_luns; /* IIReportLuns token */ static nsc_svc_t *ii_get_initiators; /* IIGetInitiators token */ static ksema_t _ii_concopy_sema; static int _ii_concopy_init = 0; static int _ii_instance = 0; void _ii_deinit_dev(); static void _ii_info_free(_ii_info_t *ip); static void _ii_info_freeshd(_ii_info_t *ip); static void ii_sibling_free(_ii_info_t *ip); ii_header_t *_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp); int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp); static void _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp); static int _ii_copyvol(_ii_info_t *, int, int, spcs_s_info_t, int); static void _ii_stopvol(_ii_info_t *ip); static int _ii_stopcopy(_ii_info_t *ip); static _ii_info_t *_ii_find_set(char *volume); static _ii_info_t *_ii_find_vol(char *, int); static _ii_overflow_t *_ii_find_overflow(char *volume); static void _ii_ioctl_done(_ii_info_t *ip); static void _ii_lock_chunk(_ii_info_t *ip, chunkid_t); static void _ii_unlock_chunks(_ii_info_t *ip, chunkid_t, int); void _ii_error(_ii_info_t *ip, int error_type); static nsc_buf_t *_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), ii_fd_t *bfd); static int _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd); extern nsc_size_t ii_btsize(nsc_size_t); extern int ii_tinit(_ii_info_t *); extern chunkid_t ii_tsearch(_ii_info_t *, chunkid_t); extern void ii_tdelete(_ii_info_t *, chunkid_t); extern void ii_reclaim_overflow(_ii_info_t *); static void ii_overflow_free(_ii_info_t *ip, int disable); static int ii_overflow_attach(_ii_info_t *, char *, int); int _ii_nsc_io(_ii_info_t *, int, nsc_fd_t *, int, nsc_off_t, unsigned char *, nsc_size_t); static nsc_path_t *_ii_register_path(char *path, int type, nsc_io_t *io); static int _ii_unregister_path(nsc_path_t *sp, int flag, char *type); static int _ii_reserve_begin(_ii_info_t *ip); static int _ii_wait_for_it(_ii_info_t *ip); static void _ii_reserve_end(_ii_info_t *ip); static kstat_t *_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op); static int _ii_ll_add(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char *, char **); static int _ii_ll_remove(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char **); #define _ii_unlock_chunk(ip, chunk) _ii_unlock_chunks(ip, chunk, 1) extern const int dsw_major_rev; extern const int dsw_minor_rev; extern const int dsw_micro_rev; extern const int dsw_baseline_rev; /* * These constants are used by ii_overflow_free() to indicate how the * reclamation should take place. * NO_RECLAIM: just detach the overflow from the set; do not * attempt to reclaim chunks, do not decrement the * used-by count * RECLAIM: reclaim all chunks before decrementing the used-by count * INIT_OVR: decrement the used-by count only; do not reclaim chunks */ #define NO_RECLAIM 0 #define RECLAIM 1 #define INIT_OVR 2 struct copy_args { /* arguments passed to copy process */ _ii_info_t *ip; int flag; int rtype; int wait; spcs_s_info_t kstatus; int rc; }; /* set-specific kstats info */ ii_kstat_set_t ii_kstat_set = { { DSW_SKSTAT_SIZE, KSTAT_DATA_ULONG }, { DSW_SKSTAT_MTIME, KSTAT_DATA_ULONG }, { DSW_SKSTAT_FLAGS, KSTAT_DATA_ULONG }, { DSW_SKSTAT_THROTTLE_UNIT, KSTAT_DATA_ULONG }, { DSW_SKSTAT_THROTTLE_DELAY, KSTAT_DATA_ULONG }, { DSW_SKSTAT_SHDCHKS, KSTAT_DATA_ULONG }, { DSW_SKSTAT_SHDCHKUSED, KSTAT_DATA_ULONG }, { DSW_SKSTAT_SHDBITS, KSTAT_DATA_ULONG }, { DSW_SKSTAT_COPYBITS, KSTAT_DATA_ULONG }, { DSW_SKSTAT_MSTA, KSTAT_DATA_CHAR }, { DSW_SKSTAT_MSTB, KSTAT_DATA_CHAR }, { DSW_SKSTAT_MSTC, KSTAT_DATA_CHAR }, { DSW_SKSTAT_MSTD, KSTAT_DATA_CHAR }, { DSW_SKSTAT_SETA, KSTAT_DATA_CHAR }, { DSW_SKSTAT_SETB, KSTAT_DATA_CHAR }, { DSW_SKSTAT_SETC, KSTAT_DATA_CHAR }, { DSW_SKSTAT_SETD, KSTAT_DATA_CHAR }, { DSW_SKSTAT_BMPA, KSTAT_DATA_CHAR }, { DSW_SKSTAT_BMPB, KSTAT_DATA_CHAR }, { DSW_SKSTAT_BMPC, KSTAT_DATA_CHAR }, { DSW_SKSTAT_BMPD, KSTAT_DATA_CHAR }, { DSW_SKSTAT_OVRA, KSTAT_DATA_CHAR }, { DSW_SKSTAT_OVRB, KSTAT_DATA_CHAR }, { DSW_SKSTAT_OVRC, KSTAT_DATA_CHAR }, { DSW_SKSTAT_OVRD, KSTAT_DATA_CHAR }, { DSW_SKSTAT_MSTIO, KSTAT_DATA_CHAR }, { DSW_SKSTAT_SHDIO, KSTAT_DATA_CHAR }, { DSW_SKSTAT_BMPIO, KSTAT_DATA_CHAR }, { DSW_SKSTAT_OVRIO, KSTAT_DATA_CHAR }, }; /* * _ii_init_dev * Initialise the shadow driver * */ int _ii_init_dev() { _ii_io = nsc_register_io("ii", NSC_II_ID|NSC_REFCNT|NSC_FILTER, _ii_io_def); if (_ii_io == NULL) cmn_err(CE_WARN, "!ii: nsc_register_io failed."); _ii_ior = nsc_register_io("ii-raw", NSC_IIR_ID|NSC_REFCNT|NSC_FILTER, _ii_ior_def); if (_ii_ior == NULL) cmn_err(CE_WARN, "!ii: nsc_register_io r failed."); _ii_local_mem = nsc_register_mem("ii:kmem", NSC_MEM_LOCAL, 0); if (_ii_local_mem == NULL) cmn_err(CE_WARN, "!ii: nsc_register_mem failed."); if (!_ii_io || !_ii_ior || !_ii_local_mem) { _ii_deinit_dev(); return (ENOMEM); } mutex_init(&_ii_info_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&_ii_overflow_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&_ii_config_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&_ii_cluster_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&_ii_group_mutex, NULL, MUTEX_DRIVER, NULL); ii_volume_update = nsc_register_svc("RDCVolumeUpdated", 0); ii_report_luns = nsc_register_svc("IIReportLuns", 0); ii_get_initiators = nsc_register_svc("IIGetInitiators", 0); if (!ii_volume_update || !ii_report_luns || !ii_get_initiators) { _ii_deinit_dev(); return (ENOMEM); } return (0); } /* * _ii_deinit_dev * De-initialise the shadow driver * */ void _ii_deinit_dev() { if (_ii_io) (void) nsc_unregister_io(_ii_io, 0); if (_ii_ior) (void) nsc_unregister_io(_ii_ior, 0); if (_ii_local_mem) (void) nsc_unregister_mem(_ii_local_mem); if (ii_volume_update) (void) nsc_unregister_svc(ii_volume_update); if (ii_report_luns) (void) nsc_unregister_svc(ii_report_luns); if (ii_get_initiators) (void) nsc_unregister_svc(ii_get_initiators); mutex_destroy(&_ii_info_mutex); mutex_destroy(&_ii_overflow_mutex); mutex_destroy(&_ii_config_mutex); mutex_destroy(&_ii_cluster_mutex); mutex_destroy(&_ii_group_mutex); if (_ii_concopy_init) sema_destroy(&_ii_concopy_sema); _ii_concopy_init = 0; } static char * ii_pathname(nsc_fd_t *fd) { char *rc; if (fd == NULL || (rc = nsc_pathname(fd)) == NULL) return (""); else return (rc); } /* * _ii_rlse_d * Internal mechanics of _ii_rlse_devs(). Takes care of * resetting the ownership information as required. */ static void _ii_rlse_d(ip, mst, raw) _ii_info_t *ip; int mst, raw; { _ii_info_dev_t *cip; _ii_info_dev_t *rip; rip = mst ? (ip->bi_mstrdev) : &(ip->bi_shdrdev); cip = mst ? (ip->bi_mstdev) : &(ip->bi_shddev); DTRACE_PROBE2(_ii_rlse_d_type, _ii_info_dev_t *, rip, _ii_info_dev_t *, cip); if (RSRV(cip)) { if (raw) { ASSERT(cip->bi_orsrv > 0); cip->bi_orsrv--; } else { ASSERT(cip->bi_rsrv > 0); cip->bi_rsrv--; } if (cip->bi_rsrv > 0) { nsc_set_owner(cip->bi_fd, cip->bi_iodev); } else if (cip->bi_orsrv > 0) { nsc_set_owner(cip->bi_fd, rip->bi_iodev); } else { nsc_set_owner(cip->bi_fd, NULL); } if (!RSRV(cip)) { nsc_release(cip->bi_fd); } } else { if (raw) { ASSERT(rip->bi_rsrv > 0); rip->bi_rsrv--; } else { ASSERT(rip->bi_orsrv > 0); rip->bi_orsrv--; } if (rip->bi_rsrv > 0) { nsc_set_owner(rip->bi_fd, rip->bi_iodev); } else if (rip->bi_orsrv > 0) { nsc_set_owner(rip->bi_fd, cip->bi_iodev); } else { nsc_set_owner(rip->bi_fd, NULL); } if (!RSRV(rip)) { rip->bi_flag = 0; nsc_release(rip->bi_fd); cv_broadcast(&ip->bi_releasecv); } } } /* * _ii_rlse_devs * Release named underlying devices. * * NOTE: the 'devs' argument must be the same as that passed to * the preceding _ii_rsrv_devs call. */ void _ii_rlse_devs(ip, devs) _ii_info_t *ip; int devs; { ASSERT(!(devs & (MST|SHD))); ASSERT(ip->bi_head != (_ii_info_t *)0xdeadbeef); if (!ip) { cmn_err(CE_WARN, "!ii: _ii_rlse_devs null ip"); return; } mutex_enter(&ip->bi_rsrvmutex); DTRACE_PROBE(_ii_rlse_devs_mutex); if ((devs&(MST|MSTR)) != 0 && (ip->bi_flags&DSW_SHDIMPORT) == 0) { if (NSHADOWS(ip) && ip != ip->bi_master) _ii_rlse_devs(ip->bi_master, devs&(MST|MSTR)); else _ii_rlse_d(ip, 1, (devs&MSTR)); } if ((devs&(SHD|SHDR)) != 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) { _ii_rlse_d(ip, 0, (devs&SHDR)); } if ((devs&BMP) != 0 && ip->bi_bmpfd) { if (--(ip->bi_bmprsrv) == 0) nsc_release(ip->bi_bmpfd); } ASSERT(ip->bi_bmprsrv >= 0); ASSERT(ip->bi_shdrsrv >= 0); ASSERT(ip->bi_shdrrsrv >= 0); mutex_exit(&ip->bi_rsrvmutex); } /* * _ii_rsrv_d * Reserve device flagged, unless its companion is already reserved, * in that case increase the reserve on the companion. */ static int _ii_rsrv_d(int raw, _ii_info_dev_t *rid, _ii_info_dev_t *cid, int flag, _ii_info_t *ip) { _ii_info_dev_t *p = NULL; int other = 0; int rc; /* * If user wants to do a cache reserve and it's already * raw reserved, we need to do a real nsc_reserve, so wait * until the release has been done. */ if (RSRV(rid) && (flag == II_EXTERNAL) && (raw == 0) && (rid->bi_flag != II_EXTERNAL)) { ip->bi_release++; while (RSRV(rid)) { DTRACE_PROBE1(_ii_rsrv_d_wait, _ii_info_dev_t *, rid); cv_wait(&ip->bi_releasecv, &ip->bi_rsrvmutex); DTRACE_PROBE1(_ii_rsrv_d_resume, _ii_info_dev_t *, rid); } ip->bi_release--; } if (RSRV(rid)) { p = rid; if (!raw) { other = 1; } } else if (RSRV(cid)) { p = cid; if (raw) { other = 1; } } if (p) { if (other) { p->bi_orsrv++; } else { p->bi_rsrv++; } if (p->bi_iodev) { nsc_set_owner(p->bi_fd, p->bi_iodev); } return (0); } p = raw ? rid : cid; if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) { if (p->bi_iodev) { nsc_set_owner(p->bi_fd, p->bi_iodev); } p->bi_rsrv++; if (raw) p->bi_flag = flag; } return (rc); } /* * _ii_rsrv_devs * Reserve named underlying devices. * */ int _ii_rsrv_devs(_ii_info_t *ip, int devs, int flag) { int rc = 0; int got = 0; ASSERT(!(devs & (MST|SHD))); if (!ip) { cmn_err(CE_WARN, "!ii: _ii_rsrv_devs null ip"); return (EINVAL); } mutex_enter(&ip->bi_rsrvmutex); DTRACE_PROBE(_ii_rsrv_devs_mutex); if (rc == 0 && (devs&(MST|MSTR)) != 0 && (ip->bi_flags&DSW_SHDIMPORT) == 0) { DTRACE_PROBE(_ii_rsrv_devs_master); if (NSHADOWS(ip) && ip != ip->bi_master) { if ((rc = _ii_rsrv_devs(ip->bi_master, devs&(MST|MSTR), flag)) != 0) { cmn_err(CE_WARN, "!ii: nsc_reserve multi-master failed"); } else { got |= devs&(MST|MSTR); } } else { if ((rc = _ii_rsrv_d((devs&MSTR) != 0, ip->bi_mstrdev, ip->bi_mstdev, flag, ip)) != 0) { cmn_err(CE_WARN, "!ii: nsc_reserve master failed %d", rc); } else { got |= (devs&(MST|MSTR)); } } } if (rc == 0 && (devs&(SHD|SHDR)) != 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) { DTRACE_PROBE(_ii_rsrv_devs_shadow); if ((rc = _ii_rsrv_d((devs&SHDR) != 0, &ip->bi_shdrdev, &ip->bi_shddev, flag, ip)) != 0) { cmn_err(CE_WARN, "!ii: nsc_reserve shadow failed %d", rc); } else { got |= (devs&(SHD|SHDR)); } } if (rc == 0 && (devs&BMP) != 0 && ip->bi_bmpfd) { DTRACE_PROBE(_ii_rsrv_devs_bitmap); if ((ip->bi_bmprsrv == 0) && (rc = nsc_reserve(ip->bi_bmpfd, 0)) != 0) { cmn_err(CE_WARN, "!ii: nsc_reserve bitmap failed %d", rc); } else { (ip->bi_bmprsrv)++; got |= BMP; } } mutex_exit(&ip->bi_rsrvmutex); if (rc != 0 && got != 0) _ii_rlse_devs(ip, got); return (rc); } static int _ii_reserve_begin(_ii_info_t *ip) { int rc; mutex_enter(&ip->bi_rlsemutex); if ((rc = _ii_wait_for_it(ip)) == 0) { ++ip->bi_rsrvcnt; } mutex_exit(&ip->bi_rlsemutex); return (rc); } static int _ii_wait_for_it(_ii_info_t *ip) { int nosig; nosig = 1; while (ip->bi_rsrvcnt > 0) { nosig = cv_wait_sig(&ip->bi_reservecv, &ip->bi_rlsemutex); if (!nosig) { break; } } return (nosig? 0 : EINTR); } static void _ii_reserve_end(_ii_info_t *ip) { mutex_enter(&ip->bi_rlsemutex); if (ip->bi_rsrvcnt <= 0) { mutex_exit(&ip->bi_rlsemutex); return; } --ip->bi_rsrvcnt; mutex_exit(&ip->bi_rlsemutex); cv_broadcast(&ip->bi_reservecv); } static int ii_fill_copy_bmp(_ii_info_t *ip) { int rc; chunkid_t max_chunk, chunk_num; if ((rc = II_FILL_COPY_BMP(ip)) != 0) return (rc); /* * make certain that the last bits of the last byte of the bitmap * aren't filled as they may be copied out to the user. */ chunk_num = ip->bi_size / DSW_SIZE; if ((ip->bi_size % DSW_SIZE) != 0) ++chunk_num; max_chunk = chunk_num; if ((max_chunk & 0x7) != 0) max_chunk = (max_chunk + 7) & ~7; DTRACE_PROBE2(_ii_fill_copy_bmp_chunks, chunkid_t, chunk_num, chunkid_t, max_chunk); for (; chunk_num < max_chunk; chunk_num++) { (void) II_CLR_COPY_BIT(ip, chunk_num); } return (0); } static int ii_update_denied(_ii_info_t *ip, spcs_s_info_t kstatus, int direction, int all) { rdc_update_t update; int size; unsigned char *bmp; update.volume = direction == CV_SHD2MST ? ii_pathname(MSTFD(ip)) : ip->bi_keyname; update.denied = 0; update.protocol = RDC_SVC_ONRETURN; update.size = size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); update.status = kstatus; update.bitmap = bmp = kmem_alloc(update.size, KM_SLEEP); if (bmp == NULL) { spcs_s_add(kstatus, ENOMEM); return (1); } DTRACE_PROBE2(_ii_update_denied, int, all, int, size); if (all) { while (size-- > 0) *bmp++ = (unsigned char)0xff; } else { if (II_CHANGE_BMP(ip, update.bitmap) != 0) { /* failed to read bitmap */ spcs_s_add(kstatus, EIO); update.denied = 1; } } /* check that no user of volume objects */ if (update.denied == 0) { (void) nsc_call_svc(ii_volume_update, (intptr_t)&update); } kmem_free(update.bitmap, FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size))); return (update.denied); } static int ii_need_same_size(_ii_info_t *ip) { rdc_update_t update; update.volume = ip->bi_keyname; update.denied = 0; update.protocol = RDC_SVC_VOL_ENABLED; (void) nsc_call_svc(ii_volume_update, (intptr_t)&update); return (update.denied); } /* * ii_volume: check if vol is already known to Instant Image and return * volume type if it is. */ static int ii_volume(char *vol, int locked) { _ii_info_t *ip; _ii_overflow_t *op; int rc = NONE; /* scan overflow volume list */ mutex_enter(&_ii_overflow_mutex); DTRACE_PROBE(_ii_volume_mutex); for (op = _ii_overflow_top; op; op = op->ii_next) { if (strcmp(vol, op->ii_volname) == 0) break; } mutex_exit(&_ii_overflow_mutex); if (op) { return (OVR); } if (!locked) { mutex_enter(&_ii_info_mutex); } DTRACE_PROBE(_ii_volume_mutex2); for (ip = _ii_info_top; ip; ip = ip->bi_next) { if (strcmp(vol, ii_pathname(ip->bi_mstfd)) == 0) { rc = MST; break; } if (strcmp(vol, ip->bi_keyname) == 0) { rc = SHD; break; } if (strcmp(vol, ii_pathname(ip->bi_bmpfd)) == 0) { rc = BMP; break; } } DTRACE_PROBE1(_ii_volume_data, int, rc); if (!locked) { mutex_exit(&_ii_info_mutex); } return (rc); } /* * ii_open_shadow: open shadow volume for both cached and raw access, * if the normal device open fails attempt a file open to allow * shadowing into a file. */ static int ii_open_shadow(_ii_info_t *ip, char *shadow_vol) { int rc = 0; int file_rc = 0; ip->bi_shdfd = nsc_open(shadow_vol, NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, (blind_t)&(ip->bi_shddev), &rc); if (!ip->bi_shdfd) { ip->bi_shdfd = nsc_open(shadow_vol, NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def, (blind_t)&(ip->bi_shddev), &file_rc); file_rc = 1; if (!ip->bi_shdfd) { return (rc); } DTRACE_PROBE(_ii_open_shadow); } else DTRACE_PROBE(_ii_open_shadow); if (file_rc == 0) { ip->bi_shdrfd = nsc_open(shadow_vol, NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, (blind_t)&(ip->bi_shdrdev), &rc); DTRACE_PROBE(_ii_open_shadow); } else { ip->bi_shdrfd = nsc_open(shadow_vol, NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def, (blind_t)&(ip->bi_shdrdev), &rc); DTRACE_PROBE(_ii_open_shadow); } if (!ip->bi_shdrfd) { (void) nsc_close(ip->bi_shdfd); DTRACE_PROBE(_ii_open_shadow); return (rc); } return (0); } static void ii_register_shd(_ii_info_t *ip) { ip->bi_shd_tok = _ii_register_path(ip->bi_keyname, NSC_CACHE, _ii_io); ip->bi_shdr_tok = _ii_register_path(ip->bi_keyname, NSC_DEVICE, _ii_ior); } static void ii_register_mst(_ii_info_t *ip) { ip->bi_mst_tok = _ii_register_path(ii_pathname(ip->bi_mstfd), NSC_CACHE, _ii_io); ip->bi_mstr_tok = _ii_register_path(ii_pathname(ip->bi_mstrfd), NSC_DEVICE, _ii_ior); } static int ii_register_ok(_ii_info_t *ip) { int rc; int sibling; int exported; rc = 1; sibling = NSHADOWS(ip) && ip != ip->bi_head; exported = ip->bi_flags & DSW_SHDEXPORT; if ((ip->bi_bmpfd && !ip->bi_bmp_tok) || (!exported && ( !ip->bi_shd_tok || !ip->bi_shdr_tok))) rc = 0; else if (!sibling && (!ip->bi_mst_tok || !ip->bi_mstr_tok)) rc = 0; return (rc); } #ifndef DISABLE_KSTATS /* * _ii_kstat_create * Create and install kstat_io data * * Calling/Exit State: * Returns 0 if kstats couldn't be created, otherwise it returns * a pointer to the created kstat_t. */ static kstat_t * _ii_kstat_create(_ii_info_t *ip, char *type) { kstat_t *result; char name[ IOSTAT_NAME_LEN ]; int setnum; char *nptr; static int mstnum = 0; static int shdbmpnum = -1; switch (*type) { case 'm': setnum = mstnum++; nptr = ip->bi_kstat_io.mstio; break; case 's': /* assumption: shadow kstats created before bitmap */ setnum = ++shdbmpnum; nptr = ip->bi_kstat_io.shdio; break; case 'b': setnum = shdbmpnum; nptr = ip->bi_kstat_io.bmpio; break; default: cmn_err(CE_WARN, "!Unable to determine kstat type (%c)", *type); setnum = -1; break; } /* * The name of the kstat, defined below, is designed to work * with the 'iostat -x' command. This command leaves only * 9 characters for the name, and the kstats built in to Solaris * all seem to be of the form <service><number>. For that * reason, we have chosen ii<type><number>, where <type> is * m, s, b, or o (for master, shadow, bitmap, and overflow * respectively), and the number is monotonically increasing from * 0 for each time one of those <type>s are created. Note that * the shadow and bitmap are always created in pairs and so, for * any given set, they will have the same <number>. */ (void) sprintf(name, "ii%c%d", *type, setnum); (void) strncpy(nptr, name, IOSTAT_NAME_LEN); result = kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0); if (result) { result->ks_private = ip; result->ks_lock = &ip->bi_kstat_io.statmutex; kstat_install(result); } else { cmn_err(CE_WARN, "!Unable to create %s kstats for set %s", type, ip->bi_keyname); } return (result); } /* * _ii_overflow_kstat_create * Create and install kstat_io data for an overflow volume * * Calling/Exit State: * Returns 0 if kstats couldn't be created, otherwise it returns * a pointer to the created kstat_t. * * See comments in _ii_kstat_create for additional information. * */ static kstat_t * _ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op) { kstat_t *result; char *nptr; char name [IOSTAT_NAME_LEN]; static int ovrnum = 0; int setnum = ovrnum++; nptr = ip->bi_kstat_io.ovrio; (void) sprintf(name, "iio%d", setnum); (void) strncpy(nptr, name, IOSTAT_NAME_LEN); mutex_init(&op->ii_kstat_mutex, NULL, MUTEX_DRIVER, NULL); if ((result = kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0))) { result->ks_private = ip; result->ks_lock = &op->ii_kstat_mutex; kstat_install(result); } else { mutex_destroy(&op->ii_kstat_mutex); cmn_err(CE_WARN, "!Unabled to create overflow kstat for set " "%s", ip->bi_keyname); } return (result); } #endif static void ii_str_kstat_copy(char *str, char *p1, char *p2, char *p3, char *p4) { static int whinged = 0; char *part[ 4 ]; char fulldata[ DSW_NAMELEN ]; int i, offset, remain; int num_parts; int leftover; int kscharsize = KSTAT_DATA_CHAR_LEN - 1; /* * NOTE: the following lines must be changed if DSW_NAMELEN * ever changes. You'll need a part[] for every kscharsize * characters (or fraction thereof). The ii_kstat_set_t * definition in dsw_dev.h will also need new ovr_? entries. */ part[ 0 ] = p1; part[ 1 ] = p2; part[ 2 ] = p3; part[ 3 ] = p4; bzero(fulldata, DSW_NAMELEN); if (str) { (void) strncpy(fulldata, str, DSW_NAMELEN); } num_parts = DSW_NAMELEN / kscharsize; leftover = DSW_NAMELEN % kscharsize; if (leftover) { ++num_parts; } if (num_parts > sizeof (part) / sizeof (part[0])) { /* * DSW_NAMELEN is 64 and kscharsize is 15. * It's always "whinged" */ if (!whinged) { #ifdef DEBUG cmn_err(CE_WARN, "!May not have enough room " "to store volume name in kstats"); #endif whinged = 1; } num_parts = sizeof (part) / sizeof (part[0]); } offset = 0; remain = DSW_NAMELEN; for (i = 0; i < num_parts; i++) { int to_copy = remain > kscharsize? kscharsize : remain; bcopy(&fulldata[ offset ], part[ i ], to_copy); offset += to_copy; remain -= to_copy; } } static int ii_set_stats_update(kstat_t *ksp, int rw) { _ii_info_t *ip = (_ii_info_t *)ksp->ks_private; ii_kstat_set_t *kp = (ii_kstat_set_t *)ksp->ks_data; if (KSTAT_WRITE == rw) { return (EACCES); } /* copy values over */ kp->size.value.ul = ip->bi_size; kp->flags.value.ul = ip->bi_flags; kp->unit.value.ul = ip->bi_throttle_unit; kp->delay.value.ul = ip->bi_throttle_delay; kp->mtime.value.ul = ip->bi_mtime; /* update bitmap counters if necessary */ if (ip->bi_state & DSW_CNTCPYBITS) { ip->bi_copybits = 0; if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) { ip->bi_state &= ~DSW_CNTCPYBITS; II_CNT_BITS(ip, ip->bi_copyfba, &ip->bi_copybits, DSW_BM_SIZE_BYTES(ip)); _ii_rlse_devs(ip, BMP); } } if (ip->bi_state & DSW_CNTSHDBITS) { ip->bi_shdbits = 0; if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) { ip->bi_state &= ~DSW_CNTSHDBITS; II_CNT_BITS(ip, ip->bi_shdfba, &ip->bi_shdbits, DSW_BM_SIZE_BYTES(ip)); _ii_rlse_devs(ip, BMP); } } kp->copybits.value.ul = ip->bi_copybits; kp->shdbits.value.ul = ip->bi_shdbits; /* copy volume names */ ii_str_kstat_copy(ii_pathname(MSTFD(ip)), kp->mst_a.value.c, kp->mst_b.value.c, kp->mst_c.value.c, kp->mst_d.value.c); ii_str_kstat_copy(ip->bi_keyname, kp->set_a.value.c, kp->set_b.value.c, kp->set_c.value.c, kp->set_d.value.c); ii_str_kstat_copy(ii_pathname(ip->bi_bmpfd), kp->bmp_a.value.c, kp->bmp_b.value.c, kp->bmp_c.value.c, kp->bmp_d.value.c); if (ip->bi_overflow) { ii_str_kstat_copy(ip->bi_overflow->ii_volname, kp->ovr_a.value.c, kp->ovr_b.value.c, kp->ovr_c.value.c, kp->ovr_d.value.c); (void) strlcpy(kp->ovr_io.value.c, ip->bi_kstat_io.ovrio, KSTAT_DATA_CHAR_LEN); } else { ii_str_kstat_copy("", kp->ovr_a.value.c, kp->ovr_b.value.c, kp->ovr_c.value.c, kp->ovr_d.value.c); bzero(kp->ovr_io.value.c, KSTAT_DATA_CHAR_LEN); } if ((ip->bi_flags) & DSW_TREEMAP) { kp->shdchks.value.ul = ip->bi_shdchks; kp->shdchkused.value.ul = ip->bi_shdchkused; } else { kp->shdchks.value.ul = 0; kp->shdchkused.value.ul = 0; } /* make sure value.c are always null terminated */ (void) strlcpy(kp->mst_io.value.c, ip->bi_kstat_io.mstio, KSTAT_DATA_CHAR_LEN); (void) strlcpy(kp->shd_io.value.c, ip->bi_kstat_io.shdio, KSTAT_DATA_CHAR_LEN); (void) strlcpy(kp->bmp_io.value.c, ip->bi_kstat_io.bmpio, KSTAT_DATA_CHAR_LEN); return (0); } /* * _ii_config * Configure an II device pair * * Calling/Exit State: * Returns 0 if the pairing was configured, otherwise an * error code. The ioctl data stucture is copied out to the user * and contains any additional error information, and the master * and shadow volume names if not supplied by the user. * * Description: * Reads the user configuration structure and attempts * to establish an II pairing. The snapshot of the master * device is established at this point in time. */ int _ii_config(intptr_t arg, int ilp32, int *rvp, int iflags) { dsw_config_t uconf; dsw_config32_t *uconf32; _ii_info_t *ip, *hip, **ipp; int rc; int type; int nshadows; int add_to_mst_top; int import; int existing; int resized; nsc_size_t mst_size, shd_size, bmp_size; nsc_off_t shdfba; nsc_off_t copyfba; int keylen, keyoffset; ii_header_t *bm_header; nsc_buf_t *tmp; spcs_s_info_t kstatus; spcs_s_info32_t ustatus32; int rtype; uint_t hints; /* Import is a once only operation like an enable */ ASSERT((iflags&(II_EXISTING|II_IMPORT)) != (II_EXISTING|II_IMPORT)); existing = (iflags&II_EXISTING) != 0; import = (iflags&II_IMPORT) != 0; *rvp = 0; if (ilp32) { uconf32 = kmem_zalloc(sizeof (dsw_config32_t), KM_SLEEP); if (uconf32 == NULL) { return (ENOMEM); } if (copyin((void *)arg, uconf32, sizeof (*uconf32)) < 0) return (EFAULT); II_TAIL_COPY(uconf, (*uconf32), master_vol, dsw_config_t); uconf.status = (spcs_s_info_t)uconf32->status; ustatus32 = uconf32->status; kmem_free(uconf32, sizeof (dsw_config32_t)); } else if (copyin((void *)arg, &uconf, sizeof (uconf)) < 0) return (EFAULT); DTRACE_PROBE3(_ii_config_info, char *, uconf.master_vol, char *, uconf.shadow_vol, char *, uconf.bitmap_vol); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (_ii_shutting_down) return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_ESHUTDOWN)); if (uconf.bitmap_vol[0] == 0) return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY)); mutex_enter(&_ii_config_mutex); ip = nsc_kmem_zalloc(sizeof (*ip), KM_SLEEP, _ii_local_mem); if (!ip) { mutex_exit(&_ii_config_mutex); return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM)); } ip->bi_mstdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP, _ii_local_mem); ip->bi_mstrdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP, _ii_local_mem); if (ip->bi_mstdev == NULL || ip->bi_mstrdev == NULL) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM)); } ip->bi_disabled = 1; /* mark as disabled until we are ready to go */ mutex_init(&ip->bi_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&ip->bi_bmpmutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&ip->bi_rsrvmutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&ip->bi_rlsemutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&ip->bi_chksmutex, NULL, MUTEX_DRIVER, NULL); cv_init(&ip->bi_copydonecv, NULL, CV_DRIVER, NULL); cv_init(&ip->bi_reservecv, NULL, CV_DRIVER, NULL); cv_init(&ip->bi_releasecv, NULL, CV_DRIVER, NULL); cv_init(&ip->bi_ioctlcv, NULL, CV_DRIVER, NULL); cv_init(&ip->bi_closingcv, NULL, CV_DRIVER, NULL); cv_init(&ip->bi_busycv, NULL, CV_DRIVER, NULL); rw_init(&ip->bi_busyrw, NULL, RW_DRIVER, NULL); rw_init(&ip->bi_linkrw, NULL, RW_DRIVER, NULL); (void) strncpy(ip->bi_keyname, uconf.shadow_vol, DSW_NAMELEN); ip->bi_keyname[DSW_NAMELEN-1] = '\0'; ip->bi_throttle_unit = ii_throttle_unit; ip->bi_throttle_delay = ii_throttle_delay; /* First check the list to see if uconf.bitmap_vol's already there */ if (ii_volume(uconf.bitmap_vol, 0) != NONE) { DTRACE_PROBE(_ii_config_bmp_found); mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); } ip->bi_bmpfd = nsc_open(uconf.bitmap_vol, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(ip->bi_bmpdev), &rc); if (!ip->bi_bmpfd) ip->bi_bmpfd = nsc_open(uconf.bitmap_vol, NSC_IIR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, NULL, (blind_t)&(ip->bi_bmpdev), &rc); if (!ip->bi_bmpfd && !existing) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); spcs_s_add(kstatus, rc); DTRACE_PROBE(_ii_config_no_bmp); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); } if (import) { uconf.flag = DSW_GOLDEN; II_FLAG_SETX(DSW_SHDIMPORT|DSW_GOLDEN, ip); } if (existing) { DTRACE_PROBE(_ii_config_existing); /* * ii_config is used by enable, import and resume (existing) * If not importing or resuming, then this must be enable. * Indicate this fact for SNMP use. */ if (!ip->bi_bmpfd) { /* * Couldn't read bitmap, mark master and shadow as * unusable. */ II_FLAG_ASSIGN(DSW_BMPOFFLINE|DSW_MSTOFFLINE| DSW_SHDOFFLINE, ip); /* * Set cluster tag for this element so it can * be suspended later */ (void) II_LINK_CLUSTER(ip, uconf.cluster_tag); /* need to check on master, might be shared */ goto header_checked; } /* check the header */ (void) _ii_rsrv_devs(ip, BMP, II_INTERNAL); /* get first block of bit map */ mutex_enter(&ip->bi_mutex); bm_header = _ii_bm_header_get(ip, &tmp); mutex_exit(&ip->bi_mutex); if (bm_header == NULL) { if (ii_debug > 0) cmn_err(CE_WARN, "!ii: _ii_bm_header_get returned NULL"); mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EHDRBMP)); } if (bm_header->ii_magic != DSW_DIRTY && bm_header->ii_magic != DSW_CLEAN) { mutex_exit(&_ii_config_mutex); _ii_bm_header_free(bm_header, ip, tmp); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINVALBMP)); } II_FLAG_ASSIGN(bm_header->ii_state, ip); /* Restore copy throttle parameters, if header version is 3 */ if (bm_header->ii_version >= 3) { /* II_HEADER_VERSION */ ip->bi_throttle_delay = bm_header->ii_throttle_delay; ip->bi_throttle_unit = bm_header->ii_throttle_unit; } /* Restore cluster & group names, if header version is 4 */ if (bm_header->ii_version >= 4) { /* cluster */ if (*bm_header->clstr_name) { (void) strncpy(uconf.cluster_tag, bm_header->clstr_name, DSW_NAMELEN); (void) II_LINK_CLUSTER(ip, uconf.cluster_tag); } /* group */ if (*bm_header->group_name) { (void) strncpy(uconf.group_name, bm_header->group_name, DSW_NAMELEN); (void) II_LINK_GROUP(ip, uconf.group_name); } } /* restore latest modification time, if header version >= 5 */ if (bm_header->ii_version >= 5) { ip->bi_mtime = bm_header->ii_mtime; } /* Fetch master and shadow names from bitmap header */ if (uconf.master_vol[0] == 0) (void) strncpy(uconf.master_vol, bm_header->master_vol, DSW_NAMELEN); if (uconf.shadow_vol[0] == 0) (void) strncpy(uconf.shadow_vol, bm_header->shadow_vol, DSW_NAMELEN); /* return the fetched names to the user */ if (ilp32) { uconf32 = kmem_zalloc(sizeof (dsw_config32_t), KM_SLEEP); if (uconf32 == NULL) { mutex_exit(&_ii_config_mutex); _ii_bm_header_free(bm_header, ip, tmp); _ii_rlse_devs(ip, BMP); _ii_info_free(ip); return (ENOMEM); } uconf32->status = ustatus32; II_TAIL_COPY((*uconf32), uconf, master_vol, dsw_config32_t); rc = copyout(uconf32, (void *)arg, sizeof (*uconf32)); kmem_free(uconf32, sizeof (dsw_config32_t)); } else { rc = copyout(&uconf, (void *)arg, sizeof (uconf)); } if (rc) { mutex_exit(&_ii_config_mutex); _ii_bm_header_free(bm_header, ip, tmp); _ii_rlse_devs(ip, BMP); _ii_info_free(ip); return (EFAULT); } if (strncmp(bm_header->bitmap_vol, uconf.bitmap_vol, DSW_NAMELEN) || ((!(ip->bi_flags&DSW_SHDIMPORT)) && strncmp(bm_header->master_vol, uconf.master_vol, DSW_NAMELEN)) || strncmp(bm_header->shadow_vol, uconf.shadow_vol, DSW_NAMELEN)) { mutex_exit(&_ii_config_mutex); _ii_bm_header_free(bm_header, ip, tmp); _ii_rlse_devs(ip, BMP); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EMISMATCH)); } shdfba = bm_header->ii_shdfba; copyfba = bm_header->ii_copyfba; if ((ip->bi_flags)&DSW_TREEMAP) { if (ii_debug > 0) cmn_err(CE_NOTE, "!II: Resuming short shadow volume"); ip->bi_mstchks = bm_header->ii_mstchks; ip->bi_shdchks = bm_header->ii_shdchks; ip->bi_shdchkused = bm_header->ii_shdchkused; ip->bi_shdfchk = bm_header->ii_shdfchk; if (bm_header->overflow_vol[0] != 0) if ((rc = ii_overflow_attach(ip, bm_header->overflow_vol, 0)) != 0) { mutex_exit(&_ii_config_mutex); _ii_bm_header_free(bm_header, ip, tmp); _ii_rlse_devs(ip, BMP); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); } } _ii_bm_header_free(bm_header, ip, tmp); _ii_rlse_devs(ip, BMP); } header_checked: if (ip->bi_flags&DSW_SHDIMPORT) (void) strcpy(uconf.master_vol, "<imported shadow>"); if (!uconf.master_vol[0] || !uconf.shadow_vol[0]) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY)); } /* check that no volume has been given twice */ if (strncmp(uconf.master_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); } if (strncmp(uconf.master_vol, uconf.bitmap_vol, DSW_NAMELEN) == 0) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); } if (strncmp(uconf.bitmap_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); } /* check that master is not already a bitmap, shadow or overflow */ type = ii_volume(uconf.master_vol, 1); if (type != NONE && type != MST) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); } /* check that shadow is not used as anything else */ type = ii_volume(uconf.shadow_vol, 1); if (type != NONE && type != SHD) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); } /* Setup the table bitmap operations table */ switch (ii_bitmap) { case II_KMEM: if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: using volatile bitmaps"); ip->bi_bitmap_ops = &kmem_buf_bmp; break; case II_FWC: hints = 0; (void) nsc_node_hints(&hints); if ((hints & NSC_FORCED_WRTHRU) == 0) ip->bi_bitmap_ops = &kmem_buf_bmp; else ip->bi_bitmap_ops = &alloc_buf_bmp; if (ii_debug > 0) { cmn_err(CE_NOTE, "!ii: chosen to use %s bitmaps", ip->bi_bitmap_ops == &kmem_buf_bmp ? "volatile" : "persistent"); } break; case II_WTHRU: default: if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: using persistent bitmaps"); ip->bi_bitmap_ops = &alloc_buf_bmp; break; } /* * If we found aother shadow volume with the same name, * If this is an resume operation, * If this shadow is in the exported state * then try an on the fly join instead */ for (hip = _ii_info_top; hip; hip = hip->bi_next) if (strcmp(uconf.shadow_vol, hip->bi_keyname) == 0) break; if ((hip) && (type == SHD) && existing && (ip->bi_flags & DSW_SHDEXPORT)) { /* * Stop any copy in progress */ while (_ii_stopcopy(hip) == EINTR) ; /* * Start the imported shadow teardown */ mutex_enter(&hip->bi_mutex); /* disable accesss to imported shadow */ hip->bi_disabled = 1; /* Wait for any I/O's to complete */ while (hip->bi_ioctl) { hip->bi_state |= DSW_IOCTL; cv_wait(&hip->bi_ioctlcv, &hip->bi_mutex); } mutex_exit(&hip->bi_mutex); /* this rw_enter forces us to drain all active IO */ rw_enter(&hip->bi_linkrw, RW_WRITER); rw_exit(&hip->bi_linkrw); /* remove ip from _ii_info_top linked list */ mutex_enter(&_ii_info_mutex); for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) { if (hip == *ipp) { *ipp = hip->bi_next; break; } } if (hip->bi_kstat) { kstat_delete(hip->bi_kstat); hip->bi_kstat = NULL; } mutex_exit(&_ii_info_mutex); /* Gain access to both bitmap volumes */ rtype = BMP; if (((rc = _ii_rsrv_devs(hip, rtype, II_INTERNAL)) != 0) || ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0)) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); } /* Merge imported bitmap */ rc = II_JOIN_BMP(ip, hip); /* Release access to bitmap volume */ _ii_rlse_devs(hip, rtype); ii_sibling_free(hip); /* Clear the fact that we are exported */ mutex_enter(&ip->bi_mutex); II_FLAG_CLR(DSW_SHDEXPORT, ip); /* Release resources */ mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, BMP); } else if (type != NONE) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE)); } /* * Handle non-exported shadow */ if ((ip->bi_flags & DSW_SHDEXPORT) == 0) { if ((rc = ii_open_shadow(ip, uconf.shadow_vol)) != 0) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); } } /* * allocate _ii_concopy_sema and set to a value that won't allow * all cache to be allocated by copy loops. */ if (_ii_concopy_init == 0 && ip->bi_bmpfd != NULL) { int asize = 0, wsize; nsc_size_t cfbas, maxfbas; (void) nsc_cache_sizes(&asize, &wsize); if (asize > 0) { cfbas = FBA_NUM(asize); (void) _ii_rsrv_devs(ip, BMP, II_INTERNAL); rc = nsc_maxfbas(ip->bi_bmpfd, 0, &maxfbas); _ii_rlse_devs(ip, BMP); if (!II_SUCCESS(rc)) maxfbas = 1024; /* i.e. _SD_MAX_FBAS */ ii_nconcopy = cfbas / (maxfbas * 2) / 3; } if (ii_nconcopy < 2) ii_nconcopy = 2; ASSERT(ii_nconcopy > 0); sema_init(&_ii_concopy_sema, ii_nconcopy, NULL, SEMA_DRIVER, NULL); _ii_concopy_init = 1; } /* check for shared master volume */ for (hip = _ii_mst_top; hip; hip = hip->bi_nextmst) if (strcmp(uconf.master_vol, ii_pathname(hip->bi_mstfd)) == 0) break; add_to_mst_top = (hip == NULL); if (!hip) for (hip = _ii_info_top; hip; hip = hip->bi_next) if (strcmp(uconf.master_vol, ii_pathname(hip->bi_mstfd)) == 0) break; nshadows = (hip != NULL); /* Check if master is offline */ if (hip) { if (hip->bi_flags & DSW_MSTOFFLINE) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOFFLINE)); } } if (!nshadows && (ip->bi_flags&DSW_SHDIMPORT) == 0) { ip->bi_mstfd = nsc_open(uconf.master_vol, NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, (blind_t)(ip->bi_mstdev), &rc); if (!ip->bi_mstfd) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); } ip->bi_mstrfd = nsc_open(uconf.master_vol, NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def, (blind_t)(ip->bi_mstrdev), &rc); if (!ip->bi_mstrfd) { mutex_exit(&_ii_config_mutex); _ii_info_free(ip); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN)); } } ip->bi_head = ip; ip->bi_master = ip; mutex_enter(&_ii_info_mutex); ip->bi_next = _ii_info_top; _ii_info_top = ip; if (nshadows) { /* link new shadow group together with others sharing master */ if (ii_debug > 0) cmn_err(CE_NOTE, "!II: shadow %s shares master %s with other shadow" " groups", uconf.shadow_vol, uconf.master_vol); hip = hip->bi_head; nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev)); nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev)); ip->bi_mstrdev = hip->bi_mstrdev; ip->bi_mstdev = hip->bi_mstdev; ip->bi_head = hip; ip->bi_sibling = hip->bi_sibling; if (add_to_mst_top) { hip->bi_nextmst = _ii_mst_top; _ii_mst_top = hip; } hip->bi_sibling = ip; ip->bi_master = ip->bi_head->bi_master; } mutex_exit(&_ii_info_mutex); mutex_exit(&_ii_config_mutex); keylen = strlen(ip->bi_keyname); if (keylen > KSTAT_STRLEN - 1) { keyoffset = keylen + 1 - KSTAT_STRLEN; } else { keyoffset = 0; } ip->bi_kstat = kstat_create("ii", _ii_instance++, &ip->bi_keyname[ keyoffset ], "iiset", KSTAT_TYPE_NAMED, sizeof (ii_kstat_set) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (ip->bi_kstat) { ip->bi_kstat->ks_data = &ii_kstat_set; ip->bi_kstat->ks_update = ii_set_stats_update; ip->bi_kstat->ks_private = ip; kstat_install(ip->bi_kstat); } else { cmn_err(CE_WARN, "!Unable to create set-specific kstats"); } #ifndef DISABLE_KSTATS /* create kstats information */ mutex_init(&ip->bi_kstat_io.statmutex, NULL, MUTEX_DRIVER, NULL); if (ip == ip->bi_master) { ip->bi_kstat_io.master = _ii_kstat_create(ip, "master"); } else { ip->bi_kstat_io.master = ip->bi_master->bi_kstat_io.master; (void) strlcpy(ip->bi_kstat_io.mstio, ip->bi_master->bi_kstat_io.mstio, KSTAT_DATA_CHAR_LEN); } ip->bi_kstat_io.shadow = _ii_kstat_create(ip, "shadow"); ip->bi_kstat_io.bitmap = _ii_kstat_create(ip, "bitmap"); #endif (void) _ii_reserve_begin(ip); rtype = MSTR|SHDR|BMP; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { spcs_s_add(kstatus, rc); rc = DSW_ERSRVFAIL; goto fail; } if (ip->bi_flags&DSW_SHDIMPORT) { rc = 0; /* no master for imported volumes */ mst_size = 0; } else rc = nsc_partsize(MSTFD(ip), &mst_size); if (rc == 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) rc = nsc_partsize(SHDFD(ip), &shd_size); if (!ip->bi_bmpfd) rc = EINVAL; if (rc == 0) rc = nsc_partsize(ip->bi_bmpfd, &bmp_size); if (ip->bi_flags&DSW_SHDIMPORT) ip->bi_size = shd_size; else ip->bi_size = mst_size; if ((((ip->bi_flags&DSW_SHDIMPORT) != DSW_SHDIMPORT) && (mst_size < 1)) || (((ip->bi_flags&DSW_SHDEXPORT) != DSW_SHDEXPORT) && (shd_size < 1)) || ((rc == 0) && (bmp_size < 1))) { /* could be really zero, or could be > 1 TB; fail the enable */ rc = EINVAL; } if (rc != 0) { /* rc set means an nsc_partsize() failed */ /* * If existing group, mark bitmap as offline and set * bmp_size to "right size". */ if (existing) { bmp_size = 2 * DSW_BM_FBA_LEN(mst_size) + DSW_SHD_BM_OFFSET; goto no_more_bmp_tests; } spcs_s_add(kstatus, rc); rc = DSW_EPARTSIZE; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } if (ip->bi_flags&DSW_SHDIMPORT) mst_size = shd_size; if (ip->bi_flags&DSW_SHDEXPORT) shd_size = mst_size; /* * Check with RDC if the master & shadow sizes are different. * Once II is enabled, the shadow size will be made to appear * the same as the master, and this will panic RDC if we're * changing sizes on it. */ resized = (shd_size != mst_size); if (resized && ii_need_same_size(ip)) { cmn_err(CE_WARN, "!Cannot enable II set: would change volume " "size on RDC"); rc = DSW_EOPACKAGE; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } if (bmp_size < 2 * DSW_BM_FBA_LEN(mst_size) + DSW_SHD_BM_OFFSET) { /* bitmap volume too small */ if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: invalid sizes: bmp %" NSC_SZFMT " mst %" NSC_SZFMT " %" NSC_SZFMT "", bmp_size, mst_size, DSW_BM_FBA_LEN(mst_size)); rc = DSW_EBMPSIZE; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } if ((shd_size < mst_size) && (uconf.flag&DSW_GOLDEN) != 0) { /* shadow volume too small */ if (ii_debug > 0) cmn_err(CE_NOTE, "!shd size too small (%" NSC_SZFMT ") for independent set's master (%" NSC_SZFMT ")", shd_size, mst_size); rc = DSW_ESHDSIZE; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } ip->bi_busy = kmem_zalloc(1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)), KM_SLEEP); if (!ip->bi_busy) { rc = ENOMEM; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } if (existing == 0) { DTRACE_PROBE(_ii_config); /* first time this shadow has been set up */ mutex_enter(&ip->bi_mutex); bm_header = _ii_bm_header_get(ip, &tmp); mutex_exit(&ip->bi_mutex); if (bm_header == NULL) { if (ii_debug > 0) cmn_err(CE_WARN, "!ii: _ii_bm_header_get returned NULL"); rc = DSW_EHDRBMP; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } bzero(bm_header, sizeof (*bm_header)); /* copy pathnames into it */ (void) strncpy(bm_header->master_vol, uconf.master_vol, DSW_NAMELEN); (void) strncpy(bm_header->shadow_vol, uconf.shadow_vol, DSW_NAMELEN); (void) strncpy(bm_header->bitmap_vol, uconf.bitmap_vol, DSW_NAMELEN); (void) strncpy(bm_header->clstr_name, uconf.cluster_tag, DSW_NAMELEN); (void) strncpy(bm_header->group_name, uconf.group_name, DSW_NAMELEN); if (uconf.cluster_tag[0] != 0) (void) II_LINK_CLUSTER(ip, uconf.cluster_tag); if (uconf.group_name[0] != 0) (void) II_LINK_GROUP(ip, uconf.group_name); bm_header->ii_state = (uconf.flag & DSW_GOLDEN); II_FLAG_ASSIGN(bm_header->ii_state, ip); if (import) { II_FLAG_SETX(DSW_SHDIMPORT, ip); bm_header->ii_state |= DSW_SHDIMPORT; } if (resized) { II_FLAG_SETX(DSW_RESIZED, ip); bm_header->ii_state |= DSW_RESIZED; } bm_header->ii_type = (uconf.flag & DSW_GOLDEN) ? DSW_GOLDEN_TYPE : DSW_QUICK_TYPE; bm_header->ii_magic = DSW_DIRTY; bm_header->ii_version = II_HEADER_VERSION; bm_header->ii_shdfba = DSW_SHD_BM_OFFSET; bm_header->ii_copyfba = DSW_COPY_BM_OFFSET; bm_header->ii_throttle_delay = ip->bi_throttle_delay; bm_header->ii_throttle_unit = ip->bi_throttle_unit; ip->bi_shdfba = bm_header->ii_shdfba; ip->bi_copyfba = bm_header->ii_copyfba; ip->bi_mtime = ddi_get_time(); /* write it to disk */ mutex_enter(&ip->bi_mutex); rc = _ii_bm_header_put(bm_header, ip, tmp); mutex_exit(&ip->bi_mutex); if (!II_SUCCESS(rc)) { spcs_s_add(kstatus, rc); rc = DSW_EHDRBMP; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } if ((shd_size < mst_size) && (uconf.flag & DSW_GOLDEN) == 0) { /* * shadow volume smaller than master, must use a dependent * copy with a bitmap file stored mapping for chunk locations. */ /* number of chunks in shadow volume */ nsc_size_t shd_chunks; nsc_size_t bmp_chunks; nsc_size_t tmp_chunks; if (ii_debug > 1) cmn_err(CE_NOTE, "!ii: using tree index on %s", uconf.master_vol); shd_chunks = shd_size / DSW_SIZE; /* do not add in partial chunk at end */ ip->bi_mstchks = mst_size / DSW_SIZE; if (mst_size % DSW_SIZE != 0) ip->bi_mstchks++; bmp_chunks = ii_btsize(bmp_size - ip->bi_copyfba - DSW_BM_FBA_LEN(ip->bi_size)); tmp_chunks = ip->bi_copyfba + DSW_BM_FBA_LEN(ip->bi_size); if (bmp_chunks < (nsc_size_t)ip->bi_mstchks) { if (ii_debug > -1) { cmn_err(CE_NOTE, "!ii: bitmap vol too" "small: %" NSC_SZFMT " vs. %" NSC_SZFMT, bmp_size, tmp_chunks); } spcs_s_add(kstatus, rc); rc = DSW_EHDRBMP; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } mutex_enter(&ip->bi_mutex); II_FLAG_SET(DSW_TREEMAP, ip); mutex_exit(&ip->bi_mutex); /* following values are written to header by ii_tinit */ #if (defined(NSC_MULTI_TERABYTE) && !defined(II_MULTIMULTI_TERABYTE)) ASSERT(shd_chunks <= INT32_MAX); ASSERT(mst_size / DSW_SIZE <= INT32_MAX); #endif ip->bi_mstchks = mst_size / DSW_SIZE; if (mst_size % DSW_SIZE != 0) ip->bi_mstchks++; #ifdef II_MULTIMULTI_TERABYTE ip->bi_shdchks = shd_chunks; #else /* still have 31 bit chunkid's */ ip->bi_shdchks = (chunkid_t)shd_chunks; #endif ip->bi_shdchkused = 0; rc = ii_tinit(ip); } else { ip->bi_shdchks = shd_size / DSW_SIZE; ip->bi_shdchkused = 0; } if (rc == 0) rc = II_LOAD_BMP(ip, 1); if (rc == 0) rc = II_ZEROBM(ip); if (rc == 0) rc = II_COPYBM(ip); /* also clear copy bitmap */ if (rc == 0 && (uconf.flag & DSW_GOLDEN) && !import) rc = ii_fill_copy_bmp(ip); if (rc) { spcs_s_add(kstatus, rc); rc = DSW_EHDRBMP; _ii_rlse_devs(ip, rtype); goto fail; } /* check that changing shadow won't upset RDC */ if (ii_update_denied(ip, kstatus, 0, 1)) { rc = DSW_EOPACKAGE; _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); goto fail; } ip->bi_disabled = 0; /* all okay and ready, we can go now */ _ii_rlse_devs(ip, rtype); /* no _ii_reserve_end() here - we must register first */ ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd), NSC_CACHE|NSC_DEVICE, _ii_io); if (!nshadows) ii_register_mst(ip); ii_register_shd(ip); if (!ii_register_ok(ip)) { ip->bi_disabled = 1; /* argh */ rc = DSW_EREGISTER; goto fail; } /* no _ii_reserve_begin() here -- we're still in process */ (void) _ii_rsrv_devs(ip, rtype, II_INTERNAL); if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: config: master %s shadow %s", uconf.master_vol, uconf.shadow_vol); rc = 0; if ((uconf.flag & DSW_GOLDEN) && !import) { mutex_enter(&ip->bi_mutex); II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip); ip->bi_ioctl++; /* we are effectively in an ioctl */ mutex_exit(&ip->bi_mutex); rc = _ii_copyvol(ip, 0, rtype, kstatus, 1); } _ii_rlse_devs(ip, rtype); _ii_reserve_end(ip); ++iigkstat.num_sets.value.ul; return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); } ip->bi_shdchks = shd_size / DSW_SIZE; ip->bi_shdfba = shdfba; ip->bi_copyfba = copyfba; rc = II_LOAD_BMP(ip, 0); /* reload saved bitmap */ mutex_enter(&ip->bi_mutex); if (rc == 0) bm_header = _ii_bm_header_get(ip, &tmp); mutex_exit(&ip->bi_mutex); if (rc || bm_header == NULL) { if (existing) { goto no_more_bmp_tests; } rc = DSW_EHDRBMP; goto fail; } /* * If the header is dirty and it wasn't kept on persistent storage * then the bitmaps must be assumed to be bad. */ if (bm_header->ii_magic == DSW_DIRTY && ip->bi_bitmap_ops != &alloc_buf_bmp) { type = bm_header->ii_type; _ii_bm_header_free(bm_header, ip, tmp); if (type == DSW_GOLDEN_TYPE) { if ((ip->bi_flags & DSW_COPYINGM) != 0) _ii_error(ip, DSW_SHDOFFLINE); else if ((ip->bi_flags & DSW_COPYINGS) != 0) _ii_error(ip, DSW_MSTOFFLINE); else { /* No copying, so they're just different */ rc = ii_fill_copy_bmp(ip); if (rc) { spcs_s_add(kstatus, rc); rc = DSW_EHDRBMP; goto fail; } } } else _ii_error(ip, DSW_SHDOFFLINE); mutex_enter(&ip->bi_mutex); bm_header = _ii_bm_header_get(ip, &tmp); mutex_exit(&ip->bi_mutex); if (bm_header == NULL) { rc = DSW_EHDRBMP; goto fail; } } bm_header->ii_magic = DSW_DIRTY; mutex_enter(&ip->bi_mutex); rc = _ii_bm_header_put(bm_header, ip, tmp); mutex_exit(&ip->bi_mutex); if (!II_SUCCESS(rc)) { spcs_s_add(kstatus, rc); rc = DSW_EHDRBMP; goto fail; } ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd), NSC_CACHE|NSC_DEVICE, _ii_io); no_more_bmp_tests: _ii_rlse_devs(ip, rtype); ip->bi_disabled = 0; /* all okay and ready, we can go now */ if (!nshadows) ii_register_mst(ip); if ((ip->bi_flags & DSW_SHDEXPORT) == 0) ii_register_shd(ip); if (!ii_register_ok(ip)) { rc = DSW_EREGISTER; goto fail; } _ii_reserve_end(ip); if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: config: master %s shadow %s", uconf.master_vol, uconf.shadow_vol); rc = 0; if (ip->bi_flags & DSW_COPYINGP) { /* Copy was in progress, so continue it */ (void) _ii_rsrv_devs(ip, rtype, II_INTERNAL); mutex_enter(&ip->bi_mutex); ip->bi_ioctl++; /* we are effectively in an ioctl */ mutex_exit(&ip->bi_mutex); rc = _ii_copyvol(ip, ((ip->bi_flags & DSW_COPYINGS) != 0) ? CV_SHD2MST : 0, rtype, kstatus, 0); } ++iigkstat.num_sets.value.ul; return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); fail: /* remove ip from _ii_info_top linked list */ mutex_enter(&_ii_info_mutex); for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) { if (ip == *ipp) { *ipp = ip->bi_next; break; } } mutex_exit(&_ii_info_mutex); ii_sibling_free(ip); return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc)); } static int _ii_perform_disable(char *setname, spcs_s_info_t *kstatusp, int reclaim) { _ii_info_t **xip, *ip; _ii_overflow_t *op; nsc_buf_t *tmp = NULL; int rc; ii_header_t *bm_header; int rtype; mutex_enter(&_ii_info_mutex); ip = _ii_find_set(setname); if (ip == NULL) { mutex_exit(&_ii_info_mutex); return (DSW_ENOTFOUND); } if ((ip->bi_flags & DSW_GOLDEN) && ((ip->bi_flags & DSW_COPYINGP) != 0)) { /* * Cannot disable an independent copy while still copying * as it means that a data dependency exists. */ mutex_exit(&_ii_info_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); DTRACE_PROBE(_ii_perform_disable_end_DSW_EDEPENDENCY); return (DSW_EDEPENDENCY); } if ((ip->bi_flags & DSW_GOLDEN) == 0 && ii_update_denied(ip, *kstatusp, 0, 1)) { /* Cannot disable a dependent shadow while RDC is unsure */ mutex_exit(&_ii_info_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); DTRACE_PROBE(DSW_EOPACKAGE); return (DSW_EOPACKAGE); } if (((ip->bi_flags & DSW_RESIZED) == DSW_RESIZED) && ii_need_same_size(ip)) { /* We can't disable the set whilst RDC is using it */ mutex_exit(&_ii_info_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); cmn_err(CE_WARN, "!Cannot disable II set: would change " "volume size on RDC"); DTRACE_PROBE(DSW_EOPACKAGE_resize); return (DSW_EOPACKAGE); } ip->bi_disabled = 1; if (NSHADOWS(ip) && (ip->bi_master == ip)) { ip->bi_flags &= (~DSW_COPYING); ip->bi_state |= DSW_MULTIMST; } mutex_exit(&_ii_info_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_stopvol(ip); rtype = SHDR|BMP; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { spcs_s_add(*kstatusp, rc); DTRACE_PROBE(DSW_ERSRVFAIL); return (DSW_ERSRVFAIL); } if ((ii_header < 128) && (((ip->bi_flags & DSW_GOLDEN) == 0) || (ip->bi_flags & DSW_COPYING))) { /* * Not a full copy so attempt to prevent use of partial copy * by clearing where the first ufs super-block would be * located. Solaris often incorporates the disk header into * the start of the first slice, so avoid clearing the very * first 16 blocks of the volume. */ if (ii_debug > 1) cmn_err(CE_NOTE, "!ii: Shadow copy invalidated"); II_READ_START(ip, shadow); rc = nsc_alloc_buf(SHDFD(ip), ii_header, 128 - ii_header, NSC_RDWRBUF, &tmp); II_READ_END(ip, shadow, rc, 128 - ii_header); if (II_SUCCESS(rc)) { rc = nsc_zero(tmp, ii_header, 128 - ii_header, 0); if (II_SUCCESS(rc)) { II_NSC_WRITE(ip, shadow, rc, tmp, ii_header, (128 - ii_header), 0); } } if (tmp) (void) nsc_free_buf(tmp); if (!II_SUCCESS(rc)) _ii_error(ip, DSW_SHDOFFLINE); } /* this rw_enter forces us to drain all active IO */ rw_enter(&ip->bi_linkrw, RW_WRITER); rw_exit(&ip->bi_linkrw); /* remove ip from _ii_info_top linked list */ mutex_enter(&_ii_info_mutex); for (xip = &_ii_info_top; *xip; xip = &((*xip)->bi_next)) { if (ip == *xip) { *xip = ip->bi_next; break; } } if (ip->bi_kstat) { kstat_delete(ip->bi_kstat); ip->bi_kstat = NULL; } mutex_exit(&_ii_info_mutex); rc = II_SAVE_BMP(ip, 1); mutex_enter(&ip->bi_mutex); if (rc == 0) bm_header = _ii_bm_header_get(ip, &tmp); if (rc == 0 && bm_header) { if (ii_debug > 1) cmn_err(CE_NOTE, "!ii: Invalid header written"); bm_header->ii_magic = DSW_INVALID; /* write it to disk */ (void) _ii_bm_header_put(bm_header, ip, tmp); } mutex_exit(&ip->bi_mutex); op = ip->bi_overflow; if (op && (reclaim == -1)) { reclaim = (op->ii_drefcnt == 1? NO_RECLAIM : RECLAIM); } if ((op != NULL) && (op->ii_hversion >= 1) && (op->ii_hmagic == II_OMAGIC)) { mutex_enter(&_ii_overflow_mutex); if (ip->bi_flags & DSW_OVRHDRDRTY) { mutex_enter(&ip->bi_mutex); ip->bi_flags &= ~DSW_OVRHDRDRTY; mutex_exit(&ip->bi_mutex); ASSERT(op->ii_urefcnt > 0); op->ii_urefcnt--; } if (op->ii_urefcnt == 0) { op->ii_flags &= ~IIO_CNTR_INVLD; op->ii_unused = op->ii_nchunks - 1; } mutex_exit(&_ii_overflow_mutex); } ii_overflow_free(ip, reclaim); _ii_rlse_devs(ip, rtype); ii_sibling_free(ip); --iigkstat.num_sets.value.ul; return (0); } /* * _ii_disable * Deconfigures an II pair * * Calling/Exit State: * Returns 0 if the pair was disabled. Otherwise an error code * is returned and any additional error information is copied * out to the user. * * Description: * Reads the user configuration structure and attempts to * deconfigure that pairing based on the master device pathname. */ int _ii_disable(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uparms; dsw_ioctl32_t uparms32; _ii_overflow_t *op; int rc, rerr; spcs_s_info_t kstatus; uint64_t hash; int reclaim; _ii_lsthead_t *oldhead, **head; _ii_lstinfo_t *np, **xnp, *oldp; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) return (EFAULT); II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); uparms.status = (spcs_s_info_t)uparms32.status; } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uparms.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); DTRACE_PROBE2(_ii_disable_info, char *, uparms.shadow_vol, int, uparms.flags); /* group or single set? */ if (uparms.flags & CV_IS_GROUP) { hash = nsc_strhash(uparms.shadow_vol); mutex_enter(&_ii_group_mutex); for (head = &_ii_group_top; *head; head = &((*head)->lst_next)) { if ((hash == (*head)->lst_hash) && strncmp((*head)->lst_name, uparms.shadow_vol, DSW_NAMELEN) == 0) break; } if (!*head) { mutex_exit(&_ii_group_mutex); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EGNOTFOUND)); } /* clear any overflow vol usage counts */ for (np = (*head)->lst_start; np; np = np->lst_next) { if (np->lst_ip->bi_overflow) { np->lst_ip->bi_overflow->ii_detachcnt = 0; } } /* now increment */ for (np = (*head)->lst_start; np; np = np->lst_next) { if (np->lst_ip->bi_overflow) { ++np->lst_ip->bi_overflow->ii_detachcnt; } } /* finally, disable all group members */ rerr = 0; xnp = &(*head)->lst_start; while (*xnp) { op = (*xnp)->lst_ip->bi_overflow; if (op) { reclaim = (op->ii_drefcnt == op->ii_detachcnt? NO_RECLAIM : RECLAIM); --op->ii_detachcnt; } /* clear out the group pointer */ (*xnp)->lst_ip->bi_group = NULL; rc = _ii_perform_disable((*xnp)->lst_ip->bi_keyname, &kstatus, reclaim); if (rc) { /* restore group name */ (*xnp)->lst_ip->bi_group = (*head)->lst_name; /* restore detachcnt */ if (op) { ++op->ii_detachcnt; } /* don't delete branch */ ++rerr; spcs_s_add(kstatus, rc); /* move forward in linked list */ xnp = &(*xnp)->lst_next; } else { oldp = (*xnp); *xnp = (*xnp)->lst_next; kmem_free(oldp, sizeof (_ii_lstinfo_t)); } } if (rerr) { mutex_exit(&_ii_group_mutex); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EDISABLE)); } /* no errors, all sets disabled, OK to free list head */ oldhead = *head; *head = (*head)->lst_next; kmem_free(oldhead, sizeof (_ii_lsthead_t)); mutex_exit(&_ii_group_mutex); } else { /* only a single set is being disabled */ rc = _ii_perform_disable(uparms.shadow_vol, &kstatus, -1); if (rc) return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); } spcs_s_kfree(kstatus); return (0); } /* * _ii_stat * Get state of the shadow. * * Calling/Exit State: * Returns 0 on success, otherwise an error code is returned * and any additional error information is copied out to the user. * The size variable in the dsw_stat_t is set to the FBA size * of the volume, the stat variable is set to the state, and * the structure is copied out. */ /*ARGSUSED*/ int _ii_stat(intptr_t arg, int ilp32, int *rvp) { dsw_stat_t ustat; dsw_stat32_t ustat32; _ii_info_t *ip; spcs_s_info_t kstatus; char *group, *cluster; if (ilp32) { if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0) return (EFAULT); II_TAIL_COPY(ustat, ustat32, shadow_vol, dsw_stat_t); ustat.status = (spcs_s_info_t)ustat32.status; } else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!ustat.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(ustat.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_ENOTFOUND)); ustat.stat = ip->bi_flags; ustat.size = ip->bi_size; ustat.mtime = ip->bi_mtime; if (ilp32) bzero(ustat32.overflow_vol, DSW_NAMELEN); else bzero(ustat.overflow_vol, DSW_NAMELEN); if (ip->bi_overflow) { (void) strncpy(ilp32 ? ustat32.overflow_vol : ustat.overflow_vol, ip->bi_overflow->ii_volname, DSW_NAMELEN); } ustat.shdsize = ip->bi_shdchks; if ((ip->bi_flags) & DSW_TREEMAP) { ustat.shdused = ip->bi_shdchkused; } else { ustat.shdused = 0; } /* copy over group and cluster associations */ group = ilp32? ustat32.group_name : ustat.group_name; cluster = ilp32? ustat32.cluster_tag : ustat.cluster_tag; bzero(group, DSW_NAMELEN); bzero(cluster, DSW_NAMELEN); if (ip->bi_group) (void) strncpy(group, ip->bi_group, DSW_NAMELEN); if (ip->bi_cluster) (void) strncpy(cluster, ip->bi_cluster, DSW_NAMELEN); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_kfree(kstatus); if (ilp32) { ustat32.stat = ustat.stat; ustat32.size = ustat.size; ustat32.shdsize = ustat.shdsize; ustat32.shdused = ustat.shdused; ustat32.mtime = ustat.mtime; if (copyout(&ustat32, (void *)arg, sizeof (ustat32))) return (EFAULT); } else if (copyout(&ustat, (void *)arg, sizeof (ustat))) return (EFAULT); return (0); } /* * _ii_list * List what shadow sets are currently configured. * * Calling/Exit State: * Returns 0 on success, otherwise an error code is returned * and any additional error information is copied out to the user. */ /*ARGSUSED*/ int _ii_list(intptr_t arg, int ilp32, int *rvp) { dsw_list_t ulist; dsw_list32_t ulist32; _ii_info_t *ip; dsw_config_t cf, *cfp; dsw_config32_t cf32, *cf32p; int rc; int used; spcs_s_info_t kstatus; if (ilp32) { if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0) return (EFAULT); II_TAIL_COPY(ulist, ulist32, list_size, dsw_list_t); ulist.status = (spcs_s_info_t)ulist32.status; } else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); cf32p = (dsw_config32_t *)(unsigned long)ulist32.list; cfp = ulist.list; ulist.list_used = 0; mutex_enter(&_ii_info_mutex); ip = _ii_info_top; DTRACE_PROBE1(_ii_list_count, int, ulist.list_size); for (rc = used = 0; used < ulist.list_size && ip; ip = ip->bi_next) { if (ip->bi_disabled) continue; mutex_enter(&ip->bi_mutex); ip->bi_ioctl++; if (ilp32) { bzero(&cf32, sizeof (cf32)); cf32.flag = ip->bi_flags; (void) strncpy(cf32.master_vol, ii_pathname(ip->bi_mstfd), DSW_NAMELEN); (void) strncpy(cf32.shadow_vol, ip->bi_keyname, DSW_NAMELEN); (void) strncpy(cf32.bitmap_vol, (ip->bi_bmpfd) ? ii_pathname(ip->bi_bmpfd) : "<offline_bitmap>", DSW_NAMELEN); if (copyout(&cf32, (void *)cf32p, sizeof (cf32))) rc = EFAULT; cf32p++; } else { bzero(&cf, sizeof (cf)); cf.flag = ip->bi_flags; (void) strncpy(cf.master_vol, ii_pathname(ip->bi_mstfd), DSW_NAMELEN); (void) strncpy(cf.shadow_vol, ip->bi_keyname, DSW_NAMELEN); (void) strncpy(cf.bitmap_vol, (ip->bi_bmpfd) ? ii_pathname(ip->bi_bmpfd) : "<offline_bitmap>", DSW_NAMELEN); if (copyout(&cf, (void *)cfp, sizeof (cf))) rc = EFAULT; cfp++; } _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); used++; } mutex_exit(&_ii_info_mutex); spcs_s_kfree(kstatus); if (rc) return (rc); ulist.list_used = used; if (ilp32) { ulist32.list_used = ulist.list_used; if (copyout(&ulist32, (void *)arg, sizeof (ulist32))) return (EFAULT); } else if (copyout(&ulist, (void *)arg, sizeof (ulist))) return (EFAULT); return (0); } /* * _ii_listlen * Counts the number of items the DSWIOC_LIST and DSWIOC_OLIST * ioctl calls would return. * * Calling/Exit State: * Returns 0 on success, otherwise an error code is returned. * Result is returned as successful ioctl value. */ /*ARGSUSED*/ int _ii_listlen(int cmd, int ilp32, int *rvp) { _ii_info_t *ip; _ii_overflow_t *op; int count = 0; switch (cmd) { case DSWIOC_LISTLEN: mutex_enter(&_ii_info_mutex); for (ip = _ii_info_top; ip; ip = ip->bi_next) { if (ip->bi_disabled == 0) { count++; } } mutex_exit(&_ii_info_mutex); break; case DSWIOC_OLISTLEN: mutex_enter(&_ii_overflow_mutex); for (op = _ii_overflow_top; op; op = op->ii_next) count++; mutex_exit(&_ii_overflow_mutex); break; default: return (EINVAL); } *rvp = count; return (0); } /* * _ii_report_bmp * * Report to the user daemon that the bitmap has gone bad */ static int _ii_report_bmp(_ii_info_t *ip) { int rc; struct nskernd *nsk; nsk = kmem_zalloc(sizeof (*nsk), KM_SLEEP); if (!nsk) { return (ENOMEM); } nsk->command = NSKERND_IIBITMAP; nsk->data1 = (int64_t)(ip->bi_flags | DSW_BMPOFFLINE); (void) strncpy(nsk->char1, ip->bi_keyname, min(DSW_NAMELEN, NSC_MAXPATH)); rc = nskernd_get(nsk); if (rc == 0) { rc = (int)nsk->data1; } if (rc == 0) { DTRACE_PROBE(_ii_report_bmp_end); } else { DTRACE_PROBE1(_ii_report_bmp_end_2, int, rc); } kmem_free(nsk, sizeof (*nsk)); return (rc); } /* * _ii_offline * Set volume offline flag(s) for a shadow. * * Calling/Exit State: * Returns 0 on success, otherwise an error code is returned * and any additional error information is copied out to the user. */ /*ARGSUSED*/ int _ii_offline(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uparms; dsw_ioctl32_t uparms32; _ii_info_t *ip; int rc; spcs_s_info_t kstatus; if (ilp32) { if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) return (EFAULT); II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); uparms.status = (spcs_s_info_t)uparms32.status; } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uparms.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(uparms.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ENOTFOUND)); if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ERSRVFAIL)); } mutex_exit(&ip->bi_mutex); _ii_error(ip, uparms.flags & DSW_OFFLINE); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, BMP); spcs_s_kfree(kstatus); return (0); } /* * _ii_wait * Wait for a copy to complete. * * Calling/Exit State: * Returns 0 if the copy completed, otherwise error code. * */ /*ARGSUSED*/ int _ii_wait(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uparms; dsw_ioctl32_t uparms32; _ii_info_t *ip; int rc = 0; spcs_s_info_t kstatus; if (ilp32) { if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) return (EFAULT); II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); uparms.status = (spcs_s_info_t)uparms32.status; uparms.pid = uparms32.pid; } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uparms.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(uparms.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ENOTFOUND)); while (ip->bi_flags & DSW_COPYINGP) { if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) { /* Awoken by a signal */ rc = EINTR; break; } } /* Is this an attempt to unlock the copy/update PID? */ if (uparms.flags & CV_LOCK_PID) { if (ip->bi_locked_pid == 0) { rc = DSW_ENOTLOCKED; } else if (uparms.pid == -1) { cmn_err(CE_WARN, "!ii: Copy/Update PID %d, cleared", ip->bi_locked_pid); ip->bi_locked_pid = 0; } else if (uparms.pid != ip->bi_locked_pid) { rc = DSW_EINUSE; } else { ip->bi_locked_pid = 0; } } _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); } static int _ii_reset_mstvol(_ii_info_t *ip) { _ii_info_t *xip; if (!NSHADOWS(ip)) return (DSW_COPYINGS | DSW_COPYINGP); /* check for siblings updating master */ for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { if (xip == ip) continue; /* check if master is okay */ if ((xip->bi_flags & DSW_MSTOFFLINE) == 0) { return (0); } } return (DSW_COPYINGS | DSW_COPYINGP); } /* * _ii_reset * Reset offlined underlying volumes * * Calling/Exit State: * Returns 0 on success, otherwise an error code is returned * and any additional error information is copied out to the user. */ /*ARGSUSED*/ int _ii_reset(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uparms; dsw_ioctl32_t uparms32; _ii_info_t *ip; nsc_buf_t *tmp = NULL; int rc; int flags; ii_header_t *bm_header; spcs_s_info_t kstatus; int rtype; if (ilp32) { if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) return (EFAULT); II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); uparms.status = (spcs_s_info_t)uparms32.status; } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uparms.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(uparms.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ENOTFOUND)); mutex_exit(&ip->bi_mutex); /* Figure out what to do according to what was flagged as */ if ((ip->bi_flags & DSW_OFFLINE) == 0) { /* Nothing offline, so no op */ mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_kfree(kstatus); return (0); } if (!ip->bi_bmpfd) { /* No bitmap fd, can't do anything */ mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_kfree(kstatus); return (DSW_EHDRBMP); } rtype = MSTR|SHDR|BMP; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ERSRVFAIL)); } /* * Cannot use _ii_bm_header_get as it will fail if DSW_BMPOFFLINE */ II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, 0, FBA_LEN(sizeof (ii_header_t)), NSC_RDWRBUF, &tmp); II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t))); if (!II_SUCCESS(rc)) { _ii_rlse_devs(ip, rtype); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); } bm_header = (ii_header_t *)(tmp)->sb_vec[0].sv_addr; if (bm_header == NULL) { _ii_rlse_devs(ip, rtype); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); if (tmp) (void) nsc_free_buf(tmp); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); } flags = ip->bi_flags & ~DSW_COPY_FLAGS; if ((flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) == 0) { if (((flags & DSW_SHDOFFLINE) == 0) && ((flags & DSW_MSTOFFLINE) == DSW_MSTOFFLINE)) { /* Shadow was OK but master was offline */ flags |= _ii_reset_mstvol(ip); } else if ((flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) { /* Shadow was offline, don't care what the master was */ flags |= (DSW_COPYINGM | DSW_COPYINGP); } } if (ip->bi_flags & DSW_VOVERFLOW) { ip->bi_flags &= ~DSW_VOVERFLOW; ip->bi_flags |= DSW_FRECLAIM; } flags &= ~(DSW_OFFLINE | DSW_CFGOFFLINE | DSW_VOVERFLOW | DSW_OVERFLOW); if ((ip->bi_flags & DSW_BMPOFFLINE) == DSW_BMPOFFLINE) { /* free any overflow allocation */ ii_overflow_free(ip, INIT_OVR); /* Bitmap now OK, so set up new bitmap header */ (void) strncpy(bm_header->master_vol, ii_pathname(ip->bi_mstfd), DSW_NAMELEN); (void) strncpy(bm_header->shadow_vol, ii_pathname(ip->bi_shdfd), DSW_NAMELEN); (void) strncpy(bm_header->bitmap_vol, ii_pathname(ip->bi_bmpfd), DSW_NAMELEN); if (ip->bi_cluster) { (void) strncpy(bm_header->clstr_name, ip->bi_cluster, DSW_NAMELEN); } if (ip->bi_group) { (void) strncpy(bm_header->group_name, ip->bi_group, DSW_NAMELEN); } bm_header->ii_type = (flags & DSW_GOLDEN) ? DSW_GOLDEN_TYPE : DSW_QUICK_TYPE; bm_header->ii_magic = DSW_DIRTY; bm_header->ii_version = II_HEADER_VERSION; bm_header->ii_shdfba = DSW_SHD_BM_OFFSET; bm_header->ii_copyfba = DSW_COPY_BM_OFFSET; bm_header->ii_throttle_delay = ip->bi_throttle_delay; bm_header->ii_throttle_unit = ip->bi_throttle_unit; ip->bi_shdfba = bm_header->ii_shdfba; ip->bi_copyfba = bm_header->ii_copyfba; } else if ((ip->bi_flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) { /* bitmap didn't go offline, but shadow did */ if (ip->bi_overflow) { ii_overflow_free(ip, RECLAIM); } } _ii_lock_chunk(ip, II_NULLCHUNK); mutex_enter(&ip->bi_mutex); II_FLAG_ASSIGN(flags, ip); mutex_exit(&ip->bi_mutex); rc = ii_fill_copy_bmp(ip); if (rc == 0) rc = II_ZEROBM(ip); if (rc == 0) { if ((ip->bi_flags&(DSW_GOLDEN)) == 0) { /* just clear bitmaps for dependent copy */ if (ip->bi_flags & DSW_TREEMAP) { bm_header->ii_state = ip->bi_flags; mutex_enter(&ip->bi_mutex); rc = _ii_bm_header_put(bm_header, ip, tmp); mutex_exit(&ip->bi_mutex); tmp = NULL; if (rc == 0) { rc = ii_tinit(ip); if (rc == 0) { mutex_enter(&ip->bi_mutex); bm_header = _ii_bm_header_get(ip, &tmp); mutex_exit(&ip->bi_mutex); } } } if (rc == 0) II_FLAG_CLRX(DSW_COPY_FLAGS, ip); /* * if copy flags were set, another process may be * waiting */ if (rc == 0 && (flags & DSW_COPYINGP)) cv_broadcast(&ip->bi_copydonecv); if (rc == 0) rc = II_COPYBM(ip); } } _ii_unlock_chunk(ip, II_NULLCHUNK); if (rc) { if (tmp) _ii_bm_header_free(bm_header, ip, tmp); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); } bm_header->ii_state = ip->bi_flags; mutex_enter(&ip->bi_mutex); rc = _ii_bm_header_put(bm_header, ip, tmp); if (!II_SUCCESS(rc)) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP)); } /* check with RDC */ if (ii_update_denied(ip, kstatus, (ip->bi_flags & DSW_COPYINGS) ? CV_SHD2MST : 0, 1)) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); } /* don't perform copy for dependent shadows */ if ((ip->bi_flags&(DSW_GOLDEN)) == 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); } mutex_exit(&ip->bi_mutex); /* _ii_copyvol calls _ii_ioctl_done() */ if (ip->bi_flags & DSW_COPYINGS) rc = _ii_copyvol(ip, CV_SHD2MST, rtype, kstatus, 1); else if (ip->bi_flags & DSW_COPYINGM) rc = _ii_copyvol(ip, 0, rtype, kstatus, 1); else { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); } _ii_rlse_devs(ip, rtype); return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); } /* * _ii_version * Get version of the InstantImage module. * * Calling/Exit State: * Returns 0 on success, otherwise EFAULT is returned. * The major and minor revisions are copied out to the user if * successful. */ /*ARGSUSED*/ int _ii_version(intptr_t arg, int ilp32, int *rvp) { dsw_version_t uversion; dsw_version32_t uversion32; if (ilp32) { if (copyin((void *)arg, &uversion32, sizeof (uversion32)) < 0) return (EFAULT); uversion32.major = dsw_major_rev; uversion32.minor = dsw_minor_rev; uversion32.micro = dsw_micro_rev; uversion32.baseline = dsw_baseline_rev; if (copyout(&uversion32, (void *)arg, sizeof (uversion32))) return (EFAULT); } else { if (copyin((void *)arg, &uversion, sizeof (uversion)) < 0) return (EFAULT); uversion.major = dsw_major_rev; uversion.minor = dsw_minor_rev; uversion.micro = dsw_micro_rev; uversion.baseline = dsw_baseline_rev; if (copyout(&uversion, (void *)arg, sizeof (uversion))) return (EFAULT); } return (0); } /* * _ii_copyparm * Get and set copy parameters. * * Calling/Exit State: * Returns 0 on success, otherwise EFAULT is returned. * The previous values are returned to the user. */ /*ARGSUSED*/ int _ii_copyparm(intptr_t arg, int ilp32, int *rvp) { dsw_copyp_t copyp; dsw_copyp32_t copyp32; spcs_s_info_t kstatus; _ii_info_t *ip; int rc = 0; int tmp; if (ilp32) { if (copyin((void *)arg, ©p32, sizeof (copyp32)) < 0) return (EFAULT); II_TAIL_COPY(copyp, copyp32, shadow_vol, dsw_copyp_t); copyp.status = (spcs_s_info_t)copyp32.status; } else if (copyin((void *)arg, ©p, sizeof (copyp)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!copyp.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, copyp.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(copyp.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, copyp.status, DSW_ENOTFOUND)); tmp = ip->bi_throttle_delay; if (copyp.copy_delay != -1) { if (copyp.copy_delay >= MIN_THROTTLE_DELAY && copyp.copy_delay <= MAX_THROTTLE_DELAY) ip->bi_throttle_delay = copyp.copy_delay; else { cmn_err(CE_WARN, "!ii: delay out of range %d", copyp.copy_delay); rc = EINVAL; } } copyp.copy_delay = tmp; tmp = ip->bi_throttle_unit; if (copyp.copy_unit != -1) { if (copyp.copy_unit >= MIN_THROTTLE_UNIT && copyp.copy_unit <= MAX_THROTTLE_UNIT) { if (rc != EINVAL) ip->bi_throttle_unit = copyp.copy_unit; } else { cmn_err(CE_WARN, "!ii: unit out of range %d", copyp.copy_unit); if (rc != EINVAL) { rc = EINVAL; ip->bi_throttle_delay = copyp.copy_delay; } } } copyp.copy_unit = tmp; _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); if (ilp32) { copyp32.copy_delay = copyp.copy_delay; copyp32.copy_unit = copyp.copy_unit; if (copyout(©p32, (void *)arg, sizeof (copyp32)) < 0) return (EFAULT); } else if (copyout(©p, (void *)arg, sizeof (copyp))) return (EFAULT); return (spcs_s_ocopyoutf(&kstatus, copyp.status, rc)); } /* * _ii_suspend_vol * suspend an individual InstantImage group * * Calling/Exit State: * Returns 0 on success, nonzero otherwise */ int _ii_suspend_vol(_ii_info_t *ip) { _ii_info_t **xip; int copy_flag; int rc; nsc_buf_t *tmp = NULL; ii_header_t *bm_header; copy_flag = ip->bi_flags & DSW_COPY_FLAGS; _ii_stopvol(ip); ASSERT(total_ref(ip) == 0); if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) return (rc); /* this rw_enter forces us to drain all active IO */ rw_enter(&ip->bi_linkrw, RW_WRITER); rw_exit(&ip->bi_linkrw); mutex_enter(&_ii_info_mutex); for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) { if (ip == *xip) break; } *xip = ip->bi_next; mutex_exit(&_ii_info_mutex); rc = II_SAVE_BMP(ip, 1); mutex_enter(&ip->bi_mutex); if (rc == 0) bm_header = _ii_bm_header_get(ip, &tmp); if (rc == 0 && bm_header) { bm_header->ii_magic = DSW_CLEAN; bm_header->ii_state |= copy_flag; bm_header->ii_throttle_delay = ip->bi_throttle_delay; bm_header->ii_throttle_unit = ip->bi_throttle_unit; /* copy over the mtime */ bm_header->ii_mtime = ip->bi_mtime; /* write it to disk */ rc = _ii_bm_header_put(bm_header, ip, tmp); } --iigkstat.num_sets.value.ul; mutex_exit(&ip->bi_mutex); ii_overflow_free(ip, NO_RECLAIM); _ii_rlse_devs(ip, BMP); ii_sibling_free(ip); return (rc); } /* * _ii_suspend_cluster * Cluster resource group is switching over to another node, so * all shadowed volumes in that group are suspended. * * Returns 0 on success, or ESRCH if the name of the cluster resource * group couldn't be found. */ int _ii_suspend_cluster(char *shadow_vol) { int found, last; uint64_t hash; _ii_info_t *ip; _ii_lsthead_t **cp, *xcp; _ii_lstinfo_t **np, *xnp; /* find appropriate cluster list */ mutex_enter(&_ii_cluster_mutex); hash = nsc_strhash(shadow_vol); for (cp = &_ii_cluster_top; *cp; cp = &((*cp)->lst_next)) { if ((hash == (*cp)->lst_hash) && strncmp(shadow_vol, (*cp)->lst_name, DSW_NAMELEN) == 0) break; } if (!*cp) { mutex_exit(&_ii_cluster_mutex); return (DSW_ECNOTFOUND); } found = 1; last = 0; while (found && !last) { found = 0; mutex_enter(&_ii_info_mutex); for (np = &(*cp)->lst_start; *np; np = &((*np)->lst_next)) { ip = (*np)->lst_ip; if (ip->bi_disabled) continue; found++; ip->bi_disabled = 1; if (NSHADOWS(ip) && (ip->bi_master == ip)) { ip->bi_flags &= (~DSW_COPYING); ip->bi_state |= DSW_MULTIMST; } mutex_exit(&_ii_info_mutex); xnp = *np; *np = (*np)->lst_next; kmem_free(xnp, sizeof (_ii_lstinfo_t)); ip->bi_cluster = NULL; (void) _ii_suspend_vol(ip); break; } if (found == 0) mutex_exit(&_ii_info_mutex); else if (!(*cp)->lst_start) { xcp = *cp; *cp = (*cp)->lst_next; kmem_free(xcp, sizeof (_ii_lsthead_t)); last = 1; } } mutex_exit(&_ii_cluster_mutex); return (0); } /* * _ii_shutdown * System is shutting down, so all shadowed volumes are suspended. * * This always succeeds, so always returns 0. */ /* ARGSUSED */ int _ii_shutdown(intptr_t arg, int *rvp) { _ii_info_t **xip, *ip; int found; *rvp = 0; _ii_shutting_down = 1; /* Go through the list until only disabled entries are found */ found = 1; while (found) { found = 0; mutex_enter(&_ii_info_mutex); for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) { ip = *xip; if (ip->bi_disabled) { /* Also covers not fully configured yet */ continue; } found++; ip->bi_disabled = 1; mutex_exit(&_ii_info_mutex); (void) _ii_suspend_vol(ip); break; } if (found == 0) mutex_exit(&_ii_info_mutex); } _ii_shutting_down = 0; return (0); } /* * _ii_suspend * Suspend an InstantImage, saving its state to allow a subsequent resume. * * Calling/Exit State: * Returns 0 if the pair was suspended. Otherwise an error code * is returned and any additional error information is copied * out to the user. */ /* ARGSUSED */ int _ii_suspend(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uparms; dsw_ioctl32_t uparms32; _ii_info_t *ip; int rc; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) return (EFAULT); II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); uparms.status = (spcs_s_info_t)uparms32.status; } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uparms.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); if ((uparms.flags & CV_IS_CLUSTER) != 0) { rc = _ii_suspend_cluster(uparms.shadow_vol); } else { mutex_enter(&_ii_info_mutex); ip = _ii_find_set(uparms.shadow_vol); if (ip == NULL) { mutex_exit(&_ii_info_mutex); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ENOTFOUND)); } ip->bi_disabled = 1; if (NSHADOWS(ip) && (ip->bi_master == ip)) { ip->bi_flags &= (~DSW_COPYING); ip->bi_state |= DSW_MULTIMST; } mutex_exit(&_ii_info_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); rc = _ii_suspend_vol(ip); } return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); } /* * _ii_abort * Stop any copying process for shadow. * * Calling/Exit State: * Returns 0 if the abort succeeded. Otherwise an error code * is returned and any additional error information is copied * out to the user. */ /* ARGSUSED */ int _ii_abort(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uabort; dsw_ioctl32_t uabort32; _ii_info_t *ip; int rc; spcs_s_info_t kstatus; if (ilp32) { if (copyin((void *)arg, &uabort32, sizeof (uabort32)) < 0) return (EFAULT); II_TAIL_COPY(uabort, uabort32, shadow_vol, dsw_ioctl_t); uabort.status = (spcs_s_info_t)uabort32.status; } else if (copyin((void *)arg, &uabort, sizeof (uabort)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uabort.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uabort.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(uabort.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, uabort.status, DSW_ENOTFOUND)); mutex_exit(&ip->bi_mutex); rc = _ii_stopcopy(ip); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, uabort.status, rc)); } /* * _ii_segment * Copy out II pair bitmaps (cpy, shd, idx) in segments * * Calling/Exit State: * Returns 0 if the operation succeeded. Otherwise an error code * is returned and any additional error information is copied * out to the user. * */ int _ii_segment(intptr_t arg, int ilp32, int *rvp) { dsw_segment_t usegment; dsw_segment32_t usegment32; _ii_info_t *ip; int rc, size; spcs_s_info_t kstatus; int32_t bi_idxfba; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &usegment32, sizeof (usegment32))) return (EFAULT); usegment.status = (spcs_s_info_t)usegment32.status; bcopy(usegment32.shadow_vol, usegment.shadow_vol, DSW_NAMELEN); usegment.seg_number = (unsigned)usegment32.seg_number; usegment.shd_bitmap = (unsigned char *)(unsigned long)usegment32.shd_bitmap; usegment.shd_size = usegment32.shd_size; usegment.cpy_bitmap = (unsigned char *)(unsigned long)usegment32.cpy_bitmap; usegment.cpy_size = usegment32.cpy_size; usegment.idx_bitmap = (unsigned char *)(unsigned long)usegment32.idx_bitmap; usegment.idx_size = usegment32.idx_size; } else if (copyin((void *)arg, &usegment, sizeof (usegment))) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (usegment.shadow_vol[0]) { mutex_enter(&_ii_info_mutex); ip = _ii_find_set(usegment.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_ENOTFOUND)); } else return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EEMPTY)); mutex_exit(&ip->bi_mutex); size = ((((ip->bi_size + (DSW_SIZE-1)) / DSW_SIZE) + (DSW_BITS-1))) / DSW_BITS; bi_idxfba = ip->bi_copyfba + (ip->bi_copyfba - ip->bi_shdfba); if (((nsc_size_t)usegment.seg_number > DSW_BM_FBA_LEN(ip->bi_size)) || (usegment.shd_size > size) || (usegment.cpy_size > size) || (!(ip->bi_flags & DSW_GOLDEN) && (usegment.idx_size > size*32))) { _ii_ioctl_done(ip); return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EMISMATCH)); } if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_ERSRVFAIL)); } if (usegment.shd_bitmap && usegment.shd_size > 0) rc = II_CO_BMP(ip, ip->bi_shdfba+usegment.seg_number, usegment.shd_bitmap, usegment.shd_size); if (rc == 0 && usegment.cpy_bitmap && usegment.cpy_size > 0) rc = II_CO_BMP(ip, ip->bi_copyfba+usegment.seg_number, usegment.cpy_bitmap, usegment.cpy_size); if (!(ip->bi_flags & DSW_GOLDEN)) { if (rc == 0 && usegment.idx_bitmap && usegment.idx_size > 0) rc = II_CO_BMP(ip, bi_idxfba+usegment.seg_number*32, usegment.idx_bitmap, usegment.idx_size); } _ii_rlse_devs(ip, BMP); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); if (rc) { spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EIO)); } spcs_s_kfree(kstatus); return (0); } /* * _ii_bitmap * Copy out II pair bitmaps to user program * * Calling/Exit State: * Returns 0 if the operation succeeded. Otherwise an error code * is returned and any additional error information is copied * out to the user. */ int _ii_bitmap(intptr_t arg, int ilp32, int *rvp) { dsw_bitmap_t ubitmap; dsw_bitmap32_t ubitmap32; _ii_info_t *ip; int rc; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32))) return (EFAULT); ubitmap.status = (spcs_s_info_t)ubitmap32.status; bcopy(ubitmap32.shadow_vol, ubitmap.shadow_vol, DSW_NAMELEN); ubitmap.shd_bitmap = (unsigned char *)(unsigned long)ubitmap32.shd_bitmap; ubitmap.shd_size = ubitmap32.shd_size; ubitmap.copy_bitmap = (unsigned char *)(unsigned long)ubitmap32.copy_bitmap; ubitmap.copy_size = ubitmap32.copy_size; } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap))) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!ubitmap.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(ubitmap.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_ENOTFOUND)); mutex_exit(&ip->bi_mutex); if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_ERSRVFAIL)); } if (ubitmap.shd_bitmap && ubitmap.shd_size > 0) rc = II_CO_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap, ubitmap.shd_size); if (rc == 0 && ubitmap.copy_bitmap && ubitmap.copy_size > 0) rc = II_CO_BMP(ip, ip->bi_copyfba, ubitmap.copy_bitmap, ubitmap.copy_size); _ii_rlse_devs(ip, BMP); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); if (rc) { spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO)); } spcs_s_kfree(kstatus); return (0); } /* * _ii_export * Exports the shadow volume * * Calling/Exit State: * Returns 0 if the shadow was exported. Otherwise an error code * is returned and any additional error information is copied * out to the user. * * Description: */ int _ii_export(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uparms; dsw_ioctl32_t uparms32; _ii_info_t *ip; nsc_fd_t *fd; int rc = 0; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0) return (EFAULT); II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t); uparms.status = (spcs_s_info_t)uparms32.status; } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uparms.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(uparms.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ENOTFOUND)); if ((ip->bi_flags & DSW_GOLDEN) == 0 || ((ip->bi_flags & (DSW_COPYING|DSW_SHDEXPORT|DSW_SHDIMPORT)) != 0)) { /* * Cannot export a dependent copy or while still copying or * the shadow is already in an exported state */ rc = ip->bi_flags & (DSW_SHDEXPORT|DSW_SHDIMPORT) ? DSW_EALREADY : DSW_EDEPENDENCY; _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc)); } if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_ERSRVFAIL)); } II_FLAG_SET(DSW_SHDEXPORT, ip); mutex_exit(&ip->bi_mutex); /* this rw_enter forces us to drain all active IO */ rw_enter(&ip->bi_linkrw, RW_WRITER); rw_exit(&ip->bi_linkrw); mutex_enter(&ip->bi_mutex); _ii_rlse_devs(ip, BMP); /* Shut shadow volume. */ if (ip->bi_shdfd) { if (ip->bi_shdrsrv) { nsc_release(ip->bi_shdfd); ip->bi_shdrsrv = NULL; } fd = ip->bi_shdfd; ip->bi_shdfd = NULL; mutex_exit(&ip->bi_mutex); (void) nsc_close(fd); mutex_enter(&ip->bi_mutex); } if (ip->bi_shdrfd) { if (ip->bi_shdrrsrv) { nsc_release(ip->bi_shdrfd); ip->bi_shdrrsrv = NULL; } fd = ip->bi_shdrfd; ip->bi_shdrfd = NULL; mutex_exit(&ip->bi_mutex); (void) nsc_close(fd); mutex_enter(&ip->bi_mutex); } _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); (void) _ii_reserve_begin(ip); if (ip->bi_shd_tok) { (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow"); ip->bi_shd_tok = NULL; } if (ip->bi_shdr_tok) { (void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow"); ip->bi_shdr_tok = NULL; } _ii_reserve_end(ip); spcs_s_kfree(kstatus); return (0); } /* * _ii_join * Rejoins the shadow volume * * Calling/Exit State: * Returns 0 if the shadow was exported. Otherwise an error code * is returned and any additional error information is copied * out to the user. * * Description: */ int _ii_join(intptr_t arg, int ilp32, int *rvp) { dsw_bitmap_t ubitmap; dsw_bitmap32_t ubitmap32; _ii_info_t *ip; uint64_t bm_size; int rc = 0; int rtype = 0; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0) return (EFAULT); II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t); ubitmap.status = (spcs_s_info_t)ubitmap32.status; ubitmap.shd_bitmap = (unsigned char *)(unsigned long)ubitmap32.shd_bitmap; ubitmap.shd_size = ubitmap32.shd_size; } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!ubitmap.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(ubitmap.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_ENOTFOUND)); /* * Check that group has shadow exported. */ if ((ip->bi_flags & DSW_SHDEXPORT) == 0) { /* * Cannot join if the shadow isn't exported. */ _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_ENOTEXPORTED)); } /* check bitmap is at least large enough for master volume size */ bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); if (ubitmap.shd_size < bm_size) { /* bitmap is to small */ _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EINVALBMP)); } /* read in bitmap and or with differences bitmap */ rtype = BMP; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_ERSRVFAIL)); } rc = II_CI_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap, ubitmap.shd_size); /* open up shadow */ if ((rc = ii_open_shadow(ip, ip->bi_keyname)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); _ii_rlse_devs(ip, rtype); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EOPEN)); } ii_register_shd(ip); if (!rc) II_FLAG_CLR(DSW_SHDEXPORT, ip); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); if (rc) { spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO)); } spcs_s_kfree(kstatus); return (0); } /* * _ii_ocreate * Configures a volume suitable for use as an overflow volume. * * Calling/Exit State: * Returns 0 if the volume was configured successfully. Otherwise * an error code is returned and any additional error information * is copied out to the user. * * Description: */ int _ii_ocreate(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t uioctl; dsw_ioctl32_t uioctl32; _ii_overflow_t ov; _ii_overflow_t *op = &ov; int rc = 0; nsc_fd_t *fd; nsc_iodev_t *iodev; nsc_size_t vol_size; char *overflow_vol; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &uioctl32, sizeof (uioctl32)) < 0) return (EFAULT); II_TAIL_COPY(uioctl, uioctl32, shadow_vol, dsw_ioctl_t); uioctl.status = (spcs_s_info_t)uioctl32.status; } else if (copyin((void *)arg, &uioctl, sizeof (uioctl)) < 0) return (EFAULT); overflow_vol = uioctl.shadow_vol; kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!overflow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EEMPTY)); if (ii_volume(overflow_vol, 0) != NONE) return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EINUSE)); fd = nsc_open(overflow_vol, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(iodev), &rc); if (!fd) fd = nsc_open(uioctl.shadow_vol, NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL, (blind_t)&(iodev), &rc); if (fd == NULL) { spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO)); } if ((rc = nsc_reserve(fd, 0)) != 0) { spcs_s_add(kstatus, rc); (void) nsc_close(fd); return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_ERSRVFAIL)); } /* setup magic number etc; */ rc = nsc_partsize(fd, &vol_size); if (rc) { spcs_s_add(kstatus, rc); (void) nsc_close(fd); return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO)); } op->ii_hmagic = II_OMAGIC; /* take 1 off as chunk 0 contains header */ op->ii_nchunks = (vol_size / DSW_SIZE) -1; op->ii_drefcnt = 0; op->ii_used = 1; /* we have used the header */ op->ii_unused = op->ii_nchunks - op->ii_used; op->ii_freehead = II_NULLNODE; op->ii_hversion = OV_HEADER_VERSION; op->ii_flags = 0; op->ii_urefcnt = 0; (void) strncpy(op->ii_volname, uioctl.shadow_vol, DSW_NAMELEN); rc = _ii_nsc_io(0, KS_NA, fd, NSC_WRBUF, II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); (void) nsc_release(fd); (void) nsc_close(fd); if (rc) { spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO)); } spcs_s_kfree(kstatus); return (0); } /* * _ii_oattach * Attaches the volume in the "bitmap_vol" field as an overflow volume. * * Calling/Exit State: * Returns 0 if the volume was attached. Fails if the shadow group * is of the wrong type (eg independent) or already has an overflow * volume attached. * * Description: */ int _ii_oattach(intptr_t arg, int ilp32, int *rvp) { dsw_config_t uconfig; dsw_config32_t uconfig32; _ii_info_t *ip; int rc = 0; int rtype = 0; ii_header_t *bm_header; nsc_buf_t *tmp = NULL; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &uconfig32, sizeof (uconfig32)) < 0) return (EFAULT); II_TAIL_COPY(uconfig, uconfig32, shadow_vol, dsw_config_t); uconfig.status = (spcs_s_info_t)uconfig32.status; } else if (copyin((void *)arg, &uconfig, sizeof (uconfig)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!uconfig.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EEMPTY)); switch (ii_volume(uconfig.bitmap_vol, 0)) { case NONE: case OVR: break; default: return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EINUSE)); } mutex_enter(&_ii_info_mutex); ip = _ii_find_set(uconfig.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_ENOTFOUND)); /* check shadow doesn't already have an overflow volume */ if (ip->bi_overflow) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EALREADY)); } /* check shadow is mapped so can have an overflow */ if ((ip->bi_flags&DSW_TREEMAP) == 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EWRONGTYPE)); } rtype = BMP; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_ERSRVFAIL)); } /* attach volume */ if ((rc = ii_overflow_attach(ip, uconfig.bitmap_vol, 1)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); return (spcs_s_ocopyoutf(&kstatus, uconfig.status, rc)); } /* re-write header so shadow can be restarted with overflow volume */ bm_header = _ii_bm_header_get(ip, &tmp); if (bm_header == NULL) { /* detach volume */ ii_overflow_free(ip, RECLAIM); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EHDRBMP)); } (void) strncpy(bm_header->overflow_vol, uconfig.bitmap_vol, DSW_NAMELEN); (void) _ii_bm_header_put(bm_header, ip, tmp); _ii_rlse_devs(ip, rtype); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_kfree(kstatus); return (0); } /* * _ii_odetach * Breaks the link with the overflow volume. * * Calling/Exit State: * Returns 0 if the overflow volume was detached. Otherwise an error code * is returned and any additional error information is copied * out to the user. * * Description: */ int _ii_odetach(intptr_t arg, int ilp32, int *rvp) { dsw_bitmap_t ubitmap; dsw_bitmap32_t ubitmap32; _ii_info_t *ip; int rc = 0; int rtype = 0; ii_header_t *bm_header; nsc_buf_t *tmp = NULL; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0) return (EFAULT); II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t); ubitmap.status = (spcs_s_info_t)ubitmap32.status; } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!ubitmap.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(ubitmap.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_ENOTFOUND)); if ((ip->bi_flags&DSW_VOVERFLOW) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EODEPENDENCY)); } rtype = BMP; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_ERSRVFAIL)); } ii_overflow_free(ip, RECLAIM); /* re-write header to break link with overflow volume */ bm_header = _ii_bm_header_get(ip, &tmp); if (bm_header == NULL) { _ii_rlse_devs(ip, rtype); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EHDRBMP)); } bzero(bm_header->overflow_vol, DSW_NAMELEN); (void) _ii_bm_header_put(bm_header, ip, tmp); _ii_rlse_devs(ip, rtype); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); if (rc) { spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO)); } spcs_s_kfree(kstatus); --iigkstat.assoc_over.value.ul; return (0); } /* * _ii_gc_list * Returns a list of all lists, or all entries in a list * */ int _ii_gc_list(intptr_t arg, int ilp32, int *rvp, kmutex_t *mutex, _ii_lsthead_t *lst) { dsw_aioctl_t ulist; dsw_aioctl32_t ulist32; size_t name_offset; int i; spcs_s_info_t kstatus; char *carg = (char *)arg; uint64_t hash; _ii_lsthead_t *cp; _ii_lstinfo_t *np; *rvp = 0; name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]); if (ilp32) { if (copyin((void *) arg, &ulist32, sizeof (ulist32)) < 0) return (EFAULT); II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t); ulist.status = (spcs_s_info_t)ulist32.status; name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]); } else if (copyin((void *) arg, &ulist, sizeof (ulist)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); mutex_enter(mutex); if (ulist.shadow_vol[ 0 ] != 0) { /* search for specific list */ hash = nsc_strhash(ulist.shadow_vol); for (cp = lst; cp; cp = cp->lst_next) { if ((hash == cp->lst_hash) && strncmp(ulist.shadow_vol, cp->lst_name, DSW_NAMELEN) == 0) { break; } } if (cp) { for (i = 0, np = cp->lst_start; i < ulist.count && np; np = np->lst_next, carg += DSW_NAMELEN, i++) { if (copyout(np->lst_ip->bi_keyname, carg + name_offset, DSW_NAMELEN)) { mutex_exit(mutex); return (spcs_s_ocopyoutf(&kstatus, ulist.status, EFAULT)); } } } else { i = 0; } } else { /* return full list */ for (i = 0, cp = lst; i < ulist.count && cp; carg += DSW_NAMELEN, i++, cp = cp->lst_next) { if (copyout(cp->lst_name, carg + name_offset, DSW_NAMELEN)) { mutex_exit(mutex); return (spcs_s_ocopyoutf(&kstatus, ulist.status, EFAULT)); } } } mutex_exit(mutex); ulist32.count = ulist.count = i; if (ilp32) { if (copyout(&ulist32, (void *) arg, name_offset)) return (EFAULT); } else { if (copyout(&ulist, (void*) arg, name_offset)) return (EFAULT); } return (spcs_s_ocopyoutf(&kstatus, ulist.status, 0)); } /* * _ii_olist * Breaks the link with the overflow volume. * * Calling/Exit State: * Returns 0 if the overflow volume was detached. Otherwise an error code * is returned and any additional error information is copied * out to the user. * * Description: */ int _ii_olist(intptr_t arg, int ilp32, int *rvp) { dsw_aioctl_t ulist; dsw_aioctl32_t ulist32; _ii_overflow_t *op; size_t name_offset; int rc = 0; int i; char *carg = (char *)arg; spcs_s_info_t kstatus; *rvp = 0; name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]); if (ilp32) { if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0) return (EFAULT); II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t); ulist.status = (spcs_s_info_t)ulist32.status; name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]); } else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); i = 0; mutex_enter(&_ii_overflow_mutex); for (op = _ii_overflow_top; i < ulist.count && op; carg += DSW_NAMELEN) { if (copyout(op->ii_volname, carg+name_offset, DSW_NAMELEN)) { mutex_exit(&_ii_overflow_mutex); return (spcs_s_ocopyoutf(&kstatus, ulist.status, EFAULT)); } i++; op = op->ii_next; } mutex_exit(&_ii_overflow_mutex); ulist32.count = ulist.count = i; /* return count of items listed to user */ if (ilp32) { if (copyout(&ulist32, (void *)arg, name_offset)) return (EFAULT); } else { if (copyout(&ulist, (void *)arg, name_offset)) return (EFAULT); } return (spcs_s_ocopyoutf(&kstatus, ulist.status, rc)); } /* * _ii_ostat * Breaks the link with the overflow volume. * * Calling/Exit State: * Returns 0 if the overflow volume was detached. Otherwise an error code * is returned and any additional error information is copied * out to the user. * * Description: */ int _ii_ostat(intptr_t arg, int ilp32, int *rvp, int is_iost_2) { dsw_ostat_t ustat; dsw_ostat32_t ustat32; _ii_overflow_t *op; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0) return (EFAULT); II_TAIL_COPY(ustat, ustat32, overflow_vol, dsw_ostat_t); ustat.status = (spcs_s_info_t)ustat32.status; } else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!ustat.overflow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY)); op = _ii_find_overflow(ustat.overflow_vol); if (op == NULL) return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_ENOTFOUND)); ustat.nchunks = op->ii_nchunks; ustat.used = op->ii_used; ustat.unused = op->ii_unused; ustat.drefcnt = op->ii_drefcnt; ustat.crefcnt = op->ii_crefcnt; if (is_iost_2) { ustat.hversion = op->ii_hversion; ustat.flags = op->ii_flags; ustat.hmagic = op->ii_hmagic; } spcs_s_kfree(kstatus); if (ilp32) { ustat32.nchunks = ustat.nchunks; ustat32.used = ustat.used; ustat32.unused = ustat.unused; ustat32.drefcnt = ustat.drefcnt; ustat32.crefcnt = ustat.crefcnt; if (is_iost_2) { ustat32.hversion = ustat.hversion; ustat32.flags = ustat.flags; ustat32.hmagic = ustat.hmagic; } if (copyout(&ustat32, (void *)arg, sizeof (ustat32))) return (EFAULT); } else { if (copyout(&ustat, (void *)arg, sizeof (ustat))) return (EFAULT); } return (0); } /* * _ii_move_grp() * Move a set from one group to another, possibly creating the new * group. */ int _ii_move_grp(intptr_t arg, int ilp32, int *rvp) { dsw_movegrp_t umove; dsw_movegrp32_t umove32; spcs_s_info_t kstatus; _ii_info_t *ip; int rc = 0; nsc_buf_t *tmp; ii_header_t *bm_header; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0) return (EFAULT); II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t); umove.status = (spcs_s_info_t)umove32.status; } else if (copyin((void *)arg, &umove, sizeof (umove)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!umove.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(umove.shadow_vol); mutex_exit(&_ii_info_mutex); if (!ip) return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_ENOTFOUND)); if (!umove.new_group[0]) { /* are we clearing the group association? */ if (ip->bi_group) { DTRACE_PROBE2(_ii_move_grp1, char *, ip->bi_keyname, char *, ip->bi_group); rc = II_UNLINK_GROUP(ip); } } else if (!ip->bi_group) { rc = II_LINK_GROUP(ip, umove.new_group); DTRACE_PROBE2(_ii_move_grp2, char *, ip->bi_keyname, char *, ip->bi_group); } else { /* remove it from one group and add it to the other */ DTRACE_PROBE3(_ii_move_grp, char *, ip->bi_keyname, char *, ip->bi_group, char *, umove.new_group); rc = II_UNLINK_GROUP(ip); if (!rc) rc = II_LINK_GROUP(ip, umove.new_group); } /* ** BEGIN UPDATE BITMAP HEADER ** */ if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_ERSRVFAIL)); } bm_header = _ii_bm_header_get(ip, &tmp); if (bm_header) { (void) strncpy(bm_header->group_name, umove.new_group, DSW_NAMELEN); (void) _ii_bm_header_put(bm_header, ip, tmp); } _ii_rlse_devs(ip, BMP); /* ** END UPDATE BITMAP HEADER ** */ _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, umove.status, rc)); } /* * _ii_change_tag() * Move a set from one group to another, possibly creating the new * group. */ int _ii_change_tag(intptr_t arg, int ilp32, int *rvp) { dsw_movegrp_t umove; dsw_movegrp32_t umove32; spcs_s_info_t kstatus; _ii_info_t *ip; int rc = 0; nsc_buf_t *tmp; ii_header_t *bm_header; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0) return (EFAULT); II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t); umove.status = (spcs_s_info_t)umove32.status; } else if (copyin((void *)arg, &umove, sizeof (umove)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!umove.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(umove.shadow_vol); mutex_exit(&_ii_info_mutex); if (!ip) return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_ENOTFOUND)); if (!umove.new_group[0]) { /* are we clearing the group association? */ if (ip->bi_cluster) { DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname, char *, ip->bi_cluster); rc = II_UNLINK_CLUSTER(ip); } } else if (!ip->bi_cluster) { /* are we adding it to a group for the first time? */ rc = II_LINK_CLUSTER(ip, umove.new_group); DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname, char *, ip->bi_cluster); } else { /* remove it from one group and add it to the other */ DTRACE_PROBE3(_ii_change_tag_2, char *, ip->bi_keyname, char *, ip->bi_cluster, char *, umove.new_group); rc = II_UNLINK_CLUSTER(ip); if (!rc) rc = II_LINK_CLUSTER(ip, umove.new_group); } /* ** BEGIN UPDATE BITMAP HEADER ** */ if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_ERSRVFAIL)); } bm_header = _ii_bm_header_get(ip, &tmp); if (bm_header) { (void) strncpy(bm_header->clstr_name, umove.new_group, DSW_NAMELEN); (void) _ii_bm_header_put(bm_header, ip, tmp); } _ii_rlse_devs(ip, BMP); /* ** END UPDATE BITMAP HEADER ** */ _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(&kstatus, umove.status, rc)); } /* * _ii_spcs_s_ocopyoutf() * Wrapper for spcs_s_ocopyoutf() used by _ii_chk_copy() which permits * the spcs_s_info_t argument to be NULL. _ii_chk_copy() requires this * functionality as it is sometimes called by _ii_control_copy() which * has no user context to copy any errors into. At all other times a NULL * spcs_s_info_t argument would indicate a bug in the calling function. */ static int _ii_spcs_s_ocopyoutf(spcs_s_info_t *kstatusp, spcs_s_info_t ustatus, int err) { if (ustatus) return (spcs_s_ocopyoutf(kstatusp, ustatus, err)); spcs_s_kfree(*kstatusp); return (err); } static int _ii_chk_copy(_ii_info_t *ip, int flags, spcs_s_info_t *kstatusp, pid_t pid, spcs_s_info_t ustatus) { _ii_info_t *xip; int rc; int rtype; if ((ip->bi_flags & DSW_COPYINGP) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING)); } if (ip->bi_flags & DSW_OFFLINE) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EOFFLINE)); } if ((ip->bi_flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EISEXPORTED)); } if ((flags & CV_SHD2MST) == CV_SHD2MST) { if ((ip->bi_flags & DSW_COPYINGM) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING)); } /* check if any sibling shadow is copying towards this master */ for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { if (ip != xip && (xip->bi_flags & DSW_COPYINGS) != 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING)); } } } if (((flags & CV_SHD2MST) == 0) && ((ip->bi_flags & DSW_COPYINGS) != 0)) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING)); } if (ip->bi_flags & DSW_TREEMAP) { if ((ip->bi_flags & DSW_OVERFLOW) && (flags & CV_SHD2MST)) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EINCOMPLETE)); } } /* Assure that no other PID owns this copy/update */ if (ip->bi_locked_pid == 0) { if (flags & CV_LOCK_PID) ip->bi_locked_pid = pid; } else if (ip->bi_locked_pid != pid) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EINUSE)); } mutex_exit(&ip->bi_mutex); rtype = MSTR|SHDR|BMP; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(*kstatusp, rc); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ERSRVFAIL)); } if (ii_update_denied(ip, *kstatusp, flags & CV_SHD2MST, 0)) { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EOPACKAGE)); } return (0); } static int _ii_do_copy(_ii_info_t *ip, int flags, spcs_s_info_t kstatus, int waitflag) { int rc = 0; int rtype = MSTR|SHDR|BMP; _ii_overflow_t *op; int quick_update = 0; waitflag = (waitflag != 0); /* * a copy of a tree-mapped device must be downgraded to * an update. */ if (ip->bi_flags & DSW_TREEMAP) flags |= CV_BMP_ONLY; /* * If we want to update the dependent shadow we only need to zero * the shadow bitmap. */ if (((ip->bi_flags & DSW_GOLDEN) == 0) && (flags & (CV_BMP_ONLY|CV_SHD2MST)) == CV_BMP_ONLY) { DTRACE_PROBE(DEPENDENT); /* assign updating time */ ip->bi_mtime = ddi_get_time(); if (ip->bi_flags & DSW_TREEMAP) { DTRACE_PROBE(COMPACT_DEPENDENT); if (ip->bi_overflow && (ip->bi_overflow->ii_flags & IIO_VOL_UPDATE) == 0) { /* attempt to do a quick update */ quick_update = 1; ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE; ip->bi_overflow->ii_detachcnt = 1; } rc = ii_tinit(ip); if (quick_update && ip->bi_overflow) { /* clean up */ ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE); ip->bi_overflow->ii_detachcnt = 0; } } if (rc == 0) rc = II_ZEROBM(ip); /* update copy of shadow */ if (((op = ip->bi_overflow) != NULL) && (op->ii_hversion >= 1) && (op->ii_hmagic == II_OMAGIC)) { mutex_enter(&_ii_overflow_mutex); if (ip->bi_flags & DSW_OVRHDRDRTY) { mutex_enter(&ip->bi_mutex); ip->bi_flags &= ~DSW_OVRHDRDRTY; mutex_exit(&ip->bi_mutex); ASSERT(op->ii_urefcnt > 0); op->ii_urefcnt--; } if (op->ii_urefcnt == 0) { op->ii_flags &= ~IIO_CNTR_INVLD; op->ii_unused = op->ii_nchunks - 1; } mutex_exit(&_ii_overflow_mutex); } mutex_enter(&ip->bi_mutex); II_FLAG_CLR(DSW_OVERFLOW, ip); mutex_exit(&ip->bi_mutex); _ii_unlock_chunk(ip, II_NULLCHUNK); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); if (rc) { spcs_s_add(kstatus, rc); return (DSW_EIO); } else { DTRACE_PROBE(_ii_do_copy_end); return (0); } } /* * need to perform an actual copy. */ /* * Perform bitmap copy if asked or from dependent shadow to master. */ if ((flags & CV_BMP_ONLY) || ((flags & CV_SHD2MST) && ((ip->bi_flags & DSW_GOLDEN) == 0))) { DTRACE_PROBE(INDEPENDENT_fast); rc = II_ORBM(ip); /* save shadow bits for copy */ } else { DTRACE_PROBE(INDEPENDENT_slow); rc = ii_fill_copy_bmp(ip); /* set bits for independent copy */ } if (rc == 0) rc = II_ZEROBM(ip); _ii_unlock_chunk(ip, II_NULLCHUNK); if (rc == 0) { mutex_enter(&ip->bi_mutex); if (ip->bi_flags & (DSW_COPYINGP | DSW_SHDEXPORT)) { rc = (ip->bi_flags & DSW_COPYINGP) ? DSW_ECOPYING : DSW_EISEXPORTED; _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); _ii_rlse_devs(ip, rtype); return (rc); } /* assign copying time */ ip->bi_mtime = ddi_get_time(); if (flags & CV_SHD2MST) II_FLAG_SET(DSW_COPYINGS | DSW_COPYINGP, ip); else II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip); mutex_exit(&ip->bi_mutex); rc = _ii_copyvol(ip, (flags & CV_SHD2MST), rtype, kstatus, waitflag); } else { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); } if (waitflag) _ii_rlse_devs(ip, rtype); return (rc); } /* * _ii_copy * Copy or update (take snapshot) II volume. * * Calling/Exit State: * Returns 0 if the operation succeeded. Otherwise an error code * is returned and any additional error information is copied * out to the user. */ int _ii_copy(intptr_t arg, int ilp32, int *rvp) { dsw_ioctl_t ucopy; dsw_ioctl32_t ucopy32; _ii_info_t *ip; int rc = 0; spcs_s_info_t kstatus; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0) return (EFAULT); II_TAIL_COPY(ucopy, ucopy32, shadow_vol, dsw_ioctl_t); ucopy.status = (spcs_s_info_t)ucopy32.status; } else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!ucopy.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, ucopy.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(ucopy.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, ucopy.status, DSW_ENOTFOUND)); /* Check that the copy/update makes sense */ if ((rc = _ii_chk_copy(ip, ucopy.flags, &kstatus, ucopy.pid, ucopy.status)) == 0) { /* perform the copy */ _ii_lock_chunk(ip, II_NULLCHUNK); /* _ii_do_copy() calls _ii_ioctl_done() */ rc = _ii_do_copy(ip, ucopy.flags, kstatus, 1); return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc)); } return (rc); } /* * _ii_mass_copy * Copies/updates the sets pointed to in the ipa array. * * Calling/Exit State: * Returns 0 if the operations was successful. Otherwise an * error code. */ int _ii_mass_copy(_ii_info_t **ipa, dsw_aioctl_t *ucopy, int wait) { int i; int rc = 0; int failed; int rtype = MSTR|SHDR|BMP; _ii_info_t *ip; spcs_s_info_t kstatus; kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); /* Check copy validitity */ for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; rc = _ii_chk_copy(ip, ucopy->flags, &kstatus, ucopy->pid, ucopy->status); if (rc) { /* Clean up the mess */ DTRACE_PROBE1(_ii_mass_copy_end1, int, rc); /* * The array ipa now looks like: * 0..(i-1): needs mutex_enter/ioctl_done/mutex_exit * i: needs nothing (_ii_chk_copy does cleanup) * (i+1)..n: needs just ioctl_done/mutex_exit */ failed = i; for (i = 0; i < failed; i++) { mutex_enter(&(ipa[i]->bi_mutex)); _ii_ioctl_done(ipa[i]); mutex_exit(&(ipa[i]->bi_mutex)); _ii_rlse_devs(ipa[i], rtype); } /* skip 'failed', start with failed + 1 */ for (i = failed + 1; i < ucopy->count; i++) { _ii_ioctl_done(ipa[i]); mutex_exit(&(ipa[i]->bi_mutex)); } return (rc); } } /* Check for duplicate shadows in same II group */ if (ucopy->flags & CV_SHD2MST) { /* Reset the state of all masters */ for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; ip->bi_master->bi_state &= ~DSW_MSTTARGET; } for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; /* * Check the state of the master. If DSW_MSTTARGET is * set, it's because this master is attached to another * shadow within this set. */ if (ip->bi_master->bi_state & DSW_MSTTARGET) { rc = EINVAL; break; } /* * Set the DSW_MSTTARGET bit on the master associated * with this shadow. This will allow us to detect * multiple shadows pointing to this master within * this loop. */ ip->bi_master->bi_state |= DSW_MSTTARGET; } } /* Handle error */ if (rc) { DTRACE_PROBE1(_ii_mass_copy_end2, int, rc); for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; _ii_rlse_devs(ip, rtype); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); } return (spcs_s_ocopyoutf(&kstatus, ucopy->status, rc)); } /* Lock bitmaps & prepare counts */ for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; _ii_lock_chunk(ip, II_NULLCHUNK); if (ip->bi_overflow) { ip->bi_overflow->ii_detachcnt = 0; } } /* determine which volumes we're dealing with */ for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; if (ip->bi_overflow) { ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE; if ((ucopy->flags & (CV_BMP_ONLY|CV_SHD2MST)) == CV_BMP_ONLY) { ++ip->bi_overflow->ii_detachcnt; } } } /* Perform copy */ for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; rc = _ii_do_copy(ip, ucopy->flags, kstatus, wait); /* Hum... what to do if one of these fails? */ } /* clear out flags so as to prevent any accidental reuse */ for (i = 0; i < ucopy->count; i++) { ip = ipa[i]; if (ip->bi_overflow) ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE); } /* * We can only clean up the kstatus structure if there are * no waiters. If someone's waiting for the information, * _ii_copyvolp() uses spcs_s_add to write to kstatus. Panic * would ensue if we freed it up now. */ if (!wait) rc = spcs_s_ocopyoutf(&kstatus, ucopy->status, rc); return (rc); } /* * _ii_list_copy * Retrieve a list from a character array and use _ii_mass_copy to * initiate a copy/update operation on all of the specified sets. * * Calling/Exit State: * Returns 0 if the operations was successful. Otherwise an * error code. */ int _ii_list_copy(char *list, dsw_aioctl_t *ucopy, int wait) { int i; int rc = 0; char *name; _ii_info_t *ip; _ii_info_t **ipa; ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP); /* Reserve devices */ name = list; mutex_enter(&_ii_info_mutex); for (i = 0; i < ucopy->count; i++, name += DSW_NAMELEN) { ip = _ii_find_set(name); if (ip == NULL) { rc = DSW_ENOTFOUND; break; } ipa[i] = ip; } if (rc != 0) { /* Failed to find all sets, release those we do have */ while (i-- > 0) { ip = ipa[i]; mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); } } else { /* Begin copy operation */ rc = _ii_mass_copy(ipa, ucopy, wait); } mutex_exit(&_ii_info_mutex); kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count); return (rc); } /* * _ii_group_copy * Retrieve list of sets in a group and use _ii_mass_copy to initiate * a copy/update of all of them. * * Calling/Exit State: * Returns 0 if the operations was successful. Otherwise an * error code. */ int _ii_group_copy(char *name, dsw_aioctl_t *ucopy, int wait) { int i; int rc; uint64_t hash; _ii_info_t **ipa; _ii_lsthead_t *head; _ii_lstinfo_t *np; /* find group */ hash = nsc_strhash(name); mutex_enter(&_ii_group_mutex); for (head = _ii_group_top; head; head = head->lst_next) { if (hash == head->lst_hash && strncmp(head->lst_name, name, DSW_NAMELEN) == 0) break; } if (!head) { mutex_exit(&_ii_group_mutex); DTRACE_PROBE(_ii_group_copy); return (DSW_EGNOTFOUND); } /* Count entries */ for (ucopy->count = 0, np = head->lst_start; np; np = np->lst_next) ++ucopy->count; if (ucopy->count == 0) { mutex_exit(&_ii_group_mutex); return (DSW_EGNOTFOUND); } ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP); if (ipa == NULL) { mutex_exit(&_ii_group_mutex); return (ENOMEM); } /* Create list */ mutex_enter(&_ii_info_mutex); np = head->lst_start; for (i = 0; i < ucopy->count; i++) { ASSERT(np != 0); ipa[i] = np->lst_ip; mutex_enter(&ipa[i]->bi_mutex); ipa[i]->bi_ioctl++; np = np->lst_next; } /* Begin copy operation */ rc = _ii_mass_copy(ipa, ucopy, wait); mutex_exit(&_ii_info_mutex); mutex_exit(&_ii_group_mutex); kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count); return (rc); } /* * _ii_acopy * Copy or update (take snapshot) II multiple volumes. * * Calling/Exit State: * Returns 0 if the operation succeeded. Otherwise an error code * is returned and any additional error information is copied * out to the user. */ int _ii_acopy(intptr_t arg, int ilp32, int *rvp) { int rc; size_t name_offset; char *list; char *nptr; char name[DSW_NAMELEN]; dsw_aioctl_t ucopy; dsw_aioctl32_t ucopy32; spcs_s_info_t kstatus; *rvp = 0; name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]); if (ilp32) { if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0) return (EFAULT); II_TAIL_COPY(ucopy, ucopy32, flags, dsw_ioctl_t); ucopy.status = (spcs_s_info_t)ucopy32.status; name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]); } else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); nptr = (char *)arg + name_offset; rc = 0; if (ucopy.flags & CV_IS_GROUP) { if (copyin(nptr, name, DSW_NAMELEN) < 0) return (spcs_s_ocopyoutf(&kstatus, ucopy.status, EFAULT)); /* kstatus information is handled within _ii_group_copy */ rc = _ii_group_copy(name, &ucopy, 0); } else if (ucopy.count > 0) { list = kmem_alloc(DSW_NAMELEN * ucopy.count, KM_SLEEP); if (list == NULL) return (spcs_s_ocopyoutf(&kstatus, ucopy.status, ENOMEM)); if (copyin(nptr, list, DSW_NAMELEN * ucopy.count) < 0) return (spcs_s_ocopyoutf(&kstatus, ucopy.status, EFAULT)); rc = _ii_list_copy(list, &ucopy, 0); kmem_free(list, DSW_NAMELEN * ucopy.count); } return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc)); } /* * _ii_bitsset * Copy out II pair bitmaps to user program * * Calling/Exit State: * Returns 0 if the operation succeeded. Otherwise an error code * is returned and any additional error information is copied * out to the user. */ int _ii_bitsset(intptr_t arg, int ilp32, int cmd, int *rvp) { dsw_bitsset_t ubitsset; dsw_bitsset32_t ubitsset32; nsc_size_t nbitsset; _ii_info_t *ip; int rc; spcs_s_info_t kstatus; int bitmap_size; *rvp = 0; if (ilp32) { if (copyin((void *)arg, &ubitsset32, sizeof (ubitsset32))) return (EFAULT); ubitsset.status = (spcs_s_info_t)ubitsset32.status; bcopy(ubitsset32.shadow_vol, ubitsset.shadow_vol, DSW_NAMELEN); } else if (copyin((void *)arg, &ubitsset, sizeof (ubitsset))) return (EFAULT); kstatus = spcs_s_kcreate(); if (kstatus == NULL) return (ENOMEM); if (!ubitsset.shadow_vol[0]) return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_EEMPTY)); mutex_enter(&_ii_info_mutex); ip = _ii_find_set(ubitsset.shadow_vol); mutex_exit(&_ii_info_mutex); if (ip == NULL) return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_ENOTFOUND)); mutex_exit(&ip->bi_mutex); if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_ERSRVFAIL)); } ubitsset.tot_size = ip->bi_size / DSW_SIZE; if ((ip->bi_size % DSW_SIZE) != 0) ++ubitsset.tot_size; bitmap_size = (ubitsset.tot_size + 7) / 8; if (cmd == DSWIOC_SBITSSET) rc = II_CNT_BITS(ip, ip->bi_shdfba, &nbitsset, bitmap_size); else rc = II_CNT_BITS(ip, ip->bi_copyfba, &nbitsset, bitmap_size); ubitsset.tot_set = nbitsset; _ii_rlse_devs(ip, BMP); mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); if (rc) { spcs_s_add(kstatus, rc); return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_EIO)); } spcs_s_kfree(kstatus); /* return the fetched names to the user */ if (ilp32) { ubitsset32.status = (spcs_s_info32_t)ubitsset.status; ubitsset32.tot_size = ubitsset.tot_size; ubitsset32.tot_set = ubitsset.tot_set; rc = copyout(&ubitsset32, (void *)arg, sizeof (ubitsset32)); } else { rc = copyout(&ubitsset, (void *)arg, sizeof (ubitsset)); } return (rc); } /* * _ii_stopvol * Stop any copying process for shadow, and stop shadowing * */ static void _ii_stopvol(_ii_info_t *ip) { nsc_path_t *mst_tok; nsc_path_t *mstr_tok; nsc_path_t *shd_tok; nsc_path_t *shdr_tok; nsc_path_t *bmp_tok; int rc; while (_ii_stopcopy(ip) == EINTR) ; DTRACE_PROBE(_ii_stopvol); mutex_enter(&ip->bi_mutex); mst_tok = ip->bi_mst_tok; mstr_tok = ip->bi_mstr_tok; shd_tok = ip->bi_shd_tok; shdr_tok = ip->bi_shdr_tok; bmp_tok = ip->bi_bmp_tok; ip->bi_shd_tok = 0; ip->bi_shdr_tok = 0; if (!NSHADOWS(ip)) { ip->bi_mst_tok = 0; ip->bi_mstr_tok = 0; } ip->bi_bmp_tok = 0; /* Wait for any _ii_open() calls to complete */ while (ip->bi_ioctl) { ip->bi_state |= DSW_IOCTL; cv_wait(&ip->bi_ioctlcv, &ip->bi_mutex); } mutex_exit(&ip->bi_mutex); rc = _ii_reserve_begin(ip); if (rc) { cmn_err(CE_WARN, "!_ii_stopvol: _ii_reserve_begin %d", rc); } if (!NSHADOWS(ip)) { if (mst_tok) { rc = _ii_unregister_path(mst_tok, NSC_PCATCH, "master"); if (rc) cmn_err(CE_WARN, "!ii: unregister master %d", rc); } if (mstr_tok) { rc = _ii_unregister_path(mstr_tok, NSC_PCATCH, "raw master"); if (rc) cmn_err(CE_WARN, "!ii: unregister raw " "master %d", rc); } } if (shd_tok) { rc = _ii_unregister_path(shd_tok, NSC_PCATCH, "shadow"); if (rc) cmn_err(CE_WARN, "!ii: unregister shadow %d", rc); } if (shdr_tok) { rc = _ii_unregister_path(shdr_tok, NSC_PCATCH, "raw shadow"); if (rc) cmn_err(CE_WARN, "!ii: unregister raw shadow %d", rc); } if (bmp_tok) { rc = _ii_unregister_path(bmp_tok, NSC_PCATCH, "bitmap"); if (rc) cmn_err(CE_WARN, "!ii: unregister bitmap %d", rc); } _ii_reserve_end(ip); /* Wait for all necessary _ii_close() calls to complete */ mutex_enter(&ip->bi_mutex); while (total_ref(ip) != 0) { ip->bi_state |= DSW_CLOSING; cv_wait(&ip->bi_closingcv, &ip->bi_mutex); } if (!NSHADOWS(ip)) { nsc_set_owner(ip->bi_mstfd, NULL); nsc_set_owner(ip->bi_mstrfd, NULL); } nsc_set_owner(ip->bi_shdfd, NULL); nsc_set_owner(ip->bi_shdrfd, NULL); mutex_exit(&ip->bi_mutex); } /* * _ii_ioctl_done * If this is the last one to complete, wakeup all processes waiting * for ioctls to complete * */ static void _ii_ioctl_done(_ii_info_t *ip) { ASSERT(ip->bi_ioctl > 0); ip->bi_ioctl--; if (ip->bi_ioctl == 0 && (ip->bi_state & DSW_IOCTL)) { ip->bi_state &= ~DSW_IOCTL; cv_broadcast(&ip->bi_ioctlcv); } } /* * _ii_find_vol * Search the configured shadows list for the supplied volume. * If found, flag an ioctl in progress and return the locked _ii_info_t. * * The caller must check to see if the bi_disable flag is set and * treat it appropriately. * * ASSUMPTION: * _ii_info_mutex must be locked prior to calling this function * */ static _ii_info_t * _ii_find_vol(char *volume, int vol) { _ii_info_t **xip, *ip; for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) { if ((*xip)->bi_disabled) continue; if (strcmp(volume, vol == MST ? ii_pathname((*xip)->bi_mstfd) : (*xip)->bi_keyname) == 0) { break; } } if (!*xip) { DTRACE_PROBE(VolNotFound); return (NULL); } ip = *xip; if (!ip->bi_shd_tok && ((ip->bi_flags & DSW_SHDEXPORT) == 0)) { /* Not fully configured until bi_shd_tok is set */ DTRACE_PROBE(SetNotConfiged); return (NULL); } mutex_enter(&ip->bi_mutex); ip->bi_ioctl++; return (ip); } static _ii_info_t * _ii_find_set(char *volume) { return (_ii_find_vol(volume, SHD)); } /* * _ii_find_overflow * Search the configured shadows list for the supplied overflow volume. * */ static _ii_overflow_t * _ii_find_overflow(char *volume) { _ii_overflow_t **xop, *op; mutex_enter(&_ii_overflow_mutex); DTRACE_PROBE(_ii_find_overflowmutex); for (xop = &_ii_overflow_top; *xop; xop = &(*xop)->ii_next) { if (strcmp(volume, (*xop)->ii_volname) == 0) { break; } } if (!*xop) { mutex_exit(&_ii_overflow_mutex); return (NULL); } op = *xop; mutex_exit(&_ii_overflow_mutex); return (op); } /* * _ii_bm_header_get * Fetch the bitmap volume header * */ ii_header_t * _ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp) { ii_header_t *hdr; nsc_off_t read_fba; int rc; ASSERT(ip->bi_bmprsrv); /* assert bitmap is reserved */ ASSERT(MUTEX_HELD(&ip->bi_mutex)); if ((ip->bi_flags & DSW_BMPOFFLINE) != 0) return (NULL); *tmp = NULL; read_fba = 0; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, read_fba, FBA_LEN(sizeof (ii_header_t)), NSC_RDWRBUF, tmp); II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t))); if (!II_SUCCESS(rc)) { if (ii_debug > 2) cmn_err(CE_WARN, "!ii: nsc_alloc_buf returned 0x%x", rc); if (*tmp) (void) nsc_free_buf(*tmp); *tmp = NULL; mutex_exit(&ip->bi_mutex); _ii_error(ip, DSW_BMPOFFLINE); mutex_enter(&ip->bi_mutex); return (NULL); } hdr = (ii_header_t *)(*tmp)->sb_vec[0].sv_addr; return (hdr); } /* * _ii_bm_header_free * Free the bitmap volume header * */ /* ARGSUSED */ void _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp) { (void) nsc_free_buf(tmp); } /* * _ii_bm_header_put * Write out the modified bitmap volume header and free it * */ /* ARGSUSED */ int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp) { nsc_off_t write_fba; int rc; ASSERT(MUTEX_HELD(&ip->bi_mutex)); write_fba = 0; II_NSC_WRITE(ip, bitmap, rc, tmp, write_fba, FBA_LEN(sizeof (ii_header_t)), 0); (void) nsc_free_buf(tmp); if (!II_SUCCESS(rc)) { mutex_exit(&ip->bi_mutex); _ii_error(ip, DSW_BMPOFFLINE); mutex_enter(&ip->bi_mutex); DTRACE_PROBE(_ii_bm_header_put); return (rc); } else { DTRACE_PROBE(_ii_bm_header_put_end); return (0); } } /* * _ii_flag_op * Clear or set a flag in bi_flags and dsw_state. * This relies on the ownership of the header block's nsc_buf * for locking. * */ void _ii_flag_op(and, or, ip, update) int and, or; _ii_info_t *ip; int update; { ii_header_t *bm_header; nsc_buf_t *tmp; ip->bi_flags &= and; ip->bi_flags |= or; if (update == TRUE) { /* * No point trying to access bitmap header if it's offline * or has been disassociated from set via DSW_HANGING */ if ((ip->bi_flags & (DSW_BMPOFFLINE|DSW_HANGING)) == 0) { bm_header = _ii_bm_header_get(ip, &tmp); if (bm_header == NULL) { if (tmp) (void) nsc_free_buf(tmp); DTRACE_PROBE(_ii_flag_op_end); return; } bm_header->ii_state &= and; bm_header->ii_state |= or; /* copy over the mtime */ bm_header->ii_mtime = ip->bi_mtime; (void) _ii_bm_header_put(bm_header, ip, tmp); } } } /* * _ii_nsc_io * Perform read or write on an underlying nsc device * fd - nsc file descriptor * flag - nsc io direction and characteristics flag * fba_pos - offset from beginning of device in FBAs * io_addr - pointer to data buffer * io_len - length of io in bytes */ int _ii_nsc_io(_ii_info_t *ip, int ks, nsc_fd_t *fd, int flag, nsc_off_t fba_pos, unsigned char *io_addr, nsc_size_t io_len) { nsc_buf_t *tmp = NULL; nsc_vec_t *vecp; uchar_t *vaddr; size_t copy_len; int64_t vlen; int rc; nsc_size_t fba_req, fba_len; nsc_size_t maxfbas = 0; nsc_size_t tocopy; unsigned char *toaddr; rc = nsc_maxfbas(fd, 0, &maxfbas); if (!II_SUCCESS(rc)) { #ifdef DEBUG cmn_err(CE_WARN, "!_ii_nsc_io: maxfbas failed (%d)", rc); #endif maxfbas = DSW_CBLK_FBA; } toaddr = io_addr; fba_req = FBA_LEN(io_len); #ifdef DEBUG_SPLIT_IO cmn_err(CE_NOTE, "!_ii_nsc_io: maxfbas = %08x", maxfbas); cmn_err(CE_NOTE, "!_ii_nsc_io: toaddr=%08x, io_len=%08x, fba_req=%08x", toaddr, io_len, fba_req); #endif loop: tmp = NULL; fba_len = min(fba_req, maxfbas); tocopy = min(io_len, FBA_SIZE(fba_len)); DTRACE_PROBE2(_ii_nsc_io_buffer, nsc_off_t, fba_pos, nsc_size_t, fba_len); #ifdef DEBUG_SPLIT_IO cmn_err(CE_NOTE, "!_ii_nsc_io: fba_pos=%08x, fba_len=%08x", fba_pos, fba_len); #endif #ifndef DISABLE_KSTATS if (flag & NSC_READ) { switch (ks) { case KS_MST: II_READ_START(ip, master); break; case KS_SHD: II_READ_START(ip, shadow); break; case KS_BMP: II_READ_START(ip, bitmap); break; case KS_OVR: II_READ_START(ip, overflow); break; default: cmn_err(CE_WARN, "!Invalid kstats type %d", ks); break; } } #endif rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp); #ifndef DISABLE_KSTATS if (flag & NSC_READ) { switch (ks) { case KS_MST: II_READ_END(ip, master, rc, fba_len); break; case KS_SHD: II_READ_END(ip, shadow, rc, fba_len); break; case KS_BMP: II_READ_END(ip, bitmap, rc, fba_len); break; case KS_OVR: II_READ_END(ip, overflow, rc, fba_len); break; } } #endif if (!II_SUCCESS(rc)) { if (tmp) { (void) nsc_free_buf(tmp); } return (EIO); } if ((flag & (NSC_WRITE|NSC_READ)) == NSC_WRITE && (FBA_OFF(io_len) != 0)) { /* * Not overwriting all of the last FBA, so read in the * old contents now before we overwrite it with the new * data. */ #ifdef DEBUG_SPLIT_IO cmn_err(CE_NOTE, "!_ii_nsc_io: Read-B4-Write %08x", fba_pos+FBA_NUM(io_len)); #endif #ifdef DISABLE_KSTATS rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); #else switch (ks) { case KS_MST: II_NSC_READ(ip, master, rc, tmp, fba_pos+FBA_NUM(io_len), 1, 0); break; case KS_SHD: II_NSC_READ(ip, shadow, rc, tmp, fba_pos+FBA_NUM(io_len), 1, 0); break; case KS_BMP: II_NSC_READ(ip, bitmap, rc, tmp, fba_pos+FBA_NUM(io_len), 1, 0); break; case KS_OVR: II_NSC_READ(ip, overflow, rc, tmp, fba_pos+FBA_NUM(io_len), 1, 0); break; case KS_NA: rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); break; default: cmn_err(CE_WARN, "!Invalid kstats type %d", ks); rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); break; } #endif if (!II_SUCCESS(rc)) { (void) nsc_free_buf(tmp); return (EIO); } } vecp = tmp->sb_vec; vlen = vecp->sv_len; vaddr = vecp->sv_addr; while (tocopy > 0) { if (vecp->sv_addr == 0 || vecp->sv_len == 0) { #ifdef DEBUG cmn_err(CE_WARN, "!_ii_nsc_io: ran off end of handle"); #endif break; } copy_len = (size_t)min(vlen, tocopy); DTRACE_PROBE1(_ii_nsc_io_bcopy, size_t, copy_len); if (flag & NSC_WRITE) bcopy(io_addr, vaddr, copy_len); else bcopy(vaddr, io_addr, copy_len); toaddr += copy_len; tocopy -= copy_len; io_addr += copy_len; io_len -= copy_len; vaddr += copy_len; vlen -= copy_len; if (vlen <= 0) { vecp++; vaddr = vecp->sv_addr; vlen = vecp->sv_len; } } if (flag & NSC_WRITE) { #ifdef DISABLE_KSTATS rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); #else switch (ks) { case KS_MST: II_NSC_WRITE(ip, master, rc, tmp, tmp->sb_pos, tmp->sb_len, 0); break; case KS_SHD: II_NSC_WRITE(ip, shadow, rc, tmp, tmp->sb_pos, tmp->sb_len, 0); break; case KS_BMP: II_NSC_WRITE(ip, bitmap, rc, tmp, tmp->sb_pos, tmp->sb_len, 0); break; case KS_OVR: II_NSC_WRITE(ip, overflow, rc, tmp, tmp->sb_pos, tmp->sb_len, 0); break; case KS_NA: rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); break; default: cmn_err(CE_WARN, "!Invalid kstats type %d", ks); rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); break; } #endif if (!II_SUCCESS(rc)) { (void) nsc_free_buf(tmp); return (rc); } } (void) nsc_free_buf(tmp); fba_pos += fba_len; fba_req -= fba_len; if (fba_req > 0) goto loop; return (0); } /* * ii_overflow_attach */ static int ii_overflow_attach(_ii_info_t *ip, char *name, int first) { _ii_overflow_t *op; int rc = 0; int reserved = 0; int mutex_set = 0; int II_OLD_OMAGIC = 0x426c7565; /* "Blue" */ mutex_enter(&_ii_overflow_mutex); /* search for name in list */ for (op = _ii_overflow_top; op; op = op->ii_next) { if (strncmp(op->ii_volname, name, DSW_NAMELEN) == 0) break; } if (op) { ip->bi_overflow = op; op->ii_crefcnt++; op->ii_drefcnt++; if ((op->ii_flags & IIO_CNTR_INVLD) && (op->ii_hversion >= 1)) { if (!first) mutex_enter(&ip->bi_mutex); ip->bi_flags |= DSW_OVRHDRDRTY; if (!first) mutex_exit(&ip->bi_mutex); op->ii_urefcnt++; } #ifndef DISABLE_KSTATS ip->bi_kstat_io.overflow = op->ii_overflow; (void) strlcpy(ip->bi_kstat_io.ovrio, op->ii_ioname, KSTAT_DATA_CHAR_LEN); #endif /* write header */ if (!(rc = nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI))) { rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); (void) nsc_release(op->ii_dev->bi_fd); ++iigkstat.assoc_over.value.ul; } mutex_exit(&_ii_overflow_mutex); return (rc); } if ((op = kmem_zalloc(sizeof (*op), KM_SLEEP)) == NULL) { mutex_exit(&_ii_overflow_mutex); return (ENOMEM); } if ((op->ii_dev = kmem_zalloc(sizeof (_ii_info_dev_t), KM_SLEEP)) == NULL) { kmem_free(op, sizeof (*op)); mutex_exit(&_ii_overflow_mutex); return (ENOMEM); } #ifndef DISABLE_KSTATS if ((op->ii_overflow = _ii_overflow_kstat_create(ip, op))) { ip->bi_kstat_io.overflow = op->ii_overflow; (void) strlcpy(op->ii_ioname, ip->bi_kstat_io.ovrio, KSTAT_DATA_CHAR_LEN); } else { goto fail; } #endif /* open overflow volume */ op->ii_dev->bi_fd = nsc_open(name, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(op->ii_dev->bi_iodev), &rc); if (!op->ii_dev->bi_fd) op->ii_dev->bi_fd = nsc_open(name, NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL, (blind_t)&(op->ii_dev->bi_iodev), &rc); if (op->ii_dev->bi_fd == NULL) { goto fail; } if ((rc = nsc_reserve(op->ii_dev->bi_fd, 0)) != 0) goto fail; reserved = 1; /* register path */ op->ii_dev->bi_tok = _ii_register_path(name, NSC_DEVICE, _ii_ior); if (!op->ii_dev->bi_tok) { goto fail; } /* read header */ rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_RDBUF, II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_OVROFFLINE); goto fail; } /* On resume, check for old hmagic */ if (strncmp(op->ii_volname, name, DSW_NAMELEN) || ((op->ii_hmagic != II_OLD_OMAGIC) && (op->ii_hmagic != II_OMAGIC))) { rc = DSW_EOMAGIC; goto fail; } /* set up counts */ op->ii_crefcnt = 1; op->ii_drefcnt = 0; op->ii_urefcnt = 0; op->ii_hmagic = II_OMAGIC; if (!first) { /* if header version > 0, check if header written */ if (((op->ii_flags & IIO_HDR_WRTN) == 0) && (op->ii_hversion >= 1)) { op->ii_flags |= IIO_CNTR_INVLD; mutex_enter(&ip->bi_mutex); ip->bi_flags |= DSW_OVRHDRDRTY; mutex_exit(&ip->bi_mutex); op->ii_urefcnt++; } } op->ii_flags &= ~IIO_HDR_WRTN; op->ii_drefcnt++; /* write header */ rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); nsc_release(op->ii_dev->bi_fd); reserved = 0; if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_OVROFFLINE); goto fail; } mutex_init(&op->ii_mutex, NULL, MUTEX_DRIVER, NULL); mutex_set++; /* link onto list */ op->ii_next = _ii_overflow_top; _ii_overflow_top = op; ip->bi_overflow = op; ++iigkstat.assoc_over.value.ul; mutex_exit(&_ii_overflow_mutex); DTRACE_PROBE(_ii_overflow_attach_end); return (0); fail: #ifndef DISABLE_KSTATS /* Clean-up kstat stuff */ if (op->ii_overflow) { kstat_delete(op->ii_overflow); mutex_destroy(&op->ii_kstat_mutex); } #endif /* clean up mutex if we made it that far */ if (mutex_set) { mutex_destroy(&op->ii_mutex); } if (op->ii_dev) { if (op->ii_dev->bi_tok) { (void) _ii_unregister_path(op->ii_dev->bi_tok, 0, "overflow"); } if (reserved) (void) nsc_release(op->ii_dev->bi_fd); if (op->ii_dev->bi_fd) (void) nsc_close(op->ii_dev->bi_fd); kmem_free(op->ii_dev, sizeof (_ii_info_dev_t)); } kmem_free(op, sizeof (*op)); mutex_exit(&_ii_overflow_mutex); return (rc); } /* * ii_overflow_free * Assumes that ip is locked for I/O */ static void ii_overflow_free(_ii_info_t *ip, int reclaim) { _ii_overflow_t *op, **xp; if ((op = ip->bi_overflow) == NULL) return; ip->bi_kstat_io.overflow = NULL; mutex_enter(&_ii_overflow_mutex); switch (reclaim) { case NO_RECLAIM: if (--(op->ii_drefcnt) == 0) { /* indicate header written */ op->ii_flags |= IIO_HDR_WRTN; /* write out header */ ASSERT(op->ii_dev->bi_fd); (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI); (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); nsc_release(op->ii_dev->bi_fd); } break; case RECLAIM: ii_reclaim_overflow(ip); /* FALLTHRU */ case INIT_OVR: if (--(op->ii_drefcnt) == 0) { /* reset to new condition, c.f. _ii_ocreate() */ op->ii_used = 1; op->ii_unused = op->ii_nchunks - op->ii_used; op->ii_freehead = II_NULLNODE; } /* write out header */ ASSERT(op->ii_dev->bi_fd); (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI); (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF, II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do)); nsc_release(op->ii_dev->bi_fd); } if (--(op->ii_crefcnt) == 0) { /* Close fd and unlink from active chain; */ (void) _ii_unregister_path(op->ii_dev->bi_tok, 0, "overflow"); (void) nsc_close(op->ii_dev->bi_fd); for (xp = &_ii_overflow_top; *xp && *xp != op; xp = &((*xp)->ii_next)) /* NULL statement */; *xp = op->ii_next; if (op->ii_overflow) { kstat_delete(op->ii_overflow); } /* Clean up ii_overflow_t mutexs */ mutex_destroy(&op->ii_kstat_mutex); mutex_destroy(&op->ii_mutex); if (op->ii_dev) kmem_free(op->ii_dev, sizeof (_ii_info_dev_t)); kmem_free(op, sizeof (*op)); } ip->bi_overflow = NULL; --iigkstat.assoc_over.value.ul; mutex_exit(&_ii_overflow_mutex); } /* * ii_sibling_free * Free resources and unlink the sibling chains etc. */ static void ii_sibling_free(_ii_info_t *ip) { _ii_info_t *hip, *yip; if (!ip) return; if (ip->bi_shdr_tok) (void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow"); if (ip->bi_shd_tok) (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow"); rw_enter(&ip->bi_linkrw, RW_WRITER); ip->bi_shd_tok = NULL; ip->bi_shdr_tok = NULL; if (NSHADOWS(ip)) { mutex_enter(&_ii_info_mutex); if (ip->bi_head == ip) { /* removing head of list */ hip = ip->bi_sibling; for (yip = hip; yip; yip = yip->bi_sibling) yip->bi_head = hip; } else { /* removing member of list */ hip = ip->bi_head; for (yip = ip->bi_head; yip; yip = yip->bi_sibling) { if (yip->bi_sibling == ip) { yip->bi_sibling = ip->bi_sibling; break; } } } hip->bi_master->bi_head = hip; if (ip->bi_master == ip) { /* master I/O goes through this */ mutex_exit(&_ii_info_mutex); _ii_info_freeshd(ip); rw_exit(&ip->bi_linkrw); return; } mutex_exit(&_ii_info_mutex); } else { if (ip->bi_master != ip) /* last ref to master side ip */ _ii_info_free(ip->bi_master); /* ==A== */ } if (ip->bi_master != ip) { /* info_free ==A== will close these */ /* * Null out any pointers to shared master side resources * that should only be freed once when the last reference * to this master is freed and calls _ii_info_free(). */ ip->bi_mstdev = NULL; ip->bi_mstrdev = NULL; ip->bi_kstat_io.master = NULL; } rw_exit(&ip->bi_linkrw); _ii_info_free(ip); } /* * _ii_info_freeshd * Free shadow side resources * * Calling/Exit State: * No mutexes should be held on entry to this function. * * Description: * Frees the system resources associated with the shadow * access, leaving the master side alone. This allows the * original master side to continue in use while there are * outstanding references to this _ii_info_t. */ static void _ii_info_freeshd(_ii_info_t *ip) { if (!ip) return; if ((ip->bi_flags&DSW_HANGING) == DSW_HANGING) return; /* this work has already been completed */ II_FLAG_SETX(DSW_HANGING, ip); if (ip->bi_cluster) (void) II_UNLINK_CLUSTER(ip); if (ip->bi_group) (void) II_UNLINK_GROUP(ip); if (ip->bi_shdfd && ip->bi_shdrsrv) nsc_release(ip->bi_shdfd); if (ip->bi_shdrfd && ip->bi_shdrrsrv) nsc_release(ip->bi_shdrfd); if (ip->bi_bmpfd && ip->bi_bmprsrv) nsc_release(ip->bi_bmpfd); if (ip->bi_bmp_tok) (void) _ii_unregister_path(ip->bi_bmp_tok, 0, "bitmap"); if (ip->bi_shdr_tok) (void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow"); if (ip->bi_shd_tok) (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow"); ip->bi_shd_tok = NULL; ip->bi_shdr_tok = NULL; if (ip->bi_shdfd) (void) nsc_close(ip->bi_shdfd); if (ip->bi_shdrfd) (void) nsc_close(ip->bi_shdrfd); if (ip->bi_bmpfd) (void) nsc_close(ip->bi_bmpfd); ip->bi_shdfd = NULL; ip->bi_shdrfd = NULL; ip->bi_bmpfd = NULL; if (ip->bi_busy) kmem_free(ip->bi_busy, 1 + (ip->bi_size / (DSW_SIZE * DSW_BITS))); ip->bi_busy = NULL; if (ip->bi_kstat_io.shadow) { kstat_delete(ip->bi_kstat_io.shadow); ip->bi_kstat_io.shadow = NULL; } if (ip->bi_kstat_io.bitmap) { kstat_delete(ip->bi_kstat_io.bitmap); ip->bi_kstat_io.bitmap = NULL; } if (ip->bi_kstat) { kstat_delete(ip->bi_kstat); ip->bi_kstat = NULL; } } /* * _ii_info_free * Free resources * * Calling/Exit State: * No mutexes should be held on entry to this function. * * Description: * Frees the system resources associated with the specified * II information structure. */ static void _ii_info_free(_ii_info_t *ip) { _ii_info_t **xip; if (!ip) return; mutex_enter(&_ii_info_mutex); for (xip = &_ii_mst_top; *xip; xip = &((*xip)->bi_nextmst)) { if (ip == *xip) { *xip = ip->bi_nextmst; break; } } mutex_exit(&_ii_info_mutex); /* this rw_enter forces us to wait until all nsc_buffers are freed */ rw_enter(&ip->bi_linkrw, RW_WRITER); if (ip->bi_mstdev && ip->bi_mstfd && ip->bi_mstrsrv) nsc_release(ip->bi_mstfd); if (ip->bi_mstrdev && ip->bi_mstrfd && ip->bi_mstrrsrv) nsc_release(ip->bi_mstrfd); if (ip->bi_mstdev && ip->bi_mst_tok) (void) _ii_unregister_path(ip->bi_mst_tok, 0, "master"); if (ip->bi_mstrdev && ip->bi_mstr_tok) (void) _ii_unregister_path(ip->bi_mstr_tok, 0, "raw master"); if (ip->bi_mstdev && ip->bi_mstfd) (void) nsc_close(ip->bi_mstfd); if (ip->bi_mstrdev && ip->bi_mstrfd) (void) nsc_close(ip->bi_mstrfd); rw_exit(&ip->bi_linkrw); if (ip->bi_mstdev) { nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev)); } if (ip->bi_mstrdev) { nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev)); } if (ip->bi_kstat_io.master) { kstat_delete(ip->bi_kstat_io.master); } if (ip->bi_kstat_io.shadow) { kstat_delete(ip->bi_kstat_io.shadow); ip->bi_kstat_io.shadow = 0; } if (ip->bi_kstat_io.bitmap) { kstat_delete(ip->bi_kstat_io.bitmap); ip->bi_kstat_io.bitmap = 0; } if (ip->bi_kstat) { kstat_delete(ip->bi_kstat); ip->bi_kstat = NULL; } /* this rw_enter forces us to wait until all nsc_buffers are freed */ rw_enter(&ip->bi_linkrw, RW_WRITER); rw_exit(&ip->bi_linkrw); mutex_destroy(&ip->bi_mutex); mutex_destroy(&ip->bi_rsrvmutex); mutex_destroy(&ip->bi_rlsemutex); mutex_destroy(&ip->bi_bmpmutex); mutex_destroy(&ip->bi_chksmutex); cv_destroy(&ip->bi_copydonecv); cv_destroy(&ip->bi_reservecv); cv_destroy(&ip->bi_releasecv); cv_destroy(&ip->bi_ioctlcv); cv_destroy(&ip->bi_closingcv); cv_destroy(&ip->bi_busycv); rw_destroy(&ip->bi_busyrw); rw_destroy(&ip->bi_linkrw); _ii_info_freeshd(ip); #ifdef DEBUG ip->bi_head = (_ii_info_t *)0xdeadbeef; #endif nsc_kmem_free(ip, sizeof (*ip)); } /* * _ii_copy_chunks * Perform a copy of some chunks * * Calling/Exit State: * Returns 0 if the data was copied successfully, otherwise * error code. * * Description: * flag is set to CV_SHD2MST if the data is to be copied from the shadow * to the master, 0 if it is to be copied from the master to the shadow. */ static int _ii_copy_chunks(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks) { int mst_flag; int shd_flag; int ovr_flag; nsc_off_t pos; nsc_size_t len; int rc; nsc_off_t shd_pos; chunkid_t shd_chunk; nsc_buf_t *mst_tmp = NULL; nsc_buf_t *shd_tmp = NULL; if (ip->bi_flags & DSW_MSTOFFLINE) { DTRACE_PROBE(_ii_copy_chunks_end); return (EIO); } if (ip->bi_flags & (DSW_SHDOFFLINE|DSW_SHDEXPORT|DSW_SHDIMPORT)) { DTRACE_PROBE(_ii_copy_chunks_end); return (EIO); } if (flag == CV_SHD2MST) { mst_flag = NSC_WRBUF|NSC_WRTHRU; shd_flag = NSC_RDBUF; } else { shd_flag = NSC_WRBUF|NSC_WRTHRU; mst_flag = NSC_RDBUF; } pos = DSW_CHK2FBA(chunk_num); len = DSW_SIZE * nchunks; if (pos + len > ip->bi_size) len = ip->bi_size - pos; if (ip->bi_flags & DSW_TREEMAP) { ASSERT(nchunks == 1); shd_chunk = ii_tsearch(ip, chunk_num); if (shd_chunk == II_NULLNODE) { /* shadow is full */ mutex_enter(&ip->bi_mutex); II_FLAG_SET(DSW_OVERFLOW, ip); mutex_exit(&ip->bi_mutex); DTRACE_PROBE(_ii_copy_chunks_end); return (EIO); } ovr_flag = II_ISOVERFLOW(shd_chunk); shd_pos = DSW_CHK2FBA((ovr_flag) ? II_2OVERFLOW(shd_chunk) : shd_chunk); } else { ovr_flag = FALSE; shd_chunk = chunk_num; shd_pos = pos; } /* * Always allocate the master side before the shadow to * avoid deadlocks on the same chunk. */ DTRACE_PROBE2(_ii_copy_chunks_alloc, nsc_off_t, pos, nsc_size_t, len); II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, mst_flag, &mst_tmp); if (!II_SUCCESS(rc)) { if (mst_tmp) (void) nsc_free_buf(mst_tmp); _ii_error(ip, DSW_MSTOFFLINE); DTRACE_PROBE(_ii_copy_chunks_end); return (rc); } if (ovr_flag) { /* use overflow volume */ (void) nsc_reserve(OVRFD(ip), NSC_MULTI); II_ALLOC_BUF(ip, overflow, rc, OVRFD(ip), shd_pos, len, shd_flag, &shd_tmp); } else { II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), shd_pos, len, shd_flag, &shd_tmp); } if (!II_SUCCESS(rc)) { (void) nsc_free_buf(mst_tmp); if (shd_tmp) (void) nsc_free_buf(shd_tmp); if (ovr_flag) nsc_release(OVRFD(ip)); _ii_error(ip, DSW_SHDOFFLINE); if (ovr_flag) _ii_error(ip, DSW_OVROFFLINE); DTRACE_PROBE(_ii_copy_chunks_end); return (rc); } /* * The direction of copy is determined by the mst_flag. */ DTRACE_PROBE2(_ii_copy_chunks_copy, kstat_named_t, ii_copy_direct, int, mst_flag); if (ii_copy_direct) { if (mst_flag & NSC_WRBUF) { if (ovr_flag) { II_NSC_COPY_DIRECT(ip, overflow, master, rc, shd_tmp, mst_tmp, shd_pos, pos, len) } else { II_NSC_COPY_DIRECT(ip, shadow, master, rc, shd_tmp, mst_tmp, shd_pos, pos, len) } if (!II_SUCCESS(rc)) { /* A copy has failed - something is wrong */ _ii_error(ip, DSW_MSTOFFLINE); _ii_error(ip, DSW_SHDOFFLINE); if (ovr_flag) _ii_error(ip, DSW_OVROFFLINE); } } else { if (ovr_flag) { II_NSC_COPY_DIRECT(ip, master, overflow, rc, mst_tmp, shd_tmp, pos, shd_pos, len); } else { II_NSC_COPY_DIRECT(ip, master, shadow, rc, mst_tmp, shd_tmp, pos, shd_pos, len); } if (!II_SUCCESS(rc)) { /* * A failure has occurred during the above copy. * The macro calls nsc_copy_direct, which will * never return a read failure, only a write * failure. With this assumption, we should * take only the target volume offline. */ _ii_error(ip, DSW_SHDOFFLINE); if (ovr_flag) _ii_error(ip, DSW_OVROFFLINE); } } } else { if (mst_flag & NSC_WRBUF) { rc = nsc_copy(shd_tmp, mst_tmp, shd_pos, pos, len); if (II_SUCCESS(rc)) { II_NSC_WRITE(ip, master, rc, mst_tmp, pos, len, 0); if (!II_SUCCESS(rc)) _ii_error(ip, DSW_MSTOFFLINE); } else { /* A copy has failed - something is wrong */ _ii_error(ip, DSW_MSTOFFLINE); _ii_error(ip, DSW_SHDOFFLINE); } } else { rc = nsc_copy(mst_tmp, shd_tmp, pos, shd_pos, len); if (II_SUCCESS(rc)) { if (ovr_flag) { II_NSC_WRITE(ip, overflow, rc, shd_tmp, shd_pos, len, 0); } else { II_NSC_WRITE(ip, shadow, rc, shd_tmp, shd_pos, len, 0); } if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_SHDOFFLINE); if (ovr_flag) _ii_error(ip, DSW_OVROFFLINE); } } else { /* A copy has failed - something is wrong */ _ii_error(ip, DSW_MSTOFFLINE); _ii_error(ip, DSW_SHDOFFLINE); } } } (void) nsc_free_buf(mst_tmp); (void) nsc_free_buf(shd_tmp); if (ovr_flag) nsc_release(OVRFD(ip)); DTRACE_PROBE(_ii_copy_chunks); if (II_SUCCESS(rc)) { (void) II_CLR_COPY_BITS(ip, chunk_num, nchunks); rc = 0; } return (rc); } /* * _ii_copy_on_write * * Calling/Exit State: * Returns 0 on success, otherwise error code. * * Description: * Determines if a copy on write is necessary, and performs it. * A copy on write is necessary in the following cases: * - No copy is in progress and the shadow bit is clear, which * means this is the first write to this track. * - A copy is in progress and the copy bit is set, which means * that a track copy is required. * If a copy to the master is to be done, make a recursive call to this * function to do any necessary copy on write on other InstantImage groups * that share the same master volume. */ static int _ii_copy_on_write(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks) { int rc = 0; int rtype; int hanging = (ip->bi_flags&DSW_HANGING); if (hanging || (flag & (CV_SIBLING|CV_SHD2MST)) == CV_SHD2MST && NSHADOWS(ip)) { _ii_info_t *xip; /* * Preserve copy of master for all other shadows of this master * before writing our data onto the master. */ /* * Avoid deadlock with COW on same chunk of sibling shadow * by unlocking this chunk before copying all other sibling * chunks. */ /* * Only using a single chunk when copying to master avoids * complex code here. */ ASSERT(nchunks == 1); if (!hanging) _ii_unlock_chunk(ip, chunk_num); for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { if (xip == ip) /* don't copy ourselves again */ continue; DTRACE_PROBE(_ii_copy_on_write); rw_enter(&xip->bi_linkrw, RW_READER); mutex_enter(&xip->bi_mutex); if (xip->bi_disabled) { mutex_exit(&xip->bi_mutex); rw_exit(&xip->bi_linkrw); continue; /* this set is stopping */ } xip->bi_shdref++; mutex_exit(&xip->bi_mutex); /* don't waste time asking for MST as ip shares it */ rtype = SHDR|BMP; (void) _ii_rsrv_devs(xip, rtype, II_INTERNAL); _ii_lock_chunk(xip, chunk_num); rc = _ii_copy_on_write(xip, flag | CV_SIBLING, chunk_num, 1); /* * See comments in _ii_shadow_write() */ if (rc == 0 || (rc == EIO && (xip->bi_flags&DSW_OVERFLOW) != 0)) (void) II_SET_SHD_BIT(xip, chunk_num); _ii_unlock_chunk(xip, chunk_num); _ii_rlse_devs(xip, rtype); mutex_enter(&xip->bi_mutex); xip->bi_shdref--; if (xip->bi_state & DSW_CLOSING) { if (total_ref(xip) == 0) { cv_signal(&xip->bi_closingcv); } } mutex_exit(&xip->bi_mutex); rw_exit(&xip->bi_linkrw); } if (hanging) { DTRACE_PROBE(_ii_copy_on_write_end); return (0); } /* * Reacquire chunk lock and check that a COW by a sibling * has not already copied this chunk. */ _ii_lock_chunk(ip, chunk_num); rc = II_TST_SHD_BIT(ip, chunk_num); if (rc < 0) { DTRACE_PROBE(_ii_copy_on_write_end); return (EIO); } if (rc != 0) { DTRACE_PROBE(_ii_copy_on_write_end); return (0); } } if ((ip->bi_flags & DSW_COPYING) == 0) { /* Not copying at all */ if ((ip->bi_flags & DSW_GOLDEN) == DSW_GOLDEN) { /* No copy-on-write as it is independent */ DTRACE_PROBE(_ii_copy_on_write_end); return (0); } /* Dependent, so depends on shadow bit */ if ((flag == CV_SHD2MST) && ((ip->bi_flags & DSW_SHDOFFLINE) != 0)) { /* * Writing master but shadow is offline, so * no need to copy on write or set shadow bit */ DTRACE_PROBE(_ii_copy_on_write_end); return (0); } if (ip->bi_flags & DSW_BMPOFFLINE) { DTRACE_PROBE(_ii_copy_on_write_end); return (EIO); } rc = II_TST_SHD_BIT(ip, chunk_num); if (rc < 0) { DTRACE_PROBE(_ii_copy_on_write_end); return (EIO); } if (rc == 0) { /* Shadow bit clear, copy master to shadow */ rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks); } } else { /* Copying one way or the other */ if (ip->bi_flags & DSW_BMPOFFLINE) { DTRACE_PROBE(_ii_copy_on_write_end); return (EIO); } rc = II_TST_COPY_BIT(ip, chunk_num); if (rc < 0) { DTRACE_PROBE(_ii_copy_on_write_end); return (EIO); } if (rc) { /* Copy bit set, do a copy */ if ((ip->bi_flags & DSW_COPYINGS) == 0) { /* Copy master to shadow */ rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks); } else { /* Copy shadow to master */ rc = _ii_copy_chunks(ip, CV_SHD2MST, chunk_num, nchunks); } } } return (rc); } #ifdef DEBUG int ii_maxchunks = 0; #endif /* * _ii_copyvolp() * Copy volume process. * * Calling/Exit State: * Passes 0 back to caller when the copy is complete or has been aborted, * otherwise error code. * * Description: * According to the flag, copy the master to the shadow volume or the * shadow to the master volume. Upon return wakeup all processes waiting * for this copy. * */ static void _ii_copyvolp(struct copy_args *ca) { chunkid_t chunk_num; int rc = 0; chunkid_t max_chunk; nsc_size_t nc_max; int nc_try, nc_got; nsc_size_t mst_max, shd_max; _ii_info_t *ip; int flag; nsc_size_t bitmap_size; nsc_size_t shadow_set, copy_set; int chunkcount = 0; int rsrv = 1; spcs_s_info_t kstatus; ip = ca->ip; flag = ca->flag; kstatus = ca->kstatus; if (ip->bi_disabled) { rc = DSW_EABORTED; goto skip; } max_chunk = ip->bi_size / DSW_SIZE; if ((ip->bi_size % DSW_SIZE) != 0) ++max_chunk; if ((ip->bi_flags&DSW_TREEMAP)) nc_max = 1; else { mst_max = shd_max = 0; (void) nsc_maxfbas(MSTFD(ip), 0, &mst_max); (void) nsc_maxfbas(SHDFD(ip), 0, &shd_max); nc_max = (mst_max < shd_max) ? mst_max : shd_max; nc_max /= DSW_SIZE; ASSERT(nc_max > 0 && nc_max < 1000); } #ifdef DEBUG if (ii_maxchunks > 0) nc_max = ii_maxchunks; #endif for (chunk_num = nc_got = 0; /* CSTYLED */; /* CSTYLED */) { if ((flag & CV_SHD2MST) && NSHADOWS(ip)) nc_try = 1; else nc_try = (int)nc_max; chunk_num = II_NEXT_COPY_BIT(ip, chunk_num + nc_got, max_chunk, nc_try, &nc_got); if (chunk_num >= max_chunk) /* loop complete */ break; if (ip->bi_flags & DSW_COPYINGX) { /* request to abort copy */ _ii_unlock_chunks(ip, chunk_num, nc_got); rc = DSW_EABORTED; break; } sema_p(&_ii_concopy_sema); rc = _ii_copy_on_write(ip, (flag & CV_SHD2MST), chunk_num, nc_got); sema_v(&_ii_concopy_sema); if (ip->bi_flags & DSW_TREEMAP) ii_tdelete(ip, chunk_num); _ii_unlock_chunks(ip, chunk_num, nc_got); if (!II_SUCCESS(rc)) { if (ca->wait) spcs_s_add(kstatus, rc); rc = DSW_EIO; break; } if (ip->bi_release || (++chunkcount % ip->bi_throttle_unit) == 0) { _ii_rlse_devs(ip, (ca->rtype&(~BMP))); rsrv = 0; delay(ip->bi_throttle_delay); ca->rtype = MSTR|SHDR|(ca->rtype&BMP); if ((rc = _ii_rsrv_devs(ip, (ca->rtype&(~BMP)), II_INTERNAL)) != 0) { if (ca->wait) spcs_s_add(kstatus, rc); rc = DSW_EIO; break; } rsrv = 1; if (nc_max > 1) { /* * maxfbas could have changed during the * release/reserve, so recalculate the size * of transfer we can do. */ (void) nsc_maxfbas(MSTFD(ip), 0, &mst_max); (void) nsc_maxfbas(SHDFD(ip), 0, &shd_max); nc_max = (mst_max < shd_max) ? mst_max : shd_max; nc_max /= DSW_SIZE; } } } skip: mutex_enter(&ip->bi_mutex); if (ip->bi_flags & DSW_COPYINGX) II_FLAG_CLR(DSW_COPYINGP|DSW_COPYINGX, ip); else II_FLAG_CLR(DSW_COPY_FLAGS, ip); if ((ip->bi_flags & DSW_TREEMAP) && (flag & CV_SHD2MST) && (ip->bi_flags & DSW_VOVERFLOW)) { int rs; bitmap_size = ip->bi_size / DSW_SIZE; if ((ip->bi_size % DSW_SIZE) != 0) ++bitmap_size; bitmap_size += 7; bitmap_size /= 8; /* Count the number of copy bits set */ rs = II_CNT_BITS(ip, ip->bi_copyfba, ©_set, bitmap_size); if ((rs == 0) && (copy_set == 0)) { /* * If we counted successfully and completed the copy * see if any writes have forced the set into the * overflow */ rs = II_CNT_BITS(ip, ip->bi_shdfba, &shadow_set, bitmap_size); if ((rs == 0) && (shadow_set < (nsc_size_t)ip->bi_shdchks)) { II_FLAG_CLR(DSW_VOVERFLOW, ip); --iigkstat.spilled_over.value.ul; } } } ca->rc = rc; cv_broadcast(&ip->bi_copydonecv); mutex_exit(&ip->bi_mutex); if (!ca->wait) { if (rsrv) _ii_rlse_devs(ip, ca->rtype); kmem_free(ca, sizeof (*ca)); } } /* * _ii_copyvol() * Copy a volume. * * Calling/Exit State: * Returns 0 when the copy is complete or has been aborted, * otherwise error code. * * Description: * According to the flag, copy the master to the shadow volume or the * shadow to the master volume. Upon return wakeup all processes waiting * for this copy. Uses a separate process (_ii_copyvolp) to allow the * caller to be interrupted. */ static int _ii_copyvol(_ii_info_t *ip, int flag, int rtype, spcs_s_info_t kstatus, int wait) { struct copy_args *ca; int rc; /* * start copy in separate process. */ ca = (struct copy_args *)kmem_alloc(sizeof (*ca), KM_SLEEP); ca->ip = ip; ca->flag = flag; ca->rtype = rtype; ca->kstatus = kstatus; ca->wait = wait; ca->rc = 0; if (rc = nsc_create_process((void (*)(void *))_ii_copyvolp, (void *)ca, FALSE)) { mutex_enter(&ip->bi_mutex); _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); cmn_err(CE_NOTE, "!Can't create II copy process"); kmem_free(ca, sizeof (*ca)); return (rc); } mutex_enter(&ip->bi_mutex); if (wait == 0) { _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); return (0); } while (ip->bi_flags & DSW_COPYINGP) { (void) cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex); } _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); rc = ca->rc; kmem_free(ca, sizeof (*ca)); return (rc); } /* * _ii_stopcopy * Stops any copy process on ip. * * Calling/Exit State: * Returns 0 if the copy was stopped, otherwise error code. * * Description: * Stop an in-progress copy by setting the DSW_COPYINGX flag, then * wait for the copy to complete. */ static int _ii_stopcopy(_ii_info_t *ip) { mutex_enter(&ip->bi_mutex); DTRACE_PROBE1(_ii_stopcopy_flags, uint_t, ip->bi_flags); while (ip->bi_flags & DSW_COPYINGP) { DTRACE_PROBE(_ii_stopcopy); II_FLAG_SET(DSW_COPYINGX, ip); if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) { /* Awoken by a signal */ mutex_exit(&ip->bi_mutex); DTRACE_PROBE(_ii_stopcopy); return (EINTR); } } mutex_exit(&ip->bi_mutex); return (0); } /* * _ii_error * Given the error type that occurred, and the current state of the * shadowing, set the appropriate error condition(s). * */ void _ii_error(_ii_info_t *ip, int error_type) { int copy_flags; int golden; int flags; int recursive_call = (error_type & DSW_OVERFLOW) != 0; int offline_bits = DSW_OFFLINE; _ii_info_t *xip; int rc; error_type &= ~DSW_OVERFLOW; mutex_enter(&ip->bi_mutex); flags = (ip->bi_flags) & offline_bits; if ((flags ^ error_type) == 0) { /* nothing new offline */ mutex_exit(&ip->bi_mutex); return; } if (error_type == DSW_BMPOFFLINE && (ip->bi_flags & DSW_BMPOFFLINE) == 0) { /* first, let nskerd know */ rc = _ii_report_bmp(ip); if (rc) { if (ii_debug > 0) { cmn_err(CE_WARN, "!Unable to mark bitmap bad in" " config DB; rc = %d", rc); } ip->bi_flags |= DSW_CFGOFFLINE; } } flags = ip->bi_flags; golden = ((flags & DSW_GOLDEN) == DSW_GOLDEN); copy_flags = flags & DSW_COPYING; switch (error_type) { case DSW_BMPOFFLINE: /* prevent further use of bitmap */ flags |= DSW_BMPOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Bitmap offline"); switch (copy_flags) { case DSW_COPYINGM: /* Bitmap offline, copying master to shadow */ flags |= DSW_SHDOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Implied shadow offline"); break; case DSW_COPYINGS: /* Bitmap offline, copying shadow to master */ if (golden) { /* Shadow is still usable */ if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Implied master offline"); flags |= DSW_MSTOFFLINE; } else { /* * Snapshot restore from shadow to master * is a dumb thing to do anyway. Lose both. */ flags |= DSW_SHDOFFLINE | DSW_MSTOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "ii: Implied master and " "shadow offline"); } break; case 0: /* Bitmap offline, no copying in progress */ if (!golden) { if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Implied shadow offline"); flags |= DSW_SHDOFFLINE; } break; } break; case DSW_OVROFFLINE: flags |= DSW_OVROFFLINE; ASSERT(ip->bi_overflow); if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Overflow offline"); /* FALLTHRU */ case DSW_SHDOFFLINE: flags |= DSW_SHDOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Shadow offline"); if (copy_flags == DSW_COPYINGS) { /* Shadow offline, copying shadow to master */ if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Implied master offline"); flags |= DSW_MSTOFFLINE; } break; case DSW_MSTOFFLINE: flags |= DSW_MSTOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Master offline"); switch (copy_flags) { case DSW_COPYINGM: /* Master offline, copying master to shadow */ flags |= DSW_SHDOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Implied shadow offline"); break; case DSW_COPYINGS: /* Master offline, copying shadow to master */ if (!golden) { flags |= DSW_SHDOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Implied shadow offline"); } break; case 0: /* Master offline, no copying in progress */ if (!golden) { flags |= DSW_SHDOFFLINE; if (ii_debug > 0) cmn_err(CE_NOTE, "!ii: Implied shadow offline"); } break; } break; default: break; } II_FLAG_SET(flags, ip); mutex_exit(&ip->bi_mutex); if (!recursive_call && NSHADOWS(ip) && (flags&DSW_MSTOFFLINE) == DSW_MSTOFFLINE) { /* take master offline for all other sibling shadows */ for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { if (xip == ip) continue; if (_ii_rsrv_devs(xip, BMP, II_INTERNAL) != 0) continue; /* overload DSW_OVERFLOW */ _ii_error(xip, DSW_MSTOFFLINE|DSW_OVERFLOW); _ii_rlse_devs(xip, BMP); } } } /* * _ii_lock_chunk * Locks access to the specified chunk * */ static void _ii_lock_chunk(_ii_info_t *ip, chunkid_t chunk) { if (chunk == II_NULLCHUNK) { DTRACE_PROBE(_ii_lock_chunk_type); rw_enter(&ip->bi_busyrw, RW_WRITER); } else { DTRACE_PROBE(_ii_lock_chunk_type); if (ip->bi_busy == NULL) { DTRACE_PROBE(_ii_lock_chunk_end); return; } rw_enter(&ip->bi_busyrw, RW_READER); mutex_enter(&ip->bi_mutex); while (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)) cv_wait(&ip->bi_busycv, &ip->bi_mutex); DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS); mutex_exit(&ip->bi_mutex); } } /* * _ii_trylock_chunk * Tries to lock access to the specified chunk * Returns non-zero on success. * */ static int _ii_trylock_chunk(_ii_info_t *ip, chunkid_t chunk) { int rc; ASSERT(chunk != II_NULLCHUNK); if (rw_tryenter(&ip->bi_busyrw, RW_READER) == 0) { DTRACE_PROBE(_ii_trylock_chunk); return (0); } if (ip->bi_busy == NULL) { DTRACE_PROBE(_ii_trylock_chunk_end); return (0); } mutex_enter(&ip->bi_mutex); if (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)) { rw_exit(&ip->bi_busyrw); /* RW_READER */ rc = 0; } else { DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS); rc = 1; } mutex_exit(&ip->bi_mutex); return (rc); } /* * _ii_unlock_chunks * Unlocks access to the specified chunks * */ static void _ii_unlock_chunks(_ii_info_t *ip, chunkid_t chunk, int n) { if (chunk == II_NULLCHUNK) { DTRACE_PROBE(_ii_unlock_chunks); rw_exit(&ip->bi_busyrw); /* RW_WRITER */ } else { if (ip->bi_busy == NULL) { DTRACE_PROBE(_ii_unlock_chunks_end); return; } mutex_enter(&ip->bi_mutex); DTRACE_PROBE(_ii_unlock_chunks); for (; n-- > 0; chunk++) { ASSERT(DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)); DSW_BIT_CLR(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS); rw_exit(&ip->bi_busyrw); /* RW_READER */ } cv_broadcast(&ip->bi_busycv); mutex_exit(&ip->bi_mutex); } } /* * Copyout the bit map. */ static int _ii_ab_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, int user_bm_size) { nsc_off_t last_fba; nsc_buf_t *tmp; nsc_vec_t *nsc_vecp; nsc_off_t fba_pos; int buf_fba_len; int buf_byte_len; size_t co_len; int rc; DTRACE_PROBE2(_ii_ab_co_bmp_start, nsc_off_t, bm_offset, nsc_size_t, user_bm_size); if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); /* First calculate the size of the shadow and copy bitmaps */ co_len = DSW_BM_FBA_LEN(ip->bi_size); ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len); /* Are we in the ranges of the various bitmaps/indexes? */ if (bm_offset < ip->bi_shdfba) return (EIO); else if (bm_offset < (last_fba = ip->bi_shdfba + co_len)) /*EMPTY*/; else if (bm_offset < (last_fba = ip->bi_copyfba + co_len)) /*EMPTY*/; else if ((ip->bi_flags & DSW_TREEMAP) && (bm_offset < (last_fba = last_fba + (co_len * 32)))) /*EMPTY*/; else return (EIO); /* Are we within the size of the segment being copied? */ if (FBA_LEN(user_bm_size) > last_fba - bm_offset) return (EIO); for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0; fba_pos += DSW_CBLK_FBA) { tmp = NULL; buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ? DSW_CBLK_FBA : last_fba - fba_pos; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len, NSC_RDBUF, &tmp); II_READ_END(ip, bitmap, rc, buf_fba_len); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (EIO); } /* copyout each nsc_vec's worth of data */ buf_byte_len = FBA_SIZE(buf_fba_len); for (nsc_vecp = tmp->sb_vec; buf_byte_len > 0 && user_bm_size > 0; nsc_vecp++) { co_len = (user_bm_size > nsc_vecp->sv_len) ? nsc_vecp->sv_len : user_bm_size; if (copyout(nsc_vecp->sv_addr, user_bm, co_len)) { (void) nsc_free_buf(tmp); return (EFAULT); } user_bm += co_len; user_bm_size -= co_len; buf_byte_len -= co_len; } (void) nsc_free_buf(tmp); } return (0); } /* * Copyin a bit map and or with differences bitmap. */ static int _ii_ab_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, int user_bm_size) { nsc_off_t last_fba; nsc_buf_t *tmp; nsc_vec_t *nsc_vecp; nsc_off_t fba_pos; int buf_fba_len; int buf_byte_len; size_t ci_len; int rc; int n; unsigned char *tmp_buf, *tmpp, *tmpq; DTRACE_PROBE2(_ii_ab_ci_bmp_start, nsc_off_t, bm_offset, nsc_size_t, user_bm_size); if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); tmp_buf = NULL; last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size); for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0; fba_pos += DSW_CBLK_FBA) { tmp = NULL; buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ? DSW_CBLK_FBA : last_fba - fba_pos; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len, NSC_RDWRBUF, &tmp); II_READ_END(ip, bitmap, rc, buf_fba_len); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (EIO); } /* copyin each nsc_vec's worth of data */ buf_byte_len = FBA_SIZE(buf_fba_len); for (nsc_vecp = tmp->sb_vec; buf_byte_len > 0 && user_bm_size > 0; nsc_vecp++) { ci_len = (user_bm_size > nsc_vecp->sv_len) ? nsc_vecp->sv_len : user_bm_size; tmpp = tmp_buf = kmem_alloc(ci_len, KM_SLEEP); tmpq = nsc_vecp->sv_addr; if (copyin(user_bm, tmpp, ci_len)) { (void) nsc_free_buf(tmp); kmem_free(tmp_buf, ci_len); return (EFAULT); } for (n = ci_len; n-- > 0; /* CSTYLED */) *tmpq++ |= *tmpp++; user_bm += ci_len; user_bm_size -= ci_len; buf_byte_len -= ci_len; kmem_free(tmp_buf, ci_len); } II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, buf_fba_len, 0); if (!II_SUCCESS(rc)) { (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (EIO); } (void) nsc_free_buf(tmp); } ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * Completely zero the bit map. * * Returns 0 if no error * Returns non-zero if there was an error */ static int _ii_ab_zerobm(_ii_info_t *ip) { nsc_off_t fba_pos; int rc; nsc_size_t len; nsc_size_t size; nsc_buf_t *tmp; size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba; for (fba_pos = ip->bi_shdfba; fba_pos < size; fba_pos += DSW_CBLK_FBA) { tmp = NULL; len = fba_pos + DSW_CBLK_FBA < size ? DSW_CBLK_FBA : size - fba_pos; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, len, NSC_RDWRBUF, &tmp); II_READ_END(ip, bitmap, rc, len); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } rc = nsc_zero(tmp, fba_pos, len, 0); if (II_SUCCESS(rc)) { II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, len, 0); } (void) nsc_free_buf(tmp); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); return (rc); } } ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * Copy shadow bitmap to copy bitmap */ static int _ii_ab_copybm(_ii_info_t *ip) { nsc_off_t copy_fba_pos, shd_fba_pos; int rc; nsc_size_t len; nsc_off_t size; nsc_buf_t *copy_tmp, *shd_tmp; size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba; copy_fba_pos = ip->bi_copyfba; for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size; copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) { shd_tmp = NULL; len = shd_fba_pos + DSW_CBLK_FBA < size ? DSW_CBLK_FBA : size - shd_fba_pos; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, NSC_RDBUF, &shd_tmp); II_READ_END(ip, bitmap, rc, len); if (!II_SUCCESS(rc)) { if (shd_tmp) (void) nsc_free_buf(shd_tmp); _ii_error(ip, DSW_BMPOFFLINE); if (ii_debug > 1) cmn_err(CE_NOTE, "!ii: copybm failed 1 rc %d", rc); return (rc); } copy_tmp = NULL; rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, NSC_WRBUF, ©_tmp); if (!II_SUCCESS(rc)) { (void) nsc_free_buf(shd_tmp); if (copy_tmp) (void) nsc_free_buf(copy_tmp); _ii_error(ip, DSW_BMPOFFLINE); if (ii_debug > 1) cmn_err(CE_NOTE, "!ii: copybm failed 2 rc %d", rc); return (rc); } rc = nsc_copy(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos, len); if (II_SUCCESS(rc)) { II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos, len, 0); } (void) nsc_free_buf(shd_tmp); (void) nsc_free_buf(copy_tmp); if (!II_SUCCESS(rc)) { if (ii_debug > 1) cmn_err(CE_NOTE, "!ii: copybm failed 4 rc %d", rc); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } } ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * stolen from nsc_copy_h() */ static int _ii_nsc_or(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, nsc_off_t pos2, nsc_size_t len) { unsigned char *a1, *a2; unsigned char *b1, *b2; nsc_vec_t *v1, *v2; int i, sz, l1, l2; if (pos1 < h1->sb_pos || pos1 + len > h1->sb_pos + h1->sb_len || pos2 < h2->sb_pos || pos2 + len > h2->sb_pos + h2->sb_len) return (EINVAL); if (!len) return (0); /* find starting point in "from" vector */ v1 = h1->sb_vec; pos1 -= h1->sb_pos; for (; pos1 >= FBA_NUM(v1->sv_len); v1++) pos1 -= FBA_NUM(v1->sv_len); a1 = v1->sv_addr + FBA_SIZE(pos1); l1 = v1->sv_len - FBA_SIZE(pos1); /* find starting point in "to" vector */ v2 = h2->sb_vec; pos2 -= h2->sb_pos; for (; pos2 >= FBA_NUM(v2->sv_len); v2++) pos2 -= FBA_NUM(v2->sv_len); a2 = v2->sv_addr + FBA_SIZE(pos2); l2 = v2->sv_len - FBA_SIZE(pos2); /* copy required data */ len = FBA_SIZE(len); while (len) { sz = min(l1, l2); sz = (int)min((nsc_size_t)sz, len); b1 = a1; b2 = a2; for (i = sz; i-- > 0; /* CSTYLED */) *b2++ |= *b1++; l1 -= sz; l2 -= sz; a1 += sz; a2 += sz; len -= sz; if (!l1) { a1 = (++v1)->sv_addr; l1 = v1->sv_len; } if (!l2) { a2 = (++v2)->sv_addr; l2 = v2->sv_len; } } return (0); } /* * Or the shadow bitmap in to the copy bitmap, clear the * shadow bitmap. */ static int _ii_ab_orbm(_ii_info_t *ip) { nsc_off_t copy_fba_pos, shd_fba_pos; int rc; nsc_size_t len; size_t size; nsc_buf_t *copy_tmp, *shd_tmp; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba; copy_fba_pos = ip->bi_copyfba; for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size; copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) { shd_tmp = NULL; len = shd_fba_pos + DSW_CBLK_FBA < size ? DSW_CBLK_FBA : size - shd_fba_pos; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, NSC_RDBUF|NSC_WRBUF, &shd_tmp); II_READ_END(ip, bitmap, rc, len); if (!II_SUCCESS(rc)) { if (shd_tmp) (void) nsc_free_buf(shd_tmp); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } copy_tmp = NULL; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, NSC_RDBUF|NSC_WRBUF, ©_tmp); II_READ_END(ip, bitmap, rc, len); if (!II_SUCCESS(rc)) { (void) nsc_free_buf(shd_tmp); if (copy_tmp) (void) nsc_free_buf(copy_tmp); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } rc = _ii_nsc_or(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos, len); if (II_SUCCESS(rc)) { II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos, len, 0); } if (II_SUCCESS(rc)) rc = nsc_zero(shd_tmp, shd_fba_pos, len, 0); if (II_SUCCESS(rc)) { II_NSC_WRITE(ip, bitmap, rc, shd_tmp, shd_fba_pos, len, 0); } (void) nsc_free_buf(shd_tmp); (void) nsc_free_buf(copy_tmp); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); return (rc); } } ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * _ii_ab_tst_shd_bit * Determine if a chunk has been copied to the shadow device * Relies on the alloc_buf/free_buf semantics for locking. * * Calling/Exit State: * Returns 1 if the modified bit has been set for the shadow device, * Returns 0 if the modified bit has not been set for the shadow device, * Returns -1 if there was an error */ static int _ii_ab_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk) { int rc; nsc_off_t fba; nsc_buf_t *tmp = NULL; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS); chunk %= FBA_SIZE(1) * DSW_BITS; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); if (tmp) (void) nsc_free_buf(tmp); return (-1); } rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS); (void) nsc_free_buf(tmp); return (rc); } /* * _ii_ab_set_shd_bit * Records that a chunk has been copied to the shadow device * * Returns non-zero if an error is encountered * Returns 0 if no error */ static int _ii_ab_set_shd_bit(_ii_info_t *ip, chunkid_t chunk) { int rc; nsc_off_t fba; nsc_buf_t *tmp = NULL; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS); chunk %= FBA_SIZE(1) * DSW_BITS; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); if (tmp) (void) nsc_free_buf(tmp); return (rc); } if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS) == 0) { DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS); II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); if ((ip->bi_state & DSW_CNTSHDBITS) == 0) ip->bi_shdbits++; } (void) nsc_free_buf(tmp); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); return (rc); } return (0); } /* * _ii_ab_tst_copy_bit * Determine if a chunk needs to be copied during updates. * * Calling/Exit State: * Returns 1 if the copy bit for the chunk is set * Returns 0 if the copy bit for the chunk is not set * Returns -1 if an error is encountered */ static int _ii_ab_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk) { int rc; nsc_off_t fba; nsc_buf_t *tmp = NULL; if (ip->bi_flags & DSW_BMPOFFLINE) return (-1); fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS); chunk %= FBA_SIZE(1) * DSW_BITS; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (-1); } rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS); (void) nsc_free_buf(tmp); return (rc); } /* * _ii_ab_set_copy_bit * Records that a chunk has been copied to the shadow device * * Returns non-zero if an error is encountered * Returns 0 if no error */ static int _ii_ab_set_copy_bit(_ii_info_t *ip, chunkid_t chunk) { int rc; nsc_off_t fba; nsc_buf_t *tmp = NULL; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS); chunk %= FBA_SIZE(1) * DSW_BITS; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS) == 0) { DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS); if ((ip->bi_state & DSW_CNTCPYBITS) == 0) ip->bi_copybits++; II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); } (void) nsc_free_buf(tmp); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); return (rc); } return (0); } /* * _ii_ab_clr_copy_bits * Records that a chunk has been cleared on the shadow device, this * function assumes that the bits to clear are all in the same fba, * as is the case when they were generated by _ii_ab_next_copy_bit(). * * Returns non-zero if an error is encountered * Returns 0 if no error */ static int _ii_ab_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks) { int rc; nsc_off_t fba; nsc_buf_t *tmp = NULL; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS); chunk %= FBA_SIZE(1) * DSW_BITS; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } for (; nchunks-- > 0; chunk++) { DSW_BIT_CLR(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS); if (ip->bi_copybits > 0) ip->bi_copybits--; } II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); (void) nsc_free_buf(tmp); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); return (rc); } return (0); } /* * _ii_ab_fill_copy_bmp * Fills the copy bitmap with 1's. * * Returns non-zero if an error is encountered * Returns 0 if no error */ static int _ii_ab_fill_copy_bmp(_ii_info_t *ip) { int rc; nsc_off_t fba; nsc_buf_t *tmp; unsigned char *p; int i, j; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); fba = ip->bi_copyfba; for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) { tmp = NULL; rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_WRBUF, &tmp); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } p = (unsigned char *)tmp->sb_vec->sv_addr; for (j = FBA_SIZE(1); j-- > 0; p++) *p = (unsigned char)0xff; II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); (void) nsc_free_buf(tmp); return (rc); } (void) nsc_free_buf(tmp); } ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * _ii_ab_load_bmp * Load bitmap from persistent storage. */ static int _ii_ab_load_bmp(_ii_info_t *ip, int flag) /* ARGSUSED */ { if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * _ii_ab_next_copy_bit * Find next set copy bit. * * Returns the next bits set in the copy bitmap, with the corresponding chunks * locked. Used to avoid having to reread the same bit map block as each bit * is tested. */ static chunkid_t _ii_ab_next_copy_bit(_ii_info_t *ip, chunkid_t startchunk, chunkid_t maxchunk, int wanted, int *got) { chunkid_t rc; nsc_off_t fba; chunkid_t chunk; int bits_per_fba = FBA_SIZE(1) * DSW_BITS; int high; chunkid_t nextchunk; nsc_buf_t *tmp = NULL; *got = 0; again: if (ip->bi_flags & DSW_BMPOFFLINE) return (maxchunk + 1); while (startchunk < maxchunk) { tmp = NULL; fba = ip->bi_copyfba + startchunk / bits_per_fba; chunk = startchunk % bits_per_fba; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (maxchunk + 1); } high = startchunk + bits_per_fba - startchunk%bits_per_fba; if (high > maxchunk) high = maxchunk; for (; startchunk < high; chunk++, startchunk++) { if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS], chunk%DSW_BITS)) { /* * trylock won't sleep so can use while * holding the buf. */ if (!_ii_trylock_chunk(ip, startchunk)) { (void) nsc_free_buf(tmp); _ii_lock_chunk(ip, startchunk); if (_ii_ab_tst_copy_bit(ip, startchunk) != 1) { /* * another process copied this * chunk while we were acquiring * the chunk lock. */ _ii_unlock_chunk(ip, startchunk); DTRACE_PROBE( _ii_ab_next_copy_bit_again); goto again; } *got = 1; DTRACE_PROBE(_ii_ab_next_copy_bit_end); return (startchunk); } *got = 1; nextchunk = startchunk + 1; chunk++; for (; --wanted > 0 && nextchunk < high; nextchunk++, chunk++) { if (!DSW_BIT_ISSET(tmp->sb_vec->sv_addr [chunk/DSW_BITS], chunk%DSW_BITS)) { break; /* end of bit run */ } if (_ii_trylock_chunk(ip, nextchunk)) (*got)++; else break; } (void) nsc_free_buf(tmp); DTRACE_PROBE(_ii_ab_next_copy_bit); return (startchunk); } } (void) nsc_free_buf(tmp); } return (maxchunk + 1); } /* * _ii_ab_save_bmp * Save bitmap to persistent storage. */ static int _ii_ab_save_bmp(_ii_info_t *ip, int flag) /* ARGSUSED */ { if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); return (0); } /* * _ii_ab_change_bmp * copy change bitmap to memory */ static int _ii_ab_change_bmp(_ii_info_t *ip, unsigned char *ptr) /* ARGSUSED */ { int bm_size; int i, j, fba; int rc; unsigned char *p; nsc_buf_t *tmp = NULL; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba, ptr, bm_size); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_BMPOFFLINE); return (rc); } fba = ip->bi_copyfba; for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) { tmp = NULL; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (rc); } p = (unsigned char *)tmp->sb_vec->sv_addr; for (j = FBA_SIZE(1); j-- > 0; p++) *ptr |= *p; (void) nsc_free_buf(tmp); } return (0); } /* * Count bits set in the bit map. */ static int _ii_ab_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter, int bm_size) { nsc_size_t last_fba; nsc_buf_t *tmp; nsc_vec_t *sd_vecp; nsc_off_t fba_pos; int buf_fba_len; int buf_byte_len; int co_len; int i; unsigned int j, k; unsigned char *cp; int rc; *counter = 0; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size); for (fba_pos = bm_offset; fba_pos < last_fba && bm_size > 0; fba_pos += DSW_CBLK_FBA) { tmp = NULL; buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ? DSW_CBLK_FBA : last_fba - fba_pos; II_READ_START(ip, bitmap); rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len, NSC_RDBUF, &tmp); II_READ_END(ip, bitmap, rc, 1); if (!II_SUCCESS(rc)) { if (tmp) (void) nsc_free_buf(tmp); _ii_error(ip, DSW_BMPOFFLINE); return (EIO); } /* count each sd_vec's worth of data */ buf_byte_len = FBA_SIZE(buf_fba_len); for (sd_vecp = tmp->sb_vec; buf_byte_len > 0 && bm_size > 0; sd_vecp++) { co_len = (bm_size > sd_vecp->sv_len) ? sd_vecp->sv_len : bm_size; cp = sd_vecp->sv_addr; for (i = k = 0; i < co_len; i++) for (j = (unsigned)*cp++; j; j &= j - 1) k++; *counter += k; bm_size -= co_len; buf_byte_len -= co_len; } (void) nsc_free_buf(tmp); } return (0); } /* * OR the bitmaps as part of a join operation */ static int _ii_ab_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip) { int rc; nsc_size_t len; nsc_size_t size; nsc_buf_t *dest_tmp, *src_tmp; nsc_off_t src_fba_pos; if ((src_ip->bi_flags & DSW_BMPOFFLINE) || (dest_ip->bi_flags & DSW_BMPOFFLINE)) return (EIO); size = DSW_BM_FBA_LEN(src_ip->bi_size) + src_ip->bi_shdfba; for (src_fba_pos = src_ip->bi_shdfba; src_fba_pos < size; src_fba_pos += DSW_CBLK_FBA) { src_tmp = NULL; len = src_fba_pos + DSW_CBLK_FBA < size ? DSW_CBLK_FBA : size - src_fba_pos; II_READ_START(src_ip, bitmap); rc = nsc_alloc_buf(src_ip->bi_bmpfd, src_fba_pos, len, NSC_RDWRBUF, &src_tmp); II_READ_END(src_ip, bitmap, rc, len); if (!II_SUCCESS(rc)) { if (src_tmp) (void) nsc_free_buf(src_tmp); _ii_error(src_ip, DSW_BMPOFFLINE); return (rc); } dest_tmp = NULL; II_READ_START(dest_ip, bitmap); rc = nsc_alloc_buf(dest_ip->bi_bmpfd, src_fba_pos, len, NSC_RDWRBUF, &dest_tmp); II_READ_END(dest_ip, bitmap, rc, len); if (!II_SUCCESS(rc)) { (void) nsc_free_buf(src_tmp); if (dest_tmp) (void) nsc_free_buf(dest_tmp); _ii_error(dest_ip, DSW_BMPOFFLINE); return (rc); } rc = _ii_nsc_or(src_tmp, dest_tmp, src_fba_pos, src_fba_pos, len); if (II_SUCCESS(rc)) { II_NSC_WRITE(dest_ip, bitmap, rc, dest_tmp, src_fba_pos, len, 0); } (void) nsc_free_buf(src_tmp); (void) nsc_free_buf(dest_tmp); if (!II_SUCCESS(rc)) { _ii_error(dest_ip, DSW_BMPOFFLINE); return (rc); } } dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } static _ii_bmp_ops_t alloc_buf_bmp = { _ii_ab_co_bmp, _ii_ab_ci_bmp, _ii_ab_zerobm, _ii_ab_copybm, _ii_ab_orbm, _ii_ab_tst_shd_bit, _ii_ab_set_shd_bit, _ii_ab_tst_copy_bit, _ii_ab_set_copy_bit, _ii_ab_clr_copy_bits, _ii_ab_next_copy_bit, _ii_ab_fill_copy_bmp, _ii_ab_load_bmp, _ii_ab_save_bmp, _ii_ab_change_bmp, _ii_ab_cnt_bits, _ii_ab_join_bmp }; /* * Copyout the bit map. */ static int _ii_km_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, int user_bm_size) { int start_offset; int bm_size; size_t co_len; nsc_off_t last_fba; /* First calculate the size of the shadow and copy bitmaps */ co_len = DSW_BM_FBA_LEN(ip->bi_size); ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len); /* Are we in the ranges of the various bitmaps/indexes? */ if (bm_offset < ip->bi_shdfba) return (EIO); else if (bm_offset < (last_fba = ip->bi_shdfba + co_len)) /*EMPTY*/; else if (bm_offset < (last_fba = ip->bi_copyfba + co_len)) /*EMPTY*/; else if ((ip->bi_flags & DSW_TREEMAP) && (bm_offset < (last_fba = last_fba + (co_len * 32)))) /*EMPTY*/; else return (EIO); if (FBA_LEN(user_bm_size) > last_fba - bm_offset) return (EIO); start_offset = FBA_SIZE(bm_offset); bm_size = FBA_SIZE(last_fba); co_len = (user_bm_size > bm_size) ? bm_size : user_bm_size; if (copyout(ip->bi_bitmap + start_offset, user_bm, co_len)) return (EFAULT); return (0); } /* * Copyin a bit map and or with differences bitmap. */ static int _ii_km_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm, int user_bm_size) { unsigned char *tmp_buf; unsigned char *dest; unsigned char *p; size_t tmp_size; int n; int start_offset; int bm_size; size_t ci_len; int rc = 0; start_offset = FBA_SIZE(bm_offset); bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); tmp_buf = NULL; tmp_size = FBA_SIZE(1); tmp_buf = kmem_alloc(tmp_size, KM_SLEEP); start_offset = FBA_SIZE(bm_offset); dest = ip->bi_bitmap + start_offset; bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); ci_len = (user_bm_size > bm_size) ? bm_size : user_bm_size; while (ci_len > 0) { n = (tmp_size > ci_len) ? ci_len : tmp_size; if (copyin(user_bm, tmp_buf, n)) { rc = EFAULT; break; } user_bm += n; for (p = tmp_buf; n--> 0; ci_len--) *dest++ |= *p++; } if (tmp_buf) kmem_free(tmp_buf, tmp_size); ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (rc); } /* * Completely zero the bit map. */ static int _ii_km_zerobm(_ii_info_t *ip) { int start_offset = FBA_SIZE(ip->bi_shdfba); int len; len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); mutex_enter(&ip->bi_bmpmutex); bzero(ip->bi_bitmap+start_offset, len); mutex_exit(&ip->bi_bmpmutex); ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * Copy shadow bitmap to copy bitmap */ static int _ii_km_copybm(_ii_info_t *ip) { int copy_offset, shd_offset; int len; len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); shd_offset = FBA_SIZE(ip->bi_shdfba); copy_offset = FBA_SIZE(ip->bi_copyfba); mutex_enter(&ip->bi_bmpmutex); bcopy(ip->bi_bitmap+shd_offset, ip->bi_bitmap+copy_offset, len); mutex_exit(&ip->bi_bmpmutex); ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * Or the shadow bitmap in to the copy bitmap, clear the * shadow bitmap. */ static int _ii_km_orbm(_ii_info_t *ip) { unsigned char *copy, *shd; int copy_offset, shd_offset; int len; len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); shd_offset = FBA_SIZE(ip->bi_shdfba); copy_offset = FBA_SIZE(ip->bi_copyfba); shd = ip->bi_bitmap + shd_offset; copy = ip->bi_bitmap + copy_offset; mutex_enter(&ip->bi_bmpmutex); while (len-- > 0) *copy++ |= *shd++; mutex_exit(&ip->bi_bmpmutex); ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * _ii_km_tst_shd_bit * Determine if a chunk has been copied to the shadow device * * Calling/Exit State: * Returns 1 if the modified bit has been set for the shadow device, * otherwise returns 0. */ static int _ii_km_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk) { unsigned char *bmp; int bmp_offset; int rc; bmp_offset = FBA_SIZE(ip->bi_shdfba); bmp = ip->bi_bitmap + bmp_offset; mutex_enter(&ip->bi_bmpmutex); rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); mutex_exit(&ip->bi_bmpmutex); return (rc); } /* * _ii_km_set_shd_bit * Records that a chunk has been copied to the shadow device */ static int _ii_km_set_shd_bit(_ii_info_t *ip, chunkid_t chunk) { unsigned char *bmp; int bmp_offset; bmp_offset = FBA_SIZE(ip->bi_shdfba); bmp = ip->bi_bitmap + bmp_offset; mutex_enter(&ip->bi_bmpmutex); if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) { DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); if ((ip->bi_state & DSW_CNTSHDBITS) == 0) ip->bi_shdbits++; } mutex_exit(&ip->bi_bmpmutex); return (0); } /* * _ii_km_tst_copy_bit * Determine if a chunk needs to be copied during updates. * * Calling/Exit State: * Returns 1 if the copy bit for the chunk is set, * otherwise returns 0 */ static int _ii_km_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk) { unsigned char *bmp; int bmp_offset; int rc; bmp_offset = FBA_SIZE(ip->bi_copyfba); bmp = ip->bi_bitmap + bmp_offset; mutex_enter(&ip->bi_bmpmutex); rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); mutex_exit(&ip->bi_bmpmutex); return (rc); } /* * _ii_km_set_copy_bit * Records that a chunk has been copied to the shadow device */ static int _ii_km_set_copy_bit(_ii_info_t *ip, chunkid_t chunk) { unsigned char *bmp; int bmp_offset; bmp_offset = FBA_SIZE(ip->bi_copyfba); bmp = ip->bi_bitmap + bmp_offset; mutex_enter(&ip->bi_bmpmutex); if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) { DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS); if ((ip->bi_state & DSW_CNTCPYBITS) == 0) ip->bi_copybits++; } mutex_exit(&ip->bi_bmpmutex); return (0); } /* * _ii_km_clr_copy_bits * Records that a chunk has been cleared on the shadow device */ static int _ii_km_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks) { unsigned char *bmp; int bmp_offset; bmp_offset = FBA_SIZE(ip->bi_copyfba); bmp = ip->bi_bitmap + bmp_offset; mutex_enter(&ip->bi_bmpmutex); for (; nchunks-- > 0; chunk++) { DSW_BIT_CLR(bmp[chunk/DSW_BITS], chunk%DSW_BITS); if (ip->bi_copybits > 0) ip->bi_copybits--; } mutex_exit(&ip->bi_bmpmutex); return (0); } /* * _ii_km_fill_copy_bmp * Fills the copy bitmap with 1's. */ static int _ii_km_fill_copy_bmp(_ii_info_t *ip) { int len; unsigned char *bmp; int bmp_offset; bmp_offset = FBA_SIZE(ip->bi_copyfba); bmp = ip->bi_bitmap + bmp_offset; len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba); mutex_enter(&ip->bi_bmpmutex); while (len-- > 0) *bmp++ = (unsigned char)0xff; mutex_exit(&ip->bi_bmpmutex); ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } /* * _ii_km_load_bmp * Load bitmap from persistent storage. */ static int _ii_km_load_bmp(_ii_info_t *ip, int flag) { nsc_off_t bmp_offset; nsc_size_t bitmap_size; int rc; if (ip->bi_flags & DSW_BMPOFFLINE) return (EIO); if (ip->bi_bitmap == NULL) { bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) + ip->bi_shdfba); ip->bi_bitmap = nsc_kmem_zalloc(bitmap_size, KM_SLEEP, _ii_local_mem); } if (flag) return (0); /* just create an empty bitmap */ bmp_offset = FBA_SIZE(ip->bi_shdfba); rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba, ip->bi_bitmap + bmp_offset, 2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba)); if (!II_SUCCESS(rc)) _ii_error(ip, DSW_BMPOFFLINE); ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (rc); } /* * _ii_km_save_bmp * Save bitmap to persistent storage. */ static int _ii_km_save_bmp(_ii_info_t *ip, int flag) { int bmp_offset; int bitmap_size; int rc; bmp_offset = FBA_SIZE(ip->bi_shdfba); if (ip->bi_flags & DSW_BMPOFFLINE) rc = EIO; else { rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_WRBUF, ip->bi_shdfba, ip->bi_bitmap + bmp_offset, 2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba)); if (!II_SUCCESS(rc)) _ii_error(ip, DSW_BMPOFFLINE); } if (flag && ip->bi_bitmap) { /* dispose of bitmap memory */ bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) + ip->bi_shdfba); nsc_kmem_free(ip->bi_bitmap, bitmap_size); ip->bi_bitmap = NULL; } return (rc); } /* * _ii_km_next_copy_bit * Find next set copy bit. * * Returns the next bits set in the copy bitmap, with the corresponding chunks * locked. Used to cut down on the number of times the bmpmutex is acquired. */ static chunkid_t _ii_km_next_copy_bit(_ii_info_t *ip, chunkid_t chunk, chunkid_t maxchunk, int want, int *got) { unsigned char *bmp; int bmp_offset; int nextchunk; *got = 0; bmp_offset = FBA_SIZE(ip->bi_copyfba); bmp = ip->bi_bitmap + bmp_offset; mutex_enter(&ip->bi_bmpmutex); for (; chunk < maxchunk; chunk++) { if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS)) { /* * trylock won't sleep so can use while * holding bi_bmpmutex. */ if (!_ii_trylock_chunk(ip, chunk)) { mutex_exit(&ip->bi_bmpmutex); _ii_lock_chunk(ip, chunk); *got = 1; DTRACE_PROBE(_ii_km_next_copy_bit); return (chunk); } *got = 1; for (nextchunk = chunk + 1; *got < want && nextchunk < maxchunk; nextchunk++) { if (!DSW_BIT_ISSET(bmp[nextchunk/DSW_BITS], nextchunk%DSW_BITS)) break; if (_ii_trylock_chunk(ip, nextchunk)) (*got)++; else break; } mutex_exit(&ip->bi_bmpmutex); DTRACE_PROBE(_ii_km_next_copy_bit); return (chunk); } } mutex_exit(&ip->bi_bmpmutex); return (maxchunk + 1); } /* * _ii_km_change_bmp * copy change bitmap to memory */ static int _ii_km_change_bmp(_ii_info_t *ip, unsigned char *ptr) /* ARGSUSED */ { int start_offset; int bm_size; unsigned char *q; bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)); start_offset = FBA_SIZE(ip->bi_shdfba); bcopy(ip->bi_bitmap + start_offset, ptr, bm_size); start_offset = FBA_SIZE(ip->bi_copyfba); q = ip->bi_bitmap + start_offset; while (bm_size-- > 0) *ptr |= *q; return (0); } /* * Count bits set in the bit map. */ static int _ii_km_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter, int bm_size) { int start_offset; int i; nsc_size_t j, k; unsigned char *cp; start_offset = FBA_SIZE(bm_offset); cp = ip->bi_bitmap + start_offset; for (i = k = 0; i < bm_size; i++) for (j = (unsigned)*cp++; j; j &= j - 1) k++; *counter = k; return (0); } /* * Or the shadow bitmap in to the copy bitmap, clear the * shadow bitmap. */ static int _ii_km_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip) { uchar_t *dest, *src; nsc_size_t bm_size; dest = dest_ip->bi_bitmap + FBA_SIZE(dest_ip->bi_shdfba); src = src_ip->bi_bitmap + FBA_SIZE(src_ip->bi_shdfba); bm_size = FBA_SIZE(DSW_BM_FBA_LEN(dest_ip->bi_size)); while (bm_size-- > 0) *dest++ |= *src++; dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS); return (0); } static _ii_bmp_ops_t kmem_buf_bmp = { _ii_km_co_bmp, _ii_km_ci_bmp, _ii_km_zerobm, _ii_km_copybm, _ii_km_orbm, _ii_km_tst_shd_bit, _ii_km_set_shd_bit, _ii_km_tst_copy_bit, _ii_km_set_copy_bit, _ii_km_clr_copy_bits, _ii_km_next_copy_bit, _ii_km_fill_copy_bmp, _ii_km_load_bmp, _ii_km_save_bmp, _ii_km_change_bmp, _ii_km_cnt_bits, _ii_km_join_bmp }; static int ii_read_volume(_ii_info_t *ip, int mst_src, nsc_buf_t *srcbuf, nsc_buf_t *dstbuf, chunkid_t chunk_num, nsc_off_t fba, nsc_size_t len) { int rc; nsc_buf_t *tmp; nsc_off_t mapped_fba; chunkid_t mapped_chunk; int overflow; if (mst_src || (ip->bi_flags&DSW_TREEMAP) == 0) { /* simple read with optional copy */ if (mst_src) { II_NSC_READ(ip, master, rc, srcbuf, fba, len, 0); } else { II_NSC_READ(ip, shadow, rc, srcbuf, fba, len, 0); } if (dstbuf && II_SUCCESS(rc)) { rc = nsc_copy(srcbuf, dstbuf, fba, fba, len); } return (rc); } /* read from mapped shadow into final buffer */ mapped_chunk = ii_tsearch(ip, chunk_num); if (mapped_chunk == II_NULLNODE) return (EIO); overflow = II_ISOVERFLOW(mapped_chunk); if (overflow) mapped_chunk = II_2OVERFLOW(mapped_chunk); /* convert chunk number from tsearch into final fba */ mapped_fba = DSW_CHK2FBA(mapped_chunk) + (fba % DSW_SIZE); tmp = NULL; if (overflow) { (void) nsc_reserve(OVRFD(ip), NSC_MULTI); II_READ_START(ip, overflow); rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, len, NSC_RDBUF, &tmp); II_READ_END(ip, overflow, rc, len); } else { II_READ_START(ip, shadow); rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, len, NSC_RDBUF, &tmp); II_READ_END(ip, shadow, rc, len); } if (II_SUCCESS(rc)) { if (dstbuf == NULL) dstbuf = srcbuf; rc = nsc_copy(tmp, dstbuf, mapped_fba, fba, len); (void) nsc_free_buf(tmp); } if (overflow) nsc_release(OVRFD(ip)); return (rc); } /* * _ii_fill_buf * Read data from the required device * * Calling/Exit State: * Returns 0 if the data was read successfully, otherwise * error code. * * Description: * Reads the data from fba_pos for length fba_len from the * required device. This data may be a mix of data from the master * device and the shadow device, depending on the state of the * bitmaps. */ static int _ii_fill_buf(ii_fd_t *bfd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag, nsc_buf_t **handle, nsc_buf_t **handle2) { _ii_info_t *ip = bfd->ii_info; _ii_info_t *xip; int second_shd = 0; nsc_off_t temp_fba; nsc_size_t temp_len; nsc_size_t bmp_len; chunkid_t chunk_num; int rc; int fill_from_pair; int rtype = SHDR|BMP; nsc_buf_t *second_buf = NULL; if (flag&NSC_RDAHEAD) return (NSC_DONE); chunk_num = fba_pos / DSW_SIZE; temp_fba = fba_pos; temp_len = fba_len; /* * If the master is being updated from a shadow we need to fill from * the correct shadow volume. */ if (NSHADOWS(ip) && bfd->ii_shd == 0) { for (xip = ip->bi_head; xip; xip = xip->bi_sibling) { if (xip == ip) continue; if (xip->bi_flags &DSW_COPYINGS) { second_shd = 1; ip = xip; if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) return (EIO); rc = nsc_alloc_buf(SHDFD(ip), fba_pos, fba_len, (flag&NSC_RDAHEAD)|NSC_MIXED, &second_buf); if (!II_SUCCESS(rc)) { rc = EIO; goto out; } handle2 = &second_buf; break; } } } while (temp_len > 0) { if ((temp_fba + temp_len) > DSW_CHK2FBA(chunk_num + 1)) { bmp_len = DSW_CHK2FBA(chunk_num + 1) - temp_fba; temp_len -= bmp_len; } else { bmp_len = temp_len; temp_len = 0; } fill_from_pair = 0; if ((ip->bi_flags & DSW_COPYINGM) == DSW_COPYINGM) { rc = II_TST_COPY_BIT(ip, chunk_num); /* Treat a failed bitmap volume as a clear bit */ if (rc > 0) { /* Copy bit set */ if (bfd->ii_shd) { if (*handle2) fill_from_pair = 1; else { rc = EIO; goto out; } } } } if ((ip->bi_flags & DSW_COPYINGS) == DSW_COPYINGS) { rc = II_TST_COPY_BIT(ip, chunk_num); /* Treat a failed bitmap volume as a clear bit */ if (rc > 0) { /* Copy bit set */ if (bfd->ii_shd == 0) { if (*handle2 || (ip->bi_flags&DSW_TREEMAP)) fill_from_pair = 1; else { rc = EIO; goto out; } } } } if (((ip->bi_flags & DSW_GOLDEN) == 0) && bfd->ii_shd) { /* Dependent shadow read */ rc = II_TST_SHD_BIT(ip, chunk_num); if (rc < 0) { rc = EIO; goto out; } if (rc == 0) { /* Shadow bit clear */ if (*handle2) fill_from_pair = 1; else { rc = EIO; goto out; } } } if (fill_from_pair) { /* it matters now */ if (ip->bi_flags & (DSW_MSTOFFLINE | DSW_SHDOFFLINE)) { rc = EIO; goto out; } if (*handle2 == NULL && (ip->bi_flags&DSW_TREEMAP) == 0) { rc = EIO; goto out; } rc = ii_read_volume(ip, bfd->ii_shd, *handle2, *handle, chunk_num, temp_fba, bmp_len); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_MSTOFFLINE); _ii_error(ip, DSW_SHDOFFLINE); goto out; } } else { if (bfd->ii_shd && (ip->bi_flags & DSW_SHDOFFLINE)) { rc = EIO; goto out; } if ((bfd->ii_shd == 0) && (ip->bi_flags & DSW_MSTOFFLINE)) { rc = EIO; goto out; } rc = ii_read_volume(ip, !(bfd->ii_shd), *handle, NULL, chunk_num, temp_fba, bmp_len); if (!II_SUCCESS(rc)) { if (bfd->ii_shd) _ii_error(ip, DSW_SHDOFFLINE); else _ii_error(ip, DSW_MSTOFFLINE); goto out; } } temp_fba += bmp_len; chunk_num++; } rc = 0; out: if (second_buf) (void) nsc_free_buf(second_buf); if (second_shd) _ii_rlse_devs(ip, rtype); return (rc); } /* * _ii_shadow_write * Perform any copy on write required by a write buffer request * * Calling/Exit State: * Returns 0 on success, otherwise error code. * */ static int _ii_shadow_write(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len) { _ii_info_t *ip = bfd->ii_info; chunkid_t chunk_num; int rc; int flag; int hanging; DTRACE_PROBE2(_ii_shadow_write_start, nsc_off_t, pos, nsc_size_t, len); /* fail immediately if config DB is unavailable */ if ((ip->bi_flags & DSW_CFGOFFLINE) == DSW_CFGOFFLINE) { return (EIO); } chunk_num = pos / DSW_SIZE; if (bfd->ii_shd) flag = 0; /* To shadow */ else flag = CV_SHD2MST; /* To master */ mutex_enter(&ip->bi_mutex); ip->bi_shdref++; mutex_exit(&ip->bi_mutex); hanging = (ip->bi_flags&DSW_HANGING) != 0; for (; (chunk_num >= 0) && DSW_CHK2FBA(chunk_num) < (pos + len); chunk_num++) { if (!hanging) _ii_lock_chunk(ip, chunk_num); rc = _ii_copy_on_write(ip, flag, chunk_num, 1); /* * Set the shadow bit when a small shadow has overflowed so * that ii_read_volume can return an error if an attempt is * made to read that chunk. */ if (!hanging) { if (rc == 0 || (rc == EIO && (ip->bi_flags&DSW_OVERFLOW) != 0)) (void) II_SET_SHD_BIT(ip, chunk_num); _ii_unlock_chunk(ip, chunk_num); } } mutex_enter(&ip->bi_mutex); ip->bi_shdref--; if (ip->bi_state & DSW_CLOSING) { if (total_ref(ip) == 0) { cv_signal(&ip->bi_closingcv); } } mutex_exit(&ip->bi_mutex); /* did the bitmap fail during this process? */ return (ip->bi_flags & DSW_CFGOFFLINE? EIO : 0); } /* * _ii_alloc_buf * Allocate a buffer of data * * Calling/Exit State: * Returns 0 for success, < 0 for async I/O, > 0 is an error code. * * Description: * For a write buffer, calls dsw_shadow_write to perform any necessary * copy on write operations, then allocates the real buffers from the * underlying devices. * For a read buffer, allocates the real buffers from the underlying * devices, then calls _ii_fill_buf to fill the required buffer. * For a buffer that is neither read nor write, just allocate the * buffers so that a _ii_fill_buf can be done later by _ii_read. */ static int _ii_alloc_buf(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len, int flag, ii_buf_t **ptr) { _ii_info_t *ip = bfd->ii_info; ii_buf_t *h; int raw = II_RAW(bfd); int rc = 0; int ioflag; int fbuf = 0, fbuf2 = 0, abuf = 0; int rw_ent = 0; if (bfd->ii_bmp) { DTRACE_PROBE(_ii_alloc_buf_end); /* any I/O to the bitmap device is barred */ return (EIO); } if (len == 0) { DTRACE_PROBE(_ii_alloc_buf_end); return (EINVAL); } /* Bounds checking */ if (pos + len > ip->bi_size) { if (ii_debug > 1) cmn_err(CE_NOTE, "!ii: Attempt to access beyond end of ii volume"); DTRACE_PROBE(_ii_alloc_buf_end); return (EIO); } h = *ptr; if (h == NULL) { h = (ii_buf_t *)_ii_alloc_handle(NULL, NULL, NULL, bfd); if (h == NULL) { DTRACE_PROBE(_ii_alloc_buf_end); return (ENOMEM); } } /* * Temporary nsc_reserve of bitmap and other device. * This device has already been reserved by the preceding _ii_attach. * Corresponding nsc_release is in _ii_free_buf. */ h->ii_rsrv = BMP | (raw ? (bfd->ii_shd ? MSTR : SHDR) : (bfd->ii_shd ? MST : SHD)); if (!bfd->ii_shd) ip = ip->bi_master; rw_enter(&ip->bi_linkrw, RW_READER); rw_ent = 1; if (ip->bi_shdfd == NULL || (ip->bi_flags & DSW_SHDEXPORT) == DSW_SHDEXPORT) h->ii_rsrv &= ~(SHD|SHDR); if ((rc = _ii_rsrv_devs(ip, h->ii_rsrv, II_EXTERNAL)) != 0) { rw_exit(&ip->bi_linkrw); rw_ent = 0; h->ii_rsrv = NULL; goto error; } if (flag & NSC_WRBUF) { rc = _ii_shadow_write(bfd, pos, len); if (!II_SUCCESS(rc)) goto error; } if (!(flag & NSC_RDAHEAD)) ioflag = flag & ~(NSC_RDBUF); else ioflag = flag; if (bfd->ii_shd) { /* * SHADOW */ if (ip->bi_flags & DSW_SHDEXPORT) { rc = EIO; goto error; } /* * The master device buffer has to be allocated first * so that deadlocks are avoided. */ DTRACE_PROBE(AllocBufFor_SHADOW); if ((ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) == 0) { rc = nsc_alloc_buf(MSTFD(ip), pos, len, (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2); if (!II_SUCCESS(rc)) { if (ii_debug > 2) cmn_err(CE_WARN, "!ii: " "Join/write-S race detected\n"); if (h->ii_bufp2) (void) nsc_free_buf(h->ii_bufp2); h->ii_bufp2 = NULL; /* * Carry on as this will not matter if * _ii_fill_buf is not called, or if * it is called but doesn't need to read this * volume. */ rc = 0; } fbuf2 = 1; } if (ip->bi_flags & DSW_SHDOFFLINE) { rc = EIO; goto error; } if ((ip->bi_flags)&DSW_TREEMAP) { rc = nsc_alloc_abuf(pos, len, 0, &h->ii_abufp); if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_SHDOFFLINE); goto error; } abuf = 1; } else { II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), pos, len, ioflag, &h->ii_bufp); /* do not read yet */ if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_SHDOFFLINE); goto error; } fbuf = 1; } } else { /* * MASTER */ /* * The master device buffer has to be allocated first * so that deadlocks are avoided. */ if (ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) { rc = EIO; goto error; } DTRACE_PROBE(AllocBufFor_MASTER); II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, ioflag, &h->ii_bufp); /* do not read yet */ if (!II_SUCCESS(rc)) { _ii_error(ip, DSW_MSTOFFLINE); goto error; } fbuf = 1; /* * If shadow FD and (dependent set OR copying) and * not (compact dependent && shadow offline && shadow exported) */ if ((ip->bi_shdfd) && ((ip->bi_flags & DSW_COPYINGP) || (!(ip->bi_flags & DSW_GOLDEN))) && (!(ip->bi_flags & (DSW_TREEMAP|DSW_SHDOFFLINE|DSW_SHDEXPORT)))) { rc = nsc_alloc_buf(SHDFD(ip), pos, len, (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2); if (!II_SUCCESS(rc)) { if (ii_debug > 2) cmn_err(CE_WARN, "!ii: " "Join/write-M race detected\n"); if (h->ii_bufp2) (void) nsc_free_buf(h->ii_bufp2); h->ii_bufp2 = NULL; /* * Carry on as this will not matter if * _ii_fill_buf is not called, or if * it is called but doesn't need to read this * volume. */ rc = 0; } fbuf2 = 1; } } if (flag & NSC_RDBUF) rc = _ii_fill_buf(bfd, pos, len, flag, h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2); error: if (II_SUCCESS(rc)) { h->ii_bufh.sb_vec = h->ii_abufp ? h->ii_abufp->sb_vec : h->ii_bufp->sb_vec; h->ii_bufh.sb_error = 0; h->ii_bufh.sb_flag |= flag; h->ii_bufh.sb_pos = pos; h->ii_bufh.sb_len = len; } else { h->ii_bufh.sb_error = rc; if (h->ii_bufp2 && fbuf2) { (void) nsc_free_buf(h->ii_bufp2); h->ii_bufp2 = NULL; } if (h->ii_bufp && fbuf) { (void) nsc_free_buf(h->ii_bufp); h->ii_bufp = NULL; } if (h->ii_abufp && abuf) { (void) nsc_free_buf(h->ii_abufp); h->ii_abufp = NULL; } if (h->ii_rsrv) { /* * Release temporary reserve - reserved above. */ _ii_rlse_devs(ip, h->ii_rsrv); h->ii_rsrv = NULL; } if (rw_ent) rw_exit(&ip->bi_linkrw); } return (rc); } /* * _ii_free_buf */ static int _ii_free_buf(ii_buf_t *h) { ii_fd_t *bfd; int rsrv; int rc; if (h->ii_abufp == NULL) { rc = nsc_free_buf(h->ii_bufp); } else { rc = nsc_free_buf(h->ii_abufp); h->ii_abufp = NULL; } if (!II_SUCCESS(rc)) return (rc); if (h->ii_bufp2) { rc = nsc_free_buf(h->ii_bufp2); h->ii_bufp2 = NULL; if (!II_SUCCESS(rc)) return (rc); } bfd = h->ii_fd; rsrv = h->ii_rsrv; if ((h->ii_bufh.sb_flag & NSC_HALLOCATED) == 0) { rc = _ii_free_handle(h, h->ii_fd); if (!II_SUCCESS(rc)) return (rc); } else { h->ii_bufh.sb_flag = NSC_HALLOCATED; h->ii_bufh.sb_vec = NULL; h->ii_bufh.sb_error = 0; h->ii_bufh.sb_pos = 0; h->ii_bufh.sb_len = 0; h->ii_rsrv = NULL; } /* * Release temporary reserve - reserved in _ii_alloc_buf. */ if (rsrv) _ii_rlse_devs(bfd->ii_info, rsrv); rw_exit(&bfd->ii_info->bi_linkrw); return (0); } /* * _ii_open * Open a device * * Calling/Exit State: * Returns a token to identify the shadow device. * * Description: * Performs the housekeeping operations associated with an upper layer * of the nsc stack opening a shadowed device. */ /* ARGSUSED */ static int _ii_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) { _ii_info_t *ip; _ii_overflow_t *op; ii_fd_t *bfd; int is_mst = 0; int is_shd = 0; int raw = (flag & NSC_CACHE) == 0; bfd = nsc_kmem_zalloc(sizeof (*bfd), KM_SLEEP, _ii_local_mem); if (!bfd) return (ENOMEM); DTRACE_PROBE1(_ii_open_mutex, ii_fd_t *, bfd); mutex_enter(&_ii_info_mutex); for (ip = _ii_info_top; ip; ip = ip->bi_next) { if (strcmp(path, ii_pathname(ip->bi_mstfd)) == 0) { is_mst = 1; break; } else if (strcmp(path, ip->bi_keyname) == 0) { is_shd = 1; break; } else if (strcmp(path, ii_pathname(ip->bi_bmpfd)) == 0) break; } if (is_mst) ip = ip->bi_master; if (ip && ip->bi_disabled && !(ip->bi_state & DSW_MULTIMST)) { DTRACE_PROBE(_ii_open_Disabled); mutex_exit(&_ii_info_mutex); return (EINTR); } if (!ip) { /* maybe it's an overflow */ mutex_exit(&_ii_info_mutex); mutex_enter(&_ii_overflow_mutex); for (op = _ii_overflow_top; op; op = op->ii_next) { if (strcmp(path, op->ii_volname) == 0) break; } mutex_exit(&_ii_overflow_mutex); if (!op) { nsc_kmem_free(bfd, sizeof (*bfd)); DTRACE_PROBE(_ii_open_end_EINVAL); return (EINVAL); } bfd->ii_ovr = 1; bfd->ii_oflags = flag; bfd->ii_optr = op; *cdp = (blind_t)bfd; DTRACE_PROBE(_ii_open_end_overflow); return (0); } mutex_enter(&ip->bi_mutex); ip->bi_ioctl++; mutex_exit(&_ii_info_mutex); if (is_mst) { if (raw) { ip->bi_mstr_iodev = NULL; /* set in attach */ ip->bi_mstrref++; } else { ip->bi_mst_iodev = NULL; /* set in attach */ ip->bi_mstref++; } ip->bi_master->bi_iifd = bfd; } else if (is_shd) { if (raw) { ip->bi_shdr_iodev = NULL; /* set in attach */ ip->bi_shdrref++; } else { ip->bi_shd_iodev = NULL; /* set in attach */ ip->bi_shdref++; } bfd->ii_shd = 1; } else { ip->bi_bmpref++; ip->bi_bmp_iodev = NULL; /* set in attach */ bfd->ii_bmp = 1; } _ii_ioctl_done(ip); mutex_exit(&ip->bi_mutex); bfd->ii_info = ip; bfd->ii_oflags = flag; *cdp = (blind_t)bfd; return (0); } static int _ii_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) { return (_ii_open(path, NSC_CACHE|flag, cdp, iodev)); } static int _ii_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) { return (_ii_open(path, NSC_DEVICE|flag, cdp, iodev)); } /* * _ii_close * Close a device * * Calling/Exit State: * Always succeeds - returns 0 * * Description: * Performs the housekeeping operations associated with an upper layer * of the nsc stack closing a shadowed device. */ static int _ii_close(bfd) ii_fd_t *bfd; { _ii_info_t *ip = bfd->ii_info; _ii_info_dev_t *dip; int raw; if (!ip) { ASSERT(bfd->ii_ovr); return (0); } raw = II_RAW(bfd); mutex_enter(&ip->bi_mutex); if (bfd->ii_shd && raw) { dip = &ip->bi_shdrdev; } else if (bfd->ii_shd) { dip = &ip->bi_shddev; } else if (bfd->ii_bmp) { dip = &ip->bi_bmpdev; } else if (raw) { dip = ip->bi_mstrdev; } else { dip = ip->bi_mstdev; } if (dip) { dip->bi_ref--; if (dip->bi_ref == 0) dip->bi_iodev = NULL; } if (ip->bi_state & DSW_CLOSING) { if (total_ref(ip) == 0) { cv_signal(&ip->bi_closingcv); } } else if ((ip->bi_flags & DSW_HANGING) && (ip->bi_head->bi_state & DSW_CLOSING)) cv_signal(&ip->bi_head->bi_closingcv); if (!(bfd->ii_shd || bfd->ii_bmp)) /* is master device */ ip->bi_master->bi_iifd = NULL; mutex_exit(&ip->bi_mutex); nsc_kmem_free(bfd, sizeof (*bfd)); return (0); } /* * _ii_alloc_handle * Allocate a handle * */ static nsc_buf_t * _ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), ii_fd_t *bfd) { ii_buf_t *h; if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info)) return (NULL); h = kmem_alloc(sizeof (*h), KM_SLEEP); if (!h) return (NULL); h->ii_abufp = NULL; h->ii_bufp = nsc_alloc_handle(II_FD(bfd), d_cb, r_cb, w_cb); if (!h->ii_bufp) { kmem_free(h, sizeof (*h)); return (NULL); } h->ii_bufp2 = NULL; h->ii_bufh.sb_flag = NSC_HALLOCATED; h->ii_fd = bfd; h->ii_rsrv = NULL; return ((nsc_buf_t *)h); } /* * _ii_free_handle * Free a handle * */ static int /*ARGSUSED*/ _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd) { int rc; if (h->ii_abufp) (void) nsc_free_buf(h->ii_abufp); rc = nsc_free_handle(h->ii_bufp); if (!II_SUCCESS(rc)) { return (rc); } kmem_free(h, sizeof (ii_buf_t)); return (0); } /* * _ii_attach * Attach * * Calling/Exit State: * Returns 0 for success, errno on failure. * * Description: */ static int _ii_attach(ii_fd_t *bfd, nsc_iodev_t *iodev) { _ii_info_t *ip; int dev; int raw; int rc; _ii_info_dev_t *infop; raw = II_RAW(bfd); DTRACE_PROBE2(_ii_attach_info, char *, bfd->ii_shd? "shadow" : "master", int, raw); if (bfd->ii_ovr) return (EINVAL); ip = bfd->ii_info; if (ip == NULL) return (EINVAL); mutex_enter(&ip->bi_mutex); if (bfd->ii_bmp) { infop = &ip->bi_bmpdev; } else if (bfd->ii_shd) { if (raw) { infop = &ip->bi_shdrdev; } else { infop = &ip->bi_shddev; } } else if (!bfd->ii_ovr) { if (raw) { infop = ip->bi_mstrdev; } else { infop = ip->bi_mstdev; } } if (iodev) { infop->bi_iodev = iodev; nsc_set_owner(infop->bi_fd, infop->bi_iodev); } mutex_exit(&ip->bi_mutex); if (bfd->ii_bmp) return (EINVAL); if (raw) dev = bfd->ii_shd ? SHDR : MSTR; else dev = bfd->ii_shd ? SHD : MST; rc = _ii_rsrv_devs(ip, dev, II_EXTERNAL); return (rc); } /* * _ii_detach * Detach * * Calling/Exit State: * Returns 0 for success, always succeeds * * Description: */ static int _ii_detach(bfd) ii_fd_t *bfd; { int dev; int raw; raw = II_RAW(bfd); DTRACE_PROBE2(_ii_detach_info, char *, bfd->ii_shd? "shadow" : "master", int, raw); if (bfd->ii_bmp) return (0); ASSERT(bfd->ii_info); dev = bfd->ii_shd ? (raw ? SHDR : SHD) : (raw ? MSTR : MST); _ii_rlse_devs(bfd->ii_info, dev); return (0); } /* * _ii_get_pinned * */ static int _ii_get_pinned(ii_fd_t *bfd) { int rc; if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info)) return (EIO); rc = nsc_get_pinned(II_FD(bfd)); return (rc); } /* * _ii_discard_pinned * */ static int _ii_discard_pinned(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len) { int rc; if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info)) return (EIO); rc = nsc_discard_pinned(II_FD(bfd), pos, len); return (rc); } /* * _ii_partsize * */ static int _ii_partsize(ii_fd_t *bfd, nsc_size_t *ptr) { /* Always return saved size */ *ptr = bfd->ii_info->bi_size; return (0); } /* * _ii_maxfbas * */ static int _ii_maxfbas(ii_fd_t *bfd, int flag, nsc_size_t *ptr) { int rc; int rs; int dev; _ii_info_t *ip; ip = bfd->ii_info; if (REMOTE_VOL(bfd->ii_shd, ip)) return (EIO); dev = ((ip->bi_flags)&DSW_SHDIMPORT) ? SHDR : MSTR; DTRACE_PROBE1(_ii_maxfbas_info, char *, dev == SHDR? "shadow" : "master"); rs = _ii_rsrv_devs(ip, dev, II_INTERNAL); rc = nsc_maxfbas((dev == MSTR) ? MSTFD(ip) : SHDFD(ip), flag, ptr); if (rs == 0) _ii_rlse_devs(ip, dev); return (rc); } /* * ii_get_group_list */ _ii_info_t ** ii_get_group_list(char *group, int *count) { int i; int nip; uint64_t hash; _ii_info_t **ipa; _ii_lsthead_t *head; _ii_lstinfo_t *np; hash = nsc_strhash(group); for (head = _ii_group_top; head; head = head->lst_next) { if (hash == head->lst_hash && strncmp(head->lst_name, group, DSW_NAMELEN) == 0) break; } if (!head) { return (NULL); } /* Count entries */ for (nip = 0, np = head->lst_start; np; np = np->lst_next) ++nip; ASSERT(nip > 0); ipa = kmem_zalloc(sizeof (_ii_info_t *) * nip, KM_SLEEP); np = head->lst_start; for (i = 0; i < nip; i++) { ASSERT(np != 0); ipa[i] = np->lst_ip; np = np->lst_next; } *count = nip; return (ipa); } /* * _ii_pinned * */ static void _ii_pinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) { DTRACE_PROBE3(_ii_pinned_start, nsc_iodev_t, dip->bi_iodev, nsc_off_t, pos, nsc_size_t, len); nsc_pinned_data(dip->bi_iodev, pos, len); } /* * _ii_unpinned * */ static void _ii_unpinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) { nsc_unpinned_data(dip->bi_iodev, pos, len); } /* * _ii_read */ static int _ii_read(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) { int rc; void *sb_vec; nsc_vec_t **src; if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info)) rc = EIO; else { src = h->ii_abufp? &h->ii_abufp->sb_vec : &h->ii_bufp->sb_vec; sb_vec = *src; *src = h->ii_bufh.sb_vec; rc = _ii_fill_buf(h->ii_fd, pos, len, flag, h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2); *src = sb_vec; } if (!II_SUCCESS(rc)) h->ii_bufh.sb_error = rc; return (rc); } /* * _ii_write */ static int _ii_write(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) { int rc; ii_fd_t *bfd = h->ii_fd; _ii_info_t *ip = bfd->ii_info; chunkid_t chunk_num; nsc_size_t copy_len; nsc_off_t mapped_fba; chunkid_t mapped_chunk; int overflow; nsc_buf_t *tmp; void *sb_vec; if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info)) rc = EIO; else if ((ip->bi_flags&DSW_TREEMAP) == 0 || !bfd->ii_shd) { sb_vec = h->ii_bufp->sb_vec; h->ii_bufp->sb_vec = h->ii_bufh.sb_vec; if (bfd->ii_shd) { II_NSC_WRITE(ip, shadow, rc, h->ii_bufp, pos, len, flag); } else { II_NSC_WRITE(ip, master, rc, h->ii_bufp, pos, len, flag); } h->ii_bufp->sb_vec = sb_vec; } else { /* write of mapped shadow buffer */ rc = 0; chunk_num = pos / DSW_SIZE; while (len > 0 && II_SUCCESS(rc)) { /* * don't need to test bitmaps as allocating the * write buffer will c-o-write the chunk. */ mapped_chunk = ii_tsearch(ip, chunk_num); if (mapped_chunk == II_NULLNODE) { rc = EIO; break; } overflow = II_ISOVERFLOW(mapped_chunk); if (overflow) mapped_chunk = II_2OVERFLOW(mapped_chunk); mapped_fba = DSW_CHK2FBA(mapped_chunk) + (pos % DSW_SIZE); copy_len = DSW_SIZE - (pos % DSW_SIZE); if (copy_len > len) copy_len = len; tmp = NULL; if (overflow) { (void) nsc_reserve(OVRFD(ip), NSC_MULTI); rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, copy_len, NSC_WRBUF, &tmp); } else rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, copy_len, NSC_WRBUF, &tmp); sb_vec = h->ii_abufp->sb_vec; h->ii_abufp->sb_vec = h->ii_bufh.sb_vec; if (II_SUCCESS(rc)) { rc = nsc_copy(h->ii_abufp, tmp, pos, mapped_fba, copy_len); } if (overflow) { II_NSC_WRITE(ip, overflow, rc, tmp, mapped_fba, copy_len, flag); } else { II_NSC_WRITE(ip, shadow, rc, tmp, mapped_fba, copy_len, flag); } h->ii_abufp->sb_vec = sb_vec; (void) nsc_free_buf(tmp); if (overflow) nsc_release(OVRFD(ip)); /* move on to next chunk */ pos += copy_len; len -= copy_len; chunk_num++; } } if (!II_SUCCESS(rc)) h->ii_bufh.sb_error = rc; return (rc); } /* * _ii_zero */ static int _ii_zero(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) { int rc; void *sb_vec; sb_vec = h->ii_bufp->sb_vec; h->ii_bufp->sb_vec = h->ii_bufh.sb_vec; rc = nsc_zero(h->ii_bufp, pos, len, flag); h->ii_bufp->sb_vec = sb_vec; if (!II_SUCCESS(rc)) h->ii_bufh.sb_error = rc; return (rc); } /* * _ii_uncommit */ static int _ii_uncommit(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) { int rc; void *sb_vec; sb_vec = h->ii_bufp->sb_vec; h->ii_bufp->sb_vec = h->ii_bufh.sb_vec; rc = nsc_uncommit(h->ii_bufp, pos, len, flag); h->ii_bufp->sb_vec = sb_vec; if (!II_SUCCESS(rc)) h->ii_bufh.sb_error = rc; return (rc); } /* * _ii_trksize */ static int _ii_trksize(ii_fd_t *bfd, int trksize) { int rc; rc = nsc_set_trksize(II_FD(bfd), trksize); return (rc); } /* * _ii_register_path */ static nsc_path_t * _ii_register_path(char *path, int type, nsc_io_t *io) { nsc_path_t *tok; tok = nsc_register_path(path, type, io); return (tok); } /* * _ii_unregister_path */ /*ARGSUSED*/ static int _ii_unregister_path(nsc_path_t *sp, int flag, char *type) { int rc; rc = nsc_unregister_path(sp, flag); return (rc); } int _ii_ll_add(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char *name, char **key) { _ii_lsthead_t **head; _ii_lstinfo_t *node; uint64_t hash; ASSERT(key && !*key); ASSERT(ip && mutex && lst && name); node = kmem_zalloc(sizeof (_ii_lstinfo_t), KM_SLEEP); if (!node) { cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM"); DTRACE_PROBE(_ii_ll_add_end_ENOMEM); return (ENOMEM); } node->lst_ip = ip; /* find out where we should insert it */ hash = nsc_strhash(name); mutex_enter(mutex); for (head = lst; *head; head = &((*head)->lst_next)) { if (((*head)->lst_hash == hash) && strncmp(name, (*head)->lst_name, DSW_NAMELEN) == 0) { node->lst_next = (*head)->lst_start; (*head)->lst_start = node; break; } } if (!*head) { /* create a new entry */ *head = kmem_zalloc(sizeof (_ii_lsthead_t), KM_SLEEP); if (!*head) { /* bother */ cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM"); kmem_free(node, sizeof (_ii_lstinfo_t)); DTRACE_PROBE(_ii_ll_add_end_2); return (ENOMEM); } (*head)->lst_hash = hash; (void) strncpy((*head)->lst_name, name, DSW_NAMELEN); (*head)->lst_start = node; } mutex_exit(mutex); *key = (*head)->lst_name; return (0); } int _ii_ll_remove(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char **key) { _ii_lsthead_t **head, *oldhead = 0; _ii_lstinfo_t **node, *oldnode = 0; uint64_t hash; int found; ASSERT(key && *key); ASSERT(ip && lst); hash = nsc_strhash(*key); mutex_enter(mutex); for (head = lst; *head; head = &((*head)->lst_next)) { if (((*head)->lst_hash == hash) && strncmp(*key, (*head)->lst_name, DSW_NAMELEN) == 0) break; } if (!*head) { /* no such link (!) */ mutex_exit(mutex); return (0); } found = 0; for (node = &(*head)->lst_start; *node; node = &((*node)->lst_next)) { if (ip == (*node)->lst_ip) { oldnode = *node; *node = (*node)->lst_next; kmem_free(oldnode, sizeof (_ii_lstinfo_t)); found = 1; break; } } ASSERT(found); if (!found) { mutex_exit(mutex); return (0); } /* did we just delete the last set in this resource group? */ if (!(*head)->lst_start) { oldhead = *head; *head = (*head)->lst_next; kmem_free(oldhead, sizeof (_ii_lsthead_t)); } mutex_exit(mutex); *key = NULL; return (0); } static nsc_def_t _ii_fd_def[] = { "Pinned", (uintptr_t)_ii_pinned, 0, "Unpinned", (uintptr_t)_ii_unpinned, 0, 0, 0, 0 }; static nsc_def_t _ii_io_def[] = { "Open", (uintptr_t)_ii_openc, 0, "Close", (uintptr_t)_ii_close, 0, "Attach", (uintptr_t)_ii_attach, 0, "Detach", (uintptr_t)_ii_detach, 0, "AllocHandle", (uintptr_t)_ii_alloc_handle, 0, "FreeHandle", (uintptr_t)_ii_free_handle, 0, "AllocBuf", (uintptr_t)_ii_alloc_buf, 0, "FreeBuf", (uintptr_t)_ii_free_buf, 0, "GetPinned", (uintptr_t)_ii_get_pinned, 0, "Discard", (uintptr_t)_ii_discard_pinned, 0, "PartSize", (uintptr_t)_ii_partsize, 0, "MaxFbas", (uintptr_t)_ii_maxfbas, 0, "Read", (uintptr_t)_ii_read, 0, "Write", (uintptr_t)_ii_write, 0, "Zero", (uintptr_t)_ii_zero, 0, "Uncommit", (uintptr_t)_ii_uncommit, 0, "TrackSize", (uintptr_t)_ii_trksize, 0, "Provide", 0, 0, 0, 0, 0 }; static nsc_def_t _ii_ior_def[] = { "Open", (uintptr_t)_ii_openr, 0, "Close", (uintptr_t)_ii_close, 0, "Attach", (uintptr_t)_ii_attach, 0, "Detach", (uintptr_t)_ii_detach, 0, "AllocHandle", (uintptr_t)_ii_alloc_handle, 0, "FreeHandle", (uintptr_t)_ii_free_handle, 0, "AllocBuf", (uintptr_t)_ii_alloc_buf, 0, "FreeBuf", (uintptr_t)_ii_free_buf, 0, "GetPinned", (uintptr_t)_ii_get_pinned, 0, "Discard", (uintptr_t)_ii_discard_pinned, 0, "PartSize", (uintptr_t)_ii_partsize, 0, "MaxFbas", (uintptr_t)_ii_maxfbas, 0, "Read", (uintptr_t)_ii_read, 0, "Write", (uintptr_t)_ii_write, 0, "Zero", (uintptr_t)_ii_zero, 0, "Uncommit", (uintptr_t)_ii_uncommit, 0, "TrackSize", (uintptr_t)_ii_trksize, 0, "Provide", 0, 0, 0, 0, 0 };