comparison usr/src/uts/common/fs/zfs/spa.c @ 10974:32d689ba6466

6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool 6898598 dsl needs to be more aware of dedup space
author Jeff Bonwick <Jeff.Bonwick@Sun.COM>
date Thu, 05 Nov 2009 18:44:56 -0800
parents 0c81acaaf614
children 63ab26072e41
comparison
equal deleted inserted replaced
10973:6969e719525a 10974:32d689ba6466
3787 * grab and release the spa_config_lock while still holding the namespace 3787 * grab and release the spa_config_lock while still holding the namespace
3788 * lock. During each step the configuration is synced out. 3788 * lock. During each step the configuration is synced out.
3789 */ 3789 */
3790 3790
3791 /* 3791 /*
3792 * Initial phase of device removal - stop future allocations from this device.
3793 */
3794 void
3795 spa_vdev_remove_start(spa_t *spa, vdev_t *vd)
3796 {
3797 metaslab_group_t *mg = vd->vdev_mg;
3798
3799 ASSERT(MUTEX_HELD(&spa_namespace_lock));
3800 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
3801 ASSERT(vd == vd->vdev_top);
3802
3803 /*
3804 * Remove our vdev from the allocatable vdevs
3805 */
3806 if (mg)
3807 metaslab_class_remove(mg->mg_class, mg);
3808 }
3809
3810 /*
3811 * Evacuate the device. 3792 * Evacuate the device.
3812 */ 3793 */
3813 int 3794 int
3814 spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 3795 spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
3815 { 3796 {
3797 int error = 0;
3816 uint64_t txg; 3798 uint64_t txg;
3817 int error;
3818 3799
3819 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3800 ASSERT(MUTEX_HELD(&spa_namespace_lock));
3820 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3801 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
3821 ASSERT(vd == vd->vdev_top); 3802 ASSERT(vd == vd->vdev_top);
3822 3803
3825 * since we need to do I/O but we do keep the 3806 * since we need to do I/O but we do keep the
3826 * spa_namespace_lock held. Once this completes the device 3807 * spa_namespace_lock held. Once this completes the device
3827 * should no longer have any blocks allocated on it. 3808 * should no longer have any blocks allocated on it.
3828 */ 3809 */
3829 if (vd->vdev_islog) { 3810 if (vd->vdev_islog) {
3830 /* 3811 error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
3831 * Evacuate the device. 3812 NULL, DS_FIND_CHILDREN);
3832 */ 3813 } else {
3833 if (error = dmu_objset_find(spa_name(spa), 3814 error = ENOTSUP; /* until we have bp rewrite */
3834 zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { 3815 }
3835 uint64_t txg; 3816
3836 3817 txg_wait_synced(spa_get_dsl(spa), 0);
3837 txg = spa_vdev_config_enter(spa); 3818
3838 metaslab_class_add(spa->spa_log_class, 3819 if (error)
3839 vd->vdev_mg); 3820 return (error);
3840 return (spa_vdev_exit(spa, NULL, txg, error)); 3821
3841 } 3822 /*
3842 txg_wait_synced(spa_get_dsl(spa), 0); 3823 * The evacuation succeeded. Remove any remaining MOS metadata
3843 } 3824 * associated with this vdev, and wait for these changes to sync.
3844
3845 /*
3846 * Remove any remaining MOS metadata associated with the device.
3847 */ 3825 */
3848 txg = spa_vdev_config_enter(spa); 3826 txg = spa_vdev_config_enter(spa);
3849 vd->vdev_removing = B_TRUE; 3827 vd->vdev_removing = B_TRUE;
3850 vdev_dirty(vd, 0, NULL, txg); 3828 vdev_dirty(vd, 0, NULL, txg);
3851 vdev_config_dirty(vd); 3829 vdev_config_dirty(vd);
3856 3834
3857 /* 3835 /*
3858 * Complete the removal by cleaning up the namespace. 3836 * Complete the removal by cleaning up the namespace.
3859 */ 3837 */
3860 void 3838 void
3861 spa_vdev_remove_done(spa_t *spa, vdev_t *vd) 3839 spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
3862 { 3840 {
3863 vdev_t *rvd = spa->spa_root_vdev; 3841 vdev_t *rvd = spa->spa_root_vdev;
3864 metaslab_group_t *mg = vd->vdev_mg;
3865 uint64_t id = vd->vdev_id; 3842 uint64_t id = vd->vdev_id;
3866 boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 3843 boolean_t last_vdev = (id == (rvd->vdev_children - 1));
3867 3844
3868 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3845 ASSERT(MUTEX_HELD(&spa_namespace_lock));
3869 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3846 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
3875 vdev_state_clean(vd); 3852 vdev_state_clean(vd);
3876 if (list_link_active(&vd->vdev_config_dirty_node)) 3853 if (list_link_active(&vd->vdev_config_dirty_node))
3877 vdev_config_clean(vd); 3854 vdev_config_clean(vd);
3878 3855
3879 vdev_free(vd); 3856 vdev_free(vd);
3880
3881 /*
3882 * It's possible that another thread is trying todo a spa_vdev_add()
3883 * at the same time we're trying remove it. As a result the
3884 * added vdev may not have initialized its metaslabs yet.
3885 */
3886 if (mg != NULL)
3887 metaslab_group_destroy(mg);
3888 3857
3889 if (last_vdev) { 3858 if (last_vdev) {
3890 vdev_compact_children(rvd); 3859 vdev_compact_children(rvd);
3891 } else { 3860 } else {
3892 vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 3861 vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
3906 */ 3875 */
3907 int 3876 int
3908 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 3877 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
3909 { 3878 {
3910 vdev_t *vd; 3879 vdev_t *vd;
3880 metaslab_group_t *mg;
3911 nvlist_t **spares, **l2cache, *nv; 3881 nvlist_t **spares, **l2cache, *nv;
3912 uint64_t txg = 0; 3882 uint64_t txg = 0;
3913 uint_t nspares, nl2cache; 3883 uint_t nspares, nl2cache;
3914 int error = 0; 3884 int error = 0;
3915 boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 3885 boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
3953 /* 3923 /*
3954 * XXX - Once we have bp-rewrite this should 3924 * XXX - Once we have bp-rewrite this should
3955 * become the common case. 3925 * become the common case.
3956 */ 3926 */
3957 3927
3928 mg = vd->vdev_mg;
3929
3958 /* 3930 /*
3959 * 1. Stop allocations 3931 * Stop allocating from this vdev.
3960 * 2. Evacuate the device (i.e. kill off stubby and
3961 * metadata) and wait for it to complete (i.e. sync).
3962 * 3. Cleanup the vdev namespace.
3963 */ 3932 */
3964 spa_vdev_remove_start(spa, vd); 3933 metaslab_group_passivate(mg);
3965 3934
3966 /* 3935 /*
3967 * Wait for the youngest allocations and frees to sync, 3936 * Wait for the youngest allocations and frees to sync,
3968 * and then wait for the deferral of those frees to finish. 3937 * and then wait for the deferral of those frees to finish.
3969 */ 3938 */
3970 spa_vdev_config_exit(spa, NULL, 3939 spa_vdev_config_exit(spa, NULL,
3971 txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 3940 txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
3972 3941
3973 if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) 3942 /*
3974 return (error); 3943 * Attempt to evacuate the vdev.
3944 */
3945 error = spa_vdev_remove_evacuate(spa, vd);
3946
3975 txg = spa_vdev_config_enter(spa); 3947 txg = spa_vdev_config_enter(spa);
3976 3948
3977 spa_vdev_remove_done(spa, vd); 3949 /*
3950 * If we couldn't evacuate the vdev, unwind.
3951 */
3952 if (error) {
3953 metaslab_group_activate(mg);
3954 return (spa_vdev_exit(spa, NULL, txg, error));
3955 }
3956
3957 /*
3958 * Clean up the vdev namespace.
3959 */
3960 spa_vdev_remove_from_namespace(spa, vd);
3978 3961
3979 } else if (vd != NULL) { 3962 } else if (vd != NULL) {
3980 /* 3963 /*
3981 * Normal vdevs cannot be removed (yet). 3964 * Normal vdevs cannot be removed (yet).
3982 */ 3965 */