Mercurial > illumos > illumos-gate
comparison usr/src/uts/common/fs/zfs/spa.c @ 10974:32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
6898598 dsl needs to be more aware of dedup space
author | Jeff Bonwick <Jeff.Bonwick@Sun.COM> |
---|---|
date | Thu, 05 Nov 2009 18:44:56 -0800 |
parents | 0c81acaaf614 |
children | 63ab26072e41 |
comparison
equal
deleted
inserted
replaced
10973:6969e719525a | 10974:32d689ba6466 |
---|---|
3787 * grab and release the spa_config_lock while still holding the namespace | 3787 * grab and release the spa_config_lock while still holding the namespace |
3788 * lock. During each step the configuration is synced out. | 3788 * lock. During each step the configuration is synced out. |
3789 */ | 3789 */ |
3790 | 3790 |
3791 /* | 3791 /* |
3792 * Initial phase of device removal - stop future allocations from this device. | |
3793 */ | |
3794 void | |
3795 spa_vdev_remove_start(spa_t *spa, vdev_t *vd) | |
3796 { | |
3797 metaslab_group_t *mg = vd->vdev_mg; | |
3798 | |
3799 ASSERT(MUTEX_HELD(&spa_namespace_lock)); | |
3800 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); | |
3801 ASSERT(vd == vd->vdev_top); | |
3802 | |
3803 /* | |
3804 * Remove our vdev from the allocatable vdevs | |
3805 */ | |
3806 if (mg) | |
3807 metaslab_class_remove(mg->mg_class, mg); | |
3808 } | |
3809 | |
3810 /* | |
3811 * Evacuate the device. | 3792 * Evacuate the device. |
3812 */ | 3793 */ |
3813 int | 3794 int |
3814 spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) | 3795 spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) |
3815 { | 3796 { |
3797 int error = 0; | |
3816 uint64_t txg; | 3798 uint64_t txg; |
3817 int error; | |
3818 | 3799 |
3819 ASSERT(MUTEX_HELD(&spa_namespace_lock)); | 3800 ASSERT(MUTEX_HELD(&spa_namespace_lock)); |
3820 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); | 3801 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); |
3821 ASSERT(vd == vd->vdev_top); | 3802 ASSERT(vd == vd->vdev_top); |
3822 | 3803 |
3825 * since we need to do I/O but we do keep the | 3806 * since we need to do I/O but we do keep the |
3826 * spa_namespace_lock held. Once this completes the device | 3807 * spa_namespace_lock held. Once this completes the device |
3827 * should no longer have any blocks allocated on it. | 3808 * should no longer have any blocks allocated on it. |
3828 */ | 3809 */ |
3829 if (vd->vdev_islog) { | 3810 if (vd->vdev_islog) { |
3830 /* | 3811 error = dmu_objset_find(spa_name(spa), zil_vdev_offline, |
3831 * Evacuate the device. | 3812 NULL, DS_FIND_CHILDREN); |
3832 */ | 3813 } else { |
3833 if (error = dmu_objset_find(spa_name(spa), | 3814 error = ENOTSUP; /* until we have bp rewrite */ |
3834 zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { | 3815 } |
3835 uint64_t txg; | 3816 |
3836 | 3817 txg_wait_synced(spa_get_dsl(spa), 0); |
3837 txg = spa_vdev_config_enter(spa); | 3818 |
3838 metaslab_class_add(spa->spa_log_class, | 3819 if (error) |
3839 vd->vdev_mg); | 3820 return (error); |
3840 return (spa_vdev_exit(spa, NULL, txg, error)); | 3821 |
3841 } | 3822 /* |
3842 txg_wait_synced(spa_get_dsl(spa), 0); | 3823 * The evacuation succeeded. Remove any remaining MOS metadata |
3843 } | 3824 * associated with this vdev, and wait for these changes to sync. |
3844 | |
3845 /* | |
3846 * Remove any remaining MOS metadata associated with the device. | |
3847 */ | 3825 */ |
3848 txg = spa_vdev_config_enter(spa); | 3826 txg = spa_vdev_config_enter(spa); |
3849 vd->vdev_removing = B_TRUE; | 3827 vd->vdev_removing = B_TRUE; |
3850 vdev_dirty(vd, 0, NULL, txg); | 3828 vdev_dirty(vd, 0, NULL, txg); |
3851 vdev_config_dirty(vd); | 3829 vdev_config_dirty(vd); |
3856 | 3834 |
3857 /* | 3835 /* |
3858 * Complete the removal by cleaning up the namespace. | 3836 * Complete the removal by cleaning up the namespace. |
3859 */ | 3837 */ |
3860 void | 3838 void |
3861 spa_vdev_remove_done(spa_t *spa, vdev_t *vd) | 3839 spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) |
3862 { | 3840 { |
3863 vdev_t *rvd = spa->spa_root_vdev; | 3841 vdev_t *rvd = spa->spa_root_vdev; |
3864 metaslab_group_t *mg = vd->vdev_mg; | |
3865 uint64_t id = vd->vdev_id; | 3842 uint64_t id = vd->vdev_id; |
3866 boolean_t last_vdev = (id == (rvd->vdev_children - 1)); | 3843 boolean_t last_vdev = (id == (rvd->vdev_children - 1)); |
3867 | 3844 |
3868 ASSERT(MUTEX_HELD(&spa_namespace_lock)); | 3845 ASSERT(MUTEX_HELD(&spa_namespace_lock)); |
3869 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); | 3846 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); |
3875 vdev_state_clean(vd); | 3852 vdev_state_clean(vd); |
3876 if (list_link_active(&vd->vdev_config_dirty_node)) | 3853 if (list_link_active(&vd->vdev_config_dirty_node)) |
3877 vdev_config_clean(vd); | 3854 vdev_config_clean(vd); |
3878 | 3855 |
3879 vdev_free(vd); | 3856 vdev_free(vd); |
3880 | |
3881 /* | |
3882 * It's possible that another thread is trying todo a spa_vdev_add() | |
3883 * at the same time we're trying remove it. As a result the | |
3884 * added vdev may not have initialized its metaslabs yet. | |
3885 */ | |
3886 if (mg != NULL) | |
3887 metaslab_group_destroy(mg); | |
3888 | 3857 |
3889 if (last_vdev) { | 3858 if (last_vdev) { |
3890 vdev_compact_children(rvd); | 3859 vdev_compact_children(rvd); |
3891 } else { | 3860 } else { |
3892 vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); | 3861 vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); |
3906 */ | 3875 */ |
3907 int | 3876 int |
3908 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) | 3877 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) |
3909 { | 3878 { |
3910 vdev_t *vd; | 3879 vdev_t *vd; |
3880 metaslab_group_t *mg; | |
3911 nvlist_t **spares, **l2cache, *nv; | 3881 nvlist_t **spares, **l2cache, *nv; |
3912 uint64_t txg = 0; | 3882 uint64_t txg = 0; |
3913 uint_t nspares, nl2cache; | 3883 uint_t nspares, nl2cache; |
3914 int error = 0; | 3884 int error = 0; |
3915 boolean_t locked = MUTEX_HELD(&spa_namespace_lock); | 3885 boolean_t locked = MUTEX_HELD(&spa_namespace_lock); |
3953 /* | 3923 /* |
3954 * XXX - Once we have bp-rewrite this should | 3924 * XXX - Once we have bp-rewrite this should |
3955 * become the common case. | 3925 * become the common case. |
3956 */ | 3926 */ |
3957 | 3927 |
3928 mg = vd->vdev_mg; | |
3929 | |
3958 /* | 3930 /* |
3959 * 1. Stop allocations | 3931 * Stop allocating from this vdev. |
3960 * 2. Evacuate the device (i.e. kill off stubby and | |
3961 * metadata) and wait for it to complete (i.e. sync). | |
3962 * 3. Cleanup the vdev namespace. | |
3963 */ | 3932 */ |
3964 spa_vdev_remove_start(spa, vd); | 3933 metaslab_group_passivate(mg); |
3965 | 3934 |
3966 /* | 3935 /* |
3967 * Wait for the youngest allocations and frees to sync, | 3936 * Wait for the youngest allocations and frees to sync, |
3968 * and then wait for the deferral of those frees to finish. | 3937 * and then wait for the deferral of those frees to finish. |
3969 */ | 3938 */ |
3970 spa_vdev_config_exit(spa, NULL, | 3939 spa_vdev_config_exit(spa, NULL, |
3971 txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); | 3940 txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); |
3972 | 3941 |
3973 if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) | 3942 /* |
3974 return (error); | 3943 * Attempt to evacuate the vdev. |
3944 */ | |
3945 error = spa_vdev_remove_evacuate(spa, vd); | |
3946 | |
3975 txg = spa_vdev_config_enter(spa); | 3947 txg = spa_vdev_config_enter(spa); |
3976 | 3948 |
3977 spa_vdev_remove_done(spa, vd); | 3949 /* |
3950 * If we couldn't evacuate the vdev, unwind. | |
3951 */ | |
3952 if (error) { | |
3953 metaslab_group_activate(mg); | |
3954 return (spa_vdev_exit(spa, NULL, txg, error)); | |
3955 } | |
3956 | |
3957 /* | |
3958 * Clean up the vdev namespace. | |
3959 */ | |
3960 spa_vdev_remove_from_namespace(spa, vd); | |
3978 | 3961 |
3979 } else if (vd != NULL) { | 3962 } else if (vd != NULL) { |
3980 /* | 3963 /* |
3981 * Normal vdevs cannot be removed (yet). | 3964 * Normal vdevs cannot be removed (yet). |
3982 */ | 3965 */ |