|
51 | 51 | #include <sys/trace_zfs.h> |
52 | 52 |
|
53 | 53 | /* |
54 | | - * This file contains the necessary logic to remove vdevs from a |
55 | | - * storage pool. Currently, the only devices that can be removed |
56 | | - * are log, cache, and spare devices; and top level vdevs from a pool |
57 | | - * w/o raidz or mirrors. (Note that members of a mirror can be removed |
58 | | - * by the detach operation.) |
| 54 | + * This file contains the necessary logic to remove vdevs from a storage |
| 55 | + * pool. Note that members of a mirror can be removed by the detach |
| 56 | + * operation. Currently, the only devices that can be removed are: |
59 | 57 | * |
60 | | - * Log vdevs are removed by evacuating them and then turning the vdev |
61 | | - * into a hole vdev while holding spa config locks. |
| 58 | + * 1) Traditional hot spare and cache vdevs. Note that draid distributed |
| 59 | + * spares are fixed at creation time and cannot be removed. |
62 | 60 | * |
63 | | - * Top level vdevs are removed and converted into an indirect vdev via |
64 | | - * a multi-step process: |
| 61 | + * 2) Log vdevs are removed by evacuating them and then turning the vdev |
| 62 | + * into a hole vdev while holding spa config locks. |
65 | 63 | * |
66 | | - * - Disable allocations from this device (spa_vdev_remove_top). |
| 64 | + * 3) Top-level singleton and mirror vdevs, including dedup and special |
| 65 | + * vdevs, are removed and converted into an indirect vdev via a |
| 66 | + * multi-step process: |
67 | 67 | * |
68 | | - * - From a new thread (spa_vdev_remove_thread), copy data from |
69 | | - * the removing vdev to a different vdev. The copy happens in open |
70 | | - * context (spa_vdev_copy_impl) and issues a sync task |
71 | | - * (vdev_mapping_sync) so the sync thread can update the partial |
72 | | - * indirect mappings in core and on disk. |
| 68 | + * - Disable allocations from this device (spa_vdev_remove_top). |
73 | 69 | * |
74 | | - * - If a free happens during a removal, it is freed from the |
75 | | - * removing vdev, and if it has already been copied, from the new |
76 | | - * location as well (free_from_removing_vdev). |
| 70 | + * - From a new thread (spa_vdev_remove_thread), copy data from the |
| 71 | + * removing vdev to a different vdev. The copy happens in open context |
| 72 | + * (spa_vdev_copy_impl) and issues a sync task (vdev_mapping_sync) so |
| 73 | + * the sync thread can update the partial indirect mappings in core |
| 74 | + * and on disk. |
77 | 75 | * |
78 | | - * - After the removal is completed, the copy thread converts the vdev |
79 | | - * into an indirect vdev (vdev_remove_complete) before instructing |
80 | | - * the sync thread to destroy the space maps and finish the removal |
81 | | - * (spa_finish_removal). |
| 76 | + * - If a free happens during a removal, it is freed from the removing |
| 77 | + * vdev, and if it has already been copied, from the new location as |
| 78 | + * well (free_from_removing_vdev). |
| 79 | + * |
| 80 | + * - After the removal is completed, the copy thread converts the vdev |
| 81 | + * into an indirect vdev (vdev_remove_complete) before instructing |
| 82 | + * the sync thread to destroy the space maps and finish the removal |
| 83 | + * (spa_finish_removal). |
| 84 | + * |
| 85 | + * The following constraints currently apply primary device removal: |
| 86 | + * |
| 87 | + * - All vdevs must be online, healthy, and not be missing any data |
| 88 | + * according to the DTLs. |
| 89 | + * |
| 90 | + * - A removed special/dedup vdev must has the same ashift as the |
| 91 | + * normal allocation class. Furthermore, all vdevs in the normal |
| 92 | + * class must have the same ashift to ensure the new allocation |
| 93 | + * never includes additional padding. |
| 94 | + * |
| 95 | + * - The normal allocation class cannot contain a raidz or draid |
| 96 | + * top-level vdev since segments are copied without regard for block |
| 97 | + * boundaries. This makes it impossible to calculate the required |
| 98 | + * parity columns. |
| 99 | + * |
| 100 | + * N.B. ashift and raidz/draid constraints for primary top-level device |
| 101 | + * removal could be slightly relaxed if it were possible to request that |
| 102 | + * metaslab_alloc_dva() select a DVA from a mirror or singleton in the |
| 103 | + * specified allocation class. This would be particularly useful for |
| 104 | + * raidz/draid pools which often include a mirrored special device. If |
| 105 | + * a mistakenly added top-level singleton were added it could then still |
| 106 | + * be removed at the cost of some special device capacity. This may be a |
| 107 | + * worthwhile tradeoff depending on the pool capacity and expense (cost, |
| 108 | + * complexity, time) of creating a new pool and copying all of the data |
| 109 | + * to correct the configuration. |
82 | 110 | */ |
83 | 111 |
|
84 | 112 | typedef struct vdev_copy_arg { |
|
0 commit comments