Skip to content

Commit 6fa275b

Browse files
committed
Reduce false positives and add autosit vdev property
Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
1 parent e91d007 commit 6fa275b

14 files changed

Lines changed: 246 additions & 87 deletions

File tree

include/sys/fs/zfs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ typedef enum {
386386
VDEV_PROP_TRIM_ERRORS,
387387
VDEV_PROP_SLOW_IOS,
388388
VDEV_PROP_SIT_OUT,
389+
VDEV_PROP_AUTOSIT,
389390
VDEV_NUM_PROPS
390391
} vdev_prop_t;
391392

@@ -1671,6 +1672,7 @@ typedef enum {
16711672
ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
16721673
ZFS_ERR_ASHIFT_MISMATCH,
16731674
ZFS_ERR_STREAM_LARGE_MICROZAP,
1675+
ZFS_ERR_TOO_MANY_SITOUTS,
16741676
} zfs_errno_t;
16751677

16761678
/*

include/sys/vdev_impl.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ struct vdev {
279279
uint64_t vdev_noalloc; /* device is passivated? */
280280
uint64_t vdev_removing; /* device is being removed? */
281281
uint64_t vdev_failfast; /* device failfast setting */
282+
boolean_t vdev_autosit; /* automatic sitout management */
282283
boolean_t vdev_rz_expanding; /* raidz is being expanded? */
283284
boolean_t vdev_ishole; /* is a hole in the namespace */
284285
uint64_t vdev_top_zap;
@@ -432,8 +433,9 @@ struct vdev {
432433
hrtime_t vdev_mmp_pending; /* 0 if write finished */
433434
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
434435
uint64_t vdev_expansion_time; /* vdev's last expansion time */
436+
/* used to calculate average read latency */
437+
uint64_t *vdev_prev_histo;
435438
uint64_t vdev_outlier_count; /* read outlier amongst peers */
436-
uint64_t vdev_ewma_latency; /* moving average read latency */
437439
hrtime_t vdev_read_sit_out_expire; /* end of sit out period */
438440
list_node_t vdev_leaf_node; /* leaf vdev list */
439441

include/sys/vdev_raidz.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
6262
void vdev_raidz_reflow_copy_scratch(spa_t *);
6363
void raidz_dtl_reassessed(vdev_t *);
6464
boolean_t vdev_sit_out_reads(vdev_t *, zio_flag_t);
65+
extern void vdev_raidz_sit_child(vdev_t *svd);
6566

6667
extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
6768

lib/libuutil/libuutil.abi

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,11 @@
14751475
<parameter type-id='80f4b756' name='name'/>
14761476
<return type-id='a27af98c'/>
14771477
</function-decl>
1478+
<function-decl name='zfs_tunable_iter' mangled-name='zfs_tunable_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_iter'>
1479+
<parameter type-id='d8d5f4ab' name='cb'/>
1480+
<parameter type-id='eaa32e2f' name='arg'/>
1481+
<return type-id='48b5725f'/>
1482+
</function-decl>
14781483
<function-decl name='zfs_tunable_set' mangled-name='zfs_tunable_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_set'>
14791484
<parameter type-id='a27af98c' name='zt'/>
14801485
<parameter type-id='80f4b756' name='val'/>
@@ -1486,11 +1491,6 @@
14861491
<parameter type-id='b59d7dce' name='valsz'/>
14871492
<return type-id='95e97e5e'/>
14881493
</function-decl>
1489-
<function-decl name='zfs_tunable_iter' mangled-name='zfs_tunable_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_iter'>
1490-
<parameter type-id='d8d5f4ab' name='cb'/>
1491-
<parameter type-id='eaa32e2f' name='arg'/>
1492-
<return type-id='48b5725f'/>
1493-
</function-decl>
14941494
<function-type size-in-bits='64' id='92f86508'>
14951495
<parameter type-id='a27af98c'/>
14961496
<parameter type-id='eaa32e2f'/>

lib/libzfs/libzfs.abi

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1655,6 +1655,11 @@
16551655
<parameter type-id='80f4b756' name='name'/>
16561656
<return type-id='a27af98c'/>
16571657
</function-decl>
1658+
<function-decl name='zfs_tunable_iter' mangled-name='zfs_tunable_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_iter'>
1659+
<parameter type-id='d8d5f4ab' name='cb'/>
1660+
<parameter type-id='eaa32e2f' name='arg'/>
1661+
<return type-id='48b5725f'/>
1662+
</function-decl>
16581663
<function-decl name='zfs_tunable_set' mangled-name='zfs_tunable_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_set'>
16591664
<parameter type-id='a27af98c' name='zt'/>
16601665
<parameter type-id='80f4b756' name='val'/>
@@ -1666,11 +1671,6 @@
16661671
<parameter type-id='b59d7dce' name='valsz'/>
16671672
<return type-id='95e97e5e'/>
16681673
</function-decl>
1669-
<function-decl name='zfs_tunable_iter' mangled-name='zfs_tunable_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_iter'>
1670-
<parameter type-id='d8d5f4ab' name='cb'/>
1671-
<parameter type-id='eaa32e2f' name='arg'/>
1672-
<return type-id='48b5725f'/>
1673-
</function-decl>
16741674
<function-type size-in-bits='64' id='92f86508'>
16751675
<parameter type-id='a27af98c'/>
16761676
<parameter type-id='eaa32e2f'/>
@@ -6114,7 +6114,8 @@
61146114
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
61156115
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
61166116
<enumerator name='VDEV_PROP_SIT_OUT' value='52'/>
6117-
<enumerator name='VDEV_NUM_PROPS' value='53'/>
6117+
<enumerator name='VDEV_PROP_AUTOSIT' value='53'/>
6118+
<enumerator name='VDEV_NUM_PROPS' value='54'/>
61186119
</enum-decl>
61196120
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
61206121
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

lib/libzfs/libzfs_util.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,11 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
776776
case ZFS_ERR_ASHIFT_MISMATCH:
777777
zfs_verror(hdl, EZFS_ASHIFT_MISMATCH, fmt, ap);
778778
break;
779+
case ZFS_ERR_TOO_MANY_SITOUTS:
780+
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "too many disks "
781+
"already sitting out"));
782+
zfs_verror(hdl, EZFS_BUSY, fmt, ap);
783+
break;
779784
default:
780785
zfs_error_aux(hdl, "%s", zfs_strerror(error));
781786
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);

lib/libzfs_core/libzfs_core.abi

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,6 +1426,11 @@
14261426
<parameter type-id='80f4b756' name='name'/>
14271427
<return type-id='a27af98c'/>
14281428
</function-decl>
1429+
<function-decl name='zfs_tunable_iter' mangled-name='zfs_tunable_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_iter'>
1430+
<parameter type-id='d8d5f4ab' name='cb'/>
1431+
<parameter type-id='eaa32e2f' name='arg'/>
1432+
<return type-id='48b5725f'/>
1433+
</function-decl>
14291434
<function-decl name='zfs_tunable_set' mangled-name='zfs_tunable_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_set'>
14301435
<parameter type-id='a27af98c' name='zt'/>
14311436
<parameter type-id='80f4b756' name='val'/>
@@ -1437,11 +1442,6 @@
14371442
<parameter type-id='b59d7dce' name='valsz'/>
14381443
<return type-id='95e97e5e'/>
14391444
</function-decl>
1440-
<function-decl name='zfs_tunable_iter' mangled-name='zfs_tunable_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_tunable_iter'>
1441-
<parameter type-id='d8d5f4ab' name='cb'/>
1442-
<parameter type-id='eaa32e2f' name='arg'/>
1443-
<return type-id='48b5725f'/>
1444-
</function-decl>
14451445
<function-type size-in-bits='64' id='92f86508'>
14461446
<parameter type-id='a27af98c'/>
14471447
<parameter type-id='eaa32e2f'/>

module/zcommon/zpool_prop.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,11 +468,14 @@ vdev_prop_init(void)
468468
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING",
469469
boolean_table, sfeatures);
470470
zprop_register_index(VDEV_PROP_SIT_OUT, "sit_out", 0,
471-
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "SIT_OUT", boolean_table,
471+
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "SIT_OUT", boolean_table,
472472
sfeatures);
473473
zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0,
474474
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP",
475475
boolean_table, sfeatures);
476+
zprop_register_index(VDEV_PROP_AUTOSIT, "autosit", 1,
477+
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "AUTOSIT", boolean_table,
478+
sfeatures);
476479

477480
/* default index properties */
478481
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,

module/zfs/vdev.c

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
10651065
}
10661066
}
10671067

1068+
if (top_level && (ops == &vdev_raidz_ops || ops == &vdev_draid_ops))
1069+
vd->vdev_autosit =
1070+
vdev_prop_default_numeric(VDEV_PROP_AUTOSIT);
1071+
10681072
/*
10691073
* Add ourselves to the parent's list of children.
10701074
*/
@@ -1166,6 +1170,9 @@ vdev_free(vdev_t *vd)
11661170
spa_spare_remove(vd);
11671171
if (vd->vdev_isl2cache)
11681172
spa_l2cache_remove(vd);
1173+
if (vd->vdev_prev_histo)
1174+
kmem_free(vd->vdev_prev_histo,
1175+
sizeof (uint64_t) * VDEV_L_HISTO_BUCKETS);
11691176

11701177
txg_list_destroy(&vd->vdev_ms_list);
11711178
txg_list_destroy(&vd->vdev_dtl_list);
@@ -3833,6 +3840,26 @@ vdev_load(vdev_t *vd)
38333840
}
38343841
}
38353842

3843+
if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
3844+
spa_t *spa = vd->vdev_spa;
3845+
uint64_t autosit;
3846+
3847+
error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
3848+
vdev_prop_to_name(VDEV_PROP_AUTOSIT), sizeof (autosit),
3849+
1, &autosit);
3850+
if (error == 0) {
3851+
vd->vdev_autosit = autosit == 1;
3852+
} else if (error == ENOENT) {
3853+
vd->vdev_autosit = vdev_prop_default_numeric(
3854+
VDEV_PROP_AUTOSIT);
3855+
} else {
3856+
vdev_dbgmsg(vd,
3857+
"vdev_load: zap_lookup(top_zap=%llu) "
3858+
"failed [error=%d]",
3859+
(u_longlong_t)vd->vdev_top_zap, error);
3860+
}
3861+
}
3862+
38363863
/*
38373864
* Load any rebuild state from the top-level vdev zap.
38383865
*/
@@ -6085,6 +6112,52 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
60856112
}
60866113
vd->vdev_failfast = intval & 1;
60876114
break;
6115+
case VDEV_PROP_SIT_OUT:
6116+
/* Only expose this for a draid or raidz leaf */
6117+
if (!vd->vdev_ops->vdev_op_leaf ||
6118+
vd->vdev_top == NULL ||
6119+
(vd->vdev_top->vdev_ops != &vdev_raidz_ops &&
6120+
vd->vdev_top->vdev_ops != &vdev_draid_ops)) {
6121+
error = ENOTSUP;
6122+
break;
6123+
}
6124+
if (nvpair_value_uint64(elem, &intval) != 0) {
6125+
error = EINVAL;
6126+
break;
6127+
}
6128+
if (intval == 1) {
6129+
vdev_t *pvd = vd->vdev_top;
6130+
uint_t sitouts = 0;
6131+
for (int i = 0; i < pvd->vdev_children; i++) {
6132+
if (pvd->vdev_child[i] == vd)
6133+
continue;
6134+
if (vdev_sit_out_reads(
6135+
pvd->vdev_child[i], 0)) {
6136+
sitouts++;
6137+
}
6138+
}
6139+
if (sitouts >= vdev_get_nparity(pvd)) {
6140+
error = ZFS_ERR_TOO_MANY_SITOUTS;
6141+
break;
6142+
}
6143+
if (error == 0)
6144+
vdev_raidz_sit_child(vd);
6145+
} else {
6146+
vd->vdev_read_sit_out_expire = 0;
6147+
}
6148+
break;
6149+
case VDEV_PROP_AUTOSIT:
6150+
if (vd->vdev_ops != &vdev_raidz_ops &&
6151+
vd->vdev_ops != &vdev_draid_ops) {
6152+
error = ENOTSUP;
6153+
break;
6154+
}
6155+
if (nvpair_value_uint64(elem, &intval) != 0) {
6156+
error = EINVAL;
6157+
break;
6158+
}
6159+
vd->vdev_autosit = intval == 1;
6160+
break;
60886161
case VDEV_PROP_CHECKSUM_N:
60896162
if (nvpair_value_uint64(elem, &intval) != 0) {
60906163
error = EINVAL;
@@ -6497,6 +6570,29 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
64976570
vdev_prop_add_list(outnvl, propname, strval,
64986571
intval, src);
64996572
break;
6573+
case VDEV_PROP_AUTOSIT:
6574+
/* Only raidz vdevs cannot have this property */
6575+
if (vd->vdev_ops != &vdev_raidz_ops &&
6576+
vd->vdev_ops != &vdev_draid_ops) {
6577+
src = ZPROP_SRC_NONE;
6578+
intval = ZPROP_BOOLEAN_NA;
6579+
} else {
6580+
err = vdev_prop_get_int(vd, prop,
6581+
&intval);
6582+
if (err && err != ENOENT)
6583+
break;
6584+
6585+
if (intval ==
6586+
vdev_prop_default_numeric(prop))
6587+
src = ZPROP_SRC_DEFAULT;
6588+
else
6589+
src = ZPROP_SRC_LOCAL;
6590+
}
6591+
6592+
vdev_prop_add_list(outnvl, propname, NULL,
6593+
intval, src);
6594+
break;
6595+
65006596
case VDEV_PROP_CHECKSUM_N:
65016597
case VDEV_PROP_CHECKSUM_T:
65026598
case VDEV_PROP_IO_N:

0 commit comments

Comments
 (0)