Skip to content

Commit 9579fe7

Browse files
robnlundman
authored andcommitted
ZIL: flag crashed LWBs so we know not to process them
If the ZIL crashed, any outstanding LWBs are no longer interesting, so if they return, we need to just clean them up and return, not try to do any work on them. This is true even if they return success, as that may be long after the pool suspended and resumed, depending on when/if the kernel decides to return the IO to us. In particular, we must not try to get the "next" LWB from zl_lwb_list, since they're no longer on that list. So, we put a flag on in-flight LWBs in zil_crash() when we move them from zl_lwb_list to zl_lwb_crash_list, so we know what's going on when they return. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes openzfs#17622
1 parent 7d564e0 commit 9579fe7

2 files changed

Lines changed: 19 additions & 7 deletions

File tree

include/sys/zil_impl.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,9 @@ typedef enum {
101101
* "zl_lock" is used to protect the lwb against concurrent access.
102102
*/
103103
typedef enum {
104-
LWB_FLAG_SLIM = (1<<0), /* log block has slim format */
105-
LWB_FLAG_SLOG = (1<<1), /* lwb_blk is on SLOG device */
104+
LWB_FLAG_SLIM = (1<<0), /* log block has slim format */
105+
LWB_FLAG_SLOG = (1<<1), /* lwb_blk is on SLOG device */
106+
LWB_FLAG_CRASHED = (1<<2), /* lwb is on the crash list */
106107
} lwb_flag_t;
107108

108109
typedef struct lwb {

module/zfs/zil.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,7 +1591,7 @@ zil_lwb_write_done(zio_t *zio)
15911591
avl_tree_t *t = &lwb->lwb_vdev_tree;
15921592
void *cookie = NULL;
15931593
zil_vdev_node_t *zv;
1594-
lwb_t *nlwb;
1594+
lwb_t *nlwb = NULL;
15951595

15961596
ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), !=, 0);
15971597

@@ -1611,9 +1611,11 @@ zil_lwb_write_done(zio_t *zio)
16111611
* its write ZIO a parent this ZIO. In such case we can not defer
16121612
* our flushes or below may be a race between the done callbacks.
16131613
*/
1614-
nlwb = list_next(&zilog->zl_lwb_list, lwb);
1615-
if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED)
1616-
nlwb = NULL;
1614+
if (!(lwb->lwb_flags & LWB_FLAG_CRASHED)) {
1615+
nlwb = list_next(&zilog->zl_lwb_list, lwb);
1616+
if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED)
1617+
nlwb = NULL;
1618+
}
16171619
mutex_exit(&zilog->zl_lock);
16181620

16191621
if (avl_numnodes(t) == 0)
@@ -1631,8 +1633,13 @@ zil_lwb_write_done(zio_t *zio)
16311633
* we expect that to occur in "zil_lwb_flush_vdevs_done" (thus,
16321634
* we expect any error seen here, to have been propagated to
16331635
* that function).
1636+
*
1637+
* Note that we treat a "crashed" LWB as though it was in error,
1638+
* even if it did appear to succeed, because we've already
1639+
* signaled error and cleaned up waiters and committers in
1640+
* zil_crash(); we just want to clean up and get out of here.
16341641
*/
1635-
if (zio->io_error != 0) {
1642+
if (zio->io_error != 0 || (lwb->lwb_flags & LWB_FLAG_CRASHED)) {
16361643
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL)
16371644
kmem_free(zv, sizeof (*zv));
16381645
return;
@@ -2747,6 +2754,7 @@ zil_crash_clean(zilog_t *zilog, uint64_t synced_txg)
27472754
}
27482755

27492756
/* This LWB is from the past, so we can clean it up now. */
2757+
ASSERT(lwb->lwb_flags & LWB_FLAG_CRASHED);
27502758
list_remove(&zilog->zl_lwb_crash_list, lwb);
27512759
if (lwb->lwb_buf != NULL)
27522760
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
@@ -3733,6 +3741,9 @@ zil_crash(zilog_t *zilog)
37333741
*/
37343742
for (lwb_t *lwb = list_head(&zilog->zl_lwb_crash_list); lwb != NULL;
37353743
lwb = list_next(&zilog->zl_lwb_crash_list, lwb)) {
3744+
ASSERT(!(lwb->lwb_flags & LWB_FLAG_CRASHED));
3745+
lwb->lwb_flags |= LWB_FLAG_CRASHED;
3746+
37363747
itx_t *itx;
37373748
while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
37383749
zil_itx_destroy(itx, EIO);

0 commit comments

Comments
 (0)