Skip to content

Commit 02312f4

Browse files
oshogboPaul Dagnelie
authored andcommitted
Add TXG timestamp database
This feature enables tracking of when TXGs are committed to disk, providing an estimated timestamp for each TXG. With this information, it becomes possible to perform scrubs based on specific date ranges, improving the granularity of data management and recovery operations. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Reviewed-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Signed-off-by: Mariusz Zaborski <mariusz.zaborski@klarasystems.com> Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Closes openzfs#16853
1 parent 99aa4c8 commit 02312f4

File tree

21 files changed

+735
-9
lines changed

21 files changed

+735
-9
lines changed

cmd/zpool/zpool_main.c

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ get_usage(zpool_help_t idx)
480480
return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
481481
"[<device> ...]\n"));
482482
case HELP_SCRUB:
483-
return (gettext("\tscrub [-e | -s | -p | -C] [-w] "
483+
return (gettext("\tscrub [-e | -s | -p | -C| -E | -S] [-w] "
484484
"<pool> ...\n"));
485485
case HELP_RESILVER:
486486
return (gettext("\tresilver <pool> ...\n"));
@@ -8330,6 +8330,8 @@ zpool_do_reopen(int argc, char **argv)
83308330
typedef struct scrub_cbdata {
83318331
int cb_type;
83328332
pool_scrub_cmd_t cb_scrub_cmd;
8333+
time_t cb_date_start;
8334+
time_t cb_date_end;
83338335
} scrub_cbdata_t;
83348336

83358337
static boolean_t
@@ -8373,8 +8375,8 @@ scrub_callback(zpool_handle_t *zhp, void *data)
83738375
return (1);
83748376
}
83758377

8376-
err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);
8377-
8378+
err = zpool_scan_range(zhp, cb->cb_type, cb->cb_scrub_cmd,
8379+
cb->cb_date_start, cb->cb_date_end);
83788380
if (err == 0 && zpool_has_checkpoint(zhp) &&
83798381
cb->cb_type == POOL_SCAN_SCRUB) {
83808382
(void) printf(gettext("warning: will not scrub state that "
@@ -8392,10 +8394,34 @@ wait_callback(zpool_handle_t *zhp, void *data)
83928394
return (zpool_wait(zhp, *act));
83938395
}
83948396

8397+
static time_t
8398+
date_string_to_sec(const char *timestr, boolean_t rounding)
8399+
{
8400+
struct tm tm = {0};
8401+
int adjustment = rounding ? 1 : 0;
8402+
8403+
/* Allow mktime to determine timezone. */
8404+
tm.tm_isdst = -1;
8405+
8406+
if (strptime(timestr, "%Y-%m-%d %H:%M", &tm) == NULL) {
8407+
if (strptime(timestr, "%Y-%m-%d", &tm) == NULL) {
8408+
fprintf(stderr, gettext("Failed to parse the date.\n"));
8409+
usage(B_FALSE);
8410+
}
8411+
adjustment *= 24 * 60 * 60;
8412+
} else {
8413+
adjustment *= 60;
8414+
}
8415+
8416+
return (mktime(&tm) + adjustment);
8417+
}
8418+
83958419
/*
8396-
* zpool scrub [-e | -s | -p | -C] [-w] <pool> ...
8420+
* zpool scrub [-e | -s | -p | -C | -E | -S] [-w] <pool> ...
83978421
*
83988422
* -e Only scrub blocks in the error log.
8423+
* -E End date of scrub.
8424+
* -S Start date of scrub.
83998425
* -s Stop. Stops any in-progress scrub.
84008426
* -p Pause. Pause in-progress scrub.
84018427
* -w Wait. Blocks until scrub has completed.
@@ -8411,21 +8437,32 @@ zpool_do_scrub(int argc, char **argv)
84118437

84128438
cb.cb_type = POOL_SCAN_SCRUB;
84138439
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
8440+
cb.cb_date_start = cb.cb_date_end = 0;
84148441

84158442
boolean_t is_error_scrub = B_FALSE;
84168443
boolean_t is_pause = B_FALSE;
84178444
boolean_t is_stop = B_FALSE;
84188445
boolean_t is_txg_continue = B_FALSE;
84198446

84208447
/* check options */
8421-
while ((c = getopt(argc, argv, "spweC")) != -1) {
8448+
while ((c = getopt(argc, argv, "spweCE:S:")) != -1) {
84228449
switch (c) {
84238450
case 'e':
84248451
is_error_scrub = B_TRUE;
84258452
break;
8453+
case 'E':
8454+
/*
8455+
* Round the date. It's better to scrub more data than
8456+
* less. This also makes the date inclusive.
8457+
*/
8458+
cb.cb_date_end = date_string_to_sec(optarg, B_TRUE);
8459+
break;
84268460
case 's':
84278461
is_stop = B_TRUE;
84288462
break;
8463+
case 'S':
8464+
cb.cb_date_start = date_string_to_sec(optarg, B_FALSE);
8465+
break;
84298466
case 'p':
84308467
is_pause = B_TRUE;
84318468
break;
@@ -8485,6 +8522,19 @@ zpool_do_scrub(int argc, char **argv)
84858522
}
84868523
}
84878524

8525+
if ((cb.cb_date_start != 0 || cb.cb_date_end != 0) &&
8526+
cb.cb_scrub_cmd != POOL_SCRUB_NORMAL) {
8527+
(void) fprintf(stderr, gettext("invalid option combination: "
8528+
"start/end date is available only with normal scrub\n"));
8529+
usage(B_FALSE);
8530+
}
8531+
if (cb.cb_date_start != 0 && cb.cb_date_end != 0 &&
8532+
cb.cb_date_start > cb.cb_date_end) {
8533+
(void) fprintf(stderr, gettext("invalid arguments: "
8534+
"end date has to be later than start date\n"));
8535+
usage(B_FALSE);
8536+
}
8537+
84888538
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
84898539
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) {
84908540
(void) fprintf(stderr, gettext("invalid option combination: "
@@ -8525,6 +8575,7 @@ zpool_do_resilver(int argc, char **argv)
85258575

85268576
cb.cb_type = POOL_SCAN_RESILVER;
85278577
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
8578+
cb.cb_date_start = cb.cb_date_end = 0;
85288579

85298580
/* check options */
85308581
while ((c = getopt(argc, argv, "")) != -1) {

include/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ COMMON_H = \
1212
literals.h \
1313
zfeature_common.h \
1414
zfs_comutil.h \
15+
zfs_crrd.h \
1516
zfs_deleg.h \
1617
zfs_fletcher.h \
1718
zfs_namecheck.h \

include/libzfs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ typedef struct trimflags {
292292
* Functions to manipulate pool and vdev state
293293
*/
294294
_LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
295+
_LIBZFS_H int zpool_scan_range(zpool_handle_t *, pool_scan_func_t,
296+
pool_scrub_cmd_t, time_t, time_t);
295297
_LIBZFS_H int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
296298
nvlist_t *);
297299
_LIBZFS_H int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t,

include/sys/dmu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,9 @@ typedef struct dmu_buf {
394394
#define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint"
395395
#define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap"
396396
#define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones"
397+
#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klaraystems:txg_log_time:minutes"
398+
#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klaraystems:txg_log_time:days"
399+
#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klaraystems:txg_log_time:months"
397400

398401
/*
399402
* Allocate an object from this objset. The range of object numbers

include/sys/spa_impl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
#include <sys/dsl_deadlist.h>
5757
#include <zfeature_common.h>
5858

59+
#include "zfs_crrd.h"
60+
5961
#ifdef __cplusplus
6062
extern "C" {
6163
#endif
@@ -357,6 +359,12 @@ struct spa {
357359
spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */
358360
zthr_t *spa_checkpoint_discard_zthr;
359361

362+
kmutex_t spa_txg_log_time_lock; /* for spa_txg_log_time */
363+
dbrrd_t spa_txg_log_time;
364+
uint64_t spa_last_noted_txg;
365+
uint64_t spa_last_noted_txg_time;
366+
uint64_t spa_last_flush_txg_time;
367+
360368
space_map_t *spa_syncing_log_sm; /* current log space map */
361369
avl_tree_t spa_sm_logs_by_txg;
362370
kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */

include/zfs_crrd.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// SPDX-License-Identifier: CDDL-1.0
2+
/*
3+
* CDDL HEADER START
4+
*
5+
* The contents of this file are subject to the terms of the
6+
* Common Development and Distribution License (the "License").
7+
* You may not use this file except in compliance with the License.
8+
*
9+
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10+
* or https://opensource.org/licenses/CDDL-1.0.
11+
* See the License for the specific language governing permissions
12+
* and limitations under the License.
13+
*
14+
* When distributing Covered Code, include this CDDL HEADER in each
15+
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16+
* If applicable, add the following below this CDDL HEADER, with the
17+
* fields enclosed by brackets "[]" replaced with your own identifying
18+
* information: Portions Copyright [yyyy] [name of copyright owner]
19+
*
20+
* CDDL HEADER END
21+
*/
22+
/*
23+
* Copyright (c) 2024 Klara Inc.
24+
*
25+
* This software was developed by
26+
* Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
27+
* Fred Weigel <fred.weigel@klarasystems.com>
28+
* under sponsorship from Wasabi Technology, Inc. and Klara Inc.
29+
*/
30+
31+
#ifndef _CRRD_H_
32+
#define _CRRD_H_
33+
34+
#define RRD_MAX_ENTRIES 256
35+
36+
#define RRD_ENTRY_SIZE sizeof (uint64_t)
37+
#define RRD_STRUCT_ELEM (sizeof (rrd_t) / RRD_ENTRY_SIZE)
38+
39+
typedef enum {
40+
DBRRD_FLOOR,
41+
DBRRD_CEILING
42+
} dbrrd_rounding_t;
43+
44+
typedef struct {
45+
uint64_t rrdd_time;
46+
uint64_t rrdd_txg;
47+
} rrd_data_t;
48+
49+
typedef struct {
50+
uint64_t rrd_head; /* head (beginning) */
51+
uint64_t rrd_tail; /* tail (end) */
52+
uint64_t rrd_length;
53+
54+
rrd_data_t rrd_entries[RRD_MAX_ENTRIES];
55+
} rrd_t;
56+
57+
typedef struct {
58+
rrd_t dbr_minutes;
59+
rrd_t dbr_days;
60+
rrd_t dbr_months;
61+
} dbrrd_t;
62+
63+
size_t rrd_len(rrd_t *rrd);
64+
65+
const rrd_data_t *rrd_entry(rrd_t *r, size_t i);
66+
rrd_data_t *rrd_tail_entry(rrd_t *rrd);
67+
uint64_t rrd_tail(rrd_t *rrd);
68+
uint64_t rrd_get(rrd_t *rrd, size_t i);
69+
70+
void rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg);
71+
72+
void dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg);
73+
uint64_t dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rouding);
74+
75+
#endif

lib/libzfs/libzfs.abi

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@
569569
<elf-symbol name='zpool_reguid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
570570
<elf-symbol name='zpool_reopen_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
571571
<elf-symbol name='zpool_scan' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
572+
<elf-symbol name='zpool_scan_range' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
572573
<elf-symbol name='zpool_search_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
573574
<elf-symbol name='zpool_set_bootenv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
574575
<elf-symbol name='zpool_set_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -6727,6 +6728,14 @@
67276728
<parameter type-id='b51cf3c2' name='cmd'/>
67286729
<return type-id='95e97e5e'/>
67296730
</function-decl>
6731+
<function-decl name='zpool_scan_range' mangled-name='zpool_scan_range' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_scan_range'>
6732+
<parameter type-id='4c81de99' name='zhp'/>
6733+
<parameter type-id='7313fbe2' name='func'/>
6734+
<parameter type-id='b51cf3c2' name='cmd'/>
6735+
<parameter type-id='c9d12d66' name='date_start'/>
6736+
<parameter type-id='c9d12d66' name='date_end'/>
6737+
<return type-id='95e97e5e'/>
6738+
</function-decl>
67306739
<function-decl name='zpool_find_vdev_by_physpath' mangled-name='zpool_find_vdev_by_physpath' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_find_vdev_by_physpath'>
67316740
<parameter type-id='4c81de99' name='zhp'/>
67326741
<parameter type-id='80f4b756' name='ppath'/>

lib/libzfs/libzfs_pool.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2730,7 +2730,13 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
27302730
* Scan the pool.
27312731
*/
27322732
int
2733-
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2733+
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) {
2734+
return (zpool_scan_range(zhp, func, cmd, 0, 0));
2735+
}
2736+
2737+
int
2738+
zpool_scan_range(zpool_handle_t *zhp, pool_scan_func_t func,
2739+
pool_scrub_cmd_t cmd, time_t date_start, time_t date_end)
27342740
{
27352741
char errbuf[ERRBUFLEN];
27362742
int err;
@@ -2739,6 +2745,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
27392745
nvlist_t *args = fnvlist_alloc();
27402746
fnvlist_add_uint64(args, "scan_type", (uint64_t)func);
27412747
fnvlist_add_uint64(args, "scan_command", (uint64_t)cmd);
2748+
fnvlist_add_uint64(args, "scan_date_start", (uint64_t)date_start);
2749+
fnvlist_add_uint64(args, "scan_date_end", (uint64_t)date_end);
27422750

27432751
err = lzc_scrub(ZFS_IOC_POOL_SCRUB, zhp->zpool_name, args, NULL);
27442752
fnvlist_free(args);

lib/libzpool/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ nodist_libzpool_la_SOURCES = \
182182
module/zfs/zfeature.c \
183183
module/zfs/zfs_byteswap.c \
184184
module/zfs/zfs_chksum.c \
185+
module/zfs/zfs_crrd.c \
185186
module/zfs/zfs_fm.c \
186187
module/zfs/zfs_fuid.c \
187188
module/zfs/zfs_ratelimit.c \

man/man4/zfs.4

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,6 +2252,21 @@ Defer frees starting in this pass.
22522252
Maximum memory used for prefetching a checkpoint's space map on each
22532253
vdev while discarding the checkpoint.
22542254
.
2255+
.It Sy zfs_spa_note_txg_time Ns = Ns Sy 600 Pq uint
2256+
This parameter defines, in seconds, how often the TXG time database will record
2257+
a new TXG if it has changed.
2258+
After the specified time interval has passed, and if the TXG number has changed,
2259+
the new value is recorded in the database.
2260+
These timestamps can later be used for more granular operations, such as
2261+
scrubbing.
2262+
.
2263+
.It Sy zfs_spa_flush_txg_time Ns = Ns Sy 600 Pq uint
2264+
This parameter defines, in seconds, how often the ZFS will flush
2265+
the TXG time database to disk.
2266+
It ensures that the data is actually written to persistent storage, which helps
2267+
preserve the database in case of unexpected shutdown.
2268+
The database is also automatically flushed during the export sequence.
2269+
.
22552270
.It Sy zfs_special_class_metadata_reserve_pct Ns = Ns Sy 25 Ns % Pq uint
22562271
Only allow small data blocks to be allocated on the special and dedup vdev
22572272
types when the available free space percentage on these vdevs exceeds this

0 commit comments

Comments
 (0)