Skip to content

Commit b88bfe7

Browse files
committed
Refactor zhack label repair and fix -c regression on nonzero TXG
This commit fixes a likely regression introduced by 64db435 where the checksum repair functionality (`-c` or default behavior) will perform checks and access data associated with the newer undetach (`-u`) functionality, resulting in a failure when an uberblock's TXG is not 0 as required by `-u` but not `-c` Additionally, code is refactored for better separation of tasks. Signed-off-by: buzzingwires <buzzingwires@outlook.com>
1 parent 654f2dc commit b88bfe7

File tree

3 files changed

+111
-68
lines changed

3 files changed

+111
-68
lines changed

cmd/zhack.c

Lines changed: 79 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,23 @@ zhack_repair_read_label(const int fd, vdev_label_t *vl,
714714
return (0);
715715
}
716716

717+
static int
718+
zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
719+
{
720+
if (vdev_eck->zec_magic == ZEC_MAGIC) {
721+
*byteswap = B_FALSE;
722+
} else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
723+
*byteswap = B_TRUE;
724+
} else {
725+
(void) fprintf(stderr, "error: label %d: "
726+
"Expected the nvlist checksum magic number but instead got "
727+
"0x%" PRIx64 "\n",
728+
l, vdev_eck->zec_magic);
729+
return (1);
730+
}
731+
return (0);
732+
}
733+
717734
static void
718735
zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
719736
const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
@@ -740,33 +757,10 @@ zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
740757
}
741758

742759
static int
743-
zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys,
744-
const size_t cfg_keys_len, nvlist_t *cfg, nvlist_t *vdev_tree_cfg,
745-
uint64_t *ashift)
760+
zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
746761
{
747762
int err;
748-
749-
if (ub->ub_txg != 0) {
750-
(void) fprintf(stderr,
751-
"error: label %d: UB TXG of 0 expected, but got %"
752-
PRIu64 "\n",
753-
l, ub->ub_txg);
754-
(void) fprintf(stderr, "It would appear the device was not "
755-
"properly removed.\n");
756-
return (1);
757-
}
758-
759-
for (int i = 0; i < cfg_keys_len; i++) {
760-
uint64_t val;
761-
err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val);
762-
if (err) {
763-
(void) fprintf(stderr,
764-
"error: label %d, %d: "
765-
"cannot find nvlist key %s\n",
766-
l, i, cfg_keys[i]);
767-
return (err);
768-
}
769-
}
763+
nvlist_t *vdev_tree_cfg;
770764

771765
err = nvlist_lookup_nvlist(cfg,
772766
ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
@@ -790,7 +784,7 @@ zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys,
790784
(void) fprintf(stderr,
791785
"error: label %d: nvlist key %s is zero\n",
792786
l, ZPOOL_CONFIG_ASHIFT);
793-
return (err);
787+
return (1);
794788
}
795789

796790
return (0);
@@ -805,30 +799,35 @@ zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
805799
*/
806800
if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
807801
const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
802+
int err;
803+
808804
ub->ub_txg = txg;
809805

810-
if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) {
806+
err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
807+
if (err) {
811808
(void) fprintf(stderr,
812809
"error: label %d: "
813810
"Failed to remove pool creation TXG\n",
814811
l);
815-
return (1);
812+
return (err);
816813
}
817814

818-
if (nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG) != 0) {
815+
err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
816+
if (err) {
819817
(void) fprintf(stderr,
820818
"error: label %d: Failed to remove pool TXG to "
821819
"be replaced.\n",
822820
l);
823-
return (1);
821+
return (err);
824822
}
825823

826-
if (nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg) != 0) {
824+
err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
825+
if (err) {
827826
(void) fprintf(stderr,
828827
"error: label %d: "
829828
"Failed to add pool TXG of %" PRIu64 "\n",
830829
l, txg);
831-
return (1);
830+
return (err);
832831
}
833832
}
834833

@@ -922,6 +921,7 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data,
922921
BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
923922
const uint64_t actual_magic = vdev_eck->zec_magic;
924923
int err = 0;
924+
925925
if (actual_magic != expected_magic) {
926926
(void) fprintf(stderr, "error: label %d: "
927927
"Expected "
@@ -943,6 +943,36 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data,
943943
return (err);
944944
}
945945

946+
static int
947+
zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
948+
{
949+
const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
950+
ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
951+
int err;
952+
953+
err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
954+
VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
955+
if (err) {
956+
(void) fprintf(stderr,
957+
"error: cannot unpack nvlist label %d\n", l);
958+
return (err);
959+
}
960+
961+
for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
962+
uint64_t val;
963+
err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
964+
if (err) {
965+
(void) fprintf(stderr,
966+
"error: label %d, %d: "
967+
"cannot find nvlist key %s\n",
968+
l, i, cfg_keys[i]);
969+
return (err);
970+
}
971+
}
972+
973+
return (0);
974+
}
975+
946976
static void
947977
zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
948978
vdev_label_t *vl, const uint64_t label_offset, const int l,
@@ -956,29 +986,17 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
956986
(zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
957987
const uint64_t vdev_phys_offset =
958988
label_offset + offsetof(vdev_label_t, vl_vdev_phys);
959-
const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
960-
ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
961989
nvlist_t *cfg;
962-
nvlist_t *vdev_tree_cfg = NULL;
963990
uint64_t ashift;
964991
int byteswap;
965992

966993
err = zhack_repair_read_label(fd, vl, label_offset, l);
967994
if (err)
968995
return;
969996

970-
if (vdev_eck->zec_magic == 0) {
971-
(void) fprintf(stderr, "error: label %d: "
972-
"Expected the nvlist checksum magic number to not be zero"
973-
"\n",
974-
l);
975-
(void) fprintf(stderr, "There should already be a checksum "
976-
"for the label.\n");
997+
err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
998+
if (err)
977999
return;
978-
}
979-
980-
byteswap =
981-
(vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC));
9821000

9831001
if (byteswap) {
9841002
byteswap_uint64_array(&vdev_eck->zec_cksum,
@@ -994,23 +1012,28 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
9941012
return;
9951013
}
9961014

997-
err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
998-
VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0);
999-
if (err) {
1000-
(void) fprintf(stderr,
1001-
"error: cannot unpack nvlist label %d\n", l);
1002-
return;
1003-
}
1004-
1005-
err = zhack_repair_check_label(ub,
1006-
l, cfg_keys, ARRAY_SIZE(cfg_keys), cfg, vdev_tree_cfg, &ashift);
1015+
err = zhack_repair_unpack_cfg(vl, l, &cfg);
10071016
if (err)
10081017
return;
10091018

10101019
if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
10111020
char *buf;
10121021
size_t buflen;
10131022

1023+
if (ub->ub_txg != 0) {
1024+
(void) fprintf(stderr,
1025+
"error: label %d: UB TXG of 0 expected, but got %"
1026+
PRIu64 "\n",
1027+
l, ub->ub_txg);
1028+
(void) fprintf(stderr, "It would appear the device was "
1029+
"not properly detached.\n");
1030+
return;
1031+
}
1032+
1033+
err = zhack_repair_get_ashift(cfg, l, &ashift);
1034+
if (err)
1035+
return;
1036+
10141037
err = zhack_repair_undetach(ub, cfg, l);
10151038
if (err)
10161039
return;

tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,16 @@
3333
# Test one:
3434
#
3535
# 1. Create pool on a loopback device with some test data
36-
# 2. Export the pool.
37-
# 3. Corrupt all label checksums in the pool
38-
# 4. Check that pool cannot be imported
39-
# 5. Verify that it cannot be imported after using zhack label repair -u
36+
# 2. Checksum repair should work with a valid TXG. Repeatedly write and
37+
# sync the pool so there are enough transactions for every uberblock
38+
# to have a TXG
39+
# 3. Export the pool.
40+
# 4. Corrupt all label checksums in the pool
41+
# 5. Check that pool cannot be imported
42+
# 6. Verify that it cannot be imported after using zhack label repair -u
4043
# to ensure that the -u option will quit on corrupted checksums.
41-
# 6. Use zhack label repair -c on device
42-
# 7. Check that pool can be imported and that data is intact
44+
# 7. Use zhack label repair -c on device
45+
# 8. Check that pool can be imported and that data is intact
4346
#
4447
# Test two:
4548
#
@@ -170,6 +173,17 @@ function setup_dataset
170173
check_dataset true
171174
}
172175

176+
function force_transactions
177+
{
178+
L_TIMES="$1"
179+
for ((i=0; i < L_TIMES; i++))
180+
do
181+
touch "$TESTDIR"/"test" || return $?
182+
zpool sync "$TESTPOOL" || return $?
183+
done
184+
return 0
185+
}
186+
173187
function get_practical_size
174188
{
175189
L_SIZE="$1"
@@ -257,6 +271,9 @@ function run_test_one
257271

258272
setup_dataset
259273

274+
# Force 128 extra transactions to ensure all uberblocks are assigned a TXG
275+
log_must force_transactions 128
276+
260277
log_must zpool export "$TESTPOOL"
261278

262279
corrupt_labels "$L_SIZE" "$VIRTUAL_DISK"

tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@
1818
# Strategy:
1919
#
2020
# 1. Create pool on a loopback device with some test data
21-
# 2. Export the pool.
22-
# 3. Corrupt all label checksums in the pool
23-
# 4. Check that pool cannot be imported
24-
# 5. Verify that it cannot be imported after using zhack label repair -u
21+
# 2. Checksum repair should work with a valid TXG. Repeatedly write and
22+
# sync the pool so there are enough transactions for every uberblock
23+
# to have a TXG
24+
# 3. Export the pool.
25+
# 4. Corrupt all label checksums in the pool
26+
# 5. Check that pool cannot be imported
27+
# 6. Verify that it cannot be imported after using zhack label repair -u
2528
# to ensure that the -u option will quit on corrupted checksums.
26-
# 6. Use zhack label repair -c on device
27-
# 7. Check that pool can be imported and that data is intact
29+
# 7. Use zhack label repair -c on device
30+
# 8. Check that pool can be imported and that data is intact
2831

2932
. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib
3033

0 commit comments

Comments
 (0)