Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAS-115112 / None / zfsd: add support for hotplugging spares #258

Draft
wants to merge 2 commits into
base: truenas/13-stable
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions cddl/usr.sbin/zfsd/case_file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,26 @@ CaseFile::Find(Guid poolGUID, Guid vdevGUID)
return (NULL);
}

void
CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases)
{
for (CaseFileList::iterator curCase = s_activeCases.begin();
curCase != s_activeCases.end(); curCase++) {
if (((*curCase)->PoolGUID() != poolGUID &&
Guid::InvalidGuid() != poolGUID) ||
(*curCase)->VdevGUID() != vdevGUID)
continue;

/*
* We can have multiple cases for spare vdevs
*/
cases.push_back(*curCase);
if (!(*curCase)->IsSpare()) {
return;
}
}
}

CaseFile *
CaseFile::Find(const string &physPath)
{
Expand Down Expand Up @@ -217,6 +237,12 @@ CaseFile::PurgeAll()

}

int
CaseFile::IsSpare()
{
return (m_is_spare);
}

//- CaseFile Public Methods ----------------------------------------------------
bool
CaseFile::RefreshVdevState()
Expand All @@ -240,6 +266,7 @@ CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
{
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;

if (pool == NULL || !RefreshVdevState()) {
/*
Expand Down Expand Up @@ -280,9 +307,10 @@ CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
|| vdev->PoolGUID() == Guid::InvalidGuid())
&& vdev->GUID() == m_vdevGUID) {

if (IsSpare())
flags |= ZFS_ONLINE_SPARE;
if (zpool_vdev_online(pool, vdev->GUIDString().c_str(),
ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
&m_vdevState) != 0) {
flags, &m_vdevState) != 0) {
syslog(LOG_ERR,
"Failed to online vdev(%s/%s:%s): %s: %s\n",
zpool_get_name(pool), vdev->GUIDString().c_str(),
Expand Down Expand Up @@ -791,7 +819,8 @@ CaseFile::CaseFile(const Vdev &vdev)
: m_poolGUID(vdev.PoolGUID()),
m_vdevGUID(vdev.GUID()),
m_vdevState(vdev.State()),
m_vdevPhysPath(vdev.PhysicalPath())
m_vdevPhysPath(vdev.PhysicalPath()),
m_is_spare(vdev.IsSpare())
{
stringstream guidString;

Expand Down
19 changes: 19 additions & 0 deletions cddl/usr.sbin/zfsd/case_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,19 @@ class CaseFile
*/
static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID);

/**
* \brief Find multiple CaseFile objects by a vdev's pool/vdev
* GUID tuple (special case for spare vdevs)
*
* \param poolGUID Pool GUID for the vdev of the CaseFile to find.
* If InvalidGuid, then only match the vdev GUID
* instead of both pool and vdev GUIDs.
* \param vdevGUID Vdev GUID for the vdev of the CaseFile to find.
* \param caseList List of cases associated with the vdev.
*/
static void Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID,
CaseFileList &caseList);

/**
* \brief Find a CaseFile object by a vdev's current/last known
* physical path.
Expand Down Expand Up @@ -219,6 +232,11 @@ class CaseFile
*/
bool ShouldFault() const;

/**
* \brief If this vdev is spare
*/
int IsSpare();

protected:
enum {
/**
Expand Down Expand Up @@ -384,6 +402,7 @@ class CaseFile
string m_poolGUIDString;
string m_vdevGUIDString;
string m_vdevPhysPath;
int m_is_spare;

/**
* \brief Callout activated when a grace period
Expand Down
9 changes: 9 additions & 0 deletions cddl/usr.sbin/zfsd/vdev_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ VdevIterator::Reset()
{
nvlist_t *rootVdev;
nvlist **cache_child;
nvlist **spare_child;
int result;
uint_t cache_children;
uint_t spare_children;

result = nvlist_lookup_nvlist(m_poolConfig,
ZPOOL_CONFIG_VDEV_TREE,
Expand All @@ -95,6 +97,13 @@ VdevIterator::Reset()
if (result == 0)
for (uint_t c = 0; c < cache_children; c++)
m_vdevQueue.push_back(cache_child[c]);
result = nvlist_lookup_nvlist_array(rootVdev,
ZPOOL_CONFIG_SPARES,
&spare_child,
&spare_children);
if (result == 0)
for (uint_t c = 0; c < spare_children; c++)
m_vdevQueue.push_back(spare_child[c]);
}

nvlist_t *
Expand Down
14 changes: 9 additions & 5 deletions cddl/usr.sbin/zfsd/zfsd_event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,9 @@ bool
GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
nvlist_t *devConfig)
{
bool ret = false;
try {
CaseFileList case_list;
/*
* A device with ZFS label information has been
* inserted. If it matches a device for which we
Expand All @@ -238,18 +240,20 @@ GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
syslog(LOG_INFO, "Interrogating VDEV label for %s\n",
devPath.c_str());
Vdev vdev(devConfig);
CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(),
vdev.GUID()));
if (caseFile != NULL)
return (caseFile->ReEvaluate(devPath, physPath, &vdev));
CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list);
for (CaseFileList::iterator curr = case_list.begin();
curr != case_list.end(); curr++) {
ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev);
}
return (ret);

} catch (ZfsdException &exp) {
string context("GeomEvent::OnlineByLabel: " + devPath + ": ");

exp.GetString().insert(0, context);
exp.Log();
}
return (false);
return (ret);
}


Expand Down
8 changes: 5 additions & 3 deletions sys/contrib/openzfs/module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -5312,13 +5312,15 @@ spa_add_spares(spa_t *spa, nvlist_t *config)
for (i = 0; i < nspares; i++) {
VERIFY(nvlist_lookup_uint64(spares[i],
ZPOOL_CONFIG_GUID, &guid) == 0);
VERIFY0(nvlist_lookup_uint64_array(spares[i],
ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc));
if (spa_spare_exists(guid, &pool, NULL) &&
pool != 0ULL) {
VERIFY(nvlist_lookup_uint64_array(
spares[i], ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &vsc) == 0);
vs->vs_state = VDEV_STATE_CANT_OPEN;
vs->vs_aux = VDEV_AUX_SPARED;
} else {
vs->vs_state =
spa->spa_spares.sav_vdevs[i]->vdev_state;
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions tests/sys/cddl/zfs/tests/zfsd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,7 @@ ${PACKAGE}FILES+= zfsd_import_001_pos.ksh
${PACKAGE}FILES+= zfsd_replace_001_pos.ksh
${PACKAGE}FILES+= zfsd_replace_002_pos.ksh
${PACKAGE}FILES+= zfsd_replace_003_pos.ksh
${PACKAGE}FILES+= zfsd_replace_004_pos.ksh
${PACKAGE}FILES+= zfsd_replace_005_pos.ksh

.include <bsd.test.mk>
11 changes: 8 additions & 3 deletions tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,20 @@ function wait_for_pool_dev_state_change
typeset -i timeout=$1
typeset disk=$2
typeset state=$3
typeset pool=$4

if [ -z "$pool" ]; then
pool=$TESTPOOL
fi

log_note "Waiting up to $timeout seconds for $disk to become $state ..."
for ((; $timeout > 0; timeout=$timeout-1)); do
check_state $TESTPOOL "$disk" "$state"
check_state $pool "$disk" "$state"
[ $? -eq 0 ] && return
$SLEEP 1
done
log_must $ZPOOL status $TESTPOOL
log_fail "ERROR: Disk $disk not marked as $state in $TESTPOOL"
log_must $ZPOOL status $pool
log_fail "ERROR: Disk $disk not marked as $state in $pool"
}

function wait_for_pool_removal
Expand Down
64 changes: 64 additions & 0 deletions tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_004_pos.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/local/bin/ksh93 -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2023 Axcient. All rights reserved.
# Use is subject to license terms.
#
# $FreeBSD$

. $STF_SUITE/tests/hotspare/hotspare.kshlib
. $STF_SUITE/tests/zfsd/zfsd.kshlib
. $STF_SUITE/include/libtest.kshlib
. $STF_SUITE/include/libgnop.kshlib

log_assert "ZFSD will automatically replace a spare that disappears and reappears in the same location, with the same devname"

ensure_zfsd_running

set_disks

typeset DISK0_NOP=${DISK0}.nop
typeset DISK1_NOP=${DISK1}.nop

log_must create_gnops $DISK0 $DISK1

# Create a pool on the supplied disks
create_pool $TESTPOOL $DISK0_NOP spare $DISK1_NOP

# Disable the first disk.
log_must destroy_gnop $DISK1

# Check to make sure ZFS sees the disk as removed
wait_for_pool_dev_state_change 20 $DISK1_NOP REMOVED

# Re-enable the disk
log_must create_gnop $DISK1

# Disk should auto-join the zpool
wait_for_pool_dev_state_change 20 $DISK1_NOP AVAIL

$ZPOOL status $TESTPOOL
destroy_pool $TESTPOOL
log_must $RM -rf /$TESTPOOL

log_pass
70 changes: 70 additions & 0 deletions tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_005_pos.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/local/bin/ksh93 -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2023 Axcient. All rights reserved.
# Use is subject to license terms.
#
# $FreeBSD$

. $STF_SUITE/tests/hotspare/hotspare.kshlib
. $STF_SUITE/tests/zfsd/zfsd.kshlib
. $STF_SUITE/include/libtest.kshlib
. $STF_SUITE/include/libgnop.kshlib

log_assert "ZFSD will automatically replace a multi-pool spare that disappears and reappears"

ensure_zfsd_running

set_disks

typeset DISK0_NOP=${DISK0}.nop
typeset DISK1_NOP=${DISK1}.nop
typeset DISK2_NOP=${DISK2}.nop

log_must create_gnops $DISK0 $DISK1 $DISK2

# Create pools on the supplied disks
create_pool $TESTPOOL $DISK0_NOP spare $DISK2_NOP
create_pool $TESTPOOL1 $DISK1_NOP spare $DISK2_NOP

# Disable the spare disk.
log_must destroy_gnop $DISK2

# Check to make sure ZFS sees the disk as removed
wait_for_pool_dev_state_change 20 $DISK2_NOP REMOVED
wait_for_pool_dev_state_change 20 $DISK2_NOP REMOVED $TESTPOOL1

# Re-enable the disk
log_must create_gnop $DISK2

# Disk should auto-join the zpools
wait_for_pool_dev_state_change 20 $DISK2_NOP AVAIL
wait_for_pool_dev_state_change 20 $DISK2_NOP AVAIL $TESTPOOL1

$ZPOOL status $TESTPOOL
$ZPOOL status $TESTPOOL1
destroy_pool $TESTPOOL
destroy_pool $TESTPOOL1
log_must $RM -rf /$TESTPOOL

log_pass
Loading