Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAS-130126 / 24.10 / Wipe unused boot-pool disks #89

Merged
merged 1 commit into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions truenas_installer/disks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,25 @@
MIN_DISK_SIZE = 8_000_000_000


@dataclass
class ZFSMember:
name: str
pool: str


@dataclass
class Disk:
name: str
size: int
model: str
label: str
zfs_members: list[ZFSMember]
removable: bool

@property
def device(self):
return f"/dev/{self.name}"


async def list_disks():
# need to settle so that lsblk output is stable
Expand Down Expand Up @@ -44,12 +55,15 @@ async def list_disks():
if m := re.search("Model: (.+)", (await run(["sgdisk", "-p", device], check=False)).stdout):
model = m.group(1)

zfs_members = []
if disk["fstype"] is not None:
label = disk["fstype"]
else:
children = disk.get("children", [])
if zfs_members := [child for child in children if child["fstype"] == "zfs_member"]:
label = f"zfs-\"{zfs_members[0]['label']}\""
if zfs_members := [ZFSMember(child["name"], child["label"])
for child in children
if child["fstype"] == "zfs_member"]:
label = ", ".join([f"zfs-\"{zfs_member.pool}\"" for zfs_member in zfs_members])
else:
for fstype in ["ext4", "xfs"]:
if labels := [child for child in children if child["fstype"] == fstype]:
Expand All @@ -61,6 +75,6 @@ async def list_disks():
else:
label = ""

disks.append(Disk(disk["name"], disk["size"], model, label, disk["rm"]))
disks.append(Disk(disk["name"], disk["size"], model, label, zfs_members, disk["rm"]))

return disks
61 changes: 41 additions & 20 deletions truenas_installer/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import os
import subprocess
import tempfile
from typing import Callable

from .disks import Disk
from .exception import InstallError
from .lock import installation_lock
from .utils import get_partitions, run
Expand All @@ -13,51 +15,70 @@
BOOT_POOL = "boot-pool"


async def install(disks, set_pmbr, authentication, post_install, sql, callback):
async def install(destination_disks: list[Disk], wipe_disks: list[Disk], set_pmbr: bool, authentication: dict | None,
post_install: dict | None, sql: str | None, callback: Callable):
with installation_lock:
try:
if not os.path.exists("/etc/hostid"):
await run(["zgenhostid"])

for disk in disks:
callback(0, f"Formatting disk {disk}")
await format_disk(f"/dev/{disk}", set_pmbr, callback)
for disk in destination_disks:
Copy link
Contributor

@yocalebo yocalebo Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You've changed it so that you're iterating destination_disks but you're passing the disks list object into format_disks. This is sloppy. The same goes for for disk in wipe_disks. If you kept those there for logging purposes, then we should moving the logging into format_disk and wipe_disk. Also, because we're now dealing with multiple disks, we should rename the functions to wipe_disks and format_disks accordingly.

callback(0, f"Formatting disk {disk.name}")
await format_disk(disk, set_pmbr, callback)

for disk in wipe_disks:
callback(0, f"Wiping disk {disk.name}")
await wipe_disk(disk, callback)

disk_parts = list()
part_num = 3
for disk in disks:
found = (await get_partitions(disk, [part_num]))[part_num]
for disk in destination_disks:
found = (await get_partitions(disk.device, [part_num]))[part_num]
if found is None:
raise InstallError(f"Failed to find data partition on {disk!r}")
raise InstallError(f"Failed to find data partition on {disk.name}")
else:
disk_parts.append(found)

callback(0, "Creating boot pool")
await create_boot_pool(disk_parts)
try:
await run_installer(disks, authentication, post_install, sql, callback)
await run_installer(
[disk.name for disk in destination_disks],
authentication,
post_install,
sql,
callback,
)
finally:
await run(["zpool", "export", "-f", BOOT_POOL])
except subprocess.CalledProcessError as e:
raise InstallError(f"Command {' '.join(e.cmd)} failed:\n{e.stderr.rstrip()}")


async def format_disk(device, set_pmbr, callback):
if (result := await run(["wipefs", "-a", device], check=False)).returncode != 0:
callback(0, f"Warning: unable to wipe partition table for {device}: {result.stderr.rstrip()}")
async def wipe_disk(disk: Disk, callback: Callable):
for zfs_member in disk.zfs_members:
if (result := await run(["zpool", "labelclear", "-f", f"/dev/{zfs_member.name}"],
check=False)).returncode != 0:
callback(0, f"Warning: unable to wipe ZFS label from {zfs_member.name}: {result.stderr.rstrip()}")
pass

if (result := await run(["wipefs", "-a", disk.device], check=False)).returncode != 0:
callback(0, f"Warning: unable to wipe partition table for {disk.name}: {result.stderr.rstrip()}")

await run(["sgdisk", "-Z", disk.device], check=False)


# Erase both typical metadata area.
await run(["sgdisk", "-Z", device], check=False)
await run(["sgdisk", "-Z", device], check=False)
async def format_disk(disk: Disk, set_pmbr: bool, callback: Callable):
await wipe_disk(disk, callback)

# Create BIOS boot partition
await run(["sgdisk", "-a4096", "-n1:0:+1024K", "-t1:EF02", "-A1:set:2", device])
await run(["sgdisk", "-a4096", "-n1:0:+1024K", "-t1:EF02", "-A1:set:2", disk.device])

# Create EFI partition (Even if not used, allows user to switch to UEFI later)
await run(["sgdisk", "-n2:0:+524288K", "-t2:EF00", device])
await run(["sgdisk", "-n2:0:+524288K", "-t2:EF00", disk.device])

# Create data partition
await run(["sgdisk", "-n3:0:0", "-t3:BF01", device])
await run(["sgdisk", "-n3:0:0", "-t3:BF01", disk.device])

# Bad hardware is bad, but we've seen a few users
# state that by the time we run `parted` command
Expand All @@ -66,13 +87,13 @@ async def format_disk(device, set_pmbr, callback):
# be present. This is almost _exclusively_ related
# to bad hardware, but we will wait up to 30 seconds
# for the partitions to show up in sysfs.
disk_parts = await get_partitions(device, [1, 2, 3], tries=30)
disk_parts = await get_partitions(disk.device, [1, 2, 3], tries=30)
for partnum, part_device in disk_parts.items():
if part_device is None:
raise InstallError(f"Failed to find partition number {partnum} on {device!r}")
raise InstallError(f"Failed to find partition number {partnum} on {disk.name}")

if set_pmbr:
await run(["parted", "-s", device, "disk_set", "pmbr_boot", "on"], check=False)
await run(["parted", "-s", disk.device, "disk_set", "pmbr_boot", "on"], check=False)


async def create_boot_pool(devices):
Expand Down
38 changes: 35 additions & 3 deletions truenas_installer/installer_menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import humanfriendly

from .dialog import dialog_checklist, dialog_menu, dialog_msgbox, dialog_password, dialog_yesno
from .disks import list_disks
from .disks import Disk, list_disks
from .exception import InstallError
from .install import install
from .serial import serial_sql
Expand Down Expand Up @@ -72,11 +72,31 @@ async def _install_upgrade_internal(self):
)
continue

wipe_disks = [
disk.name
for disk in disks
if (
any(zfs_member.pool == "boot-pool" for zfs_member in disk.zfs_members) and
disk.name not in destination_disks
)
]
if wipe_disks:
# The presence of multiple `boot-pool` disks with different guids leads to boot pool import error
text = "\n".join([
f"Disk(s) {', '.join(wipe_disks)} contain existing TrueNAS boot pool, but they were not "
f"selected for TrueNAS installation. This configuration will not work unless these disks "
"are erased.",
"",
f"Proceed with erasing {', '.join(wipe_disks)}?"
])
if not await dialog_yesno("TrueNAS Installation", text):
continue

break

text = "\n".join([
"WARNING:",
f"- This erases ALL partitions and data on {', '.join(destination_disks)}.",
f"- This erases ALL partitions and data on {', '.join(sorted(wipe_disks + destination_disks))}.",
f"- {', '.join(destination_disks)} will be unavailable for use in storage pools.",
"",
"NOTE:",
Expand Down Expand Up @@ -112,7 +132,15 @@ async def _install_upgrade_internal(self):
sql = await serial_sql()

try:
await install(destination_disks, set_pmbr, authentication_method, None, sql, self._callback)
await install(
self._select_disks(disks, destination_disks),
self._select_disks(disks, wipe_disks),
set_pmbr,
authentication_method,
None,
sql,
self._callback,
)
except InstallError as e:
await dialog_msgbox("Installation Error", e.message)
return False
Expand All @@ -126,6 +154,10 @@ async def _install_upgrade_internal(self):
)
return True

def _select_disks(self, disks: list[Disk], disks_names: list[str]):
disks_dict = {disk.name: disk for disk in disks}
return [disks_dict[disk_name] for disk_name in disks_names]

async def _authentication_truenas_admin(self):
return await self._authentication_password(
"truenas_admin",
Expand Down
10 changes: 10 additions & 0 deletions truenas_installer/server/api/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ async def system_info(context):
"size": {"type": "number"},
"model": {"type": "string"},
"label": {"type": "string"},
"zfs_members": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"pool": {"type": "string"},
},
},
},
"removable": {"type": "boolean"},
},
},
Expand Down
28 changes: 26 additions & 2 deletions truenas_installer/server/api/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from aiohttp_rpc.protocol import JsonRpcRequest

from truenas_installer.disks import list_disks
from truenas_installer.exception import InstallError
from truenas_installer.install import install as install_
from truenas_installer.serial import serial_sql
Expand All @@ -18,6 +19,10 @@
"required": ["disks", "set_pmbr", "authentication"],
"additionalProperties": False,
"properties": {
"wipe_disks": {
"type": "array",
"items": {"type": "string"},
},
"disks": {
"type": "array",
"items": {"type": "string"},
Expand Down Expand Up @@ -76,9 +81,28 @@ async def install(context, params):
"""
Performs system installation.
"""
disks = {disk.name: disk for disk in await list_disks()}

try:
destination_disks = [disks[disk_name] for disk_name in params["disks"]]
except KeyError as e:
raise Error(f"Disk {e.args[0]!r} does not exist", errno.EFAULT)

try:
wipe_disks = [disks[disk_name] for disk_name in params.get("wipe_disks", [])]
except KeyError as e:
raise Error(f"Disk {e.args[0]!r} does not exist", errno.EFAULT)

try:
await install_(params["disks"], params["set_pmbr"], params["authentication"], params.get("post_install", None),
await serial_sql(), functools.partial(callback, context.server))
await install_(
destination_disks,
wipe_disks,
params["set_pmbr"],
params["authentication"],
params.get("post_install", None),
await serial_sql(),
functools.partial(callback, context.server),
)
except InstallError as e:
raise Error(e.message, errno.EFAULT)

Expand Down
Loading