From 7a435c110ca382fbb502c0ca1c5d3ded5e4f0b49 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 12 Dec 2024 14:18:41 +0000 Subject: [PATCH 01/13] We don't require auth to fetch the hub spec, reduces auth calls made --- deployer/commands/exec/infra_components.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index a63753b2d9..f6d26c2fb0 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -40,12 +40,11 @@ def root_homes( with open(config_file_path) as f: cluster = Cluster(yaml.load(f), config_file_path.parent) - with cluster.auth(): - hubs = cluster.hubs - hub = next((hub for hub in hubs if hub.spec["name"] == hub_name), None) - if not hub: - print_colour("Hub does not exist in {cluster_name} cluster}") - return + hubs = cluster.hubs + hub = next((hub for hub in hubs if hub.spec["name"] == hub_name), None) + if not hub: + print_colour("Hub does not exist in {cluster_name} cluster}") + return server_ip = base_share_name = "" for values_file in hub.spec["helm_chart_values_files"]: From 674fc258d8f8a4b56300e9f551199209ebfaa83c Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 12 Dec 2024 14:19:11 +0000 Subject: [PATCH 02/13] Ensure extra NFS args are all options --- deployer/commands/exec/infra_components.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index f6d26c2fb0..831c55a0ef 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -22,13 +22,13 @@ def root_homes( cluster_name: str = typer.Argument(..., help="Name of cluster to operate on"), hub_name: str = typer.Argument(..., help="Name of hub to operate on"), - extra_nfs_server: str = typer.Argument( + extra_nfs_server: str = typer.Option( None, help="IP address of an extra NFS server to mount" ), - extra_nfs_base_path: str = typer.Argument( + extra_nfs_base_path: str = typer.Option( None, help="Path of the extra NFS share to mount" ), - extra_nfs_mount_path: str = typer.Argument( + extra_nfs_mount_path: str = typer.Option( None, help="Mount point for the extra NFS share" ), ): From 913851f8d205e68baa78fae4a4c69dc570ed1ec7 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 12 Dec 2024 14:22:07 +0000 Subject: [PATCH 03/13] Split kubectl run command into a create and exec couplet - Ensures the pod spec has the right metadata to be placed in the correct namespace - Dumps the pod spec to a temp json file - Runs kubectl create then kubectl exec to access the pod - This ensures that our bash process is NOT PID 1, which will mean the container will NOT restart if we exit that process for any reason - Cleans up the temp json file --- deployer/commands/exec/infra_components.py | 42 ++++++++++++---------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 831c55a0ef..a4bba4d403 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -1,5 +1,6 @@ import json import os +import tempfile import subprocess import typer @@ -93,6 +94,10 @@ def root_homes( pod = { "apiVersion": "v1", "kind": "Pod", + "metadata": { + "name": pod_name, + "namespace": hub_name, + }, "spec": { "terminationGracePeriodSeconds": 1, "automountServiceAccountToken": False, @@ -111,27 +116,28 @@ def root_homes( }, } - cmd = [ - "kubectl", - "-n", - hub_name, - "run", - "--rm", # Remove pod when we're done - "-it", # Give us a shell! - "--overrides", - json.dumps(pod), - "--image", - # Use ubuntu image so we get GNU rm and other tools - # Should match what we have in our pod definition - UBUNTU_IMAGE, - pod_name, - "--", - "/bin/bash", - "-l", + # Ask kube-controller to create a pod + create_cmd = ["kubectl", "create", "-f"] + + # Dump the pod spec to a temporary file + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmpf: + json.dump(pod, tmpf) + + create_cmd.append(tmpf.name) + + # Command to exec into pod + exec_cmd = [ + "kubectl", "-n", hub_name, "exec", "-it", pod_name, "--", "/bin/bash", "-l" ] with cluster.auth(): - subprocess.check_call(cmd) + subprocess.check_call(create_cmd) + subprocess.check_call(exec_cmd) + + + # I want to ensure this code runs event if the exec cmd returns an exit code other than 0 + # Delete temporary pod spec file + os.remove(tmpf.name) @exec_app.command() From 345be9c2ab127b1d7cc7910ae51e6dd0cbb69bad Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 12 Dec 2024 14:22:50 +0000 Subject: [PATCH 04/13] Adds an --rm flag to optionally remove the pod after completion --- deployer/commands/exec/infra_components.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index a4bba4d403..909f316741 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -23,6 +23,7 @@ def root_homes( cluster_name: str = typer.Argument(..., help="Name of cluster to operate on"), hub_name: str = typer.Argument(..., help="Name of hub to operate on"), + rm_pod: bool = typer.Option(False, "--rm", help="Automatically delete the pod after completing"), extra_nfs_server: str = typer.Option( None, help="IP address of an extra NFS server to mount" ), @@ -134,6 +135,9 @@ def root_homes( subprocess.check_call(create_cmd) subprocess.check_call(exec_cmd) + # I want to ensure this code runs event if the exec cmd returns an exit code other than 0 + if rm_pod: + delete_pod(pod_name, hub_name) # I want to ensure this code runs event if the exec cmd returns an exit code other than 0 # Delete temporary pod spec file From 6d07761f28fb22f4f22844545d9b9355cf4aad90 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:31:15 +0000 Subject: [PATCH 05/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deployer/commands/exec/infra_components.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 909f316741..c913871f48 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -1,7 +1,7 @@ import json import os -import tempfile import subprocess +import tempfile import typer from ruamel.yaml import YAML @@ -23,7 +23,9 @@ def root_homes( cluster_name: str = typer.Argument(..., help="Name of cluster to operate on"), hub_name: str = typer.Argument(..., help="Name of hub to operate on"), - rm_pod: bool = typer.Option(False, "--rm", help="Automatically delete the pod after completing"), + rm_pod: bool = typer.Option( + False, "--rm", help="Automatically delete the pod after completing" + ), extra_nfs_server: str = typer.Option( None, help="IP address of an extra NFS server to mount" ), @@ -128,7 +130,15 @@ def root_homes( # Command to exec into pod exec_cmd = [ - "kubectl", "-n", hub_name, "exec", "-it", pod_name, "--", "/bin/bash", "-l" + "kubectl", + "-n", + hub_name, + "exec", + "-it", + pod_name, + "--", + "/bin/bash", + "-l", ] with cluster.auth(): From 7b7b0d13bc6d1aae45c6f6447213673e6a9ee51e Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 12 Dec 2024 14:39:04 +0000 Subject: [PATCH 06/13] Update deployer exec root-homes documentation --- deployer/README.md | 46 +++++++++++++++------------- docs/howto/features/storage-quota.md | 2 +- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/deployer/README.md b/deployer/README.md index 95b14fa0b6..8628bc08ba 100644 --- a/deployer/README.md +++ b/deployer/README.md @@ -384,6 +384,30 @@ setup or an outage), or when taking down a hub. All these commands take a cluster and hub name as parameters, and perform appropriate authentication before performing their function. +#### `exec hub` + +This subcommand gives you an interactive shell on the hub pod itself, so you +can poke around to see what's going on. Particularly useful if you want to peek +at the hub db with the `sqlite` command. + +#### `exec homes` + +This subcommand gives you a shell with the home directories of all the +users on the given hub in the given cluster mounted under `/home`. +Very helpful when doing (rare) manual operations on user home directories, +such as renames. + +When you exit the shell, the temporary pod spun up is removed. + +#### `exec root-homes` + +Similar to `exec homes` but mounts the _entire_ NFS filesystem to `/root-homes`. +You can optionally mount a secondary NFS share if required, which is useful when migrating data across servers. + +#### `exec aws` + +This sub-command can exec into a shell with appropriate AWS credentials (including MFA). + #### `exec debug` This sub-command is useful for debugging. @@ -410,28 +434,6 @@ in our 2i2c cluster that can be accessed via this command, and speeds up image b Once you run this command, run `export DOCKER_HOST=tcp://localhost:23760` in another terminal to use the faster remote docker daemon. -#### `exec shell` -This exec sub-command can be used to acquire a shell in various places of the infrastructure. - -##### `exec shell hub` - -This subcommand gives you an interactive shell on the hub pod itself, so you -can poke around to see what's going on. Particularly useful if you want to peek -at the hub db with the `sqlite` command. - -##### `exec shell homes` - -This subcommand gives you a shell with the home directories of all the -users on the given hub in the given cluster mounted under `/home`. -Very helpful when doing (rare) manual operations on user home directories, -such as renames. - -When you exit the shell, the temporary pod spun up is removed. - -##### `exec shell aws` - -This sub-command can exec into a shall with appropriate AWS credentials (including MFA). - ## Running Tests To execute tests on the `deployer`, you will need to install the development requirements and then invoke `pytest` from the root of the repository. diff --git a/docs/howto/features/storage-quota.md b/docs/howto/features/storage-quota.md index 1409c48372..88454ad5e2 100644 --- a/docs/howto/features/storage-quota.md +++ b/docs/howto/features/storage-quota.md @@ -82,7 +82,7 @@ Here's an example of how to do this: ```bash # Create a throwaway pod with both the existing home directories and the new NFS server mounted -deployer exec root-homes --extra_nfs_server= --extra_nfs_base_path=/ --extra_nfs_mount_path=/new-nfs-volume +deployer exec root-homes --extra-nfs-server= --extra-nfs-base-path=/ --extra-nfs-mount-path=/new-nfs-volume # Copy the existing home directories to the new NFS server while keeping the original permissions rsync -av --info=progress2 /root-homes/ /new-nfs-volume/ From a961ebb88498006403761dbba3e878a41523470d Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 12 Dec 2024 16:03:16 +0000 Subject: [PATCH 07/13] Reduce line count by simplifying kubectl create command --- deployer/commands/exec/infra_components.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index c913871f48..94b119d555 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -119,15 +119,10 @@ def root_homes( }, } - # Ask kube-controller to create a pod - create_cmd = ["kubectl", "create", "-f"] - # Dump the pod spec to a temporary file with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmpf: json.dump(pod, tmpf) - create_cmd.append(tmpf.name) - # Command to exec into pod exec_cmd = [ "kubectl", @@ -142,7 +137,8 @@ def root_homes( ] with cluster.auth(): - subprocess.check_call(create_cmd) + # Ask kube-controller to create a pod + subprocess.check_call(["kubectl", "create", "-f", tmpf.name]) subprocess.check_call(exec_cmd) # I want to ensure this code runs event if the exec cmd returns an exit code other than 0 From 7aa02046281b62e2ba21877fc8ecacb1240cd615 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 13 Dec 2024 10:50:27 +0000 Subject: [PATCH 08/13] Ensure pod is deleted when the exec command ends, even with an exit code greater than 0, and clean up tempfile use --- deployer/commands/exec/infra_components.py | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 94b119d555..5b0c0578d9 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -119,10 +119,6 @@ def root_homes( }, } - # Dump the pod spec to a temporary file - with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmpf: - json.dump(pod, tmpf) - # Command to exec into pod exec_cmd = [ "kubectl", @@ -136,18 +132,20 @@ def root_homes( "-l", ] - with cluster.auth(): - # Ask kube-controller to create a pod - subprocess.check_call(["kubectl", "create", "-f", tmpf.name]) - subprocess.check_call(exec_cmd) - - # I want to ensure this code runs event if the exec cmd returns an exit code other than 0 - if rm_pod: - delete_pod(pod_name, hub_name) - - # I want to ensure this code runs event if the exec cmd returns an exit code other than 0 - # Delete temporary pod spec file - os.remove(tmpf.name) + with tempfile.NamedTemporaryFile(mode="w", suffix=".json") as tmpf: + # Dump the pod spec to a temporary file + json.dump(pod, tmpf) + tmpf.flush() + + with cluster.auth(): + try: + # Ask kube-controller to create a pod + subprocess.check_call(["kubectl", "create", "-f", tmpf.name]) + # Exec into pod + subprocess.check_call(exec_cmd) + finally: + if rm_pod: + delete_pod(pod_name, hub_name) @exec_app.command() From 7a4261f5c2f27d93aba15affa6f65a4b8b315d0f Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 13 Dec 2024 10:59:47 +0000 Subject: [PATCH 09/13] Swap --rm flag for --persist since most times we want to delete the pod on exit --- deployer/commands/exec/infra_components.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 5b0c0578d9..2411cf42fa 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -23,8 +23,8 @@ def root_homes( cluster_name: str = typer.Argument(..., help="Name of cluster to operate on"), hub_name: str = typer.Argument(..., help="Name of hub to operate on"), - rm_pod: bool = typer.Option( - False, "--rm", help="Automatically delete the pod after completing" + persist: bool = typer.Option( + False, "--persist", help="Do not automatically delete the pod after completing. Useful for long-running processes." ), extra_nfs_server: str = typer.Option( None, help="IP address of an extra NFS server to mount" @@ -144,7 +144,7 @@ def root_homes( # Exec into pod subprocess.check_call(exec_cmd) finally: - if rm_pod: + if not persist: delete_pod(pod_name, hub_name) From 34d5dcce399a9ced46d42288aa5025de06b7fdad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:00:35 +0000 Subject: [PATCH 10/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deployer/commands/exec/infra_components.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 2411cf42fa..1d17271bc5 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -24,7 +24,9 @@ def root_homes( cluster_name: str = typer.Argument(..., help="Name of cluster to operate on"), hub_name: str = typer.Argument(..., help="Name of hub to operate on"), persist: bool = typer.Option( - False, "--persist", help="Do not automatically delete the pod after completing. Useful for long-running processes." + False, + "--persist", + help="Do not automatically delete the pod after completing. Useful for long-running processes.", ), extra_nfs_server: str = typer.Option( None, help="IP address of an extra NFS server to mount" From 1038e43afaf6163f3d801cd2285bb4ec663dc958 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 13 Dec 2024 11:11:01 +0000 Subject: [PATCH 11/13] Improve a comment around the --rm flag to kubectl run command --- deployer/commands/exec/infra_components.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 2411cf42fa..3e13843c68 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -193,7 +193,7 @@ def homes( "-n", hub_name, "run", - "--rm", # Remove pod when we're done + "--rm", # Deletes the pod when the process completes, successfully or otherwise "-it", # Give us a shell! "--overrides", json.dumps(pod), From bf7fd6266e7e6bc56a3876942482b956c6c7cf69 Mon Sep 17 00:00:00 2001 From: Sarah Gibson <44771837+sgibson91@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:45:56 +0000 Subject: [PATCH 12/13] Correct a comment Co-authored-by: Yuvi Panda --- deployer/commands/exec/infra_components.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 485866a0c5..30133eec09 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -141,7 +141,7 @@ def root_homes( with cluster.auth(): try: - # Ask kube-controller to create a pod + # Ask api-server to create a pod subprocess.check_call(["kubectl", "create", "-f", tmpf.name]) # Exec into pod subprocess.check_call(exec_cmd) From 86464bd805adf74cbca07c79a82a373a058e57d6 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 13 Dec 2024 17:52:47 +0000 Subject: [PATCH 13/13] Add a comment explaining why we're splitting kubectl run into create and exec --- deployer/commands/exec/infra_components.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/deployer/commands/exec/infra_components.py b/deployer/commands/exec/infra_components.py index 30133eec09..beccd6f0d4 100644 --- a/deployer/commands/exec/infra_components.py +++ b/deployer/commands/exec/infra_components.py @@ -141,6 +141,15 @@ def root_homes( with cluster.auth(): try: + # We are splitting the previous `kubectl run` command into a + # create and exec couplet, because using run meant that the bash + # process we start would be assigned PID 1. This is a 'special' + # PID and killing it is equivalent to sending a shutdown command + # that will cause the pod to restart, killing any processes + # running in it. By using create then exec instead, we will be + # assigned a PID other than 1 and we can safely exit the pod to + # leave long-running processes if required. + # # Ask api-server to create a pod subprocess.check_call(["kubectl", "create", "-f", tmpf.name]) # Exec into pod