Skip to content

Commit

Permalink
Add daskhub dependent values to z2jh values using if/else control flow
Browse files Browse the repository at this point in the history
  • Loading branch information
sgibson91 committed May 17, 2022
1 parent 0b2295e commit 52015cd
Showing 1 changed file with 102 additions and 0 deletions.
102 changes: 102 additions & 0 deletions helm-charts/binderhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,56 @@ binderhub:
limits:
memory: 1Gi
singleuser:
{{- if .Values.dask-gateway.enabled -}}
# Almost everyone using dask by default wants JupyterLab
defaultUrl: /lab
extraLabels:
hub.jupyter.org/network-access-proxy-http: "true"
cloudMetadata:
# Don't block access to AWS cloud metadata
# If we don't, our users can't access S3 buckets / other AWS services
# without an explicit identity
# FIXME: Provide an explicit identity for users instead
blockWithIptables: false
serviceAccountName: user-sa
extraEnv:
# About DASK_ prefixed variables we set:
#
# 1. k8s native variable expansion is applied with $(MY_ENV) syntax. The
# order variables are defined matters though and we are under the
# mercy of how KubeSpawner renders our passed dictionaries.
#
# 2. Dask loads local YAML config.
#
# 3. Dask loads environment variables prefixed DASK_.
# - DASK_ is stripped
# - Capitalization is ignored
# - Double underscore means a nested configuration
# - `ast.literal_eval` is used to parse values
#
# 4. dask-gateway and dask-distributed looks at its config and expands
# expressions in {} again, sometimes only with the environment
# variables as context but sometimes also with additional variables.
#
# References:
# - K8s expansion: https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/
# - KubeSpawner issue: https://github.com/jupyterhub/kubespawner/issues/491
# - Dask config: https://docs.dask.org/en/latest/configuration.html
# - Exploration issue: https://github.com/2i2c-org/infrastructure/issues/442
#
# DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE makes the default worker image
# match the singleuser image.
DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE: "{JUPYTER_IMAGE_SPEC}"
# DASK_GATEWAY__CLUSTER__OPTIONS__ENVIRONMENT makes some environment
# variables be copied over to the worker nodes from the user nodes.
DASK_GATEWAY__CLUSTER__OPTIONS__ENVIRONMENT: '{"SCRATCH_BUCKET": "$(SCRATCH_BUCKET)", "PANGEO_SCRATCH": "$(PANGEO_SCRATCH)"}'
# DASK_DISTRIBUTED__DASHBOARD_LINK makes the suggested link to the
# dashboard account for the /user/<username> prefix in the path. Note
# that this still misbehave if you have a named server but now its at
# least functional for non-named servers.
DASK_DISTRIBUTED__DASHBOARD_LINK: "/user/{JUPYTERHUB_USER}/proxy/{port}/status"
{{- end -}}

extraFiles:
jupyter_notebook_config.json:
mountPath: /usr/local/etc/jupyter/jupyter_notebook_config.json
Expand Down Expand Up @@ -139,6 +189,13 @@ binderhub:
blocked_users:
- deployment-service-check
services:
{{- if .Values.dask-gateway.enabled -}}
dask-gateway:
# Don't display a dask-gateway entry under 'services',
# as dask-gateway has no UI
display: false
{{- end -}}

# hub-health service helps us run health checks from the deployer script.
# The JupyterHub Helm chart will automatically generate an API token for
# services and expose it in a k8s Secret named `hub`. When we run health
Expand All @@ -153,6 +210,10 @@ binderhub:
nodeSelector:
hub.jupyter.org/node-purpose: core
networkPolicy:
{{- if .Values.dask-gateway.enabled -}}
# FIXME: Enable this when dask-gateway chart v0.9.1 or higher is used
enabled: false
{{- else -}}
enabled: true
ingress:
- from:
Expand Down Expand Up @@ -186,7 +247,48 @@ binderhub:
ports:
- port: http
protocol: TCP
{{- end -}}

extraConfig:
{{- if .Values.dask-gateway.enabled -}}
daskhub-01-add-dask-gateway-values: |
# 1. Sets `DASK_GATEWAY__PROXY_ADDRESS` in the singleuser environment.
# 2. Adds the URL for the Dask Gateway JupyterHub service.
import os
# These are set by jupyterhub.
release_name = os.environ["HELM_RELEASE_NAME"]
release_namespace = os.environ["POD_NAMESPACE"]
if "PROXY_HTTP_SERVICE_HOST" in os.environ:
# https is enabled, we want to use the internal http service.
gateway_address = "http://{}:{}/services/dask-gateway/".format(
os.environ["PROXY_HTTP_SERVICE_HOST"],
os.environ["PROXY_HTTP_SERVICE_PORT"],
)
print("Setting DASK_GATEWAY__ADDRESS {} from HTTP service".format(gateway_address))
else:
gateway_address = "http://proxy-public/services/dask-gateway"
print("Setting DASK_GATEWAY__ADDRESS {}".format(gateway_address))
# Internal address to connect to the Dask Gateway.
c.KubeSpawner.environment.setdefault("DASK_GATEWAY__ADDRESS", gateway_address)
# Internal address for the Dask Gateway proxy.
c.KubeSpawner.environment.setdefault("DASK_GATEWAY__PROXY_ADDRESS", "gateway://traefik-{}-dask-gateway.{}:80".format(release_name, release_namespace))
# Relative address for the dashboard link.
c.KubeSpawner.environment.setdefault("DASK_GATEWAY__PUBLIC_ADDRESS", "/services/dask-gateway/")
# Use JupyterHub to authenticate with Dask Gateway.
c.KubeSpawner.environment.setdefault("DASK_GATEWAY__AUTH__TYPE", "jupyterhub")
# Adds Dask Gateway as a JupyterHub service to make the gateway available at
# {HUB_URL}/services/dask-gateway
service_url = "http://traefik-{}-dask-gateway.{}".format(release_name, release_namespace)
for service in c.JupyterHub.services:
if service["name"] == "dask-gateway":
if not service.get("url", None):
print("Adding dask-gateway service URL")
service.setdefault("url", service_url)
break
else:
print("dask-gateway service not found. Did you set jupyterhub.hub.services.dask-gateway.apiToken?")
{{- end -}}

01-custom-theme: |
from z2jh import get_config
c.JupyterHub.template_paths = ['/usr/local/share/jupyterhub/custom_templates/']
Expand Down

0 comments on commit 52015cd

Please sign in to comment.