diff --git a/.github/workflows/build-review-app.yaml b/.github/workflows/build-review-app.yaml new file mode 100644 index 00000000..9f54a2d0 --- /dev/null +++ b/.github/workflows/build-review-app.yaml @@ -0,0 +1,33 @@ +name: Build Review App + +on: + pull_request: + branches: ['main'] + types: [opened, reopened, synchronize, labeled] + +env: + PIPELINE: '17cc0239-494f-4a68-aa75-3da7c466709c' + REPO_URL: 'https://github.com/pangeo-forge/pangeo-forge-orchestrator' + +jobs: + build: + if: | + github.event.label.name == 'build-review-app' || + contains( github.event.pull_request.labels.*.name, 'build-review-app') + runs-on: ubuntu-latest + steps: + # https://devcenter.heroku.com/articles/platform-api-reference#review-app-create + - run: | + curl -X POST https://api.heroku.com/review-apps \ + -d '{ + "branch": "${{ github.head_ref }}", + "pr_number": ${{ github.event.pull_request.number }}, + "pipeline": "${{ env.PIPELINE }}", + "source_blob": { + "url": "${{ env.REPO_URL }}/tarball/${{ github.event.pull_request.head.sha }}", + "version": "${{ github.event.pull_request.head.sha }}" + } + }' \ + -H "Content-Type: application/json" \ + -H "Accept: application/vnd.heroku+json; version=3" \ + -H "Authorization: Bearer ${{ secrets.HEROKU_API_KEY }}" diff --git a/.github/workflows/delete-review-app.yaml b/.github/workflows/delete-review-app.yaml new file mode 100644 index 00000000..4b415833 --- /dev/null +++ b/.github/workflows/delete-review-app.yaml @@ -0,0 +1,32 @@ +name: Delete Review App + +on: + pull_request: + branches: ['main'] + types: [unlabeled] + +env: + PIPELINE: '17cc0239-494f-4a68-aa75-3da7c466709c' + +jobs: + delete: + if: | + github.event.label.name == 'build-review-app' + runs-on: ubuntu-latest + steps: + - name: Get review app id & export to env + run: | + curl -s https://api.heroku.com/pipelines/${{ env.PIPELINE }}/review-apps \ + -H "Accept: application/vnd.heroku+json; version=3" \ + -H "Authorization: Bearer ${{ secrets.HEROKU_API_KEY }}" \ + | python3 -c " + import sys, json; + j = json.load(sys.stdin); + print('REVIEW_APP_ID=' + [app['id'].strip() for app in j if app['pr_number'] == ${{ github.event.pull_request.number }}].pop(0)) + " >> $GITHUB_ENV + - name: Delete review app + run: | + curl -X DELETE https://api.heroku.com/review-apps/${{ env.REVIEW_APP_ID }} \ + -H "Content-Type: application/json" \ + -H "Accept: application/vnd.heroku+json; version=3" \ + -H "Authorization: Bearer ${{ secrets.HEROKU_API_KEY }}" diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml new file mode 100644 index 00000000..fc3e92bc --- /dev/null +++ b/.github/workflows/test-dataflow-integration.yaml @@ -0,0 +1,184 @@ +name: Test Dataflow Integration + +on: + deployment_status: + # TODO: add on 'schedule' against staging deployment? + pull_request: + branches: ['main'] + types: [labeled] + +jobs: + matrix-generate-prs: + # Generates the matrix of reference prs to test against. Compare: + # - https://blog.aspect.dev/github-actions-dynamic-matrix + # - https://github.com/aspect-build/bazel-lib/blob/ + # 0c8ef86684d5a3335bb5e911a51d64e5fab39f9b/.github/workflows/ci.yaml + runs-on: ubuntu-latest + steps: + - id: default + run: echo "pr=22::gpcp-from-gcs" >> $GITHUB_OUTPUT + + - id: also-test-from-deployment-status + if: | + github.event_name == 'deployment_status' + run: | + export ENVIRONMENT=${{ github.event.deployment_status.environment }} \ + && python3 -c " + import os; print(os.environ['ENVIRONMENT'].split('-')[-1])" \ + | xargs -I{} curl -s ${{ github.event.deployment_status.repository_url }}/pulls/{} \ + | python3 -c " + import json, sys; + labels = json.load(sys.stdin)['labels']; + also_test = [ + l['name'].split('also-test:')[-1] for l in labels if l['name'].startswith('also-test') + ] + if also_test: + for label in also_test: + print(f'pr={label}') + " >> $GITHUB_OUTPUT + + - id: also-test-from-pull-request + if: | + github.event_name == 'pull_request' + && contains( join(github.event.pull_request.labels.*.name), 'also-test') + run: | + python3 -c " + import json; + labels = json.loads('${{ toJSON(github.event.pull_request.labels.*.name) }}') + also_test = [l.split('also-test:')[-1] for l in labels if l.startswith('also-test')] + if also_test: + for label in also_test: + print(f'pr={label}') + " >> $GITHUB_OUTPUT + outputs: + # Will look like '["22::gpcp-from-gcs", etc...]' + prs: ${{ toJSON(steps.*.outputs.pr) }} + + test: + # run when: + # - a PR is labeled 'test-dataflow' + # (assuming it is also labeled 'build-review-app' + # *and* the deployment for the head sha is a success) + # - heroku marks a deployment with 'state' == 'success' + # (assuming PR also has 'test-dataflow' label) + runs-on: ubuntu-latest + + needs: + - matrix-generate-prs + + strategy: + fail-fast: false + matrix: + prs: ${{ fromJSON(needs.matrix-generate-prs.outputs.prs) }} + + steps: + # conditional step if triggering event is a pull_request + - name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from pull_request + if: | + github.event_name == 'pull_request' + && github.event.label.name == 'test-dataflow' + && contains( github.event.pull_request.labels.*.name, 'build-review-app') + # if we get here, this is a pull request, so we need to know the statuses url + # for the deployment associated with the head sha. we use the **base** repo + # deployments url, and look for deployments associated with pr's head sha. + # (the head repo deployments url would cause errors, if the pr is from a fork.) + run: | + export DEPLOYMENTS_URL=\ + ${{ github.event.pull_request.base.repo.deployments_url }}\ + \?environment\=pforge-pr-${{ github.event.pull_request.number }}\ + \&sha\=${{ github.event.pull_request.head.sha }} + curl -s $DEPLOYMENTS_URL \ + | python3 -c " + import sys, json; print(json.load(sys.stdin)[0]['statuses_url'])" \ + | xargs -I{} curl -s {} \ + | python3 -c " + import sys, json; + d = json.load(sys.stdin)[-1]; + print('TEST_DATAFLOW=True'); + print('DEPLOYMENT_STATE=' + d['state']); + print('REVIEW_APP_URL=' + d['environment_url']);" \ + >> $GITHUB_ENV + + # conditional step if triggering event is deployment_status + - name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from deployment_status + if: | + github.event_name == 'deployment_status' + # if we're here, we know this is a deployment_status event, but we don't know whether or not + # the PR has the 'test-dataflow' label. (it's possible the PR *only* has the 'build-review-app' + # label, but not the 'test-dataflow' label, in which case we do not want to deploy a dataflow job. + # so before we do anything else, we need to make sure this PR is labeled 'test-dataflow'. + # note that the github deployment "environments" for our review apps are named according to the + # convention "pforge-pr-${NUMBER}". so our most direct path to get the PR number from the deployment + # status event is to parse the PR number out of this string. + run: | + export ENVIRONMENT=${{ github.event.deployment_status.environment }} \ + && python3 -c " + import os; print(os.environ['ENVIRONMENT'].split('-')[-1])" \ + | xargs -I{} curl -s ${{ github.event.deployment_status.repository_url }}/pulls/{} \ + | python3 -c " + import json, sys; + labels = json.load(sys.stdin)['labels']; + print('TEST_DATAFLOW=' + str(True if any([l['name'] == 'test-dataflow' for l in labels]) else False)); + print('REVIEW_APP_URL=' + '${{ github.event.deployment_status.environment_url }}'); + print('DEPLOYMENT_STATE=' + '${{ github.event.deployment_status.state }}');" \ + >> $GITHUB_ENV + + - name: Is app up? + if: ${{ env.DEPLOYMENT_STATE == 'success' }} + # Heroku updates deployment as 'success' when build succeedes, not when *release* succeedes. + # So there is actually still a latency between when this status is set, and when the review app + # is ready to receive requests. In general, the review apps take about 3 minutes to release. + # So here we wait 2 minutes, then start checking if the app is up, repeating every 30 seconds + # until it's either up, or if > 10 mins have elapsed, something's gone wrong, so we bail out. + run: | + python3 -c " + import sys, time; + from urllib.request import urlopen; + start = time.time(); + time.sleep(60 * 2); + while True: + elapsed = time.time() - start; + if elapsed > 60 * 10: + # releases shouldn't take > 10 mins; something's gone wrong, so exit. + sys.exit(1) + contents = urlopen('${{ env.REVIEW_APP_URL }}').read().decode() + if contents == '{\"status\":\"ok\"}': + # if we get this response from the review app, it's up and ready to go. + print('IS_UP=True') + break + else: + time.sleep(30)" \ + >> $GITHUB_ENV + + - name: Checkout the repo + uses: actions/checkout@v3 + + - name: Install deps + run: | + python3 -m pip install aiohttp PyJWT pydantic pytest pytest-asyncio gidgethub + + - name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@v1' + with: + # the creds to deploy jobs to dataflow are packaged with the review app itself, but + # this test needs its own read only creds so that it can poll dataflow for job status + credentials_json: '${{ secrets.GCP_DATAFLOW_READONLY_SERVICE_KEY }}' + + - name: Run test + if: | + env.DEPLOYMENT_STATE == 'success' + && env.IS_UP == 'True' + && env.TEST_DATAFLOW == 'True' + # So far here, we: + # - programatically make a /run comment on an existing PR in pforgetest + # - check to ensure a dataflow job was submitted within a plausible timeframe + # Remaining TODO: + # - parametrize SOURCE_REPO_FULL_NAME and SOURCE_REPO_PR_NUMBER + # - wait for the job to complete (5-6 mins) + # - check to make sure the job was successful + run: | + DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY='${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }}' \ + GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ + PR_NUMBER_AND_RECIPE_ID=${{ matrix.prs }} \ + REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \ + pytest -vxs tests.integration/test_dataflow.py diff --git a/Dockerfile b/Dockerfile index c6db30af..068acf54 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,25 +39,9 @@ RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages. && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | tee /usr/share/keyrings/cloud.google.gpg \ && apt-get update && apt-get -y install google-cloud-cli -COPY requirements.txt ./ -RUN python3.9 -m pip install -r requirements.txt - -COPY . /opt/app -WORKDIR /opt/app - -# heroku can't fetch submodule contents from github: -# https://devcenter.heroku.com/articles/github-integration#does-github-integration-work-with-git-submodules -# so even though we have this in the repo (for development & testing convenience), we actually .dockerignore -# it, and then clone it from github at build time (otherwise we don't actually get these contents on heroku) -# After cloning, reset to a specific commit, so we don't end up with the wrong contents. +# Install git, for fetching submodule contents in Dockerfile.heroku RUN apt-get update && apt-get -y install git -RUN git clone -b main --single-branch https://github.com/pangeo-forge/dataflow-status-monitoring \ - && cd dataflow-status-monitoring \ - && git reset --hard c72a594b2aea5db45d6295fadd801673bee9746f \ - && cd - -# the only deploy-time process which needs pangeo_forge_orchestrator installed is the review app's -# `postdeploy/seed_review_app_data.py`, but this shouldn't interfere with anything else. -RUN SETUPTOOLS_SCM_PRETEND_VERSION=0.0 pip install . --no-deps - -RUN chmod +x scripts.deploy/release.sh +# Install pip requirements, a time-consuming step! +COPY requirements.txt ./ +RUN python3.9 -m pip install -r requirements.txt diff --git a/Dockerfile.heroku b/Dockerfile.heroku new file mode 100644 index 00000000..0afc7680 --- /dev/null +++ b/Dockerfile.heroku @@ -0,0 +1,21 @@ +FROM pangeo/forge-orchestrator:latest + +COPY . /opt/app +WORKDIR /opt/app + +# heroku can't fetch submodule contents from github: +# https://devcenter.heroku.com/articles/github-integration#does-github-integration-work-with-git-submodules +# so even though we have this in the repo (for development & testing convenience), we actually .dockerignore +# it, and then clone it from github at build time (otherwise we don't actually get these contents on heroku) +# After cloning, reset to a specific commit, so we don't end up with the wrong contents. +RUN apt-get update && apt-get -y install git +RUN git clone -b main --single-branch https://github.com/pangeo-forge/dataflow-status-monitoring \ + && cd dataflow-status-monitoring \ + && git reset --hard c72a594b2aea5db45d6295fadd801673bee9746f \ + && cd - + +# the only deploy-time process which needs pangeo_forge_orchestrator installed is the review app's +# `postdeploy/seed_review_app_data.py`, but this shouldn't interfere with anything else. +RUN SETUPTOOLS_SCM_PRETEND_VERSION=0.0 pip install . --no-deps + +RUN chmod +x scripts.deploy/release.sh diff --git a/bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml b/bakeries/pangeo-ldeo-nsf-earthcube.dev-app-proxy.yaml similarity index 100% rename from bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml rename to bakeries/pangeo-ldeo-nsf-earthcube.dev-app-proxy.yaml diff --git a/docker-compose.yml b/docker-compose.yml index 7c8a5bf2..a9591264 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,7 +7,9 @@ services: web: # For platform spec, see https://stackoverflow.com/a/70238851 platform: linux/amd64 - build: . + build: + context: . + dockerfile: Dockerfile.heroku ports: - '3000:8000' depends_on: diff --git a/docs/README.md b/docs/README.md index 258fb5ed..fd3d316d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -386,59 +386,16 @@ Both staging and prod are set up with [automatic certificate management](https:/ ## `review` -For each PR to `pangeo-forge-recipes`, Heroku creates a Review App which will hang around for two days. -This is a live version of the app running against an ephemeral database. It can be used for manual -checks or further integration testing. +To build a Heroku Review App for a PR, simply add the `build-review-app` label to the PR. This will +trigger a build of the review app which, once deployed, will be served at the ephemeral address +https://pforge-pr-{PR_NUMBER}.herokuapp.com/. > We use Heroku's Review Apps -> [injected-environment-variables](https://devcenter.heroku.com/articles/github-integration-review-apps#injected-environment-variables) -> feature to dynamically set the `PANGEO_FORGE_DEPLOYMENT` env var for review apps to the value of the injected env var `HEROKU_APP_NAME`, which follows the pattern `pforge-pr-${PR number}`. Take a look at both `heroku.yml` and `scripts.deploy/release.sh` to see where this happens. - -> We also use Heroku's Review Apps > [postdeploy script](https://devcenter.heroku.com/articles/github-integration-review-apps#the-postdeploy-script) > feature to automatically seed each review app database with the > https://github.com/pforgetest/test-staged-recipes feedstock. > To see where this happens (and/or seed additional test data), see `postdeploy/seed_review_app_data.py`. -To ensure a successful build of a Review App for your PR: - -1. Generate a `secrets/pforge-pr-${PR number}.yaml` secrets config for your review app, by: - 1. Running: - ```console - $ python3 scripts.develop/generate_api_key.py pforge-pr-${PR number} - ``` - 2. Then running: - ```console - $ GITHUB_PAT=${Your GitHub PAT} python3 scripts.develop/new_github_app.py ${GitHub Username} review ${PR number} - ``` - and following the link to complete the in-browser OAuth flow. -2. Encrypting the secrets config with: - ```console - $ sops -e -i secrets/pforge-pr-${PR number}.yaml - ``` -3. Rename the review app bakery config as follows (where `SOME_OTHER_NUMBER` is a prior review app's PR number): - ```console - $ mv bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-SOME_OTHER_NUMBER.yaml bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-${PR number}.yaml - ``` -4. Push these changes (the secrets and bakeries config for your review app) to your PR -5. From https://github.com/organizations/pforgetest/settings/apps, manually install the - `pforge-pr-${PR number}` app on all repos in the `pforgetest` org. (Optionally, suspend - all other installations from `pforgetest`, so that multiple apps are not active at one time.) -6. Update the Review App's webhook url (decrypting your review app creds first): - ```console - $ sops -d -i secrets/pforge-pr-${PR number}.yaml - $ python3 scripts/update_hook_url.py review http://pforge-pr-${PR number}.herokuapp.com - ``` - -> **Tip**: The first review app build often fails, perhaps because the PR was opened before all -> of the above steps were complete. If the build fails, navigate to -> [the Heroku Pipeline dashboard](https://dashboard.heroku.com/pipelines/17cc0239-494f-4a68-aa75-3da7c466709c) -> and manually delete the Review App. Then, manually click -> "create review app". Typically, this build will succeed if everything has been configured -> correctly. If it fails, there may be a deeper issue in play. _**Important**_: A failed Review -> App build _will not be fixed_ by simply re-releasing. It must be deleted and then rebuilt from -> scratch. - ## `staging` Changes merged to `main` will deploy the @@ -798,6 +755,82 @@ consider adding an alternate pathway to test against the live GitHub API and/or `pangeo-forge-runner`. This comes with its own challenges, of course, including fixturization & managing rate limits, in the case of the GitHub API. +## Integration testing + +### Dataflow + +To run an Dataflow integration test from a PR, add the following two labels to the PR: + +- `build-review-app`: This builds a Heroku Review App for the PR. +- `test-dataflow`: This deploys a job to Dataflow from the Heroku Review App for the PR. + +By default, the Dataflow integration test creates a automated PR on the `pforgetest/test-staged-recipes` +repo by duplicating https://github.com/pforgetest/test-staged-recipes/pull/22, and then triggers a test +run of the `gpcp-from-gcs` recipe in that PR by commenting `/run gpcp-from-gcs` on the auto-generated PR. + +To also test against additional recipes as part of this integration test, create and add a third label to +the PR, following the format: + +- `also-test:{TEST_STAGED_RECIPES_PR_NUMBER}::{RECIPE_ID}`: Here, `TEST_STAGED_RECIPES_PR_NUMBER` is the + number of an actual PR on the `pforgetest/test-staged-recipes` repository which would would like to + duplicate for the test, and `RECIPE_ID` is the name of a recipe referenced in the `meta.yaml` of that PR. + You may need to first create a reference PR on `pforgetest/test-staged-recipes` containing the desired + files. + +Once the integration test is underway, you will find the automated PR on `pforgetest/test-staged-recipes`. +During test session teardown, the auto-generated PR is closed and the branch used to create it is +deleted automatically. + +```mermaid + +sequenceDiagram + autonumber + actor Developer + participant orchestrator PR + participant integration test + participant Heroku API + participant Heroku Review App + participant pforgetest GitHub org + participant dev app proxy + participant Google Dataflow + + + Developer->>orchestrator PR: adds 'build-review-app' label + orchestrator PR->>Heroku API: requests review app + Heroku API-->>Heroku API: builds review app + Heroku API-->>orchestrator PR: sets deployment_status == success + Heroku API-->>Heroku Review App: begins release (~3 min) + + Developer->>orchestrator PR: adds 'test-dataflow' label + + + loop + orchestrator PR->>Heroku Review App: polls release status + Heroku Review App-->>orchestrator PR: responds with status + end + + orchestrator PR-->orchestrator PR: release status verified as 'ok' + + orchestrator PR->>integration test: calls test + + integration test->>pforgetest GitHub org: makes automated recipe PR + integration test->>pforgetest GitHub org: labels PR 'fwd:{review app url}' + + pforgetest GitHub org->>dev app proxy: sends webhook + + dev app proxy-->>Heroku Review App: forwards webhook + + Heroku Review App-->Heroku Review App: syncs PR to database + + integration test->>pforgetest GitHub org: adds `/run {recipe_id}` comment to recipe PR + + pforgetest GitHub org->>dev app proxy: sends webhook + + dev app proxy-->>Heroku Review App: forwards webhook + + Heroku Review App->>Google Dataflow: submits job +``` + # GitHub App: manual API calls Situations may arise in which you want to call the GitHup API directly, authenticated as a diff --git a/heroku.yml b/heroku.yml index 85e0b71b..5678f265 100644 --- a/heroku.yml +++ b/heroku.yml @@ -4,19 +4,17 @@ setup: - plan: papertrail:fixa build: docker: - web: Dockerfile + web: Dockerfile.heroku release: command: - ./scripts.deploy/release.sh image: web run: # The first line of this command sets PANGEO_FORGE_DEPLOYMENT to itself, if it exists, - # but if it doesn't exist, PANGEO_FORGE_DEPLOYMENT is set to the value of HEROKU_APP_NAME. - # The latter case occurs only in the review app context. We use this method because review - # app names are dynaically generated based on the PR number and are therefore to cumbersome - # to set manually for each PR. More on this syntax in: https://stackoverflow.com/a/2013589. + # but if it doesn't exist, PANGEO_FORGE_DEPLOYMENT is set to the value of 'dev-app-proxy'. + # The latter case occurs only in the review app context. web: > - export PANGEO_FORGE_DEPLOYMENT="${PANGEO_FORGE_DEPLOYMENT:=$HEROKU_APP_NAME}" + export PANGEO_FORGE_DEPLOYMENT="${PANGEO_FORGE_DEPLOYMENT:=dev-app-proxy}" && echo "PANGEO_FORGE_DEPLOYMENT set to ${PANGEO_FORGE_DEPLOYMENT}" && sops -d -i secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml && export DATAFLOW_CREDS='./secrets/dataflow-job-submission.json' diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index a1e7f593..2805f579 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -87,8 +87,15 @@ async def get_access_token(gh: GitHubAPI) -> str: async def get_app_webhook_url(gh: GitHubAPI) -> str: - response = await gh.getitem("/app/hook/config", jwt=get_jwt(), accept=ACCEPT) - return response["url"] + if heroku_app_name := os.environ.get("HEROKU_APP_NAME", None): + # This env var is only set on Heroku Review Apps, so if it's present, we know + # we need to generate the review app url here, because the GitHub App webhook + # url is a proxy url, and not the actual url for this review app instance. + return f"https://{heroku_app_name}.herokuapp.com/github/hooks/" + else: + # This is not a Review App, so we can query the GitHub App webhook url. + response = await gh.getitem("/app/hook/config", jwt=get_jwt(), accept=ACCEPT) + return response["url"] async def get_repo_id(repo_full_name: str, gh: GitHubAPI) -> str: @@ -728,7 +735,7 @@ async def run( # Add the traceback for this deployment failure to the recipe run, otherwise it could # easily get buried in the server logs. TODO: Consider: is there anything of security # significance in the call stack captured in the trace? - message = json.loads(recipe_run.message) + message = json.loads(recipe_run.message or "{}") recipe_run.message = json.dumps(message | {"trace": trace}) db_session.add(recipe_run) db_session.commit() diff --git a/postdeploy/seed_review_app_data.py b/postdeploy/seed_review_app_data.py index 56893892..c9836ea4 100644 --- a/postdeploy/seed_review_app_data.py +++ b/postdeploy/seed_review_app_data.py @@ -14,8 +14,18 @@ test_staged_recipes = MODELS["feedstock"].table.from_orm( MODELS["feedstock"].creation(spec="pforgetest/test-staged-recipes") ) +gpcp_from_gcs = MODELS["feedstock"].table.from_orm( + MODELS["feedstock"].creation(spec="pforgetest/gpcp-from-gcs-feedstock") +) +default_bakery = MODELS["bakery"].table.from_orm( + MODELS["bakery"].creation( + region="foo", + name="pangeo-ldeo-nsf-earthcube", + description="bar", + ) +) -to_commit = [test_staged_recipes] +to_commit = [test_staged_recipes, gpcp_from_gcs, default_bakery] with Session(engine) as db_session: for model in to_commit: db_session.add(model) diff --git a/scripts.deploy/release.sh b/scripts.deploy/release.sh index fa8e6f76..20c91995 100644 --- a/scripts.deploy/release.sh +++ b/scripts.deploy/release.sh @@ -11,9 +11,8 @@ python3.9 -m alembic upgrade head if [[ -z "${PANGEO_FORGE_DEPLOYMENT}" ]]; then echo "PANGEO_FORGE_DEPLOYMENT undefined, so this must be a review app..." - echo "Review app injected env var \$HEROKU_APP_NAME=${HEROKU_APP_NAME}..." - echo "Setting PANGEO_FORGE_DEPLOYMENT=\$HEROKU_APP_NAME..." - export PANGEO_FORGE_DEPLOYMENT=$HEROKU_APP_NAME + echo "Setting PANGEO_FORGE_DEPLOYMENT=dev-app-proxy..." + export PANGEO_FORGE_DEPLOYMENT=dev-app-proxy fi echo "setting terraform env..." diff --git a/secrets/config.dev-app-proxy.yaml b/secrets/config.dev-app-proxy.yaml new file mode 100644 index 00000000..80e82a7b --- /dev/null +++ b/secrets/config.dev-app-proxy.yaml @@ -0,0 +1,26 @@ +fastapi: + PANGEO_FORGE_API_KEY: ENC[AES256_GCM,data:ywwwwN6Noco5Rt5DVHmXrFCilPWsvgWEUoy9DBGTJD8=,iv:La9RRJL1WGMR+N7IKE+XFl98LH8CUPZGTSOxRx9NXTI=,tag:pfXwKio8DWPmUVVojtkerQ==,type:str] +github_app: + app_name: ENC[AES256_GCM,data:HgV8uE8llvsEsfKxvg==,iv:QI7WQtVSQ0VVU6ou8K1G1SFndD+PptrdyPGiM243PUg=,tag:F2M9rT0Y1uTdRP7RtuyUZA==,type:str] + id: ENC[AES256_GCM,data:QTTmPboQ,iv:CCwpSsUkK146DnG7vaD34atotkW/0SdA5FD49XrFbG8=,tag:qMQmDDBgkuCVOjPJwTlisQ==,type:int] + private_key: ENC[AES256_GCM,data:9tI/81JeSSu4pdVmbCktc6gP4InyfYCYWsyNC+RWbPysshfW6/kN2NE8YpCGfhh2LhCxP6Qvq2Bh915RAz+4k1+P8/NFSI23dvg3SXgvX5fmnXNi5qyl/L6PvJVA+7HfWzttBlaO0OTca6d6Aou6xWTK90TgJ/pQ2c2ZRKCJAVu7TGpT8U8A21eDG4+a1FKtldgX59F6X8+kts7SuaOT1aPbupQmo9+xkrJfB1g+/pp+llqV2BpQdf0oYxmKzL29kwWE/5UsBC+rIvwIuHiQazviO8FEKy/tEvDZu0TzbifpvbqCL9mIbayeJ0mKSl3ug40hR8b1Ox7q0E6+InuUBp4fgOmUsBRi6y6URu6+OeWPHGo4U0PYJ+RqK6HRpfs6ef9TBifZfvS6C+s3dSWfLKcImMGDq9MUTy0FHe+8TrDvEjuDFeGdhsgBUc8LZc2PE1n7uRdfp+6d8Pi+iGjY3RSFHl4iwGdEPwUYuJ76Z11hoF72JFVh+ncASovWNa1649VvXjwdEI7ziI0uZIUnGSir0JVIgOF49s4atFtNJNc+onr+ZAx3ZjIBj+R4kkkdj2ldhNMj402vGzzZaiXtDGNe3ENhCTy+BwgZlOLXU9HuC/TFFLGIBLAZ27A+o72KWWxdAeF+rJholkH5U7/XMGdlUiAwk5boYZxs8YTDi+nLmt4qyZZ26I3H36XIFntwFOpoRHLcF75H013UeuAwF8wF1TyOJxkscxOhSV5Hjs6mEX9Tz3egCkn4n85iIJAXUE3uNOCGCLaPvR5lZXUiIAtIX+hxfFKzbqWnw0LGS6hj+xwstszNbvC2/b/q2OwMrJNtMHeP1PT0DHBJMAT60IMMQS/1jvbyeAwqxLxucdhzg72f698tfNdc8ssFzS9lkOz7tGHTfUHsw4Batx7DpJRDBVcFhZirLMFMR5wobhsVI+D4Iyimk31LHVfnFHXIhxgUVlRRb3TYyzSE4dXV4mZMSahxSROz0067QblGfuFzjkL168NrBaXW7UBSMncCx7Au5BM3JOFx1hDwxo/srKEmxC1ZoDCbeD2fjQgDbD8bWprFVyglkP5OUvENaQSX/ZxfbZPg3U/XaT3YdbZ1ORucIHCV9W6O77DVQ17ImwLR1rADh0UX8cLT3bV3zSkRAe03QbDCpHYmEkQbJdQ96EJtSnUnxRdAlcYhznNHMY9XUt8CWjtRWKD5Xo7jyCgFKC+8ZELUjrpJvsnbRIpPO4rN/AShUUlw2x1zHZTnSGFAUjNTY6zXNCVt1WtGEkKsUoHFb/92Hm7pe2pRpss7rc71UNMqWYbc31AK7jZbrtaAf5WwRKstNKUBighyymgYVkzGAL2peOiKwWokngeuRRxrXdN7qjbsyiqvQtBtMa96G1eE3hY5x5uwbhC69Gcs+fKZ32JaNLCZiFnk4GbLc665bxkEzNn9zUlhqQ+3Tbb8qkzawrpv5nsCuSYEDk3UNGWCMdyWZm3MHH0r5yJlhsSF3/OTri8s73KV+v0KggJ1m/afrjKRjl+6O7cq+Wvyr2ymI8njYYRkIU3emJwKK6y+4k09N/CyKE6mRvxrrpADpVNY2vsskKhHXJtUbx2qyoxwS4a8ObuTz7p/AN/5X8aAN9v5MOGYgH6Pr8q76b13unYYl8NPOPxG3yxWEcvFbuG4qpuxrAA0pU3ksuflcfim+k3agEuCTU3QYUcZnCBCfsllXetKoTME8+JNpfSh3Lfn99WE80weCvBcyvKDR3MJ67Mga/ifSdRjvisM3vHmbgghWXVhSRQrYg5mabFicW/rz4ojfVGtiQKFIPR/W6Dh/nJm/7v57ocMHz0exlnK8jNSRkdoB4M4sOAIOJ867Dqd8zloyJ5FqKyH+2HwnMBDCTCsj1oVtKU1LyR7JbR9RhHddRFucR3TfQzqE3sB6MZ38ri3b+AJdRbfN2Ag73/xBcH9m70BH115fu/yTio4nSOoHN4omKbd1hrQtULCBvDjG890OQqAkZGZjAyeRG7NH/jCtTsNjv0BipS0MBitq5MSZ4VCeSxIxX89SVb5ufiWoqzky8I9xJAMA+WZqUB6EEKFClwKYnvcJ6RD7Ne2ScoZaBz3xdN4zsGUOpI5/b/NH7OZBk1zesM+TMnKEYEkrUlWfB8nTquvcYVdqdeTG/bJpDQj3UFKOheh0xAhVR7/tXDcyyrylNJeWiZZ4fthpcZo2R3THvFrTppwwCbYB+lciLe8nbQv9s7LFKY=,iv:T7xjhuolzQPvHXu3My0x1zzqynGz+xBl6LtiZC5s6Qk=,tag:8sBbZd7I+cfiblfCvQSkrA==,type:str] + webhook_secret: ENC[AES256_GCM,data:IXM90fuSlZOHStTMezYtE7CiTz2QaX0jRemXuAkaC542vF4bbJbqfQ==,iv:OrOWzepafRyH5eQB70y3rzXdBN10lk0WSLjPyvt9H0M=,tag:m9Bz8usyu65DfXyKGQjjnQ==,type:str] +sops: + kms: + - arn: arn:aws:kms:us-east-1:256317687910:key/d8b153c3-20a9-4364-a553-94405d4c1027 + created_at: "2023-01-27T21:47:51Z" + enc: AQICAHgpH4G+b2ULBcvMucaHVvQi3QdX1B0xlVvF3iFfhxUVOwGJYo4LG4LywIw4SzygEjqgAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQM6lVQBq8/Fu07uuNVAgEQgDvgYdES8td7nMzWbwF+h63uPjVjH161zUPNYupat6px9yaxz01Ui8v4Zn9kMXA6UcQuZCJABHMS/gp6BA== + aws_profile: "" + - arn: arn:aws:kms:us-west-1:256317687910:key/0f31de65-bcc9-4fef-b9f5-67ce086f532e + created_at: "2023-01-27T21:47:51Z" + enc: AQICAHiYQfpCkhiXnfU8apZYPITKv6caFhMAr7Hx04ufaTWvvgE6pXG5A0C4c/AqNm+PChBnAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMhZAPYiawhTUehx+xAgEQgDuVDWSyPxnoAGCtVsEKCxWvDHTkZQBlDWdcQRYIEQQU0oyifTtT3OdYu11WRNDjo4360egs463ScV9uUQ== + aws_profile: "" + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2023-01-27T21:47:51Z" + mac: ENC[AES256_GCM,data:jw3icjVL0rn85lCFBEwcyKM00DvvJ81mvd5pM487rL86cfS8gBRewqyylK87P7l9OA9L82L50f2DwDqVtxR45yCzvy1Y9lRYqTtvC1aQCzrOqoECs0eM+6D5sPLLKKWnVUnvwTl9yD5xb8r1IIrZkW2lkRanA9GPpUjf2Vol3A8=,iv:JNw+IQs+AkLsfdjFPGndDs+uekM/n5ZMtahFoe/91Rs=,tag:8oppzfjFY8BFnD/KTT2TJQ==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.7.3 diff --git a/secrets/config.pforge-pr-157.yaml b/secrets/config.pforge-pr-157.yaml deleted file mode 100644 index 7084d0d8..00000000 --- a/secrets/config.pforge-pr-157.yaml +++ /dev/null @@ -1,26 +0,0 @@ -fastapi: - PANGEO_FORGE_API_KEY: ENC[AES256_GCM,data:MlO5jf+F9maWgxsaIIyUQboOq1uuhFn+6xgS1O/+Fsc=,iv:MY7iXKpKPKrd1xPiSKirSgryvjTKTUISXw7Y0nk2CEQ=,tag:vu3GUt2uI59Q0O9dK2PX1A==,type:str] -github_app: - app_name: ENC[AES256_GCM,data:B33ymW2r5KYsYHk8eg==,iv:8+1eEKt4DoIlpzTZuIqLxFQy1ybIPMn1v+2bI3UjtxU=,tag:MFXVjUpokCkSbNhQEi4Chg==,type:str] - id: ENC[AES256_GCM,data:W1QhkzVO,iv:JnVPFQ4tEDsfdGJjHEqKIxOSSYe5HcP7fVKNeQUUs3A=,tag:zc5YBn+kTjB4Gi9BXlod4g==,type:int] - private_key: ENC[AES256_GCM,data:hZEGGOm41u3NOeK1THdcmcOebvNx6Sw488+xPYVGg4ga/4Ea9+ptd/LSWINdm3NxRTlFjTPv4yy1KhqWO5a/xdEl8kGh0vRMQrE53ODHheHQDs4iu7YUMPFQhr4NzPsVRE0z6oGPJWo5AAvoL4jb5vdhSSfROp/eI/9JIrrtlm2ir1JowzlxsCn6FXo1VB+sj98/r+0y+zApJYc7hoz2kb5hPQiysTPsz9lHv2uyKv1/RcaTc8+mt0NR9vj14sE4rrMh33ltenSRhPXSaWcZzHS8xQofIg6obR3O84PI6yRvQAu9Dsgsz8dieaZCxfI1METMp9TyeN0TZBKllczQYtkXQuE7CBOdyRGb00ehvznNRAGinrdfEt+5OaKYxsuaBbYasELFs2/veLJ4gDx3yisje94GMxb/3jkW20ko66ztQYaEt2hgy2HMCvXqvhRYPghE3wHjS4Nilmnn53WwhSRjam/UvYguqdFJtFgUijFeqWV04sk7fWTjpjDrG9Sqk083IkxLr1lsZ14lkEcyPv3afAcqLdFmFUz/Gmt38WwedBCnJ1cICWN0LyTkqRmxRS3uEgRGzcrjonMaPpeHp+DdXpb5V+2BGdIvRNXgPbil1OJ8dYHEJEO8E3U1aFrH29O1k4/Mbzgy7wtK1nplBkVQfDCRN7kJksi/ANE+2VXDoLxGVmwbuU42A1efZmaRpyRTYz0IQvbhtCB3EJhDklphQZ0y1Zjw3EEWPo9aUcQHPRiarZzEBMPve1Mx52xTIqHeOy+9aht/vuda2My1/Kki5TK5V8Yl5gs/UN/atSkJ2aph57QKNdWbdphbfv+vkLzVpmjPrP9bKltPA1lhnhpcaxnLZgS0Cp42lv6yKJfFL+GLeMvMNRBKSSo9ff17iKkvbywYDgBubfTrfsHTw/KnwED9WDornL+TdEdBhBba7oTmBGurxohFXz6GH3HPtBUc9kQTX94yyLSrAnG1HCZ4etZDVqzbjPIAvfu55o0oDV1zmHJAuOZyd+0P1UQPv3Uiwq1poCiULT8ZQCWyEvYVatHu/euRpNdDR0ESVZSHMFGiViejz5spl8r6p0iN2Ot1KGD3fa4mESluyxADWLbE+g8RRK8O1wjaXFCTpH/I7TtY2XH5LtmZSzf11tqERGzaHsOPLC97QSOwfULnEqeA2302nGEeM/gUZITS8FCh7shdEW10v+hYQ6DYtA7xhb+olSTvqOGptFZREYWCawWE2HUn9Qo9DVP8vnYycPLxwgb4harlKX0EIegm9eGDzbbP02b11GX6bUXkEAcsAW8cI52S7IHU9wzVWbkVaMVID49d5YjRYqr6VzX6DPI3ywf9bvOL9DkX/maFLHX/A1zCntYuxCnuSi+CK2s+mX9nYVj7DwsZeE7JtyxSY+dD/WkYxuzkokGsVXVARcTfPbMtEoam6QHzunBEfIQ7/suqdG1PqkW6qL8r+ATcIGUDU07V2sA4Mlr/DdUnKSMeYW5XgHTVUUVPEWBsHDtrk14HQLNR9VtqLziGQ/nt8mIurcmJM3PwcIr3gkn5FA8hepe9vltCdQ3A7tayYhTW50F200h8Z16KjTpHj/Ryo4sQnDBu/PHeI1kOqRZxXBT2Z58nj/yTEPtPY2TKUyo+xDb6szm8LbI9a3I936ia/CiE7yXyvGHx779aT1BNMZszmmC08PSqnffm6gVgUYf/bQqWOPcByUhvYWmo3J3cltMm2FyS8EPqHxWW7Ty0inkLrPupPwjhNA/z+IWSCbsNEPDMFzc4giEk4dsH1fs01nTd4IMN6P/YL5SMxiPNZ+Nj8/NdKOhxDJxRbQywDl5nVFl9N8CgnDjA/X5aYBi7rR9YxcWPHdp9morX9/PZH3fYrtdFTIXQQinBSli/dfrezM0J14d3RFTK0nlJnISL4kIr2EgsE0Wm+REn7cMRkuzvjbREk7ui4UscJ9vPwhC4+oJhFYHEcA1yOaE7B+VnfGcb3Y9j+j8nggcqTB0RhgrgBVnTH2cg84flHJqtsv2AiPV5U+EtEpOZW8/6Hs6eDJjMOX70E4V2BADPj26/iGTRcjZgPXJwFLwox10jQeMass6xRCu4ubk3UlvPzNhZ7s+jZy9vLN4yjG87TbEUkc2/LLHsKG2PmbI+43igRI09QkKTxX01uSXBsUFtkeIuGV4BXJ0flATCsTSAdIBZuyDlgs6XV/U+fMKgkK+a4offxBRlyuOecEvUtNCuJspXVeM=,iv:pwdazy/432hwXNu4CIuuCX6XaswQDbjUR78Tcxluec8=,tag:Px5FRLUGuEAekC74dezJdg==,type:str] - webhook_secret: ENC[AES256_GCM,data:XDJoW74WAEvKIpOjggjzMPkhlZklilf2YIlvul1KK7eDZUnAiJ1B0w==,iv:z/QZI8WcIvHBXnbE6kyp3vIfFiJYSR+NMI1Xm4sTbBQ=,tag:ukuYWiur+NlHJTSpax+NvA==,type:str] -sops: - kms: - - arn: arn:aws:kms:us-east-1:256317687910:key/d8b153c3-20a9-4364-a553-94405d4c1027 - created_at: "2022-10-03T16:05:31Z" - enc: AQICAHgpH4G+b2ULBcvMucaHVvQi3QdX1B0xlVvF3iFfhxUVOwEOVvZ+Bkb8BSnkr/2qDnaMAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMo1r1NiVj8DkbjwX+AgEQgDsXHd10jxFx9KZfHFWDPONRHFP6gcDLytRzixqaOasN4pbyxNgxbXtTRgqYQyZV5UtSanQgwSMSZ9VtMw== - aws_profile: "" - - arn: arn:aws:kms:us-west-1:256317687910:key/0f31de65-bcc9-4fef-b9f5-67ce086f532e - created_at: "2022-10-03T16:05:31Z" - enc: AQICAHiYQfpCkhiXnfU8apZYPITKv6caFhMAr7Hx04ufaTWvvgHw+nXyjHtlMsZ6cHd+RSWBAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMkwCxlaOjVIdX3xb7AgEQgDtL1O5koHLP+nlxhd13r27fsZB8Co9sO4x9KmSY6+N39urR3UUp7TV5nOS7D6Uu6FfIPhW4mQNB74Mqsg== - aws_profile: "" - gcp_kms: [] - azure_kv: [] - hc_vault: [] - age: [] - lastmodified: "2022-10-03T16:05:31Z" - mac: ENC[AES256_GCM,data:PLgAP7yVGaq7IXmXxOplwcrLgtEoocmWesFOrqkr+7pXgtZWWr9/hUJNLwBsEz2fD5ma4KZuuFY02ateh2ae3uAgIigoLciIIki5lZ7ijxk+3C0NBaWFIIubuUkKkyNwqLLqzS9kp+TP8qHdXX9txjteDQTsB7k9Fqkx/dEX3RM=,iv:5kAHobhOB4nWJai63P9gNdvbiwsbavSttJSxTDFpWW0=,tag:Ip4rJYPllxCxyFJzljcFsw==,type:str] - pgp: [] - unencrypted_suffix: _unencrypted - version: 3.7.3 diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py new file mode 100644 index 00000000..d125e626 --- /dev/null +++ b/tests.integration/test_dataflow.py @@ -0,0 +1,432 @@ +import asyncio +import json +import os +import random +import subprocess +import time +from urllib.parse import urljoin, urlparse + +import aiohttp +import jwt +import pytest +import pytest_asyncio +from gidgethub.aiohttp import GitHubAPI +from gidgethub.apps import get_installation_access_token +from pydantic import BaseModel, SecretStr + + +class GitHubApp(BaseModel): + name: str + id: int + private_key: SecretStr + + +@pytest.fixture(scope="session") +def github_app() -> GitHubApp: + return GitHubApp( + name="dev-app-proxy", + id=238613, + # the private key is passed to the env as a `\n`-delimited, single line string from github + # repository secrets. when passed to the env, single backslash `\n`s become double `\\n`s, + # so that needs to be reversed here. this is just one of many possible ways to manage + # multiline private keys in the env. and for our case, i believe the simplest option; + # see also: https://github.com/dwyl/learn-environment-variables/issues/17. + private_key=os.environ["DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY"].replace("\\n", "\n"), + # NOTE: ☝️ this ☝️ credential **must match** the latest version stored in the SOPS-encrypted + # private key for the `dev-app-proxy` app stored in pangeo-forge-orchestrator. When that key + # rotated, this corresponding credential in github repository secrets must also be updated. + # we are duplicating this credential in two places because, for ci testing, it's much simpler + # to source this from github repository secrets than it would be to SOPS-decrypt from disk. + # the cost of that simplicity, is this duplication. + ) + + +@pytest_asyncio.fixture +async def gh(github_app: GitHubApp) -> GitHubAPI: + """A global gidgethub session to use throughout the integration tests.""" + + async with aiohttp.ClientSession() as session: + yield GitHubAPI(session, github_app.name) + + +@pytest_asyncio.fixture +async def gh_token(github_app: GitHubApp, gh: GitHubAPI, gh_kws: dict) -> SecretStr: + payload = { + "iat": int(time.time()), + "exp": int(time.time()) + (10 * 60), + "iss": github_app.id, + } + gh_jwt = jwt.encode(payload, github_app.private_key.get_secret_value(), algorithm="RS256") + + async for installation in gh.getiter("/app/installations", jwt=gh_jwt, **gh_kws): + # dev-app-proxy is only installed in one org (i.e., pforgetest), so + # the first iteration will give us the installation_id we're after + installation_id = installation["id"] + break + token_response = await get_installation_access_token( + gh, + installation_id=installation_id, + app_id=github_app.id, + private_key=github_app.private_key.get_secret_value(), + ) + # wrap in SecretStr to avoid leaking in failed test logs, + # see https://github.com/pytest-dev/pytest/issues/8613 + return SecretStr(token_response["token"]) + + +@pytest.fixture +def gh_kws() -> dict: + return {"accept": "application/vnd.github+json"} + + +@pytest.fixture +def app_url() -> str: + """The review app url as provided by Heroku.""" + return os.environ["REVIEW_APP_URL"] + + +@pytest.fixture +def app_netloc(app_url) -> str: + """Netloc of review app as parsed from app_url fixture.""" + return urlparse(app_url).netloc + + +@pytest.fixture +def app_recipe_runs_route(app_url) -> str: + """Route on review app under test at which recipe runs can be retrieved.""" + return urljoin(app_url, "/recipe_runs/") + + +@pytest.fixture +def gh_workflow_run_id() -> str: + """Identifies the GitHub Workflow run which called this test.""" + return os.environ["GH_WORKFLOW_RUN_ID"] + + +@pytest.fixture +def base(): + """The base repo against which the reference (i.e. source) PR has been made.""" + return "pforgetest/test-staged-recipes" + + +@pytest.fixture +def pr_number_and_recipe_id() -> str: + return os.environ["PR_NUMBER_AND_RECIPE_ID"] + + +@pytest.fixture +def source_pr_number(pr_number_and_recipe_id: str) -> str: + """The number of a PR on pforgetest/test-staged-recipes to replicate for this test.""" + return pr_number_and_recipe_id.split("::")[0] + + +@pytest.fixture +def recipe_id(pr_number_and_recipe_id: str) -> str: + """The recipe_id of the recipe defined in the PR to run during this test.""" + return pr_number_and_recipe_id.split("::")[-1] + + +@pytest_asyncio.fixture +async def pr_label(gh: GitHubAPI, gh_token: SecretStr, gh_kws: dict, base: str, app_netloc: str): + label_name_fmt = "fwd:{app_netloc}" + if "smee" not in app_netloc: + # smee proxy urls do not take the route path; heroku review apps do. + label_name_fmt += "/github/hooks/" + + exists = False + async for label in gh.getiter(f"repos/{base}/labels", **gh_kws): + if label["name"] == label_name_fmt.format(app_netloc=app_netloc): + exists = True + break + if not exists: + label = await gh.post( + f"/repos/{base}/labels", + data=dict( + name=f"fwd:{app_netloc}/github/hooks/", + color=f"{random.randint(0, 0xFFFFFF):06x}", + description="Tells dev-app-proxy GitHub App to forward webhooks to specified url.", + ), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + yield label["name"] + # TODO: delete label after every test? it could certainly be reused multiple times if not. + # if we do delete the label here, then the check to see if it exists would only hit if the label + # had been manually created outside a test session, or if the test runner happened to to have + # errored out on the prior test attempt (before the label had been deleted). + + +@pytest_asyncio.fixture +async def recipe_pr( + gh: GitHubAPI, + gh_token: SecretStr, + gh_kws: dict, + gh_workflow_run_id: str, + source_pr_number: str, + base: str, + pr_label: str, + pr_number_and_recipe_id: str, +): + """Makes a PR to ``pforgetest/test-staged-recipes`` with labels ``f"fwd:{app_netloc}{route}"``, + where ``{route}`` is optionally the path at which the app running at ``app_netloc`` receives + GitHub Webhooks. The label ``f"fwd:{app_netloc}{route}"`` informs the ``dev-app-proxy`` GitHub + App where to forward webhooks originating from the PR. After the PR is created, its identifying + information is yielded to the test function using this fixture. When control is returned to this + fixture, the PR and its associated branch are closed & cleaned-up. + """ + # create a new branch on the test repo with a descriptive name. + # (in the typical contribution process, contributions may likely be from forks. the deviation + # from that process here may introduce some sublte differences with production. for now, we are + # accepting that as the cost for doing this more simply; i.e., all within a single repo.) + main = await gh.getitem(f"/repos/{base}/branches/main", **gh_kws) + + jobs = await gh.getitem( + f"/repos/pangeo-forge/pangeo-forge-orchestrator/actions/runs/{gh_workflow_run_id}/jobs" + ) + this_job = [j for j in jobs["jobs"] if pr_number_and_recipe_id in j["name"]].pop(0) + # example working branch name would be runs/4179677701/jobs/7239892586, as parsed from html url + # 'https://github.com/pangeo-forge/pangeo-forge-orchestrator/actions/runs/4179677701/jobs/7239892586' + working_branch_name = this_job["html_url"].split("/actions/")[-1] + working_branch = await gh.post( + f"/repos/{base}/git/refs", + data=dict( + ref=f"refs/heads/{working_branch_name}", + sha=main["commit"]["sha"], + ), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + + # populate that branch with content files from the source pr + src_files = await gh.getitem( + f"repos/{base}/pulls/{source_pr_number}/files", + **gh_kws, + ) + + async def add_file(f): + content = await gh.getitem(f["contents_url"], **gh_kws) + await gh.put( + f"/repos/{base}/contents/{f['filename']}", + data=dict( + message=f"Adding {f['filename']}", + content=content["content"], + branch=working_branch_name, + ), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + + # add first source file to working branch. see commend above where `add_file` is + # called a second time, below, for why both files are not added at the same time. + await add_file(src_files[0]) + + # open a pr against pforgetest/test-staged-recipes:main + pr = await gh.post( + f"/repos/{base}/pulls", + data=dict( + title=f"[CI] Automated PR for {working_branch_name}", + head=working_branch_name, + body=( + f":robot: Created by test run job {this_job['html_url']}\n" + f":memo: Which is testing {pr_label.replace('fwd:', 'https://')}" + ), + base="main", + ), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + + # label the pr so the dev-app-proxy knows where to forward webhooks originating from this pr + await gh.put( + f"/repos/{base}/issues/{pr['number']}/labels", + data=dict(labels=[pr_label]), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + + # add the second source file (after labeling, so that the `synchronize` task will be forwarded) + # for explanation of why files are added one at a time (rather than at the same time) see: + # https://github.com/pangeo-forge/pangeo-forge-orchestrator/pull/226#issuecomment-1423337307 + await add_file(src_files[1]) + + # wait a moment to make sure new file is set on github, then get the pr + # in its current state (otherwise head_sha will not reflect latests commit) + await asyncio.sleep(3) + completed_pr = await gh.getitem(f"/repos/{base}/pulls/{pr['number']}", **gh_kws) + + print(f"\nYielding {completed_pr['head']['sha'] = } from recipes_pr fixture...") + yield completed_pr + + # close pr and delete branch + await gh.patch( + pr["url"], + data=dict(state="closed"), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + await gh.delete( + working_branch["url"], + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + + +@pytest_asyncio.fixture +async def recipe_run_id(recipe_pr: dict, app_recipe_runs_route: str): + # at the start of this test, the recipes_pr fixture has already made a pr on github, but we + # don't know exactly how long it take for that pr to be synchronized to the review app, so we + # run a loop to check for when the synchronization is complete. + + # (when heroku re-builds a review app that has previously been built, the database attached to + # that review app persists between builds. the database is only reset if the review app is + # deleted, not simply rebuilt. therefore, even though each invocation of this test creates + # just one recipe_run, there can easily be many recipe runs in the heroku review app database. + # as such, we parse which specific recipe_run we're currently testing by comparing head_shas.) + await asyncio.sleep(10) + start = time.time() + print("Querying review app database for recipe run id...") + while True: + elapsed = time.time() - start + async with aiohttp.ClientSession() as session: + get_runs = await session.get(app_recipe_runs_route) + runs = await get_runs.json() + if any([r["head_sha"] == recipe_pr["head"]["sha"] for r in runs]): + run_id = [r for r in runs if r["head_sha"] == recipe_pr["head"]["sha"]][0]["id"] + print(f"Found matching recipe run in review app database with recipe_{run_id = }...") + break + elif elapsed > 30: + # synchronization should only take a few seconds, so if more than 30 + # seconds has elapsed, something has gone wrong and we should bail out. + pytest.fail(f"Time {elapsed = } on synchronization.") + else: + # if no head_shas match, the sync task may + # still be running, so wait 2s then retry. + await asyncio.sleep(5) + yield run_id + + +@pytest_asyncio.fixture +async def dataflow_job_id( + recipe_run_id: int, + app_recipe_runs_route: str, + gh: GitHubAPI, + gh_token: SecretStr, + gh_kws: dict, + base: str, + recipe_pr: dict, + recipe_id: str, +): + # now we know the pr is synced, it's time to dispatch the `/run` command + comment_body = f"/run {recipe_id}" + print(f"Making comment on test PR with {comment_body = }") + await gh.post( + f"/repos/{base}/issues/{recipe_pr['number']}/comments", + data=dict(body=comment_body), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + # start polling the review app database to see if the job has been deployed to dataflow. + # if the job was deployed to dataflow, a job_id field will exist in the recipe_run message. + print("Polling review app for dataflow job submission status...") + start = time.time() + while True: + elapsed = time.time() - start + async with aiohttp.ClientSession() as session: + get_run = await session.get(urljoin(app_recipe_runs_route, str(recipe_run_id))) + run = await get_run.json() + message = json.loads(run["message"] or "{}") + if "job_id" in message: + job_id = message["job_id"] + print(f"Confirmed dataflow job submitted with {job_id = }") + break + elif elapsed > 60 * 5: + # job submission is taking longer than 5 minutes, something must be wrong, so bail. + pytest.fail(f"Time {elapsed = } on job submission.") + else: + # if there is no job_id in the message, and less than 5 minutes has elapsed in this + # loop, the job submission might still be in process, so wait 30 seconds and retry + await asyncio.sleep(30) + yield job_id + + +@pytest_asyncio.fixture +async def dataflow_job_state(dataflow_job_id: str): + # NOTE: much of this test is redundant with dataflow integration test + # https://github.com/pangeo-forge/pangeo-forge-runner/ + # blob/c7c5e88c006ce5f5ea636d061423981bb9d23734/tests/integration/test_dataflow_integration.py + + # 6 minutes seems like an average runtime for these jobs, but being optimistic + # let's start by waiting 5 minutes + start = time.time() + utc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(start)) + print(f"Waiting for 5 mins, starting at {utc_time = }") + time.sleep(60 * 5) + # at this point, the job has been submitted and we know the job_id, so time to start polling + # dataflow to see if its completed. + show_job = f"gcloud dataflow jobs show {dataflow_job_id} --format=json".split() + while True: + elapsed = time.time() - start + print(f"Time {elapsed = }") + if elapsed > 60 * 12: + pytest.fail(f"Time {elapsed = } on running job.") + + # check job state + state_proc = subprocess.run(show_job, capture_output=True) + assert state_proc.returncode == 0 + state = json.loads(state_proc.stdout)["state"] + print(f"Current {state = }") + if state == "Done": + # on Dataflow, "Done" means success + break + elif state == "Running": + # still running, let's give it another 30s then check again + await asyncio.sleep(30) + else: + # consider any other state a failure + pytest.fail(f"{state = } is neither 'Done' nor 'Running'") + # if we get here without failing out, the yielded state should be 'Done' + yield state + + +@pytest_asyncio.fixture +async def job_status_notification_comment_body( + gh: GitHubAPI, + gh_kws: dict, + base: str, + recipe_pr: dict, + dataflow_job_state: str, +): + # this value is not actually used below, but we include it as a fixture + # here to preserve the desired inheritance path of fixtures in this module + assert dataflow_job_state == "Done" + + start = time.time() + while True: + elapsed = time.time() - start + if elapsed > 60 * 5: + pytest.fail(f"Time {elapsed = } waiting for job success notification comment.") + + comments = await gh.getitem( + f"/repos/{base}/issues/{recipe_pr['number']}/comments", + **gh_kws, + ) + if comments: + last_comment_body: str = comments[-1]["body"] + if not last_comment_body.startswith("/run"): + break + + else: + await asyncio.sleep(15) + + yield last_comment_body + + +@pytest.mark.asyncio +async def test_end_to_end_integration( + job_status_notification_comment_body: str, + recipe_pr: dict, + recipe_id: str, +): + assert job_status_notification_comment_body.startswith( + f":tada: The test run of `{recipe_id}` at {recipe_pr['head']['sha']} succeeded!" + )