From b2593be46512a563df931ca8dcc4f1c01343288b Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 10:10:56 -0800 Subject: [PATCH 01/66] add build-review-app workflow --- .github/workflows/build-review-app.yaml | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/build-review-app.yaml diff --git a/.github/workflows/build-review-app.yaml b/.github/workflows/build-review-app.yaml new file mode 100644 index 00000000..90c90abd --- /dev/null +++ b/.github/workflows/build-review-app.yaml @@ -0,0 +1,33 @@ +name: Build Review App + +on: + pull_request: + branches: ['main'] + types: [opened, reopened, labeled] + +env: + PIPELINE: '17cc0239-494f-4a68-aa75-3da7c466709c' + REPO_URL: 'https://github.com/pangeo-forge/pangeo-forge-orchestrator' + +jobs: + build: + if: | + github.event.label.name == 'build-review-app' || + contains( github.event.pull_request.labels.*.name, 'build-review-app') + runs-on: ubuntu-latest + steps: + # https://devcenter.heroku.com/articles/platform-api-reference#review-app-create + - run: | + curl -X POST https://api.heroku.com/review-apps \ + -d '{ + "branch": "${{ github.ref_name }}", + "pr_number": ${{ github.event.pull_request.number }}, + "pipeline": "${{ env.PIPELINE }}", + "source_blob": { + "url": "${{ env.REPO_URL }}/tarball/${{ github.sha }}", + "version": "${{ github.sha }}" + } + }' \ + -H "Content-Type: application/json" \ + -H "Accept: application/vnd.heroku+json; version=3" \ + -H "Authorization: Bearer ${{ secrets.HEROKU_API_KEY }}" From 941f8ac14de67652c0256574b901d7e7dc75068a Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 15:27:56 -0800 Subject: [PATCH 02/66] prebuild Dockerfile and use Dockerfile.heroku for Heroku --- Dockerfile.heroku | 1 + heroku.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.heroku diff --git a/Dockerfile.heroku b/Dockerfile.heroku new file mode 100644 index 00000000..7fe4f2aa --- /dev/null +++ b/Dockerfile.heroku @@ -0,0 +1 @@ +FROM pangeo/forge-orchestrator:latest diff --git a/heroku.yml b/heroku.yml index 85e0b71b..97a5433e 100644 --- a/heroku.yml +++ b/heroku.yml @@ -4,7 +4,7 @@ setup: - plan: papertrail:fixa build: docker: - web: Dockerfile + web: Dockerfile.heroku release: command: - ./scripts.deploy/release.sh From dee1f3811f122b172e7930cf6d1b9d5980b0f346 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 16:38:45 -0800 Subject: [PATCH 03/66] delete review app workflow first commit --- .github/workflows/delete-review-app.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/delete-review-app.yaml diff --git a/.github/workflows/delete-review-app.yaml b/.github/workflows/delete-review-app.yaml new file mode 100644 index 00000000..e5eac7d9 --- /dev/null +++ b/.github/workflows/delete-review-app.yaml @@ -0,0 +1,14 @@ +name: Delete Review App +on: push + +pull_request: + branches: ['main'] + types: [unlabeled] + +jobs: + delete: + runs-on: ubuntu-latest + steps: + - name: Dump GitHub context + id: github_context_step + run: echo '${{ toJSON(github) }}' From 3b47970c228c0656162fcf9d7e54ba9c95c16493 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 16:47:01 -0800 Subject: [PATCH 04/66] fix delete review app on: block --- .github/workflows/delete-review-app.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/delete-review-app.yaml b/.github/workflows/delete-review-app.yaml index e5eac7d9..04c4524c 100644 --- a/.github/workflows/delete-review-app.yaml +++ b/.github/workflows/delete-review-app.yaml @@ -1,9 +1,9 @@ name: Delete Review App -on: push -pull_request: - branches: ['main'] - types: [unlabeled] +on: + pull_request: + branches: ['main'] + types: [unlabeled] jobs: delete: From f12d737830c7684ec21d5a727ec54aef4543834c Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 17:24:31 -0800 Subject: [PATCH 05/66] actually make delete review app workflow work --- .github/workflows/delete-review-app.yaml | 31 +++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/.github/workflows/delete-review-app.yaml b/.github/workflows/delete-review-app.yaml index 04c4524c..0f5dffc2 100644 --- a/.github/workflows/delete-review-app.yaml +++ b/.github/workflows/delete-review-app.yaml @@ -5,10 +5,35 @@ on: branches: ['main'] types: [unlabeled] +env: + PIPELINE: '17cc0239-494f-4a68-aa75-3da7c466709c' + jobs: delete: + if: | + github.event.label.name == 'build-review-app' runs-on: ubuntu-latest steps: - - name: Dump GitHub context - id: github_context_step - run: echo '${{ toJSON(github) }}' + - name: Get review app id & export to env + run: | + curl -s https://api.heroku.com/pipelines/${{ env.PIPELINE }}/review-apps \ + -H "Accept: application/vnd.heroku+json; version=3" \ + -H "Authorization: Bearer ${{ secrets.HEROKU_API_KEY }}" \ + | python3 -c " + import sys, json; + j = json.load(sys.stdin); + print( + 'REVIEW_APP_ID=', + [ + app['id'].strip() + for app in j + if app['pr_number'] == ${{ github.event.pull_request.number }} + ].pop(0) + ) + " >> $GITHUB_ENV + - name: Delete review app + run: | + curl -X DELETE https://api.heroku.com/review-apps/${{ env.REVIEW_APP_ID }} \ + -H "Content-Type: application/json" \ + -H "Accept: application/vnd.heroku+json; version=3" \ + -H "Authorization: Bearer ${{ secrets.HEROKU_API_KEY }}" From b11cf40d682d01259a052ae204d39613baa4ccd0 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 17:32:27 -0800 Subject: [PATCH 06/66] fix indentation in delete workflow, run build on sync --- .github/workflows/build-review-app.yaml | 2 +- .github/workflows/delete-review-app.yaml | 13 +++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build-review-app.yaml b/.github/workflows/build-review-app.yaml index 90c90abd..da8af843 100644 --- a/.github/workflows/build-review-app.yaml +++ b/.github/workflows/build-review-app.yaml @@ -3,7 +3,7 @@ name: Build Review App on: pull_request: branches: ['main'] - types: [opened, reopened, labeled] + types: [opened, reopened, synchronize, labeled] env: PIPELINE: '17cc0239-494f-4a68-aa75-3da7c466709c' diff --git a/.github/workflows/delete-review-app.yaml b/.github/workflows/delete-review-app.yaml index 0f5dffc2..604883d1 100644 --- a/.github/workflows/delete-review-app.yaml +++ b/.github/workflows/delete-review-app.yaml @@ -20,16 +20,9 @@ jobs: -H "Accept: application/vnd.heroku+json; version=3" \ -H "Authorization: Bearer ${{ secrets.HEROKU_API_KEY }}" \ | python3 -c " - import sys, json; - j = json.load(sys.stdin); - print( - 'REVIEW_APP_ID=', - [ - app['id'].strip() - for app in j - if app['pr_number'] == ${{ github.event.pull_request.number }} - ].pop(0) - ) + import sys, json; + j = json.load(sys.stdin); + print('REVIEW_APP_ID=', [app['id'].strip() for app in j if app['pr_number'] == ${{ github.event.pull_request.number }}].pop(0)) " >> $GITHUB_ENV - name: Delete review app run: | From 122877bef3cda1c4d5ff576c56f36b3d4a3afbd6 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 17:35:38 -0800 Subject: [PATCH 07/66] fix branch name for review app --- .github/workflows/build-review-app.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-review-app.yaml b/.github/workflows/build-review-app.yaml index da8af843..0ac0144c 100644 --- a/.github/workflows/build-review-app.yaml +++ b/.github/workflows/build-review-app.yaml @@ -20,7 +20,7 @@ jobs: - run: | curl -X POST https://api.heroku.com/review-apps \ -d '{ - "branch": "${{ github.ref_name }}", + "branch": "${{ github.head_ref }}", "pr_number": ${{ github.event.pull_request.number }}, "pipeline": "${{ env.PIPELINE }}", "source_blob": { From 95fbe3dd328dfe714539f98fd360cd4012042712 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 24 Jan 2023 17:42:04 -0800 Subject: [PATCH 08/66] use + instead of , to remove whitespace in env --- .github/workflows/delete-review-app.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/delete-review-app.yaml b/.github/workflows/delete-review-app.yaml index 604883d1..4b415833 100644 --- a/.github/workflows/delete-review-app.yaml +++ b/.github/workflows/delete-review-app.yaml @@ -22,7 +22,7 @@ jobs: | python3 -c " import sys, json; j = json.load(sys.stdin); - print('REVIEW_APP_ID=', [app['id'].strip() for app in j if app['pr_number'] == ${{ github.event.pull_request.number }}].pop(0)) + print('REVIEW_APP_ID=' + [app['id'].strip() for app in j if app['pr_number'] == ${{ github.event.pull_request.number }}].pop(0)) " >> $GITHUB_ENV - name: Delete review app run: | From 8abf23cf5148af059102b0275f8930326f8aa939 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 25 Jan 2023 10:06:05 -0800 Subject: [PATCH 09/66] use head sha for review app source blob --- .github/workflows/build-review-app.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-review-app.yaml b/.github/workflows/build-review-app.yaml index 0ac0144c..9f54a2d0 100644 --- a/.github/workflows/build-review-app.yaml +++ b/.github/workflows/build-review-app.yaml @@ -24,8 +24,8 @@ jobs: "pr_number": ${{ github.event.pull_request.number }}, "pipeline": "${{ env.PIPELINE }}", "source_blob": { - "url": "${{ env.REPO_URL }}/tarball/${{ github.sha }}", - "version": "${{ github.sha }}" + "url": "${{ env.REPO_URL }}/tarball/${{ github.event.pull_request.head.sha }}", + "version": "${{ github.event.pull_request.head.sha }}" } }' \ -H "Content-Type: application/json" \ From 24044660094248b3bb9616002ec409277a4e9d53 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 13:48:28 -0800 Subject: [PATCH 10/66] add dev-app-proxy secrets --- secrets/config.dev-app-proxy.yaml | 26 ++++++++++++++++++++++++++ secrets/config.pforge-pr-157.yaml | 26 -------------------------- 2 files changed, 26 insertions(+), 26 deletions(-) create mode 100644 secrets/config.dev-app-proxy.yaml delete mode 100644 secrets/config.pforge-pr-157.yaml diff --git a/secrets/config.dev-app-proxy.yaml b/secrets/config.dev-app-proxy.yaml new file mode 100644 index 00000000..80e82a7b --- /dev/null +++ b/secrets/config.dev-app-proxy.yaml @@ -0,0 +1,26 @@ +fastapi: + PANGEO_FORGE_API_KEY: ENC[AES256_GCM,data:ywwwwN6Noco5Rt5DVHmXrFCilPWsvgWEUoy9DBGTJD8=,iv:La9RRJL1WGMR+N7IKE+XFl98LH8CUPZGTSOxRx9NXTI=,tag:pfXwKio8DWPmUVVojtkerQ==,type:str] +github_app: + app_name: ENC[AES256_GCM,data:HgV8uE8llvsEsfKxvg==,iv:QI7WQtVSQ0VVU6ou8K1G1SFndD+PptrdyPGiM243PUg=,tag:F2M9rT0Y1uTdRP7RtuyUZA==,type:str] + id: ENC[AES256_GCM,data:QTTmPboQ,iv:CCwpSsUkK146DnG7vaD34atotkW/0SdA5FD49XrFbG8=,tag:qMQmDDBgkuCVOjPJwTlisQ==,type:int] + private_key: ENC[AES256_GCM,data:9tI/81JeSSu4pdVmbCktc6gP4InyfYCYWsyNC+RWbPysshfW6/kN2NE8YpCGfhh2LhCxP6Qvq2Bh915RAz+4k1+P8/NFSI23dvg3SXgvX5fmnXNi5qyl/L6PvJVA+7HfWzttBlaO0OTca6d6Aou6xWTK90TgJ/pQ2c2ZRKCJAVu7TGpT8U8A21eDG4+a1FKtldgX59F6X8+kts7SuaOT1aPbupQmo9+xkrJfB1g+/pp+llqV2BpQdf0oYxmKzL29kwWE/5UsBC+rIvwIuHiQazviO8FEKy/tEvDZu0TzbifpvbqCL9mIbayeJ0mKSl3ug40hR8b1Ox7q0E6+InuUBp4fgOmUsBRi6y6URu6+OeWPHGo4U0PYJ+RqK6HRpfs6ef9TBifZfvS6C+s3dSWfLKcImMGDq9MUTy0FHe+8TrDvEjuDFeGdhsgBUc8LZc2PE1n7uRdfp+6d8Pi+iGjY3RSFHl4iwGdEPwUYuJ76Z11hoF72JFVh+ncASovWNa1649VvXjwdEI7ziI0uZIUnGSir0JVIgOF49s4atFtNJNc+onr+ZAx3ZjIBj+R4kkkdj2ldhNMj402vGzzZaiXtDGNe3ENhCTy+BwgZlOLXU9HuC/TFFLGIBLAZ27A+o72KWWxdAeF+rJholkH5U7/XMGdlUiAwk5boYZxs8YTDi+nLmt4qyZZ26I3H36XIFntwFOpoRHLcF75H013UeuAwF8wF1TyOJxkscxOhSV5Hjs6mEX9Tz3egCkn4n85iIJAXUE3uNOCGCLaPvR5lZXUiIAtIX+hxfFKzbqWnw0LGS6hj+xwstszNbvC2/b/q2OwMrJNtMHeP1PT0DHBJMAT60IMMQS/1jvbyeAwqxLxucdhzg72f698tfNdc8ssFzS9lkOz7tGHTfUHsw4Batx7DpJRDBVcFhZirLMFMR5wobhsVI+D4Iyimk31LHVfnFHXIhxgUVlRRb3TYyzSE4dXV4mZMSahxSROz0067QblGfuFzjkL168NrBaXW7UBSMncCx7Au5BM3JOFx1hDwxo/srKEmxC1ZoDCbeD2fjQgDbD8bWprFVyglkP5OUvENaQSX/ZxfbZPg3U/XaT3YdbZ1ORucIHCV9W6O77DVQ17ImwLR1rADh0UX8cLT3bV3zSkRAe03QbDCpHYmEkQbJdQ96EJtSnUnxRdAlcYhznNHMY9XUt8CWjtRWKD5Xo7jyCgFKC+8ZELUjrpJvsnbRIpPO4rN/AShUUlw2x1zHZTnSGFAUjNTY6zXNCVt1WtGEkKsUoHFb/92Hm7pe2pRpss7rc71UNMqWYbc31AK7jZbrtaAf5WwRKstNKUBighyymgYVkzGAL2peOiKwWokngeuRRxrXdN7qjbsyiqvQtBtMa96G1eE3hY5x5uwbhC69Gcs+fKZ32JaNLCZiFnk4GbLc665bxkEzNn9zUlhqQ+3Tbb8qkzawrpv5nsCuSYEDk3UNGWCMdyWZm3MHH0r5yJlhsSF3/OTri8s73KV+v0KggJ1m/afrjKRjl+6O7cq+Wvyr2ymI8njYYRkIU3emJwKK6y+4k09N/CyKE6mRvxrrpADpVNY2vsskKhHXJtUbx2qyoxwS4a8ObuTz7p/AN/5X8aAN9v5MOGYgH6Pr8q76b13unYYl8NPOPxG3yxWEcvFbuG4qpuxrAA0pU3ksuflcfim+k3agEuCTU3QYUcZnCBCfsllXetKoTME8+JNpfSh3Lfn99WE80weCvBcyvKDR3MJ67Mga/ifSdRjvisM3vHmbgghWXVhSRQrYg5mabFicW/rz4ojfVGtiQKFIPR/W6Dh/nJm/7v57ocMHz0exlnK8jNSRkdoB4M4sOAIOJ867Dqd8zloyJ5FqKyH+2HwnMBDCTCsj1oVtKU1LyR7JbR9RhHddRFucR3TfQzqE3sB6MZ38ri3b+AJdRbfN2Ag73/xBcH9m70BH115fu/yTio4nSOoHN4omKbd1hrQtULCBvDjG890OQqAkZGZjAyeRG7NH/jCtTsNjv0BipS0MBitq5MSZ4VCeSxIxX89SVb5ufiWoqzky8I9xJAMA+WZqUB6EEKFClwKYnvcJ6RD7Ne2ScoZaBz3xdN4zsGUOpI5/b/NH7OZBk1zesM+TMnKEYEkrUlWfB8nTquvcYVdqdeTG/bJpDQj3UFKOheh0xAhVR7/tXDcyyrylNJeWiZZ4fthpcZo2R3THvFrTppwwCbYB+lciLe8nbQv9s7LFKY=,iv:T7xjhuolzQPvHXu3My0x1zzqynGz+xBl6LtiZC5s6Qk=,tag:8sBbZd7I+cfiblfCvQSkrA==,type:str] + webhook_secret: ENC[AES256_GCM,data:IXM90fuSlZOHStTMezYtE7CiTz2QaX0jRemXuAkaC542vF4bbJbqfQ==,iv:OrOWzepafRyH5eQB70y3rzXdBN10lk0WSLjPyvt9H0M=,tag:m9Bz8usyu65DfXyKGQjjnQ==,type:str] +sops: + kms: + - arn: arn:aws:kms:us-east-1:256317687910:key/d8b153c3-20a9-4364-a553-94405d4c1027 + created_at: "2023-01-27T21:47:51Z" + enc: AQICAHgpH4G+b2ULBcvMucaHVvQi3QdX1B0xlVvF3iFfhxUVOwGJYo4LG4LywIw4SzygEjqgAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQM6lVQBq8/Fu07uuNVAgEQgDvgYdES8td7nMzWbwF+h63uPjVjH161zUPNYupat6px9yaxz01Ui8v4Zn9kMXA6UcQuZCJABHMS/gp6BA== + aws_profile: "" + - arn: arn:aws:kms:us-west-1:256317687910:key/0f31de65-bcc9-4fef-b9f5-67ce086f532e + created_at: "2023-01-27T21:47:51Z" + enc: AQICAHiYQfpCkhiXnfU8apZYPITKv6caFhMAr7Hx04ufaTWvvgE6pXG5A0C4c/AqNm+PChBnAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMhZAPYiawhTUehx+xAgEQgDuVDWSyPxnoAGCtVsEKCxWvDHTkZQBlDWdcQRYIEQQU0oyifTtT3OdYu11WRNDjo4360egs463ScV9uUQ== + aws_profile: "" + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2023-01-27T21:47:51Z" + mac: ENC[AES256_GCM,data:jw3icjVL0rn85lCFBEwcyKM00DvvJ81mvd5pM487rL86cfS8gBRewqyylK87P7l9OA9L82L50f2DwDqVtxR45yCzvy1Y9lRYqTtvC1aQCzrOqoECs0eM+6D5sPLLKKWnVUnvwTl9yD5xb8r1IIrZkW2lkRanA9GPpUjf2Vol3A8=,iv:JNw+IQs+AkLsfdjFPGndDs+uekM/n5ZMtahFoe/91Rs=,tag:8oppzfjFY8BFnD/KTT2TJQ==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.7.3 diff --git a/secrets/config.pforge-pr-157.yaml b/secrets/config.pforge-pr-157.yaml deleted file mode 100644 index 7084d0d8..00000000 --- a/secrets/config.pforge-pr-157.yaml +++ /dev/null @@ -1,26 +0,0 @@ -fastapi: - PANGEO_FORGE_API_KEY: ENC[AES256_GCM,data:MlO5jf+F9maWgxsaIIyUQboOq1uuhFn+6xgS1O/+Fsc=,iv:MY7iXKpKPKrd1xPiSKirSgryvjTKTUISXw7Y0nk2CEQ=,tag:vu3GUt2uI59Q0O9dK2PX1A==,type:str] -github_app: - app_name: ENC[AES256_GCM,data:B33ymW2r5KYsYHk8eg==,iv:8+1eEKt4DoIlpzTZuIqLxFQy1ybIPMn1v+2bI3UjtxU=,tag:MFXVjUpokCkSbNhQEi4Chg==,type:str] - id: ENC[AES256_GCM,data:W1QhkzVO,iv:JnVPFQ4tEDsfdGJjHEqKIxOSSYe5HcP7fVKNeQUUs3A=,tag:zc5YBn+kTjB4Gi9BXlod4g==,type:int] - private_key: ENC[AES256_GCM,data:hZEGGOm41u3NOeK1THdcmcOebvNx6Sw488+xPYVGg4ga/4Ea9+ptd/LSWINdm3NxRTlFjTPv4yy1KhqWO5a/xdEl8kGh0vRMQrE53ODHheHQDs4iu7YUMPFQhr4NzPsVRE0z6oGPJWo5AAvoL4jb5vdhSSfROp/eI/9JIrrtlm2ir1JowzlxsCn6FXo1VB+sj98/r+0y+zApJYc7hoz2kb5hPQiysTPsz9lHv2uyKv1/RcaTc8+mt0NR9vj14sE4rrMh33ltenSRhPXSaWcZzHS8xQofIg6obR3O84PI6yRvQAu9Dsgsz8dieaZCxfI1METMp9TyeN0TZBKllczQYtkXQuE7CBOdyRGb00ehvznNRAGinrdfEt+5OaKYxsuaBbYasELFs2/veLJ4gDx3yisje94GMxb/3jkW20ko66ztQYaEt2hgy2HMCvXqvhRYPghE3wHjS4Nilmnn53WwhSRjam/UvYguqdFJtFgUijFeqWV04sk7fWTjpjDrG9Sqk083IkxLr1lsZ14lkEcyPv3afAcqLdFmFUz/Gmt38WwedBCnJ1cICWN0LyTkqRmxRS3uEgRGzcrjonMaPpeHp+DdXpb5V+2BGdIvRNXgPbil1OJ8dYHEJEO8E3U1aFrH29O1k4/Mbzgy7wtK1nplBkVQfDCRN7kJksi/ANE+2VXDoLxGVmwbuU42A1efZmaRpyRTYz0IQvbhtCB3EJhDklphQZ0y1Zjw3EEWPo9aUcQHPRiarZzEBMPve1Mx52xTIqHeOy+9aht/vuda2My1/Kki5TK5V8Yl5gs/UN/atSkJ2aph57QKNdWbdphbfv+vkLzVpmjPrP9bKltPA1lhnhpcaxnLZgS0Cp42lv6yKJfFL+GLeMvMNRBKSSo9ff17iKkvbywYDgBubfTrfsHTw/KnwED9WDornL+TdEdBhBba7oTmBGurxohFXz6GH3HPtBUc9kQTX94yyLSrAnG1HCZ4etZDVqzbjPIAvfu55o0oDV1zmHJAuOZyd+0P1UQPv3Uiwq1poCiULT8ZQCWyEvYVatHu/euRpNdDR0ESVZSHMFGiViejz5spl8r6p0iN2Ot1KGD3fa4mESluyxADWLbE+g8RRK8O1wjaXFCTpH/I7TtY2XH5LtmZSzf11tqERGzaHsOPLC97QSOwfULnEqeA2302nGEeM/gUZITS8FCh7shdEW10v+hYQ6DYtA7xhb+olSTvqOGptFZREYWCawWE2HUn9Qo9DVP8vnYycPLxwgb4harlKX0EIegm9eGDzbbP02b11GX6bUXkEAcsAW8cI52S7IHU9wzVWbkVaMVID49d5YjRYqr6VzX6DPI3ywf9bvOL9DkX/maFLHX/A1zCntYuxCnuSi+CK2s+mX9nYVj7DwsZeE7JtyxSY+dD/WkYxuzkokGsVXVARcTfPbMtEoam6QHzunBEfIQ7/suqdG1PqkW6qL8r+ATcIGUDU07V2sA4Mlr/DdUnKSMeYW5XgHTVUUVPEWBsHDtrk14HQLNR9VtqLziGQ/nt8mIurcmJM3PwcIr3gkn5FA8hepe9vltCdQ3A7tayYhTW50F200h8Z16KjTpHj/Ryo4sQnDBu/PHeI1kOqRZxXBT2Z58nj/yTEPtPY2TKUyo+xDb6szm8LbI9a3I936ia/CiE7yXyvGHx779aT1BNMZszmmC08PSqnffm6gVgUYf/bQqWOPcByUhvYWmo3J3cltMm2FyS8EPqHxWW7Ty0inkLrPupPwjhNA/z+IWSCbsNEPDMFzc4giEk4dsH1fs01nTd4IMN6P/YL5SMxiPNZ+Nj8/NdKOhxDJxRbQywDl5nVFl9N8CgnDjA/X5aYBi7rR9YxcWPHdp9morX9/PZH3fYrtdFTIXQQinBSli/dfrezM0J14d3RFTK0nlJnISL4kIr2EgsE0Wm+REn7cMRkuzvjbREk7ui4UscJ9vPwhC4+oJhFYHEcA1yOaE7B+VnfGcb3Y9j+j8nggcqTB0RhgrgBVnTH2cg84flHJqtsv2AiPV5U+EtEpOZW8/6Hs6eDJjMOX70E4V2BADPj26/iGTRcjZgPXJwFLwox10jQeMass6xRCu4ubk3UlvPzNhZ7s+jZy9vLN4yjG87TbEUkc2/LLHsKG2PmbI+43igRI09QkKTxX01uSXBsUFtkeIuGV4BXJ0flATCsTSAdIBZuyDlgs6XV/U+fMKgkK+a4offxBRlyuOecEvUtNCuJspXVeM=,iv:pwdazy/432hwXNu4CIuuCX6XaswQDbjUR78Tcxluec8=,tag:Px5FRLUGuEAekC74dezJdg==,type:str] - webhook_secret: ENC[AES256_GCM,data:XDJoW74WAEvKIpOjggjzMPkhlZklilf2YIlvul1KK7eDZUnAiJ1B0w==,iv:z/QZI8WcIvHBXnbE6kyp3vIfFiJYSR+NMI1Xm4sTbBQ=,tag:ukuYWiur+NlHJTSpax+NvA==,type:str] -sops: - kms: - - arn: arn:aws:kms:us-east-1:256317687910:key/d8b153c3-20a9-4364-a553-94405d4c1027 - created_at: "2022-10-03T16:05:31Z" - enc: AQICAHgpH4G+b2ULBcvMucaHVvQi3QdX1B0xlVvF3iFfhxUVOwEOVvZ+Bkb8BSnkr/2qDnaMAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMo1r1NiVj8DkbjwX+AgEQgDsXHd10jxFx9KZfHFWDPONRHFP6gcDLytRzixqaOasN4pbyxNgxbXtTRgqYQyZV5UtSanQgwSMSZ9VtMw== - aws_profile: "" - - arn: arn:aws:kms:us-west-1:256317687910:key/0f31de65-bcc9-4fef-b9f5-67ce086f532e - created_at: "2022-10-03T16:05:31Z" - enc: AQICAHiYQfpCkhiXnfU8apZYPITKv6caFhMAr7Hx04ufaTWvvgHw+nXyjHtlMsZ6cHd+RSWBAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMkwCxlaOjVIdX3xb7AgEQgDtL1O5koHLP+nlxhd13r27fsZB8Co9sO4x9KmSY6+N39urR3UUp7TV5nOS7D6Uu6FfIPhW4mQNB74Mqsg== - aws_profile: "" - gcp_kms: [] - azure_kv: [] - hc_vault: [] - age: [] - lastmodified: "2022-10-03T16:05:31Z" - mac: ENC[AES256_GCM,data:PLgAP7yVGaq7IXmXxOplwcrLgtEoocmWesFOrqkr+7pXgtZWWr9/hUJNLwBsEz2fD5ma4KZuuFY02ateh2ae3uAgIigoLciIIki5lZ7ijxk+3C0NBaWFIIubuUkKkyNwqLLqzS9kp+TP8qHdXX9txjteDQTsB7k9Fqkx/dEX3RM=,iv:5kAHobhOB4nWJai63P9gNdvbiwsbavSttJSxTDFpWW0=,tag:Ip4rJYPllxCxyFJzljcFsw==,type:str] - pgp: [] - unencrypted_suffix: _unencrypted - version: 3.7.3 From e6cdd01f3d48dccc5dc65ee87a8955e08d88965f Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 13:53:50 -0800 Subject: [PATCH 11/66] PANGEO_FORGE_DEPLOYMENT=dev-app-proxy for release and run scripts --- heroku.yml | 8 +++----- scripts.deploy/release.sh | 5 ++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/heroku.yml b/heroku.yml index 97a5433e..5678f265 100644 --- a/heroku.yml +++ b/heroku.yml @@ -11,12 +11,10 @@ release: image: web run: # The first line of this command sets PANGEO_FORGE_DEPLOYMENT to itself, if it exists, - # but if it doesn't exist, PANGEO_FORGE_DEPLOYMENT is set to the value of HEROKU_APP_NAME. - # The latter case occurs only in the review app context. We use this method because review - # app names are dynaically generated based on the PR number and are therefore to cumbersome - # to set manually for each PR. More on this syntax in: https://stackoverflow.com/a/2013589. + # but if it doesn't exist, PANGEO_FORGE_DEPLOYMENT is set to the value of 'dev-app-proxy'. + # The latter case occurs only in the review app context. web: > - export PANGEO_FORGE_DEPLOYMENT="${PANGEO_FORGE_DEPLOYMENT:=$HEROKU_APP_NAME}" + export PANGEO_FORGE_DEPLOYMENT="${PANGEO_FORGE_DEPLOYMENT:=dev-app-proxy}" && echo "PANGEO_FORGE_DEPLOYMENT set to ${PANGEO_FORGE_DEPLOYMENT}" && sops -d -i secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml && export DATAFLOW_CREDS='./secrets/dataflow-job-submission.json' diff --git a/scripts.deploy/release.sh b/scripts.deploy/release.sh index fa8e6f76..20c91995 100644 --- a/scripts.deploy/release.sh +++ b/scripts.deploy/release.sh @@ -11,9 +11,8 @@ python3.9 -m alembic upgrade head if [[ -z "${PANGEO_FORGE_DEPLOYMENT}" ]]; then echo "PANGEO_FORGE_DEPLOYMENT undefined, so this must be a review app..." - echo "Review app injected env var \$HEROKU_APP_NAME=${HEROKU_APP_NAME}..." - echo "Setting PANGEO_FORGE_DEPLOYMENT=\$HEROKU_APP_NAME..." - export PANGEO_FORGE_DEPLOYMENT=$HEROKU_APP_NAME + echo "Setting PANGEO_FORGE_DEPLOYMENT=dev-app-proxy..." + export PANGEO_FORGE_DEPLOYMENT=dev-app-proxy fi echo "setting terraform env..." From 19b41223291a5fde53613c661646ef8b2f82fb82 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:26:22 -0800 Subject: [PATCH 12/66] copy workdir into image at heroku build time --- Dockerfile | 19 +++++++------------ Dockerfile.heroku | 9 +++++++++ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index c6db30af..ffb07c3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,25 +39,20 @@ RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages. && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | tee /usr/share/keyrings/cloud.google.gpg \ && apt-get update && apt-get -y install google-cloud-cli +# Install git, for fetching submodule contents below +RUN apt-get update && apt-get -y install git + +# Install pip requirements, a time-consuming step! COPY requirements.txt ./ RUN python3.9 -m pip install -r requirements.txt -COPY . /opt/app -WORKDIR /opt/app - # heroku can't fetch submodule contents from github: # https://devcenter.heroku.com/articles/github-integration#does-github-integration-work-with-git-submodules # so even though we have this in the repo (for development & testing convenience), we actually .dockerignore # it, and then clone it from github at build time (otherwise we don't actually get these contents on heroku) # After cloning, reset to a specific commit, so we don't end up with the wrong contents. -RUN apt-get update && apt-get -y install git -RUN git clone -b main --single-branch https://github.com/pangeo-forge/dataflow-status-monitoring \ - && cd dataflow-status-monitoring \ +RUN git clone -b main --single-branch \ + https://github.com/pangeo-forge/dataflow-status-monitoring /opt/app \ + && cd /opt/app/dataflow-status-monitoring \ && git reset --hard c72a594b2aea5db45d6295fadd801673bee9746f \ && cd - - -# the only deploy-time process which needs pangeo_forge_orchestrator installed is the review app's -# `postdeploy/seed_review_app_data.py`, but this shouldn't interfere with anything else. -RUN SETUPTOOLS_SCM_PRETEND_VERSION=0.0 pip install . --no-deps - -RUN chmod +x scripts.deploy/release.sh diff --git a/Dockerfile.heroku b/Dockerfile.heroku index 7fe4f2aa..045c18ce 100644 --- a/Dockerfile.heroku +++ b/Dockerfile.heroku @@ -1 +1,10 @@ FROM pangeo/forge-orchestrator:latest + +COPY . /opt/app +WORKDIR /opt/app + +# the only deploy-time process which needs pangeo_forge_orchestrator installed is the review app's +# `postdeploy/seed_review_app_data.py`, but this shouldn't interfere with anything else. +RUN SETUPTOOLS_SCM_PRETEND_VERSION=0.0 pip install . --no-deps + +RUN chmod +x scripts.deploy/release.sh From 454ba8a8ad211a70c5c93324bacca5c371d619c0 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:36:14 -0800 Subject: [PATCH 13/66] mv git clone into Dockerfile.heroku --- Dockerfile | 13 +------------ Dockerfile.heroku | 11 +++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index ffb07c3a..068acf54 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,20 +39,9 @@ RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages. && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | tee /usr/share/keyrings/cloud.google.gpg \ && apt-get update && apt-get -y install google-cloud-cli -# Install git, for fetching submodule contents below +# Install git, for fetching submodule contents in Dockerfile.heroku RUN apt-get update && apt-get -y install git # Install pip requirements, a time-consuming step! COPY requirements.txt ./ RUN python3.9 -m pip install -r requirements.txt - -# heroku can't fetch submodule contents from github: -# https://devcenter.heroku.com/articles/github-integration#does-github-integration-work-with-git-submodules -# so even though we have this in the repo (for development & testing convenience), we actually .dockerignore -# it, and then clone it from github at build time (otherwise we don't actually get these contents on heroku) -# After cloning, reset to a specific commit, so we don't end up with the wrong contents. -RUN git clone -b main --single-branch \ - https://github.com/pangeo-forge/dataflow-status-monitoring /opt/app \ - && cd /opt/app/dataflow-status-monitoring \ - && git reset --hard c72a594b2aea5db45d6295fadd801673bee9746f \ - && cd - diff --git a/Dockerfile.heroku b/Dockerfile.heroku index 045c18ce..0afc7680 100644 --- a/Dockerfile.heroku +++ b/Dockerfile.heroku @@ -3,6 +3,17 @@ FROM pangeo/forge-orchestrator:latest COPY . /opt/app WORKDIR /opt/app +# heroku can't fetch submodule contents from github: +# https://devcenter.heroku.com/articles/github-integration#does-github-integration-work-with-git-submodules +# so even though we have this in the repo (for development & testing convenience), we actually .dockerignore +# it, and then clone it from github at build time (otherwise we don't actually get these contents on heroku) +# After cloning, reset to a specific commit, so we don't end up with the wrong contents. +RUN apt-get update && apt-get -y install git +RUN git clone -b main --single-branch https://github.com/pangeo-forge/dataflow-status-monitoring \ + && cd dataflow-status-monitoring \ + && git reset --hard c72a594b2aea5db45d6295fadd801673bee9746f \ + && cd - + # the only deploy-time process which needs pangeo_forge_orchestrator installed is the review app's # `postdeploy/seed_review_app_data.py`, but this shouldn't interfere with anything else. RUN SETUPTOOLS_SCM_PRETEND_VERSION=0.0 pip install . --no-deps From 18bca7347dfba3af474ce2eb8649a1dbe9dcc1d6 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 16:28:10 -0800 Subject: [PATCH 14/66] addition seed data for postdeploy --- postdeploy/seed_review_app_data.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/postdeploy/seed_review_app_data.py b/postdeploy/seed_review_app_data.py index 56893892..c9836ea4 100644 --- a/postdeploy/seed_review_app_data.py +++ b/postdeploy/seed_review_app_data.py @@ -14,8 +14,18 @@ test_staged_recipes = MODELS["feedstock"].table.from_orm( MODELS["feedstock"].creation(spec="pforgetest/test-staged-recipes") ) +gpcp_from_gcs = MODELS["feedstock"].table.from_orm( + MODELS["feedstock"].creation(spec="pforgetest/gpcp-from-gcs-feedstock") +) +default_bakery = MODELS["bakery"].table.from_orm( + MODELS["bakery"].creation( + region="foo", + name="pangeo-ldeo-nsf-earthcube", + description="bar", + ) +) -to_commit = [test_staged_recipes] +to_commit = [test_staged_recipes, gpcp_from_gcs, default_bakery] with Session(engine) as db_session: for model in to_commit: db_session.add(model) From f2b8462cdba52842f1b99ce6cdc914637e68604e Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 16:30:05 -0800 Subject: [PATCH 15/66] specify dockerfile in docker-compose (for tests) --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 7c8a5bf2..3ca4ae49 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,6 +8,7 @@ services: # For platform spec, see https://stackoverflow.com/a/70238851 platform: linux/amd64 build: . + dockerfile: Dockerfile.heroku ports: - '3000:8000' depends_on: From 2912fae3cae9c2faa8313fdd6499dc3e28da9b0c Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 16:36:11 -0800 Subject: [PATCH 16/66] fix docker-compose build declaration --- docker-compose.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3ca4ae49..a9591264 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,8 +7,9 @@ services: web: # For platform spec, see https://stackoverflow.com/a/70238851 platform: linux/amd64 - build: . - dockerfile: Dockerfile.heroku + build: + context: . + dockerfile: Dockerfile.heroku ports: - '3000:8000' depends_on: From d45a669723c57df3a2f44eeec5478b757600eb8d Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 16:59:18 -0800 Subject: [PATCH 17/66] setup bakery config for dev-app-proxy --- ...e-pr-157.yaml => pangeo-ldeo-nsf-earthcube.dev-app-proxy.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bakeries/{pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml => pangeo-ldeo-nsf-earthcube.dev-app-proxy.yaml} (100%) diff --git a/bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml b/bakeries/pangeo-ldeo-nsf-earthcube.dev-app-proxy.yaml similarity index 100% rename from bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml rename to bakeries/pangeo-ldeo-nsf-earthcube.dev-app-proxy.yaml From 1fa20fcf8e6d14d420878f74d7a57f9fe80dc2c4 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 27 Jan 2023 17:52:46 -0800 Subject: [PATCH 18/66] branching logic for review app webhook url --- pangeo_forge_orchestrator/routers/github_app.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index ec2d5737..0fbc9c32 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -87,8 +87,15 @@ async def get_access_token(gh: GitHubAPI) -> str: async def get_app_webhook_url(gh: GitHubAPI) -> str: - response = await gh.getitem("/app/hook/config", jwt=get_jwt(), accept=ACCEPT) - return response["url"] + if heroku_app_name := os.environ.get("HEROKU_APP_NAME", None): + # This env var is only set on Heroku Review Apps, so if it's present, we know + # we need to generate the review app url here, because the GitHub App webhook + # url is a proxy url, and not the actual url for this review app instance. + return f"https://{heroku_app_name}.herokuapp.com/github/hooks/" + else: + # This is not a Review App, so we can query the GitHub App webhook url. + response = await gh.getitem("/app/hook/config", jwt=get_jwt(), accept=ACCEPT) + return response["url"] async def get_repo_id(repo_full_name: str, gh: GitHubAPI) -> str: From fb25e6ff78114e78555d1fae1f3c70f7714a3567 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 1 Feb 2023 16:48:35 -0800 Subject: [PATCH 19/66] test-dataflow-integration workflow first commit --- .../workflows/test-dataflow-integration.yaml | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 .github/workflows/test-dataflow-integration.yaml diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml new file mode 100644 index 00000000..dddf8b4e --- /dev/null +++ b/.github/workflows/test-dataflow-integration.yaml @@ -0,0 +1,62 @@ +name: Test Dataflow Integration + +on: + deployment_status: + # TODO: add on 'schedule' against staging deployment? + pull_request: + branches: ['main'] + types: [labeled] + +jobs: + test: + # run when: + # - a PR is labeled 'test-dataflow' + # (assuming it is also labeled 'build-review-app' + # *and* the deployment for the head sha is a success) + # - heroku marks a deployment with 'state' == 'success' + # (assuming PR also has 'test-dataflow' label) + runs-on: ubuntu-latest + steps: + # conditional step if triggering event is a pull_request + - name: Maybe get deployment state from pull request + if: | + github.event_name == 'pull_request' + && github.event.label.name == 'test-dataflow' + && contains( github.event.pull_request.labels.*.name, 'build-review-app') + # if we get here, this is a pull request, so we need to know the statuses url + # for the deployment associated with the head sha. we use the **base** repo + # deployments url, and look for deployments associated with pr's head sha. + # (the head repo deployments url would cause errors, if the pr is from a fork.) + run: | + export DEPLOYMENTS_URL=\ + ${{ github.event.pull_request.base.repo.deployments_url }}\ + \?environment\=pforge-pr-${{ github.event.pull_request.number }}\ + \&sha\=${{ github.event.pull_request.head.sha }} + curl -s $DEPLOYMENTS_URL \ + | python3 -c " + import sys, json; print(json.load(sys.stdin)[0]['statuses_url'])" \ + | xargs -I{} curl -s {} \ + | python3 -c " + import sys, json; print('DEPLOYMENT_STATE=' + json.load(sys.stdin)[-1]['state'])" \ + >> $GITHUB_ENV + + # conditional step if triggering event is deployment_status + - name: Maybe set DEPLOYMENT_STATE var from deployment_status event + if: github.event_name == 'deployment_status' + # TODO: set env.DEPLOYMENT_STATE in run block below + # Question: in the case of this trigger, will the check run created be associated with the + # PR? or will this be handled via the top level of GitHub Actions in the repo? If the latter, + # then we need to manually create a check run at the PR head sha, in this case. + run: echo '${{ toJSON(github) }}' # for now, just figure out what's in the github context here + + - name: Run test + if: | + env.DEPLOYMENT_STATE == 'success' + && contains( github.event.pull_request.labels.*.name, 'test-dataflow') + # TODO: in this run block: + # - programatically make a /run comment on an existing PR in pforgetest + # - check to ensure a dataflow job was submitted within a plausible timeframe + # - wait for the job to complete (5-6 mins) + # - check to make sure the job was successful + run: | + echo foo From f5196f906330352b9b1e58a00af32b1d23273380 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 15:03:22 -0800 Subject: [PATCH 20/66] integration test workflow WIP cont --- .../workflows/test-dataflow-integration.yaml | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index dddf8b4e..8ea6aae5 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: # conditional step if triggering event is a pull_request - - name: Maybe get deployment state from pull request + - name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from pull_request if: | github.event_name == 'pull_request' && github.event.label.name == 'test-dataflow' @@ -37,23 +37,42 @@ jobs: import sys, json; print(json.load(sys.stdin)[0]['statuses_url'])" \ | xargs -I{} curl -s {} \ | python3 -c " - import sys, json; print('DEPLOYMENT_STATE=' + json.load(sys.stdin)[-1]['state'])" \ + import sys, json; + d = json.load(sys.stdin)[-1]; + print('DEPLOYMENT_STATE=' + d['state'] + '\nREVIEW_APP_URL=' + d['environment_url']);" \ >> $GITHUB_ENV # conditional step if triggering event is deployment_status - - name: Maybe set DEPLOYMENT_STATE var from deployment_status event + - name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from deployment_status if: github.event_name == 'deployment_status' - # TODO: set env.DEPLOYMENT_STATE in run block below - # Question: in the case of this trigger, will the check run created be associated with the - # PR? or will this be handled via the top level of GitHub Actions in the repo? If the latter, - # then we need to manually create a check run at the PR head sha, in this case. - run: echo '${{ toJSON(github) }}' # for now, just figure out what's in the github context here + run: | + echo 'REVIEW_APP_URL=${{ github.event.deployment_status.environment_url }} + DEPLOYMENT_STATE=${{ github.event.deployment_status.state }}' \ + >> $GITHUB_ENV - - name: Run test + - name: Is app up? + # NOTE: Heroku updates deployment as 'success' when build succeedes, not when release succeedes. + # So there is actually still a latency between setting this status, and when the review app is + # ready to receive payloads. Hmm... if: | env.DEPLOYMENT_STATE == 'success' && contains( github.event.pull_request.labels.*.name, 'test-dataflow') # TODO: in this run block: + # - check to see if app has been released (or if it's just been built) + # - if only built, then sleep until it's been released + run: | + curl -s env.REVIEW_APP_URL \ + | python3 -c " + import sys; + IS_UP = True if sys.stdin.read() == '{\"status\":\"ok\"}' else False; print(f'{IS_UP=}')" \ + >> $GITHUB_ENV + + - name: Run test + if: | + env.DEPLOYMENT_STATE == 'success' + && contains( github.event.pull_request.labels.*.name, 'test-dataflow') + && env.IS_UP == 'True' + # TODO: # - programatically make a /run comment on an existing PR in pforgetest # - check to ensure a dataflow job was submitted within a plausible timeframe # - wait for the job to complete (5-6 mins) From f4eae11bd0ecd039ebdf7e7727e3014ecc1f0b71 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 15:24:00 -0800 Subject: [PATCH 21/66] try to fix IS_UP step if block --- .github/workflows/test-dataflow-integration.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 8ea6aae5..f00cf195 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -44,7 +44,9 @@ jobs: # conditional step if triggering event is deployment_status - name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from deployment_status - if: github.event_name == 'deployment_status' + if: | + github.event_name == 'deployment_status' + && contains( github.event.pull_request.labels.*.name, 'test-dataflow') run: | echo 'REVIEW_APP_URL=${{ github.event.deployment_status.environment_url }} DEPLOYMENT_STATE=${{ github.event.deployment_status.state }}' \ @@ -54,10 +56,7 @@ jobs: # NOTE: Heroku updates deployment as 'success' when build succeedes, not when release succeedes. # So there is actually still a latency between setting this status, and when the review app is # ready to receive payloads. Hmm... - if: | - env.DEPLOYMENT_STATE == 'success' - && contains( github.event.pull_request.labels.*.name, 'test-dataflow') - # TODO: in this run block: + if: ${{ env.DEPLOYMENT_STATE == 'success' }} # - check to see if app has been released (or if it's just been built) # - if only built, then sleep until it's been released run: | From 342568ac03e3ec3c6d4281aeaff56bf2f4b1fa8c Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 16:04:56 -0800 Subject: [PATCH 22/66] for deployment_status event, check if pr has label --- .../workflows/test-dataflow-integration.yaml | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index f00cf195..1f94e897 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -39,17 +39,33 @@ jobs: | python3 -c " import sys, json; d = json.load(sys.stdin)[-1]; - print('DEPLOYMENT_STATE=' + d['state'] + '\nREVIEW_APP_URL=' + d['environment_url']);" \ + print('TEST_DATAFLOW=True'); + print('DEPLOYMENT_STATE=' + d['state']); + print('REVIEW_APP_URL=' + d['environment_url']);" \ >> $GITHUB_ENV # conditional step if triggering event is deployment_status - name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from deployment_status if: | github.event_name == 'deployment_status' - && contains( github.event.pull_request.labels.*.name, 'test-dataflow') + # if we're here, we know this is a deployment_status event, but we don't know whether or not + # the PR has the 'test-dataflow' label. (it's possible the PR *only* has the 'build-review-app' + # label, but not the 'test-dataflow' label, in which case we do not want to deploy a dataflow job. + # so before we do anything else, we need to make sure this PR is labeled 'test-dataflow'. + # note that the github deployment "environments" for our review apps are named according to the + # convention "pforge-pr-${NUMBER}". so our most direct path to get the PR number from the deployment + # status event is to parse the PR number out of this string. run: | - echo 'REVIEW_APP_URL=${{ github.event.deployment_status.environment_url }} - DEPLOYMENT_STATE=${{ github.event.deployment_status.state }}' \ + export ENVIRONMENT=${{ github.event.deployment_status.environment }} \ + && python3 -c " + import os; print(os.environ['ENVIRONMENT'].split('-')[-1])" \ + | xargs -I{} curl -s ${{ github.event.deployment_status.repository_url }}/pulls/{} \ + | python3 -c " + import json, sys; + labels = json.load(sys.stdin)['labels']; + print('TEST_DATAFLOW=' + (True if any([l['name'] == 'test-dataflow' for l in labels]) else False)); + print('REVIEW_APP_URL=' + ${{ github.event.deployment_status.environment_url }}); + print('DEPLOYMENT_STATE=' + ${{ github.event.deployment_status.state }});" \ >> $GITHUB_ENV - name: Is app up? @@ -69,8 +85,8 @@ jobs: - name: Run test if: | env.DEPLOYMENT_STATE == 'success' - && contains( github.event.pull_request.labels.*.name, 'test-dataflow') && env.IS_UP == 'True' + && env.TEST_DATAFLOW == 'True' # TODO: # - programatically make a /run comment on an existing PR in pforgetest # - check to ensure a dataflow job was submitted within a plausible timeframe From f4ead602797398ea7da9434f1e2038617a2dce8e Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 16:11:14 -0800 Subject: [PATCH 23/66] quote context variables --- .github/workflows/test-dataflow-integration.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 1f94e897..899c9f9a 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -64,8 +64,8 @@ jobs: import json, sys; labels = json.load(sys.stdin)['labels']; print('TEST_DATAFLOW=' + (True if any([l['name'] == 'test-dataflow' for l in labels]) else False)); - print('REVIEW_APP_URL=' + ${{ github.event.deployment_status.environment_url }}); - print('DEPLOYMENT_STATE=' + ${{ github.event.deployment_status.state }});" \ + print('REVIEW_APP_URL=' + '${{ github.event.deployment_status.environment_url }}'); + print('DEPLOYMENT_STATE=' + '${{ github.event.deployment_status.state }}');" \ >> $GITHUB_ENV - name: Is app up? From 7ea643dd1b9985fbc42920ab63d1b0dd596cdf62 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 16:17:49 -0800 Subject: [PATCH 24/66] fix str to bool concat issue --- .github/workflows/test-dataflow-integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 899c9f9a..2e13cd0f 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -63,7 +63,7 @@ jobs: | python3 -c " import json, sys; labels = json.load(sys.stdin)['labels']; - print('TEST_DATAFLOW=' + (True if any([l['name'] == 'test-dataflow' for l in labels]) else False)); + print('TEST_DATAFLOW=' + str(True if any([l['name'] == 'test-dataflow' for l in labels]) else False)); print('REVIEW_APP_URL=' + '${{ github.event.deployment_status.environment_url }}'); print('DEPLOYMENT_STATE=' + '${{ github.event.deployment_status.state }}');" \ >> $GITHUB_ENV From 591c49c6a1afd02b49fa47d6877a91663d33528f Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 17:09:29 -0800 Subject: [PATCH 25/66] poll review app until it's ready --- .../workflows/test-dataflow-integration.yaml | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 2e13cd0f..8ddb1235 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -69,17 +69,30 @@ jobs: >> $GITHUB_ENV - name: Is app up? - # NOTE: Heroku updates deployment as 'success' when build succeedes, not when release succeedes. - # So there is actually still a latency between setting this status, and when the review app is - # ready to receive payloads. Hmm... if: ${{ env.DEPLOYMENT_STATE == 'success' }} - # - check to see if app has been released (or if it's just been built) - # - if only built, then sleep until it's been released + # Heroku updates deployment as 'success' when build succeedes, not when *release* succeedes. + # So there is actually still a latency between when this status is set, and when the review app + # is ready to receive requests. In general, the review apps take about 3 minutes to release. + # So here we wait 2 minutes, then start checking if the app is up, repeating every 30 seconds + # until it's either up, or if > 10 mins have elapsed, something's gone wrong, so we bail out. run: | - curl -s env.REVIEW_APP_URL \ - | python3 -c " - import sys; - IS_UP = True if sys.stdin.read() == '{\"status\":\"ok\"}' else False; print(f'{IS_UP=}')" \ + python3 -c " + import sys, time; + from urllib.request import urlopen; + start = time.time(); + time.sleep(60 * 2); + while True: + elapsed = time.time() - start; + if elapsed > 60 * 10: + # releases shouldn't take > 10 mins; something's gone wrong, so exit. + sys.exit(1) + contents = urlopen(${{ env.REVIEW_APP_URL }}).read().decode() + if contents == '{"status":"ok"}': + # if we get this response from the review app, it's up and ready to go. + print('IS_UP=True') + break + else: + time.sleep(30)" \ >> $GITHUB_ENV - name: Run test From 8473e62e2b11899db1ee7e5e1077265c6f7b5e49 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 17:13:56 -0800 Subject: [PATCH 26/66] quote context variable (again) --- .github/workflows/test-dataflow-integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 8ddb1235..846790f9 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -86,7 +86,7 @@ jobs: if elapsed > 60 * 10: # releases shouldn't take > 10 mins; something's gone wrong, so exit. sys.exit(1) - contents = urlopen(${{ env.REVIEW_APP_URL }}).read().decode() + contents = urlopen('${{ env.REVIEW_APP_URL }}').read().decode() if contents == '{"status":"ok"}': # if we get this response from the review app, it's up and ready to go. print('IS_UP=True') From 028dd543a919034bd0ea19a08d9a83189affd50a Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 2 Feb 2023 17:30:29 -0800 Subject: [PATCH 27/66] escape json quotes --- .github/workflows/test-dataflow-integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 846790f9..ea6e33bb 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -87,7 +87,7 @@ jobs: # releases shouldn't take > 10 mins; something's gone wrong, so exit. sys.exit(1) contents = urlopen('${{ env.REVIEW_APP_URL }}').read().decode() - if contents == '{"status":"ok"}': + if contents == '{\"status\":\"ok\"}': # if we get this response from the review app, it's up and ready to go. print('IS_UP=True') break From d03b93339705462db40b2ca11a15f10a1ca92d62 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 3 Feb 2023 14:21:01 -0800 Subject: [PATCH 28/66] actual pytest test WIP first commit --- .../workflows/test-dataflow-integration.yaml | 10 +++- tests.integration/test_dataflow.py | 49 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tests.integration/test_dataflow.py diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index ea6e33bb..e66d9a25 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -87,7 +87,7 @@ jobs: # releases shouldn't take > 10 mins; something's gone wrong, so exit. sys.exit(1) contents = urlopen('${{ env.REVIEW_APP_URL }}').read().decode() - if contents == '{\"status\":\"ok\"}': + if contents == '{\"status\":\"ok"}': # if we get this response from the review app, it's up and ready to go. print('IS_UP=True') break @@ -105,5 +105,13 @@ jobs: # - check to ensure a dataflow job was submitted within a plausible timeframe # - wait for the job to complete (5-6 mins) # - check to make sure the job was successful + + # we need to checkout pangeo-forge-orchestrator: + # - install it (to get `get_jwt` function, etc) + # - get + decrypt dev-app-proxy creds + # - call github API as described in https://github.com/pangeo-forge/pangeo-forge-orchestrator/tree/main/docs#github-app-manual-api-calls + # during the test (decrypt them here with sops -d -i ... && pytest -vx ...) + # - get the tests dir + uses: actions/checkout@v3 run: | echo foo diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py new file mode 100644 index 00000000..e4d26609 --- /dev/null +++ b/tests.integration/test_dataflow.py @@ -0,0 +1,49 @@ +import os + +import pytest + + +@pytest.fixture +def app_url(): + """Url on the public internet at which the app to test against is currently running.""" + return os.environ["REVIEW_APP_URL"] + + +@pytest.fixture(params=[]) +def content_files(request): + """The content files to add. Parametrization of the ``test_dataflow`` test happens here.""" + ... + yield ... + + +@pytest.fixture +def staged_recipes_pr(app_url, content_files): + """Makes a PR to ``pforgetest/test-staged-recipes`` and labels it ``f"fwd:{app_url}{route}"``, + where ``{route}`` is optionally the path at which the app running at ``app_url`` receives + GitHub Webhooks. The label ``f"fwd:{app_url}{route}"`` informs the ``dev-app-proxy`` GitHub App + where to forward webhooks originating from the PR. After the PR is created, its identifying + information is yielded to the test function using this fixture. When control is returned to this + fixture, the PR and its associated branch are closed & cleaned-up. + """ + # create a new branch on pforgetest/test-staged-recipes with a descriptive name. + # (in the typical contribution process, recipes are contributed from forks. the deviation from + # that process here may introduce some sublte differences with production. for now, we are + # accepting that as the cost for doing this more simply; i.e., all within a single repo.) + ... + + # populate that branch + ... + + # open a pr against pforgetest/test-staged-recipes:main + pr = ... + + yield pr + + # close pr and cleanup branch + ... + + +def test_dataflow(staged_recipes_pr): + + ... + # From 951b2866fabfade6483eef8ce42ced21d26f2f81 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Mon, 6 Feb 2023 16:00:41 -0800 Subject: [PATCH 29/66] test dataflow integration WIP cont --- tests.integration/test_dataflow.py | 114 ++++++++++++++++++++++++++--- 1 file changed, 103 insertions(+), 11 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index e4d26609..183fdf41 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -1,23 +1,71 @@ import os +from pathlib import Path import pytest +import pytest_asyncio +import yaml # type: ignore +from gidgethub.aiohttp import GitHubAPI + +from pangeo_forge_orchestrator.http import HttpSession +from pangeo_forge_orchestrator.routers.github_app import get_access_token + + +@pytest_asyncio.fixture(scope="session") +async def gh() -> GitHubAPI: + """A global gidgethub session to use throughout the integration tests.""" + + http_session = HttpSession() + yield GitHubAPI(http_session) + await http_session.stop() + + +@pytest_asyncio.fixture +async def gh_kws(gh: GitHubAPI) -> dict: + # this entire test relies on the assumption that it is being called from the root of the + # pangeo-forge-orchestrator repo, and therefore has access to the `dev-app-proxy` github app + # creds. if these creds are not decrypted before this test is run, strange non-self-describing + # errors may occur, so before we run the test, let's just make sure the creds are decrypted. + # NOTE: the path to these credentials will need to change after traitlets refactor goes in. + with open(Path(__file__).parent.parent / "secrets/config.dev-app-proxy.yaml") as f: + if "sops" in yaml.safe_load(f): + raise ValueError( + "GitHub App `dev-app-proxy` credentials are encrypted. " + "Decrypt these credentials before running this test." + ) + # okay, the credentials are decrypted, so let's move along with the test. + token = await get_access_token(gh) + return dict(oauth_token=token, accept="application/vnd.github+json") @pytest.fixture -def app_url(): +def app_url() -> str: """Url on the public internet at which the app to test against is currently running.""" return os.environ["REVIEW_APP_URL"] -@pytest.fixture(params=[]) -def content_files(request): - """The content files to add. Parametrization of the ``test_dataflow`` test happens here.""" - ... - yield ... +@pytest.fixture +def gh_workflow_run_id() -> str: + """Identified the GitHub Workflow run which called this test.""" + return os.environ["GH_WORKFLOW_RUN_ID"] @pytest.fixture -def staged_recipes_pr(app_url, content_files): +def source_pr() -> dict[str, str]: + """A PR to replicate for this test.""" + return dict( + repo_full_name=os.environ["SOURCE_REPO_FULL_NAME"], + pr_number=os.environ["SOURCE_REPO_PR_NUMBER"], + ) + + +@pytest_asyncio.fixture(scope="session") +async def staged_recipes_pr( + gh: GitHubAPI, + gh_kws: dict, + gh_workflow_run_id: str, + source_pr: dict[str, str], + app_url: str, +): """Makes a PR to ``pforgetest/test-staged-recipes`` and labels it ``f"fwd:{app_url}{route}"``, where ``{route}`` is optionally the path at which the app running at ``app_url`` receives GitHub Webhooks. The label ``f"fwd:{app_url}{route}"`` informs the ``dev-app-proxy`` GitHub App @@ -29,13 +77,57 @@ def staged_recipes_pr(app_url, content_files): # (in the typical contribution process, recipes are contributed from forks. the deviation from # that process here may introduce some sublte differences with production. for now, we are # accepting that as the cost for doing this more simply; i.e., all within a single repo.) - ... - # populate that branch - ... + if "staged-recipes" in source_pr["repo_full_name"]: + base = "pforgetest/test-staged-recipes" + elif source_pr["repo_full_name"].endswith("-feedstock"): + # TODO: add a repo in `pforgetest` which can accept prs from any feedstock. + # this would essentially be just a blank repo containing an empty `feedstock/` directory. + raise NotImplementedError + + main = await gh.getitem(f"/repos/{base}/branches/main", **gh_kws) + working_branch = await gh.post( # noqa: F841 + f"/repos/{base}/git/refs", + data=dict( + ref=f"refs/heads/{gh_workflow_run_id}", + sha=main["commit"]["sha"], + ), + **gh_kws, + ) + + # populate that branch with content files from the source pr + src_files = await gh.getitem( + f"{source_pr['repo_full_name']}/pulls/{source_pr['pr_number']}/files", + **gh_kws, + ) + for f in src_files: + content = await gh.getitem(f["contents_url"], **gh_kws) + await gh.put( + f"/repos/{base}/contents/{f['filename']}", + data=dict( + message=f"Adding {f['filename']}", + content=content["content"], + branch=gh_workflow_run_id, + ), + **gh_kws, + ) # open a pr against pforgetest/test-staged-recipes:main - pr = ... + pr = await gh.post( + f"/repos/{base}/pulls", + data=dict( + title=f"[CI] Automated PR for workflow run {gh_workflow_run_id}", + head=gh_workflow_run_id, + body=( + ":robot: Created by https://github.com/pangeo-forge/pangeo-forge-orchestrator" + f"/actions/runs/{gh_workflow_run_id}", + ), + base="main", + ), + **gh_kws, + ) + + # label the pr so the dev-app-proxy knows where to forward webhooks originating from this pr yield pr From 37e182ddc57c427b6d0a8c359612a848eea47104 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Feb 2023 00:01:18 +0000 Subject: [PATCH 30/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests.integration/test_dataflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 183fdf41..49290416 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -136,6 +136,5 @@ async def staged_recipes_pr( def test_dataflow(staged_recipes_pr): - ... # From 519bf8f5e2c39cbf315121612ed341acedbb4094 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Mon, 6 Feb 2023 16:46:41 -0800 Subject: [PATCH 31/66] pr label fixturing first pass --- tests.integration/test_dataflow.py | 58 +++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 49290416..86028f0a 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -1,4 +1,5 @@ import os +import random from pathlib import Path import pytest @@ -58,13 +59,53 @@ def source_pr() -> dict[str, str]: ) +@pytest.fixture +def base(source_pr): + if "staged-recipes" in source_pr["repo_full_name"]: + return "pforgetest/test-staged-recipes" + elif source_pr["repo_full_name"].endswith("-feedstock"): + # TODO: add a repo in `pforgetest` which can accept prs from any feedstock. + # this would essentially be just a blank repo containing an empty `feedstock/` directory. + raise NotImplementedError + + +@pytest_asyncio.fixture +async def pr_label(gh: GitHubAPI, gh_kws: dict, base: str, app_url: str): + label_name_fmt = "fwd:{app_url}" + if "smee" not in app_url: + # smee proxy urls do not take the route path; heroku review apps do. + label_name_fmt += "/github/hooks/" + + exists = False + async for label in gh.getiter(f"repos/{base}/labels", **gh_kws): + if label["name"] == label_name_fmt.format(app_url=app_url): + exists = True + break + if not exists: + label = gh.post( + f"/repos/{base}/labels", + data=dict( + name=f"fwd:{app_url}/github/hooks/", + color=f"{random.randint(0, 0xFFFFFF):06x}", + description="Tells dev-app-proxy GitHub App to forward webhooks to specified url.", + ), + **gh_kws, + ) + yield label["name"] + # TODO: delete label after every test? it could certainly be reused multiple times if not. + # if we do delete the label here, then the check to see if it exists would only hit if the label + # had been manually created outside a test session, or if the test runner happened to to have + # errored out on the prior test attempt (before the label had been deleted). + + @pytest_asyncio.fixture(scope="session") async def staged_recipes_pr( gh: GitHubAPI, gh_kws: dict, gh_workflow_run_id: str, source_pr: dict[str, str], - app_url: str, + base: str, + pr_label: str, ): """Makes a PR to ``pforgetest/test-staged-recipes`` and labels it ``f"fwd:{app_url}{route}"``, where ``{route}`` is optionally the path at which the app running at ``app_url`` receives @@ -73,18 +114,10 @@ async def staged_recipes_pr( information is yielded to the test function using this fixture. When control is returned to this fixture, the PR and its associated branch are closed & cleaned-up. """ - # create a new branch on pforgetest/test-staged-recipes with a descriptive name. - # (in the typical contribution process, recipes are contributed from forks. the deviation from - # that process here may introduce some sublte differences with production. for now, we are + # create a new branch on the test repo with a descriptive name. + # (in the typical contribution process, contributions may likely be from forks. the deviation + # from that process here may introduce some sublte differences with production. for now, we are # accepting that as the cost for doing this more simply; i.e., all within a single repo.) - - if "staged-recipes" in source_pr["repo_full_name"]: - base = "pforgetest/test-staged-recipes" - elif source_pr["repo_full_name"].endswith("-feedstock"): - # TODO: add a repo in `pforgetest` which can accept prs from any feedstock. - # this would essentially be just a blank repo containing an empty `feedstock/` directory. - raise NotImplementedError - main = await gh.getitem(f"/repos/{base}/branches/main", **gh_kws) working_branch = await gh.post( # noqa: F841 f"/repos/{base}/git/refs", @@ -128,6 +161,7 @@ async def staged_recipes_pr( ) # label the pr so the dev-app-proxy knows where to forward webhooks originating from this pr + gh.put(f"/repos/{base}/issues/{pr['number']}/labels", data=dict(labels=[pr_label]), **gh_kws) yield pr From f81e94e4eb7f1d6cebcea0828ac3199135ed03d1 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 7 Feb 2023 14:38:48 -0800 Subject: [PATCH 32/66] fixtures successfully make real pr --- tests.integration/test_dataflow.py | 59 ++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 86028f0a..865c1c61 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -1,31 +1,50 @@ import os import random +import subprocess from pathlib import Path +import aiohttp import pytest import pytest_asyncio import yaml # type: ignore from gidgethub.aiohttp import GitHubAPI -from pangeo_forge_orchestrator.http import HttpSession from pangeo_forge_orchestrator.routers.github_app import get_access_token +PANGEO_FORGE_DEPLOYMENT = "dev-app-proxy" +os.environ["PANGEO_FORGE_DEPLOYMENT"] = PANGEO_FORGE_DEPLOYMENT -@pytest_asyncio.fixture(scope="session") + +@pytest.fixture(scope="session", autouse=True) +def decrypt_encrypt_secrets(): + repo_root = Path(__file__).parent.parent + + # FIXME: these paths will change (and i believe the bakery_secrets path will + # no longer be necessary) following completion of the traitlets refactor + dev_app_proxy_secrets = repo_root / "secrets" / "config.dev-app-proxy.yaml" + bakery_secrets = repo_root / "secrets" / "bakery-args.pangeo-ldeo-nsf-earthcube.yaml" + all_secrets = [dev_app_proxy_secrets, bakery_secrets] + + for path in all_secrets: + # decrypt secrets on test setup + subprocess.run(f"sops -d -i {path}".split()) + + yield + + # re-encrypt (restore) secrets on test teardown + subprocess.run("git restore secrets".split()) + + +@pytest_asyncio.fixture async def gh() -> GitHubAPI: """A global gidgethub session to use throughout the integration tests.""" - http_session = HttpSession() - yield GitHubAPI(http_session) - await http_session.stop() + async with aiohttp.ClientSession() as session: + yield GitHubAPI(session, PANGEO_FORGE_DEPLOYMENT) @pytest_asyncio.fixture async def gh_kws(gh: GitHubAPI) -> dict: - # this entire test relies on the assumption that it is being called from the root of the - # pangeo-forge-orchestrator repo, and therefore has access to the `dev-app-proxy` github app - # creds. if these creds are not decrypted before this test is run, strange non-self-describing - # errors may occur, so before we run the test, let's just make sure the creds are decrypted. # NOTE: the path to these credentials will need to change after traitlets refactor goes in. with open(Path(__file__).parent.parent / "secrets/config.dev-app-proxy.yaml") as f: if "sops" in yaml.safe_load(f): @@ -33,7 +52,6 @@ async def gh_kws(gh: GitHubAPI) -> dict: "GitHub App `dev-app-proxy` credentials are encrypted. " "Decrypt these credentials before running this test." ) - # okay, the credentials are decrypted, so let's move along with the test. token = await get_access_token(gh) return dict(oauth_token=token, accept="application/vnd.github+json") @@ -82,7 +100,7 @@ async def pr_label(gh: GitHubAPI, gh_kws: dict, base: str, app_url: str): exists = True break if not exists: - label = gh.post( + label = await gh.post( f"/repos/{base}/labels", data=dict( name=f"fwd:{app_url}/github/hooks/", @@ -98,7 +116,7 @@ async def pr_label(gh: GitHubAPI, gh_kws: dict, base: str, app_url: str): # errored out on the prior test attempt (before the label had been deleted). -@pytest_asyncio.fixture(scope="session") +@pytest_asyncio.fixture async def staged_recipes_pr( gh: GitHubAPI, gh_kws: dict, @@ -122,7 +140,7 @@ async def staged_recipes_pr( working_branch = await gh.post( # noqa: F841 f"/repos/{base}/git/refs", data=dict( - ref=f"refs/heads/{gh_workflow_run_id}", + ref=f"refs/heads/actions/runs/{gh_workflow_run_id}", sha=main["commit"]["sha"], ), **gh_kws, @@ -130,7 +148,7 @@ async def staged_recipes_pr( # populate that branch with content files from the source pr src_files = await gh.getitem( - f"{source_pr['repo_full_name']}/pulls/{source_pr['pr_number']}/files", + f"repos/{source_pr['repo_full_name']}/pulls/{source_pr['pr_number']}/files", **gh_kws, ) for f in src_files: @@ -140,7 +158,7 @@ async def staged_recipes_pr( data=dict( message=f"Adding {f['filename']}", content=content["content"], - branch=gh_workflow_run_id, + branch=f"actions/runs/{gh_workflow_run_id}", ), **gh_kws, ) @@ -150,10 +168,11 @@ async def staged_recipes_pr( f"/repos/{base}/pulls", data=dict( title=f"[CI] Automated PR for workflow run {gh_workflow_run_id}", - head=gh_workflow_run_id, + head=f"actions/runs/{gh_workflow_run_id}", body=( - ":robot: Created by https://github.com/pangeo-forge/pangeo-forge-orchestrator" - f"/actions/runs/{gh_workflow_run_id}", + ":robot: Created for test run https://github.com/pangeo-forge/" + f"pangeo-forge-orchestrator/actions/runs/{gh_workflow_run_id}\n" + f":memo: Which is testing {pr_label.replace('fwd:', 'https://')}" ), base="main", ), @@ -161,7 +180,9 @@ async def staged_recipes_pr( ) # label the pr so the dev-app-proxy knows where to forward webhooks originating from this pr - gh.put(f"/repos/{base}/issues/{pr['number']}/labels", data=dict(labels=[pr_label]), **gh_kws) + await gh.put( + f"/repos/{base}/issues/{pr['number']}/labels", data=dict(labels=[pr_label]), **gh_kws + ) yield pr From e23c827421ec0a8dc21e8efc5a33039de112df4c Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 8 Feb 2023 13:28:01 -0800 Subject: [PATCH 33/66] make gh_token SecretStr to prevent leaking in logs --- tests.integration/test_dataflow.py | 50 ++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 865c1c61..0d112de6 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -6,8 +6,8 @@ import aiohttp import pytest import pytest_asyncio -import yaml # type: ignore from gidgethub.aiohttp import GitHubAPI +from pydantic import SecretStr from pangeo_forge_orchestrator.routers.github_app import get_access_token @@ -44,16 +44,16 @@ async def gh() -> GitHubAPI: @pytest_asyncio.fixture -async def gh_kws(gh: GitHubAPI) -> dict: - # NOTE: the path to these credentials will need to change after traitlets refactor goes in. - with open(Path(__file__).parent.parent / "secrets/config.dev-app-proxy.yaml") as f: - if "sops" in yaml.safe_load(f): - raise ValueError( - "GitHub App `dev-app-proxy` credentials are encrypted. " - "Decrypt these credentials before running this test." - ) +async def gh_token(gh: GitHubAPI) -> SecretStr: token = await get_access_token(gh) - return dict(oauth_token=token, accept="application/vnd.github+json") + # wrap in SecretStr to avoid leaking in failed test logs, + # see https://github.com/pytest-dev/pytest/issues/8613 + return SecretStr(token) + + +@pytest.fixture +def gh_kws() -> dict: + return {"accept": "application/vnd.github+json"} @pytest.fixture @@ -64,7 +64,7 @@ def app_url() -> str: @pytest.fixture def gh_workflow_run_id() -> str: - """Identified the GitHub Workflow run which called this test.""" + """Identifies the GitHub Workflow run which called this test.""" return os.environ["GH_WORKFLOW_RUN_ID"] @@ -88,7 +88,7 @@ def base(source_pr): @pytest_asyncio.fixture -async def pr_label(gh: GitHubAPI, gh_kws: dict, base: str, app_url: str): +async def pr_label(gh: GitHubAPI, gh_token: SecretStr, gh_kws: dict, base: str, app_url: str): label_name_fmt = "fwd:{app_url}" if "smee" not in app_url: # smee proxy urls do not take the route path; heroku review apps do. @@ -107,6 +107,7 @@ async def pr_label(gh: GitHubAPI, gh_kws: dict, base: str, app_url: str): color=f"{random.randint(0, 0xFFFFFF):06x}", description="Tells dev-app-proxy GitHub App to forward webhooks to specified url.", ), + oauth_token=gh_token.get_secret_value(), **gh_kws, ) yield label["name"] @@ -119,6 +120,7 @@ async def pr_label(gh: GitHubAPI, gh_kws: dict, base: str, app_url: str): @pytest_asyncio.fixture async def staged_recipes_pr( gh: GitHubAPI, + gh_token: SecretStr, gh_kws: dict, gh_workflow_run_id: str, source_pr: dict[str, str], @@ -137,12 +139,13 @@ async def staged_recipes_pr( # from that process here may introduce some sublte differences with production. for now, we are # accepting that as the cost for doing this more simply; i.e., all within a single repo.) main = await gh.getitem(f"/repos/{base}/branches/main", **gh_kws) - working_branch = await gh.post( # noqa: F841 + working_branch = await gh.post( f"/repos/{base}/git/refs", data=dict( ref=f"refs/heads/actions/runs/{gh_workflow_run_id}", sha=main["commit"]["sha"], ), + oauth_token=gh_token.get_secret_value(), **gh_kws, ) @@ -160,6 +163,7 @@ async def staged_recipes_pr( content=content["content"], branch=f"actions/runs/{gh_workflow_run_id}", ), + oauth_token=gh_token.get_secret_value(), **gh_kws, ) @@ -176,18 +180,32 @@ async def staged_recipes_pr( ), base="main", ), + oauth_token=gh_token.get_secret_value(), **gh_kws, ) # label the pr so the dev-app-proxy knows where to forward webhooks originating from this pr await gh.put( - f"/repos/{base}/issues/{pr['number']}/labels", data=dict(labels=[pr_label]), **gh_kws + f"/repos/{base}/issues/{pr['number']}/labels", + data=dict(labels=[pr_label]), + oauth_token=gh_token.get_secret_value(), + **gh_kws, ) yield pr - # close pr and cleanup branch - ... + # close pr and delete branch + await gh.patch( + pr["url"], + data=dict(state="closed"), + # oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) + await gh.delete( + working_branch["url"], + # oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) def test_dataflow(staged_recipes_pr): From f390fffde63dd6cfbb7b91c3e1b89a72bae1f11e Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 8 Feb 2023 14:26:43 -0800 Subject: [PATCH 34/66] add authentication for test cleanup --- tests.integration/test_dataflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 0d112de6..05309117 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -198,12 +198,12 @@ async def staged_recipes_pr( await gh.patch( pr["url"], data=dict(state="closed"), - # oauth_token=gh_token.get_secret_value(), + oauth_token=gh_token.get_secret_value(), **gh_kws, ) await gh.delete( working_branch["url"], - # oauth_token=gh_token.get_secret_value(), + oauth_token=gh_token.get_secret_value(), **gh_kws, ) From 163915314e55c7bc6cc98e506b6b07e7c373c4f3 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 8 Feb 2023 15:44:18 -0800 Subject: [PATCH 35/66] label pr in between adding files --- tests.integration/test_dataflow.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 05309117..8a8a34c1 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -1,6 +1,7 @@ import os import random import subprocess +import time from pathlib import Path import aiohttp @@ -154,7 +155,8 @@ async def staged_recipes_pr( f"repos/{source_pr['repo_full_name']}/pulls/{source_pr['pr_number']}/files", **gh_kws, ) - for f in src_files: + + async def add_file(f): content = await gh.getitem(f["contents_url"], **gh_kws) await gh.put( f"/repos/{base}/contents/{f['filename']}", @@ -167,6 +169,10 @@ async def staged_recipes_pr( **gh_kws, ) + # add first source file to working branch. see commend above where `add_file` is + # called a second time, below, for why both files are not added at the same time. + await add_file(src_files[0]) + # open a pr against pforgetest/test-staged-recipes:main pr = await gh.post( f"/repos/{base}/pulls", @@ -192,6 +198,11 @@ async def staged_recipes_pr( **gh_kws, ) + # add the second source file (after labeling, so that the `synchronize` task will be forwarded) + # for explanation of why files are added one at a time (rather than at the same time) see: + # https://github.com/pangeo-forge/pangeo-forge-orchestrator/pull/226#issuecomment-1423337307 + await add_file(src_files[1]) + yield pr # close pr and delete branch @@ -209,5 +220,5 @@ async def staged_recipes_pr( def test_dataflow(staged_recipes_pr): - ... + time.sleep(10) # From f0b43075a7cd2cbefb7b5ee15fb41192701bd413 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 8 Feb 2023 16:06:44 -0800 Subject: [PATCH 36/66] make /run comment on test pr --- tests.integration/test_dataflow.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 8a8a34c1..1e999f5b 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -219,6 +219,19 @@ async def add_file(f): ) -def test_dataflow(staged_recipes_pr): +@pytest.mark.asyncio +async def test_dataflow( + gh: GitHubAPI, + gh_token: SecretStr, + gh_kws: dict, + base: str, + staged_recipes_pr: dict, +): time.sleep(10) - # + + await gh.post( + f"/repos/{base}/issues/{staged_recipes_pr['number']}/comments", + data=dict(body="/run gpcp-from-gcs"), + oauth_token=gh_token.get_secret_value(), + **gh_kws, + ) From 75a6d022eeadb14e72ba6a5f7e052a916f0fc82d Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 8 Feb 2023 16:07:19 -0800 Subject: [PATCH 37/66] in job submission calledprocesserror, recipe_run.message might be none --- pangeo_forge_orchestrator/routers/github_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index 0fbc9c32..ae68f290 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -740,7 +740,7 @@ async def run( # Add the traceback for this deployment failure to the recipe run, otherwise it could # easily get buried in the server logs. TODO: Consider: is there anything of security # significance in the call stack captured in the trace? - message = json.loads(recipe_run.message) + message = json.loads(recipe_run.message or "{}") recipe_run.message = json.dumps(message | {"trace": trace}) db_session.add(recipe_run) db_session.commit() From 9cbd07fc6c66778fff07e6e629bea83214873834 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 9 Feb 2023 11:13:48 -0800 Subject: [PATCH 38/66] sleep for 60s after triggering dataflow job --- tests.integration/test_dataflow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 1e999f5b..e2257dd9 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -235,3 +235,5 @@ async def test_dataflow( oauth_token=gh_token.get_secret_value(), **gh_kws, ) + + time.sleep(60) From f83fd1f40b243494133879f0830d7b2dcac8686f Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 9 Feb 2023 11:28:09 -0800 Subject: [PATCH 39/66] run test from github workflow --- .../workflows/test-dataflow-integration.yaml | 21 +++++++++-------- tests.integration/test_dataflow.py | 23 ++++++++++--------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index e66d9a25..f343f20b 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -95,23 +95,24 @@ jobs: time.sleep(30)" \ >> $GITHUB_ENV + - name: Checkout the repo + uses: actions/checkout@v3 + - name: Run test if: | env.DEPLOYMENT_STATE == 'success' && env.IS_UP == 'True' && env.TEST_DATAFLOW == 'True' - # TODO: + # So far here, we: # - programatically make a /run comment on an existing PR in pforgetest # - check to ensure a dataflow job was submitted within a plausible timeframe + # Remaining TODO: + # - parametrize SOURCE_REPO_FULL_NAME and SOURCE_REPO_PR_NUMBER # - wait for the job to complete (5-6 mins) # - check to make sure the job was successful - - # we need to checkout pangeo-forge-orchestrator: - # - install it (to get `get_jwt` function, etc) - # - get + decrypt dev-app-proxy creds - # - call github API as described in https://github.com/pangeo-forge/pangeo-forge-orchestrator/tree/main/docs#github-app-manual-api-calls - # during the test (decrypt them here with sops -d -i ... && pytest -vx ...) - # - get the tests dir - uses: actions/checkout@v3 run: | - echo foo + GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ + SOURCE_REPO_FULL_NAME=pforgetest/test-staged-recipes \ + SOURCE_REPO_PR_NUMBER=22 \ + REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \ + pytest -vx tests.integration/test_dataflow.py diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index e2257dd9..e1e2da62 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -3,6 +3,7 @@ import subprocess import time from pathlib import Path +from urllib.parse import urlparse import aiohttp import pytest @@ -58,9 +59,9 @@ def gh_kws() -> dict: @pytest.fixture -def app_url() -> str: +def app_netloc() -> str: """Url on the public internet at which the app to test against is currently running.""" - return os.environ["REVIEW_APP_URL"] + return urlparse(os.environ["REVIEW_APP_URL"]).netloc @pytest.fixture @@ -89,22 +90,22 @@ def base(source_pr): @pytest_asyncio.fixture -async def pr_label(gh: GitHubAPI, gh_token: SecretStr, gh_kws: dict, base: str, app_url: str): - label_name_fmt = "fwd:{app_url}" - if "smee" not in app_url: +async def pr_label(gh: GitHubAPI, gh_token: SecretStr, gh_kws: dict, base: str, app_netloc: str): + label_name_fmt = "fwd:{app_netloc}" + if "smee" not in app_netloc: # smee proxy urls do not take the route path; heroku review apps do. label_name_fmt += "/github/hooks/" exists = False async for label in gh.getiter(f"repos/{base}/labels", **gh_kws): - if label["name"] == label_name_fmt.format(app_url=app_url): + if label["name"] == label_name_fmt.format(app_netloc=app_netloc): exists = True break if not exists: label = await gh.post( f"/repos/{base}/labels", data=dict( - name=f"fwd:{app_url}/github/hooks/", + name=f"fwd:{app_netloc}/github/hooks/", color=f"{random.randint(0, 0xFFFFFF):06x}", description="Tells dev-app-proxy GitHub App to forward webhooks to specified url.", ), @@ -128,10 +129,10 @@ async def staged_recipes_pr( base: str, pr_label: str, ): - """Makes a PR to ``pforgetest/test-staged-recipes`` and labels it ``f"fwd:{app_url}{route}"``, - where ``{route}`` is optionally the path at which the app running at ``app_url`` receives - GitHub Webhooks. The label ``f"fwd:{app_url}{route}"`` informs the ``dev-app-proxy`` GitHub App - where to forward webhooks originating from the PR. After the PR is created, its identifying + """Makes a PR to ``pforgetest/test-staged-recipes`` with labels ``f"fwd:{app_netloc}{route}"``, + where ``{route}`` is optionally the path at which the app running at ``app_netloc`` receives + GitHub Webhooks. The label ``f"fwd:{app_netloc}{route}"`` informs the ``dev-app-proxy`` GitHub + App where to forward webhooks originating from the PR. After the PR is created, its identifying information is yielded to the test function using this fixture. When control is returned to this fixture, the PR and its associated branch are closed & cleaned-up. """ From 349c8a6cbffd79e33d98f7e091d11b88f4a324d5 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 9 Feb 2023 11:45:22 -0800 Subject: [PATCH 40/66] fix quote escaping --- .github/workflows/test-dataflow-integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index f343f20b..2197b974 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -87,7 +87,7 @@ jobs: # releases shouldn't take > 10 mins; something's gone wrong, so exit. sys.exit(1) contents = urlopen('${{ env.REVIEW_APP_URL }}').read().decode() - if contents == '{\"status\":\"ok"}': + if contents == '{\"status\":\"ok\"}': # if we get this response from the review app, it's up and ready to go. print('IS_UP=True') break From 7c46072088126b2391a49e11051b8297040d8616 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 9 Feb 2023 11:59:49 -0800 Subject: [PATCH 41/66] install dependencies in test workflow --- .github/workflows/test-dataflow-integration.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 2197b974..5c396238 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -98,6 +98,10 @@ jobs: - name: Checkout the repo uses: actions/checkout@v3 + - name: Install deps + run: | + python3 -m pip install '.[dev]' + - name: Run test if: | env.DEPLOYMENT_STATE == 'success' From f468bd1826b165b75a8aa1a2e951f663a74a51b8 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 9 Feb 2023 12:11:40 -0800 Subject: [PATCH 42/66] specify DATABASE_URL --- .github/workflows/test-dataflow-integration.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 5c396238..196fc0ac 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -114,7 +114,12 @@ jobs: # - parametrize SOURCE_REPO_FULL_NAME and SOURCE_REPO_PR_NUMBER # - wait for the job to complete (5-6 mins) # - check to make sure the job was successful + # FIXME: + # - DATABASE_URL is actually not needed for the test itself. It is included because + # currently pangeo_forge_orchestrator modules cannot be imported without it. This + # will probably be fixed in traitlets refactor and then DATABASE_URL can be dropped. run: | + DATABASE_URL=sqlite:///`pwd`/pytest-database.sqlite \ GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ SOURCE_REPO_FULL_NAME=pforgetest/test-staged-recipes \ SOURCE_REPO_PR_NUMBER=22 \ From 8527b13cc7cfce53bb0a770ae148086bbed6a651 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 9 Feb 2023 16:20:21 -0800 Subject: [PATCH 43/66] install sops in workflow --- .github/workflows/test-dataflow-integration.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 196fc0ac..18cc280b 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -98,10 +98,15 @@ jobs: - name: Checkout the repo uses: actions/checkout@v3 - - name: Install deps + - name: Install orchestrator + deps run: | python3 -m pip install '.[dev]' + - name: Install sops binary + uses: mdgreenwald/mozilla-sops-action@v1.4.1 + with: + version: '3.7.3' + - name: Run test if: | env.DEPLOYMENT_STATE == 'success' From 55c2557916b9ef32262c0201df96699e81694b2f Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 9 Feb 2023 20:37:49 -0800 Subject: [PATCH 44/66] rewrite test w/out importing pangeo_forge_orchestrator --- tests.integration/test_dataflow.py | 66 +++++++++++++++++------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index e1e2da62..ed779ab0 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -1,56 +1,64 @@ import os import random -import subprocess import time -from pathlib import Path from urllib.parse import urlparse import aiohttp +import jwt import pytest import pytest_asyncio from gidgethub.aiohttp import GitHubAPI -from pydantic import SecretStr +from gidgethub.apps import get_installation_access_token +from pydantic import BaseModel, SecretStr -from pangeo_forge_orchestrator.routers.github_app import get_access_token -PANGEO_FORGE_DEPLOYMENT = "dev-app-proxy" -os.environ["PANGEO_FORGE_DEPLOYMENT"] = PANGEO_FORGE_DEPLOYMENT +class GitHubApp(BaseModel): + name: str + id: int + private_key: SecretStr -@pytest.fixture(scope="session", autouse=True) -def decrypt_encrypt_secrets(): - repo_root = Path(__file__).parent.parent - - # FIXME: these paths will change (and i believe the bakery_secrets path will - # no longer be necessary) following completion of the traitlets refactor - dev_app_proxy_secrets = repo_root / "secrets" / "config.dev-app-proxy.yaml" - bakery_secrets = repo_root / "secrets" / "bakery-args.pangeo-ldeo-nsf-earthcube.yaml" - all_secrets = [dev_app_proxy_secrets, bakery_secrets] - - for path in all_secrets: - # decrypt secrets on test setup - subprocess.run(f"sops -d -i {path}".split()) - - yield - - # re-encrypt (restore) secrets on test teardown - subprocess.run("git restore secrets".split()) +@pytest.fixture(scope="session") +def github_app() -> GitHubApp: + return GitHubApp( + name="dev-app-proxy", + id=238613, + private_key=os.environ["DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY"], + ) @pytest_asyncio.fixture -async def gh() -> GitHubAPI: +async def gh(github_app: GitHubApp) -> GitHubAPI: """A global gidgethub session to use throughout the integration tests.""" async with aiohttp.ClientSession() as session: - yield GitHubAPI(session, PANGEO_FORGE_DEPLOYMENT) + yield GitHubAPI(session, github_app.name) @pytest_asyncio.fixture -async def gh_token(gh: GitHubAPI) -> SecretStr: - token = await get_access_token(gh) +async def gh_token(github_app: GitHubApp, gh: GitHubAPI, gh_kws: dict) -> SecretStr: + + payload = { + "iat": int(time.time()), + "exp": int(time.time()) + (10 * 60), + "iss": github_app.id, + } + gh_jwt = jwt.encode(payload, github_app.private_key.get_secret_value(), algorithm="RS256") + + async for installation in gh.getiter("/app/installations", jwt=gh_jwt, **gh_kws): + # dev-app-proxy is only installed in one org (i.e., pforgetest), so + # the first iteration will give us the installation_id we're after + installation_id = installation["id"] + break + token_response = await get_installation_access_token( + gh, + installation_id=installation_id, + app_id=github_app.id, + private_key=github_app.private_key.get_secret_value(), + ) # wrap in SecretStr to avoid leaking in failed test logs, # see https://github.com/pytest-dev/pytest/issues/8613 - return SecretStr(token) + return SecretStr(token_response["token"]) @pytest.fixture From fca1c4de04da6051913a7331405c1c2cddc2a162 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 10 Feb 2023 09:05:48 -0800 Subject: [PATCH 45/66] remove orchestrator dependency from dataflow test --- .github/workflows/test-dataflow-integration.yaml | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 18cc280b..daa7aa3e 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -98,14 +98,9 @@ jobs: - name: Checkout the repo uses: actions/checkout@v3 - - name: Install orchestrator + deps + - name: Install deps run: | - python3 -m pip install '.[dev]' - - - name: Install sops binary - uses: mdgreenwald/mozilla-sops-action@v1.4.1 - with: - version: '3.7.3' + python3 -m pip install aiohttp jwt pydantic pytest pytest-asyncio gidgethub - name: Run test if: | @@ -119,12 +114,8 @@ jobs: # - parametrize SOURCE_REPO_FULL_NAME and SOURCE_REPO_PR_NUMBER # - wait for the job to complete (5-6 mins) # - check to make sure the job was successful - # FIXME: - # - DATABASE_URL is actually not needed for the test itself. It is included because - # currently pangeo_forge_orchestrator modules cannot be imported without it. This - # will probably be fixed in traitlets refactor and then DATABASE_URL can be dropped. run: | - DATABASE_URL=sqlite:///`pwd`/pytest-database.sqlite \ + DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY=${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }} \ GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ SOURCE_REPO_FULL_NAME=pforgetest/test-staged-recipes \ SOURCE_REPO_PR_NUMBER=22 \ From edc4d92581f564d23443d57f298380eda52efaec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Feb 2023 17:06:08 +0000 Subject: [PATCH 46/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests.integration/test_dataflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index ed779ab0..88abbac5 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -37,7 +37,6 @@ async def gh(github_app: GitHubApp) -> GitHubAPI: @pytest_asyncio.fixture async def gh_token(github_app: GitHubApp, gh: GitHubAPI, gh_kws: dict) -> SecretStr: - payload = { "iat": int(time.time()), "exp": int(time.time()) + (10 * 60), From 418fd2e45ba37ff32ca30cd020b0c064b31d7768 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 10 Feb 2023 10:16:15 -0800 Subject: [PATCH 47/66] replace \n with \n when reading key from env --- tests.integration/test_dataflow.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 88abbac5..a554269d 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -23,7 +23,18 @@ def github_app() -> GitHubApp: return GitHubApp( name="dev-app-proxy", id=238613, - private_key=os.environ["DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY"], + # the private key is passed to the env as a `\n`-delimited, single line string from github + # repository secrets. when passed to the env, single backslash `\n`s become double `\\n`s, + # so that needs to be reversed here. this is just one of many possible ways to manage + # multiline private keys in the env. and for our case, i believe the simplest option; + # see also: https://github.com/dwyl/learn-environment-variables/issues/17. + private_key=os.environ["DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY"].replace("\\n", "\n"), + # NOTE: ☝️ this ☝️ credential **must match** the latest version stored in the SOPS-encrypted + # private key for the `dev-app-proxy` app stored in pangeo-forge-orchestrator. When that key + # rotated, this corresponding credential in github repository secrets must also be updated. + # we are duplicating this credential in two places because, for ci testing, it's much simpler + # to source this from github repository secrets than it would be to SOPS-decrypt from disk. + # the cost of that simplicity, is this duplication. ) From 3e9542cb31504cdfd5b4dc7da460db406365b551 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 10 Feb 2023 10:25:39 -0800 Subject: [PATCH 48/66] quote rsa key in workflow --- .github/workflows/test-dataflow-integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index daa7aa3e..c37cc7eb 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -115,7 +115,7 @@ jobs: # - wait for the job to complete (5-6 mins) # - check to make sure the job was successful run: | - DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY=${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }} \ + DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY='${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }}' \ GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ SOURCE_REPO_FULL_NAME=pforgetest/test-staged-recipes \ SOURCE_REPO_PR_NUMBER=22 \ From 449b89e8aeb9478a61a32d52be1c61752900f06f Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 10 Feb 2023 10:35:20 -0800 Subject: [PATCH 49/66] PyJWT for test, not jwt --- .github/workflows/test-dataflow-integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index c37cc7eb..2b451422 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -100,7 +100,7 @@ jobs: - name: Install deps run: | - python3 -m pip install aiohttp jwt pydantic pytest pytest-asyncio gidgethub + python3 -m pip install aiohttp PyJWT pydantic pytest pytest-asyncio gidgethub - name: Run test if: | From 936d7ef90a18d67a014c6436dc589f5a9e9a77e0 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 10 Feb 2023 11:35:26 -0800 Subject: [PATCH 50/66] integration test sequence diagram first draft --- docs/README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/docs/README.md b/docs/README.md index 258fb5ed..dd62fb42 100644 --- a/docs/README.md +++ b/docs/README.md @@ -798,6 +798,60 @@ consider adding an alternate pathway to test against the live GitHub API and/or `pangeo-forge-runner`. This comes with its own challenges, of course, including fixturization & managing rate limits, in the case of the GitHub API. +## Integration testing + +### Dataflow integration testing + +```mermaid + +sequenceDiagram + autonumber + actor Developer + participant orchestrator PR + participant integration test + participant Heroku API + participant Heroku Review App + participant pforgetest GitHub org + participant dev app proxy + participant Google Dataflow + + + Developer->>orchestrator PR: adds 'build-review-app' label + orchestrator PR->>Heroku API: requests review app + Heroku API-->>Heroku API: builds review app + Heroku API-->>orchestrator PR: sets deployment_status == success + Heroku API-->>Heroku Review App: begins release (~3 min) + + Developer->>orchestrator PR: adds 'test-dataflow' label + + + loop + orchestrator PR->>Heroku Review App: polls release status + Heroku Review App-->>orchestrator PR: responds with status + end + + orchestrator PR-->orchestrator PR: release status verified as 'ok' + + orchestrator PR->>integration test: calls test + + integration test->>pforgetest GitHub org: makes automated recipe PR + integration test->>pforgetest GitHub org: labels PR 'fwd:{review app url}' + + pforgetest GitHub org->>dev app proxy: sends webhook + + dev app proxy-->>Heroku Review App: forwards webhook + + Heroku Review App-->Heroku Review App: syncs PR to database + + integration test->>pforgetest GitHub org: adds `/run {recipe_id}` comment to recipe PR + + pforgetest GitHub org->>dev app proxy: sends webhook + + dev app proxy-->>Heroku Review App: forwards webhook + + Heroku Review App->>Google Dataflow: submits job +``` + # GitHub App: manual API calls Situations may arise in which you want to call the GitHup API directly, authenticated as a From cd8a050262495945f94d73f69e397551459d81cb Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 10 Feb 2023 15:14:13 -0800 Subject: [PATCH 51/66] attempted fixtures refactor WIP --- tests.integration/test_dataflow.py | 125 ++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 13 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index a554269d..90d1adf4 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -1,7 +1,9 @@ +import json import os import random +import subprocess import time -from urllib.parse import urlparse +from urllib.parse import urljoin, urlparse import aiohttp import jwt @@ -77,9 +79,21 @@ def gh_kws() -> dict: @pytest.fixture -def app_netloc() -> str: - """Url on the public internet at which the app to test against is currently running.""" - return urlparse(os.environ["REVIEW_APP_URL"]).netloc +def app_url() -> str: + """The review app url as provided by Heroku.""" + return os.environ["REVIEW_APP_URL"] + + +@pytest.fixture +def app_netloc(app_url) -> str: + """Netloc of review app as parsed from app_url fixture.""" + return urlparse(app_url).netloc + + +@pytest.fixture +def app_recipe_runs_route(app_url) -> str: + """Route on review app under test at which recipe runs can be retrieved.""" + return urljoin(app_url, "/recipe_runs/") @pytest.fixture @@ -98,7 +112,7 @@ def source_pr() -> dict[str, str]: @pytest.fixture -def base(source_pr): +def base(source_pr: dict[str, str]): if "staged-recipes" in source_pr["repo_full_name"]: return "pforgetest/test-staged-recipes" elif source_pr["repo_full_name"].endswith("-feedstock"): @@ -138,7 +152,7 @@ async def pr_label(gh: GitHubAPI, gh_token: SecretStr, gh_kws: dict, base: str, @pytest_asyncio.fixture -async def staged_recipes_pr( +async def recipe_pr( gh: GitHubAPI, gh_token: SecretStr, gh_kws: dict, @@ -238,21 +252,106 @@ async def add_file(f): ) -@pytest.mark.asyncio -async def test_dataflow( +@pytest_asyncio.fixture +async def recipe_run_id(recipe_pr: dict, app_recipe_runs_route: str): + # at the start of this test, the recipes_pr fixture has already made a pr on github, but we + # don't know exactly how long it take for that pr to be synchronized to the review app, so we + # run a loop to check for when the synchronization is complete. + + # (when heroku re-builds a review app that has previously been built, the database attached to + # that review app persists between builds. the database is only reset if the review app is + # deleted, not simply rebuilt. therefore, even though each invocation of this test creates + # just one recipe_run, there can easily be many recipe runs in the heroku review app database. + # as such, we parse which specific recipe_run we're currently testing by comparing head_shas.) + time.sleep(10) + start = time.time() + print("\nQuerying review app database for recipe run id...") + while True: + elapsed = time.time() - start + async with aiohttp.ClientSession() as session: + get_runs = await session.get(app_recipe_runs_route) + runs = await get_runs.json() + print(f"{len(runs) = }") + print(f"{elapsed = }") + if any([r["head_sha"] == recipe_pr["head"]["sha"] for r in runs]): + print("inside any block") + run_id = [r for r in runs if r["head_sha"] == recipe_pr["head"]["sha"]][0]["id"] + break + elif elapsed > 60: + # synchronization should only take a few seconds, so if more than 30 + # seconds has elapsed, something has gone wrong and we should bail out. + pytest.fail(f"Time {elapsed = } on synchronization.") + else: + # if no head_shas match, the sync task may + # still be running, so wait 2s then retry. + time.sleep(5) + yield run_id + + +@pytest_asyncio.fixture +async def dataflow_job_id( + recipe_run_id: int, + app_recipe_runs_route: str, gh: GitHubAPI, gh_token: SecretStr, gh_kws: dict, base: str, - staged_recipes_pr: dict, + recipe_pr: dict, ): - time.sleep(10) - + # now we know the pr is synced, it's time to dispatch the `/run` command await gh.post( - f"/repos/{base}/issues/{staged_recipes_pr['number']}/comments", + f"/repos/{base}/issues/{recipe_pr['number']}/comments", + # FIXME: parametrize the recipe_id (currently hardcoded as gpcp-from-gcs). + # this is necessary to test against other recipes. data=dict(body="/run gpcp-from-gcs"), oauth_token=gh_token.get_secret_value(), **gh_kws, ) + # start polling the review app database to see if the job has been deployed to dataflow. + # if the job was deployed to dataflow, a job_id field will exist in the recipe_run message. + start = time.time() + while True: + elapsed = time.time() - start + async with aiohttp.ClientSession() as session: + get_run = await session.get(urljoin(app_recipe_runs_route, str(recipe_run_id))) + run = await get_run.json() + message = json.loads(run["message"] or "{}") + if "job_id" in message: + job_id = message["job_id"] + break + elif elapsed > 60 * 5: + # job submission is taking longer than 5 minutes, something must be wrong, so bail. + pytest.fail(f"Time {elapsed = } on job submission exceedes 5 minutes.") + else: + # if there is no job_id in the message, and less than 5 minutes has elapsed in this + # loop, the job submission might still be in process, so wait 30 seconds and retry + time.sleep(30) + yield job_id + - time.sleep(60) +@pytest.mark.asyncio +async def test_dataflow(dataflow_job_id: str): + show_job = f"gcloud dataflow jobs show {dataflow_job_id} --format=json".split() + # at this point, the job has been submitted and we know the job_id, so time to start polling + # dataflow to see if its completed. + start = time.time() + while True: + elapsed = time.time() - start + print(f"Time {elapsed = }") + if elapsed > 60 * 12: + pytest.fail(f"Time {elapsed = } exceedes 12 minutes.") + + # check job state + state_proc = subprocess.run(show_job, capture_output=True) + assert state_proc.returncode == 0 + state = json.loads(state_proc.stdout)["state"] + print(f"Current {state = }") + if state == "Done": + # on Dataflow, "Done" means success + break + elif state == "Running": + # still running, let's give it another 30s then check again + time.sleep(30) + else: + # consider any other state a failure + pytest.fail(f"{state = } is neither 'Done' nor 'Running'") From 37a9954f91b1012023e530abcf136aae7c804c5f Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 10 Feb 2023 16:49:22 -0800 Subject: [PATCH 52/66] ensure pr fixture gives correct sha, add logging --- tests.integration/test_dataflow.py | 48 ++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 90d1adf4..a66b1a57 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -1,3 +1,4 @@ +import asyncio import json import os import random @@ -236,7 +237,13 @@ async def add_file(f): # https://github.com/pangeo-forge/pangeo-forge-orchestrator/pull/226#issuecomment-1423337307 await add_file(src_files[1]) - yield pr + # wait a moment to make sure new file is set on github, then get the pr + # in its current state (otherwise head_sha will not reflect latests commit) + await asyncio.sleep(3) + completed_pr = await gh.getitem(f"/repos/{base}/pulls/{pr['number']}", **gh_kws) + + print(f"\nYielding {completed_pr['head']['sha'] = } from recipes_pr fixture...") + yield completed_pr # close pr and delete branch await gh.patch( @@ -263,28 +270,26 @@ async def recipe_run_id(recipe_pr: dict, app_recipe_runs_route: str): # deleted, not simply rebuilt. therefore, even though each invocation of this test creates # just one recipe_run, there can easily be many recipe runs in the heroku review app database. # as such, we parse which specific recipe_run we're currently testing by comparing head_shas.) - time.sleep(10) + await asyncio.sleep(10) start = time.time() - print("\nQuerying review app database for recipe run id...") + print("Querying review app database for recipe run id...") while True: elapsed = time.time() - start async with aiohttp.ClientSession() as session: get_runs = await session.get(app_recipe_runs_route) runs = await get_runs.json() - print(f"{len(runs) = }") - print(f"{elapsed = }") if any([r["head_sha"] == recipe_pr["head"]["sha"] for r in runs]): - print("inside any block") run_id = [r for r in runs if r["head_sha"] == recipe_pr["head"]["sha"]][0]["id"] + print(f"Found matching recipe run in review app database with recipe_{run_id = }...") break - elif elapsed > 60: + elif elapsed > 30: # synchronization should only take a few seconds, so if more than 30 # seconds has elapsed, something has gone wrong and we should bail out. pytest.fail(f"Time {elapsed = } on synchronization.") else: # if no head_shas match, the sync task may # still be running, so wait 2s then retry. - time.sleep(5) + await asyncio.sleep(5) yield run_id @@ -299,16 +304,19 @@ async def dataflow_job_id( recipe_pr: dict, ): # now we know the pr is synced, it's time to dispatch the `/run` command + comment_body = "/run gpcp-from-gcs" + print(f"Making comment on test PR with {comment_body = }") await gh.post( f"/repos/{base}/issues/{recipe_pr['number']}/comments", # FIXME: parametrize the recipe_id (currently hardcoded as gpcp-from-gcs). # this is necessary to test against other recipes. - data=dict(body="/run gpcp-from-gcs"), + data=dict(body=comment_body), oauth_token=gh_token.get_secret_value(), **gh_kws, ) # start polling the review app database to see if the job has been deployed to dataflow. # if the job was deployed to dataflow, a job_id field will exist in the recipe_run message. + print("Polling review app for dataflow job submission status...") start = time.time() while True: elapsed = time.time() - start @@ -318,28 +326,38 @@ async def dataflow_job_id( message = json.loads(run["message"] or "{}") if "job_id" in message: job_id = message["job_id"] + print(f"Confirmed dataflow job submitted with {job_id = }") break elif elapsed > 60 * 5: # job submission is taking longer than 5 minutes, something must be wrong, so bail. - pytest.fail(f"Time {elapsed = } on job submission exceedes 5 minutes.") + pytest.fail(f"Time {elapsed = } on job submission.") else: # if there is no job_id in the message, and less than 5 minutes has elapsed in this # loop, the job submission might still be in process, so wait 30 seconds and retry - time.sleep(30) + await asyncio.sleep(30) yield job_id @pytest.mark.asyncio async def test_dataflow(dataflow_job_id: str): - show_job = f"gcloud dataflow jobs show {dataflow_job_id} --format=json".split() + # NOTE: much of this test is redundant with dataflow integration test + # https://github.com/pangeo-forge/pangeo-forge-runner/ + # blob/c7c5e88c006ce5f5ea636d061423981bb9d23734/tests/integration/test_dataflow_integration.py + + # 6 minutes seems like an average runtime for these jobs, but being optimistic + # let's start by waiting 5 minutes + start = time.time() + utc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(start)) + print(f"Waiting for 5 mins, starting at {utc_time = }") + time.sleep(60 * 5) # at this point, the job has been submitted and we know the job_id, so time to start polling # dataflow to see if its completed. - start = time.time() + show_job = f"gcloud dataflow jobs show {dataflow_job_id} --format=json".split() while True: elapsed = time.time() - start print(f"Time {elapsed = }") if elapsed > 60 * 12: - pytest.fail(f"Time {elapsed = } exceedes 12 minutes.") + pytest.fail(f"Time {elapsed = } on running job.") # check job state state_proc = subprocess.run(show_job, capture_output=True) @@ -351,7 +369,7 @@ async def test_dataflow(dataflow_job_id: str): break elif state == "Running": # still running, let's give it another 30s then check again - time.sleep(30) + await asyncio.sleep(30) else: # consider any other state a failure pytest.fail(f"{state = } is neither 'Done' nor 'Running'") From 0e06182a2c6e2ef4c6997dba83404468218e6082 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Mon, 13 Feb 2023 14:57:45 -0800 Subject: [PATCH 53/66] add gcp dataflow readonly creds to integration test --- .github/workflows/test-dataflow-integration.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 2b451422..8af7e9d3 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -102,6 +102,13 @@ jobs: run: | python3 -m pip install aiohttp PyJWT pydantic pytest pytest-asyncio gidgethub + - name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@v1' + with: + # the creds to deploy jobs to dataflow are packaged with the review app itself, but + # this test needs its own read only creds so that it can poll dataflow for job status + credentials_json: '${{ secrets.GCP_DATAFLOW_READONLY_SERVICE_KEY }}' + - name: Run test if: | env.DEPLOYMENT_STATE == 'success' From 17deec1262199cb572b611365b6c81766123ec68 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 12:30:43 -0800 Subject: [PATCH 54/66] test job status notification comment --- .../workflows/test-dataflow-integration.yaml | 2 +- tests.integration/test_dataflow.py | 50 ++++++++++++++++++- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 8af7e9d3..142fbf53 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -127,4 +127,4 @@ jobs: SOURCE_REPO_FULL_NAME=pforgetest/test-staged-recipes \ SOURCE_REPO_PR_NUMBER=22 \ REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \ - pytest -vx tests.integration/test_dataflow.py + pytest -vxs tests.integration/test_dataflow.py diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index a66b1a57..e95c3bb7 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -338,8 +338,8 @@ async def dataflow_job_id( yield job_id -@pytest.mark.asyncio -async def test_dataflow(dataflow_job_id: str): +@pytest_asyncio.fixture +async def dataflow_job_state(dataflow_job_id: str): # NOTE: much of this test is redundant with dataflow integration test # https://github.com/pangeo-forge/pangeo-forge-runner/ # blob/c7c5e88c006ce5f5ea636d061423981bb9d23734/tests/integration/test_dataflow_integration.py @@ -373,3 +373,49 @@ async def test_dataflow(dataflow_job_id: str): else: # consider any other state a failure pytest.fail(f"{state = } is neither 'Done' nor 'Running'") + # if we get here without failing out, the yielded state should be 'Done' + yield state + + +@pytest_asyncio.fixture +async def job_status_notification_comment_body( + gh: GitHubAPI, + gh_kws: dict, + base: str, + recipe_pr: dict, + dataflow_job_state: str, +): + # this value is not actually used below, but we include it as a fixture + # here to preserve the desired inheritance path of fixtures in this module + assert dataflow_job_state == "Done" + + start = time.time() + while True: + elapsed = time.time() - start + if elapsed > 60 * 5: + pytest.fail(f"Time {elapsed = } waiting for job success notification comment.") + + comments = await gh.getitem( + f"/repos/{base}/issues/{recipe_pr['number']}/comments", + **gh_kws, + ) + if comments: + last_comment_body: str = comments[-1]["body"] + if not last_comment_body.startswith("/run"): + break + + else: + await asyncio.sleep(15) + + yield last_comment_body + + +@pytest.mark.asyncio +async def test_end_to_end_integration( + job_status_notification_comment_body: str, + recipe_pr: dict, +): + assert job_status_notification_comment_body.startswith( + # TODO: parametrize recipe_id (vs. hardcoded gpcp-from-gcs) + f":tada: The test run of `gpcp-from-gcs` at {recipe_pr['head']['sha']} succeeded!" + ) From 040cf646343fc0de34859e4a0771299c8ba5cf7b Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 12:44:25 -0800 Subject: [PATCH 55/66] variablize recipe_id --- .github/workflows/test-dataflow-integration.yaml | 1 + tests.integration/test_dataflow.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 142fbf53..ec26c58b 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -126,5 +126,6 @@ jobs: GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ SOURCE_REPO_FULL_NAME=pforgetest/test-staged-recipes \ SOURCE_REPO_PR_NUMBER=22 \ + RECIPE_ID=gpcp-from-gcs \ REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \ pytest -vxs tests.integration/test_dataflow.py diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index e95c3bb7..edbfb5ca 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -112,6 +112,12 @@ def source_pr() -> dict[str, str]: ) +@pytest.fixture +def recipe_id() -> str: + """The recipe_id of the recipe defined in the PR to run during this test.""" + return os.environ["RECIPE_ID"] + + @pytest.fixture def base(source_pr: dict[str, str]): if "staged-recipes" in source_pr["repo_full_name"]: @@ -302,14 +308,13 @@ async def dataflow_job_id( gh_kws: dict, base: str, recipe_pr: dict, + recipe_id: str, ): # now we know the pr is synced, it's time to dispatch the `/run` command - comment_body = "/run gpcp-from-gcs" + comment_body = f"/run {recipe_id}" print(f"Making comment on test PR with {comment_body = }") await gh.post( f"/repos/{base}/issues/{recipe_pr['number']}/comments", - # FIXME: parametrize the recipe_id (currently hardcoded as gpcp-from-gcs). - # this is necessary to test against other recipes. data=dict(body=comment_body), oauth_token=gh_token.get_secret_value(), **gh_kws, @@ -414,8 +419,8 @@ async def job_status_notification_comment_body( async def test_end_to_end_integration( job_status_notification_comment_body: str, recipe_pr: dict, + recipe_id: str, ): assert job_status_notification_comment_body.startswith( - # TODO: parametrize recipe_id (vs. hardcoded gpcp-from-gcs) - f":tada: The test run of `gpcp-from-gcs` at {recipe_pr['head']['sha']} succeeded!" + f":tada: The test run of `{recipe_id}` at {recipe_pr['head']['sha']} succeeded!" ) From ed945ff0fae19044897d00e6070ecc8aca74383b Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:19:15 -0800 Subject: [PATCH 56/66] consolidate source recipe into single var/fixture --- .../workflows/test-dataflow-integration.yaml | 4 +--- tests.integration/test_dataflow.py | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index ec26c58b..e3bddb24 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -124,8 +124,6 @@ jobs: run: | DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY='${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }}' \ GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ - SOURCE_REPO_FULL_NAME=pforgetest/test-staged-recipes \ - SOURCE_REPO_PR_NUMBER=22 \ - RECIPE_ID=gpcp-from-gcs \ + SOURCE_RECIPE=pforgetest/test-staged-recipes:22:gpcp-from-gcs \ REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \ pytest -vxs tests.integration/test_dataflow.py diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index edbfb5ca..00b0f7d0 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -104,18 +104,23 @@ def gh_workflow_run_id() -> str: @pytest.fixture -def source_pr() -> dict[str, str]: +def source_recipe() -> tuple[str, str, str]: + repo_full_name, pr_number, recipe_id = os.environ["SOURCE_RECIPE"].split(":") + return repo_full_name, pr_number, recipe_id + + +@pytest.fixture +def source_pr(source_recipe) -> dict[str, str]: """A PR to replicate for this test.""" - return dict( - repo_full_name=os.environ["SOURCE_REPO_FULL_NAME"], - pr_number=os.environ["SOURCE_REPO_PR_NUMBER"], - ) + repo_full_name, pr_number, _ = source_recipe + return {"repo_full_name": repo_full_name, "pr_number": pr_number} @pytest.fixture -def recipe_id() -> str: +def recipe_id(source_recipe) -> str: """The recipe_id of the recipe defined in the PR to run during this test.""" - return os.environ["RECIPE_ID"] + _, _, recipe_id = source_recipe + return recipe_id @pytest.fixture From e6d9e429dcddd14ae9dba8a95f338b2a664cc495 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:55:29 -0800 Subject: [PATCH 57/66] always assume reference pr is on test-staged-recipes --- .../workflows/test-dataflow-integration.yaml | 2 +- tests.integration/test_dataflow.py | 36 +++++++++---------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index e3bddb24..5727a19a 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -124,6 +124,6 @@ jobs: run: | DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY='${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }}' \ GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ - SOURCE_RECIPE=pforgetest/test-staged-recipes:22:gpcp-from-gcs \ + PR_NUMBER_AND_RECIPE_ID=22::gpcp-from-gcs \ REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \ pytest -vxs tests.integration/test_dataflow.py diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 00b0f7d0..0194471f 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -104,33 +104,29 @@ def gh_workflow_run_id() -> str: @pytest.fixture -def source_recipe() -> tuple[str, str, str]: - repo_full_name, pr_number, recipe_id = os.environ["SOURCE_RECIPE"].split(":") - return repo_full_name, pr_number, recipe_id +def base(): + """The base repo against which the reference (i.e. source) PR has been made.""" + return "pforgetest/test-staged-recipes" @pytest.fixture -def source_pr(source_recipe) -> dict[str, str]: - """A PR to replicate for this test.""" - repo_full_name, pr_number, _ = source_recipe - return {"repo_full_name": repo_full_name, "pr_number": pr_number} +def pr_number_and_recipe_id() -> tuple[str, str]: + pr_number, recipe_id = os.environ["PR_NUMBER_AND_RECIPE_ID"].split("::") + return pr_number, recipe_id @pytest.fixture -def recipe_id(source_recipe) -> str: - """The recipe_id of the recipe defined in the PR to run during this test.""" - _, _, recipe_id = source_recipe - return recipe_id +def source_pr_number(pr_number_and_recipe_id) -> dict[str, str]: + """The number of a PR on pforgetest/test-staged-recipes to replicate for this test.""" + pr_number, _ = pr_number_and_recipe_id + return pr_number @pytest.fixture -def base(source_pr: dict[str, str]): - if "staged-recipes" in source_pr["repo_full_name"]: - return "pforgetest/test-staged-recipes" - elif source_pr["repo_full_name"].endswith("-feedstock"): - # TODO: add a repo in `pforgetest` which can accept prs from any feedstock. - # this would essentially be just a blank repo containing an empty `feedstock/` directory. - raise NotImplementedError +def recipe_id(pr_number_and_recipe_id) -> str: + """The recipe_id of the recipe defined in the PR to run during this test.""" + _, recipe_id = pr_number_and_recipe_id + return recipe_id @pytest_asyncio.fixture @@ -169,7 +165,7 @@ async def recipe_pr( gh_token: SecretStr, gh_kws: dict, gh_workflow_run_id: str, - source_pr: dict[str, str], + source_pr_number: str, base: str, pr_label: str, ): @@ -197,7 +193,7 @@ async def recipe_pr( # populate that branch with content files from the source pr src_files = await gh.getitem( - f"repos/{source_pr['repo_full_name']}/pulls/{source_pr['pr_number']}/files", + f"repos/{base}/pulls/{source_pr_number}/files", **gh_kws, ) From 15b98b4144ebaa5e6e9dfd741170ff04d1961744 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 14:20:39 -0800 Subject: [PATCH 58/66] generate prs matrix --- .../workflows/test-dataflow-integration.yaml | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 5727a19a..e0328637 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -8,6 +8,23 @@ on: types: [labeled] jobs: + matrix-generate-prs: + # Generates the matrix of reference prs to test against. Compare: + # - https://blog.aspect.dev/github-actions-dynamic-matrix + # - https://github.com/aspect-build/bazel-lib/blob/ + # 0c8ef86684d5a3335bb5e911a51d64e5fab39f9b/.github/workflows/ci.yaml + runs-on: ubuntu-latest + steps: + - id: default + run: echo "pr=22::gpcp-from-gcs" >> $GITHUB_OUTPUT + # - id: also-test + # if: contains( toJSON(github.event.pull_request.labels.*.name), 'also-test') + # run: | + # echo "pr=..." >> $GITHUB_OUTPUT + outputs: + # Will look like '["22::gpcp-from-gcs", etc...]' + prs: ${{ toJSON(steps.*.outputs.pr) }} + test: # run when: # - a PR is labeled 'test-dataflow' @@ -16,6 +33,15 @@ jobs: # - heroku marks a deployment with 'state' == 'success' # (assuming PR also has 'test-dataflow' label) runs-on: ubuntu-latest + + needs: + - matrix-generate-prs + + strategy: + fail-fast: false + matrix: + prs: ${{ fromJSON(needs.matrix-generate-prs.outputs.prs) }} + steps: # conditional step if triggering event is a pull_request - name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from pull_request @@ -124,6 +150,6 @@ jobs: run: | DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY='${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }}' \ GH_WORKFLOW_RUN_ID=${{ github.run_id }} \ - PR_NUMBER_AND_RECIPE_ID=22::gpcp-from-gcs \ + PR_NUMBER_AND_RECIPE_ID=${{ matrix.prs }} \ REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \ pytest -vxs tests.integration/test_dataflow.py From f97ed76f2d996d574981d5bdd1eb730526560074 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 14:58:41 -0800 Subject: [PATCH 59/66] add also-test block to matrix generator --- .github/workflows/test-dataflow-integration.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index e0328637..98b35929 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -17,10 +17,14 @@ jobs: steps: - id: default run: echo "pr=22::gpcp-from-gcs" >> $GITHUB_OUTPUT - # - id: also-test - # if: contains( toJSON(github.event.pull_request.labels.*.name), 'also-test') - # run: | - # echo "pr=..." >> $GITHUB_OUTPUT + - id: also-test + if: contains( join(github.event.pull_request.labels.*.name), 'also-test') + run: | + echo '${{ toJSON(github.event.pull_request.labels.*.name) }}' \ + | python3 -c " + import json, sys; + print('\n'.join([f'pr={l}' for l in json.load(sys.stdin) if l.startswith('also-test')])) + " >> $GITHUB_OUTPUT outputs: # Will look like '["22::gpcp-from-gcs", etc...]' prs: ${{ toJSON(steps.*.outputs.pr) }} From 05ad3a8300caacbc2ecd3fd7aacf22a3df66db67 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 16:52:48 -0800 Subject: [PATCH 60/66] for pr event, get also-test via http request --- .../workflows/test-dataflow-integration.yaml | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 98b35929..78921f78 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -17,13 +17,36 @@ jobs: steps: - id: default run: echo "pr=22::gpcp-from-gcs" >> $GITHUB_OUTPUT - - id: also-test - if: contains( join(github.event.pull_request.labels.*.name), 'also-test') + + - id: also-test-from-deployment-status + if: | + github.event_name == 'deployment_status' run: | - echo '${{ toJSON(github.event.pull_request.labels.*.name) }}' \ + export ENVIRONMENT=${{ github.event.deployment_status.environment }} \ + && python3 -c " + import os; print(os.environ['ENVIRONMENT'].split('-')[-1])" \ + | xargs -I{} curl -s ${{ github.event.deployment_status.repository_url }}/pulls/{} \ | python3 -c " import json, sys; - print('\n'.join([f'pr={l}' for l in json.load(sys.stdin) if l.startswith('also-test')])) + labels = json.load(sys.stdin)['labels']; + also_test = [l.split(":")[-1] for l in labels if l.startswith('also-test')] + if also_test: + for label in also_test: + print(f'pr={label}') + " >> $GITHUB_OUTPUT + + - id: also-test-from-pull-request + if: | + github.event_name == 'pull_request' + && contains( join(github.event.pull_request.labels.*.name), 'also-test') + run: | + python3 -c " + import json; + labels = json.loads('${{ toJSON(github.event.pull_request.labels.*.name) }}') + also_test = [l.split(":")[-1] for l in labels if l.startswith('also-test')] + if also_test: + for label in also_test: + print(f'pr={label}') " >> $GITHUB_OUTPUT outputs: # Will look like '["22::gpcp-from-gcs", etc...]' From 8d24d3c3449ca59dcfd35ca93866496ba98ef15d Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 17:06:45 -0800 Subject: [PATCH 61/66] single quotes in python command --- .github/workflows/test-dataflow-integration.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 78921f78..26b7451b 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -29,7 +29,7 @@ jobs: | python3 -c " import json, sys; labels = json.load(sys.stdin)['labels']; - also_test = [l.split(":")[-1] for l in labels if l.startswith('also-test')] + also_test = [l.split(':')[-1] for l in labels if l.startswith('also-test')] if also_test: for label in also_test: print(f'pr={label}') @@ -43,7 +43,7 @@ jobs: python3 -c " import json; labels = json.loads('${{ toJSON(github.event.pull_request.labels.*.name) }}') - also_test = [l.split(":")[-1] for l in labels if l.startswith('also-test')] + also_test = [l.split(':')[-1] for l in labels if l.startswith('also-test')] if also_test: for label in also_test: print(f'pr={label}') From 5f52bcd2e058560f80c2f8a7bebd163dc845eb81 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 17:15:51 -0800 Subject: [PATCH 62/66] label 'name' field, got me again --- .github/workflows/test-dataflow-integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 26b7451b..9f8de9d8 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -29,7 +29,7 @@ jobs: | python3 -c " import json, sys; labels = json.load(sys.stdin)['labels']; - also_test = [l.split(':')[-1] for l in labels if l.startswith('also-test')] + also_test = [l['name'].split(':')[-1] for l in labels if l['name'].startswith('also-test')] if also_test: for label in also_test: print(f'pr={label}') From 9b2f5485010fb553e3465987920912f48316a834 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 14 Feb 2023 17:23:28 -0800 Subject: [PATCH 63/66] split labels on 'also-test:' string --- .github/workflows/test-dataflow-integration.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-dataflow-integration.yaml b/.github/workflows/test-dataflow-integration.yaml index 9f8de9d8..fc3e92bc 100644 --- a/.github/workflows/test-dataflow-integration.yaml +++ b/.github/workflows/test-dataflow-integration.yaml @@ -29,7 +29,9 @@ jobs: | python3 -c " import json, sys; labels = json.load(sys.stdin)['labels']; - also_test = [l['name'].split(':')[-1] for l in labels if l['name'].startswith('also-test')] + also_test = [ + l['name'].split('also-test:')[-1] for l in labels if l['name'].startswith('also-test') + ] if also_test: for label in also_test: print(f'pr={label}') @@ -43,7 +45,7 @@ jobs: python3 -c " import json; labels = json.loads('${{ toJSON(github.event.pull_request.labels.*.name) }}') - also_test = [l.split(':')[-1] for l in labels if l.startswith('also-test')] + also_test = [l.split('also-test:')[-1] for l in labels if l.startswith('also-test')] if also_test: for label in also_test: print(f'pr={label}') From 5896a3cdd299e94bfe0433e80e2694400f216994 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 15 Feb 2023 14:55:13 -0800 Subject: [PATCH 64/66] use job-specific identifier for ref name --- tests.integration/test_dataflow.py | 33 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 0194471f..3431bbca 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -110,23 +110,20 @@ def base(): @pytest.fixture -def pr_number_and_recipe_id() -> tuple[str, str]: - pr_number, recipe_id = os.environ["PR_NUMBER_AND_RECIPE_ID"].split("::") - return pr_number, recipe_id +def pr_number_and_recipe_id() -> str: + return os.environ["PR_NUMBER_AND_RECIPE_ID"] @pytest.fixture -def source_pr_number(pr_number_and_recipe_id) -> dict[str, str]: +def source_pr_number(pr_number_and_recipe_id: str) -> str: """The number of a PR on pforgetest/test-staged-recipes to replicate for this test.""" - pr_number, _ = pr_number_and_recipe_id - return pr_number + return pr_number_and_recipe_id.split("::")[0] @pytest.fixture -def recipe_id(pr_number_and_recipe_id) -> str: +def recipe_id(pr_number_and_recipe_id: str) -> str: """The recipe_id of the recipe defined in the PR to run during this test.""" - _, recipe_id = pr_number_and_recipe_id - return recipe_id + return pr_number_and_recipe_id.split("::")[-1] @pytest_asyncio.fixture @@ -168,6 +165,7 @@ async def recipe_pr( source_pr_number: str, base: str, pr_label: str, + pr_number_and_recipe_id: str, ): """Makes a PR to ``pforgetest/test-staged-recipes`` with labels ``f"fwd:{app_netloc}{route}"``, where ``{route}`` is optionally the path at which the app running at ``app_netloc`` receives @@ -181,10 +179,18 @@ async def recipe_pr( # from that process here may introduce some sublte differences with production. for now, we are # accepting that as the cost for doing this more simply; i.e., all within a single repo.) main = await gh.getitem(f"/repos/{base}/branches/main", **gh_kws) + + jobs = await gh.getitem( + f"/repos/pangeo-forge/pangeo-forge-orchestrator/actions/runs/{gh_workflow_run_id}/jobs" + ) + this_job = [j for j in jobs["jobs"] if pr_number_and_recipe_id in j["name"]].pop(0) + # example working branch name would be runs/4179677701/jobs/7239892586, as parsed from html url + # 'https://github.com/pangeo-forge/pangeo-forge-orchestrator/actions/runs/4179677701/jobs/7239892586' + working_branch_name = this_job["html_url"].split("/actions/")[-1] working_branch = await gh.post( f"/repos/{base}/git/refs", data=dict( - ref=f"refs/heads/actions/runs/{gh_workflow_run_id}", + ref=f"refs/heads/{working_branch_name}", sha=main["commit"]["sha"], ), oauth_token=gh_token.get_secret_value(), @@ -204,7 +210,7 @@ async def add_file(f): data=dict( message=f"Adding {f['filename']}", content=content["content"], - branch=f"actions/runs/{gh_workflow_run_id}", + branch=working_branch_name, ), oauth_token=gh_token.get_secret_value(), **gh_kws, @@ -219,10 +225,9 @@ async def add_file(f): f"/repos/{base}/pulls", data=dict( title=f"[CI] Automated PR for workflow run {gh_workflow_run_id}", - head=f"actions/runs/{gh_workflow_run_id}", + head=working_branch_name, body=( - ":robot: Created for test run https://github.com/pangeo-forge/" - f"pangeo-forge-orchestrator/actions/runs/{gh_workflow_run_id}\n" + f":robot: Created by test run job {this_job['html_url']}\n" f":memo: Which is testing {pr_label.replace('fwd:', 'https://')}" ), base="main", From 29512a1e579000ee02dd671291154c45baef2dc1 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 15 Feb 2023 15:03:29 -0800 Subject: [PATCH 65/66] use working branch name in pr title --- tests.integration/test_dataflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests.integration/test_dataflow.py b/tests.integration/test_dataflow.py index 3431bbca..d125e626 100644 --- a/tests.integration/test_dataflow.py +++ b/tests.integration/test_dataflow.py @@ -224,7 +224,7 @@ async def add_file(f): pr = await gh.post( f"/repos/{base}/pulls", data=dict( - title=f"[CI] Automated PR for workflow run {gh_workflow_run_id}", + title=f"[CI] Automated PR for {working_branch_name}", head=working_branch_name, body=( f":robot: Created by test run job {this_job['html_url']}\n" From 5bdeaa389882368ca30d5c71e1e380d9271ac553 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Wed, 15 Feb 2023 18:18:49 -0800 Subject: [PATCH 66/66] integration test docs --- docs/README.md | 73 ++++++++++++++++++-------------------------------- 1 file changed, 26 insertions(+), 47 deletions(-) diff --git a/docs/README.md b/docs/README.md index dd62fb42..fd3d316d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -386,59 +386,16 @@ Both staging and prod are set up with [automatic certificate management](https:/ ## `review` -For each PR to `pangeo-forge-recipes`, Heroku creates a Review App which will hang around for two days. -This is a live version of the app running against an ephemeral database. It can be used for manual -checks or further integration testing. +To build a Heroku Review App for a PR, simply add the `build-review-app` label to the PR. This will +trigger a build of the review app which, once deployed, will be served at the ephemeral address +https://pforge-pr-{PR_NUMBER}.herokuapp.com/. > We use Heroku's Review Apps -> [injected-environment-variables](https://devcenter.heroku.com/articles/github-integration-review-apps#injected-environment-variables) -> feature to dynamically set the `PANGEO_FORGE_DEPLOYMENT` env var for review apps to the value of the injected env var `HEROKU_APP_NAME`, which follows the pattern `pforge-pr-${PR number}`. Take a look at both `heroku.yml` and `scripts.deploy/release.sh` to see where this happens. - -> We also use Heroku's Review Apps > [postdeploy script](https://devcenter.heroku.com/articles/github-integration-review-apps#the-postdeploy-script) > feature to automatically seed each review app database with the > https://github.com/pforgetest/test-staged-recipes feedstock. > To see where this happens (and/or seed additional test data), see `postdeploy/seed_review_app_data.py`. -To ensure a successful build of a Review App for your PR: - -1. Generate a `secrets/pforge-pr-${PR number}.yaml` secrets config for your review app, by: - 1. Running: - ```console - $ python3 scripts.develop/generate_api_key.py pforge-pr-${PR number} - ``` - 2. Then running: - ```console - $ GITHUB_PAT=${Your GitHub PAT} python3 scripts.develop/new_github_app.py ${GitHub Username} review ${PR number} - ``` - and following the link to complete the in-browser OAuth flow. -2. Encrypting the secrets config with: - ```console - $ sops -e -i secrets/pforge-pr-${PR number}.yaml - ``` -3. Rename the review app bakery config as follows (where `SOME_OTHER_NUMBER` is a prior review app's PR number): - ```console - $ mv bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-SOME_OTHER_NUMBER.yaml bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-${PR number}.yaml - ``` -4. Push these changes (the secrets and bakeries config for your review app) to your PR -5. From https://github.com/organizations/pforgetest/settings/apps, manually install the - `pforge-pr-${PR number}` app on all repos in the `pforgetest` org. (Optionally, suspend - all other installations from `pforgetest`, so that multiple apps are not active at one time.) -6. Update the Review App's webhook url (decrypting your review app creds first): - ```console - $ sops -d -i secrets/pforge-pr-${PR number}.yaml - $ python3 scripts/update_hook_url.py review http://pforge-pr-${PR number}.herokuapp.com - ``` - -> **Tip**: The first review app build often fails, perhaps because the PR was opened before all -> of the above steps were complete. If the build fails, navigate to -> [the Heroku Pipeline dashboard](https://dashboard.heroku.com/pipelines/17cc0239-494f-4a68-aa75-3da7c466709c) -> and manually delete the Review App. Then, manually click -> "create review app". Typically, this build will succeed if everything has been configured -> correctly. If it fails, there may be a deeper issue in play. _**Important**_: A failed Review -> App build _will not be fixed_ by simply re-releasing. It must be deleted and then rebuilt from -> scratch. - ## `staging` Changes merged to `main` will deploy the @@ -800,7 +757,29 @@ rate limits, in the case of the GitHub API. ## Integration testing -### Dataflow integration testing +### Dataflow + +To run an Dataflow integration test from a PR, add the following two labels to the PR: + +- `build-review-app`: This builds a Heroku Review App for the PR. +- `test-dataflow`: This deploys a job to Dataflow from the Heroku Review App for the PR. + +By default, the Dataflow integration test creates a automated PR on the `pforgetest/test-staged-recipes` +repo by duplicating https://github.com/pforgetest/test-staged-recipes/pull/22, and then triggers a test +run of the `gpcp-from-gcs` recipe in that PR by commenting `/run gpcp-from-gcs` on the auto-generated PR. + +To also test against additional recipes as part of this integration test, create and add a third label to +the PR, following the format: + +- `also-test:{TEST_STAGED_RECIPES_PR_NUMBER}::{RECIPE_ID}`: Here, `TEST_STAGED_RECIPES_PR_NUMBER` is the + number of an actual PR on the `pforgetest/test-staged-recipes` repository which would would like to + duplicate for the test, and `RECIPE_ID` is the name of a recipe referenced in the `meta.yaml` of that PR. + You may need to first create a reference PR on `pforgetest/test-staged-recipes` containing the desired + files. + +Once the integration test is underway, you will find the automated PR on `pforgetest/test-staged-recipes`. +During test session teardown, the auto-generated PR is closed and the branch used to create it is +deleted automatically. ```mermaid