feat: update pr.yml to open veda-config pr and publish collections #77
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This GitHub Actions workflow automates the process of | |
# publishing dataset collections to a staging environment | |
# It is triggered by a pull request to the main branch | |
# that modifies any files within the ingestion-data/dataset-config/ directory | |
# The workflow includes steps to | |
# - publish the datasets, | |
# - constantly updates the status of the workflow in the PR comment | |
name: Publish collection to staging | |
on: | |
pull_request: | |
branches: ['main'] | |
paths: | |
# Run the workflow only if files inside this path are updated | |
- ingestion-data/staging/dataset-config/* | |
push: | |
branches: | |
- main | |
permissions: | |
pull-requests: write | |
contents: read | |
jobs: | |
publish-new-datasets: | |
if: ${{ github.event_name == 'pull_request' && (github.event.action == 'synchronize' || github.event.action == 'opened') }} | |
runs-on: ubuntu-latest | |
environment: staging | |
outputs: | |
publishedCollections: ${{ steps.publish-collections.outputs.success_collections }} | |
steps: | |
- uses: actions/checkout@v4 | |
# Initializes the PR comment | |
# Edits existing or creates new comment | |
# Why? - Cleanliness! | |
- name: Initialize PR comment with workflow start | |
id: init-comment | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
body="### Workflow Status | |
**Starting workflow...** [View action run]($WORKFLOW_URL)" | |
# Get the PR number | |
PR_NUMBER=${{ github.event.pull_request.number }} | |
# Fetch existing comments | |
COMMENTS=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments --jq '.[] | select(.body | contains("### Workflow Status")) | {id: .id, body: .body}') | |
# Check if a comment already exists | |
COMMENT_ID=$(echo "$COMMENTS" | jq -r '.id' | head -n 1) | |
if [ -z "$COMMENT_ID" ]; then | |
# No existing comment, create a new one | |
COMMENT_ID=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments -f body="$body" --jq '.id') | |
else | |
# Comment exists, overwrite the existing comment | |
gh api repos/${{ github.repository }}/issues/comments/$COMMENT_ID -X PATCH -f body="$body" | |
fi | |
echo "COMMENT_ID=$COMMENT_ID" >> $GITHUB_OUTPUT | |
# Find only the newly added files | |
# Only .json files | |
# The files are outputted to GITHUB_OUTPUT, which can be used in subsequent steps | |
- name: Get newly added files | |
id: changed-files | |
uses: tj-actions/changed-files@v45 | |
with: | |
files: | | |
**.json | |
- name: List all newly added files | |
env: | |
ADDED_FILES: ${{ steps.changed-files.outputs.added_files }} | |
run: | | |
for file in ${ADDED_FILES}; do | |
echo "$file was added" | |
done | |
# Uses service client creds to get token | |
# No username/password needed | |
- name: Get auth token | |
id: get-token | |
run: | | |
echo "Vars: $vars" | |
response=$(curl -X POST \ | |
${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \ | |
-H "Content-Type: application/x-www-form-urlencoded" \ | |
-d "grant_type=client_credentials" \ | |
-d "client_id=${{ vars.STAGING_CLIENT_ID }}" \ | |
-d "client_secret=${{ secrets.STAGING_CLIENT_SECRET }}" | |
) | |
access_token=$(echo "$response" | jq -r '.access_token') | |
echo "ACCESS_TOKEN=$access_token" >> $GITHUB_OUTPUT | |
# Makes request to /dataset/publish endpoint | |
# Outputs only files that were successfully published | |
# Used by other steps | |
# If none of the requests are successful, workflow fails | |
# Updates the PR comment with status of collection publication | |
- name: Publish all newly added collections to staging | |
id: publish-collections | |
env: | |
ADDED_FILES: ${{ steps.changed-files.outputs.added_files }} | |
WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }} | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }} | |
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
run: | | |
if [ -z "$WORKFLOWS_URL" ]; then | |
echo "WORKFLOWS_URL is not set" | |
exit 1 | |
fi | |
if [ -z "$AUTH_TOKEN" ]; then | |
echo "AUTH_TOKEN is not set" | |
exit 1 | |
fi | |
publish_url="${WORKFLOWS_URL%/}/dataset/publish" | |
bearer_token=$AUTH_TOKEN | |
# Track successful publications | |
all_failed=true | |
success_collections=() | |
status_message='### Collection Publication Status | |
' | |
for file in "${ADDED_FILES[@]}"; do | |
echo $file | |
if [ -f "$file" ]; then | |
dataset_config=$(jq '.' "$file") | |
collection_id=$(jq -r '.collection' "$file") | |
response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \ | |
-H "Content-Type: application/json" \ | |
-H "Authorization: Bearer $AUTH_TOKEN" \ | |
-d "$dataset_config" | |
) | |
status_code=$(tail -n1 <<< "$response") | |
# Update status message based on response code | |
if [ "$status_code" -eq 200 ] || [ "$status_code" -eq 201 ]; then | |
echo "$collection_id successfully published ✅" | |
status_message+="- **$collection_id**: Successfully published ✅ | |
" | |
success_collections+=("$file") | |
all_failed=false | |
else | |
echo "$collection_id failed to publish ❌" | |
status_message+="- **$collection_id**: Failed to publish. Error code $status_code. ❌ | |
" | |
fi | |
else | |
echo "File $file does not exist" | |
exit 1 | |
fi | |
done | |
# Exit workflow if all the requests fail | |
if [ "$all_failed" = true ]; then | |
echo "All collections failed to publish." | |
exit 1 | |
fi | |
# Output only successful collections to be used in subsequent steps | |
echo "success_collections=$(IFS=','; echo "${success_collections[*]}")" >> $GITHUB_OUTPUT | |
# Update PR comment | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
$status_message" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.9' | |
- uses: actions/cache@v4 | |
with: | |
path: ${{ env.pythonLocation }} | |
key: ${{ env.pythonLocation }}-pip-${{ hashFiles('requirements.txt') }} | |
# If the workflow fails at any point, the PR comment will be updated | |
- name: Update PR comment on overall workflow failure | |
if: failure() | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
run: | | |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
create-mdx-files: | |
runs-on: ubuntu-latest | |
environment: staging | |
needs: publish-new-datasets | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Use output from dataset-publication-and-configuration | |
run: | | |
echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}" | |
# Creates a slim dataset mdx file for each collection based on the dataset config json | |
- name: Create dataset mdx for given collections | |
env: | |
PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }} | |
run: | | |
echo $PUBLISHED_COLLECTION_FILES | |
pip install -r ./scripts/requirements.txt | |
for file in "${PUBLISHED_COLLECTION_FILES[@]}" | |
do | |
python3 ./scripts/generate-mdx.py "$file" | |
done | |
- name: List files in workspace | |
run: | | |
echo "Listing all files to verify .mdx file generation:" | |
ls -la | |
ls ./ingestion-data/dataset-mdx/ | |
- name: Set up Git | |
run: | | |
git config --global user.name "github-actions[bot]" | |
git config --global user.email "github-actions[bot]@users.noreply.github.com" | |
- name: Debug Variables | |
run: | | |
echo "CONFIG_REPO_ORG: ${{ vars.VEDA_CONFIG_REPO_ORG }}" | |
echo "CONFIG_REPO_NAME: ${{ vars.VEDA_CONFIG_REPO_NAME }}" | |
- name: Checkout veda-config repo | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ vars.VEDA_CONFIG_REPO_ORG }}/${{vars.VEDA_CONFIG_REPO_NAME }} | |
token: ${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }} | |
path: datasets | |
- name: Copy MDX file to veda-config | |
run: | | |
echo "Copying .mdx file to veda-config repository" | |
ls ./ingestion-data/dataset-mdx/ | |
git status | |
# cp ./ingestion-data/dataset-mdx/* datasets/ | |
ls -la datasets | |
# Creates a PR in veda-config with the following changes: | |
# 1. the mdx files for all published collections | |
# 2. updates the stac/raster urls in .env file | |
# This step needs a GH_TOKEN that has permissions to create a PR in veda-config | |
- name: Create PR with changes | |
id: create-pr | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
VEDA_CONFIG_REPO_ACCESS_TOKEN: ${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }} | |
COMMENT_ID: ${{ steps.publish-collections.outputs.COMMENT_ID }} | |
PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }} | |
run: | | |
files_string=$(IFS=$'\n'; echo "${PUBLISHED_COLLECTION_FILES[*]}") | |
hash=$(echo -n "$files_string" | md5sum | cut -d ' ' -f 1) | |
NEW_BRANCH="add-dataset-$hash" | |
cd ${{ vars.VEDA_CONFIG_REPO_NAME }} | |
git fetch origin | |
if git ls-remote --exit-code --heads origin $NEW_BRANCH; then | |
git push origin --delete $NEW_BRANCH | |
fi | |
git checkout -b $NEW_BRANCH | |
# Update the env vars to staging based on env vars | |
#sed -i "s|${{ vars.ENV_FROM }}|${{ vars.ENV_TO }}|g" .env | |
# cp -r ../datasets/* datasets/ | |
git status | |
# git add . | |
# git commit -m "Add dataset(s)" | |
# git push origin $NEW_BRANCH | |
# PR_URL=$(GITHUB_TOKEN=$VEDA_CONFIG_REPO_ACCESS_TOKEN gh pr create -H $NEW_BRANCH -B develop --title 'Add dataset [Automated workflow]' --body-file <(echo "Add datasets (Automatically created by Github action)")) | |
# echo "PR_URL=$PR_URL" >> $GITHUB_OUTPUT | |
# echo "PR creation succeeded" | |
# Updates the comment with a link to the above PR | |
- name: Update PR comment with PR creation result | |
if: success() | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
run: | | |
PR_URL=${{ steps.create-pr.outputs.PR_URL }} | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
**A PR has been created with the dataset configuration: 🗺️ [PR link]($PR_URL)**" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
- name: Update PR comment on PR creation failure | |
if: failure() && steps.create-pr.outcome == 'failure' | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
run: | | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
**Failed ❌ to create a PR with the dataset configuration. 😔 **" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
# If the workflow fails at any point, the PR comment will be updated | |
- name: Update PR comment on overall workflow failure | |
if: failure() && steps.create-pr.outcome != 'failure' | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} | |
run: | | |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') | |
UPDATED_BODY="$CURRENT_BODY | |
# Output WORKFLOW_URL to logs for verification | |
echo "Workflow URL: $WORKFLOW_URL" | |
** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" | |
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" | |
echo "Updated Comment Body: $UPDATED_BODY" | |
publish-to-prod-on-pr-merge: | |
if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }} | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Publish to production on PR merge | |
run: echo "NO-OP. This step runs when a PR is merged." |