feat: update pr.yml to open veda-config pr and publish collections #77

Workflow file for this run

.github/workflows/pr.yml at 3eb2059

	# This GitHub Actions workflow automates the process of
	# publishing dataset collections to a staging environment
	# It is triggered by a pull request to the main branch
	# that modifies any files within the ingestion-data/dataset-config/ directory
	# The workflow includes steps to
	# - publish the datasets,
	# - constantly updates the status of the workflow in the PR comment

	name: Publish collection to staging

	on:
	pull_request:
	branches: ['main']
	paths:
	# Run the workflow only if files inside this path are updated
	- ingestion-data/staging/dataset-config/*

	push:
	branches:
	- main

	permissions:
	pull-requests: write
	contents: read

	jobs:
	publish-new-datasets:
	if: ${{ github.event_name == 'pull_request' && (github.event.action == 'synchronize' \|\| github.event.action == 'opened') }}
	runs-on: ubuntu-latest
	environment: staging
	outputs:
	publishedCollections: ${{ steps.publish-collections.outputs.success_collections }}
	steps:
	- uses: actions/checkout@v4

	# Initializes the PR comment
	# Edits existing or creates new comment
	# Why? - Cleanliness!
	- name: Initialize PR comment with workflow start
	id: init-comment
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	body="### Workflow Status
	Starting workflow... [View action run]($WORKFLOW_URL)"

	# Get the PR number
	PR_NUMBER=${{ github.event.pull_request.number }}

	# Fetch existing comments
	COMMENTS=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments --jq '.[] \| select(.body \| contains("### Workflow Status")) \| {id: .id, body: .body}')

	# Check if a comment already exists
	COMMENT_ID=$(echo "$COMMENTS" \| jq -r '.id' \| head -n 1)

	if [ -z "$COMMENT_ID" ]; then
	# No existing comment, create a new one
	COMMENT_ID=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments -f body="$body" --jq '.id')
	else
	# Comment exists, overwrite the existing comment
	gh api repos/${{ github.repository }}/issues/comments/$COMMENT_ID -X PATCH -f body="$body"
	fi

	echo "COMMENT_ID=$COMMENT_ID" >> $GITHUB_OUTPUT

	# Find only the newly added files
	# Only .json files
	# The files are outputted to GITHUB_OUTPUT, which can be used in subsequent steps
	- name: Get newly added files
	id: changed-files
	uses: tj-actions/changed-files@v45
	with:
	files: \|
	**.json

	- name: List all newly added files
	env:
	ADDED_FILES: ${{ steps.changed-files.outputs.added_files }}
	run: \|
	for file in ${ADDED_FILES}; do
	echo "$file was added"
	done

	# Uses service client creds to get token
	# No username/password needed
	- name: Get auth token
	id: get-token
	run: \|
	echo "Vars: $vars"
	response=$(curl -X POST \
	${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \
	-H "Content-Type: application/x-www-form-urlencoded" \
	-d "grant_type=client_credentials" \
	-d "client_id=${{ vars.STAGING_CLIENT_ID }}" \
	-d "client_secret=${{ secrets.STAGING_CLIENT_SECRET }}"
	)

	access_token=$(echo "$response" \| jq -r '.access_token')
	echo "ACCESS_TOKEN=$access_token" >> $GITHUB_OUTPUT

	# Makes request to /dataset/publish endpoint
	# Outputs only files that were successfully published
	# Used by other steps
	# If none of the requests are successful, workflow fails
	# Updates the PR comment with status of collection publication
	- name: Publish all newly added collections to staging
	id: publish-collections
	env:
	ADDED_FILES: ${{ steps.changed-files.outputs.added_files }}
	WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }}
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }}
	COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
	run: \|
	if [ -z "$WORKFLOWS_URL" ]; then
	echo "WORKFLOWS_URL is not set"
	exit 1
	fi

	if [ -z "$AUTH_TOKEN" ]; then
	echo "AUTH_TOKEN is not set"
	exit 1
	fi

	publish_url="${WORKFLOWS_URL%/}/dataset/publish"
	bearer_token=$AUTH_TOKEN

	# Track successful publications
	all_failed=true
	success_collections=()
	status_message='### Collection Publication Status
	'

	for file in "${ADDED_FILES[@]}"; do
	echo $file
	if [ -f "$file" ]; then
	dataset_config=$(jq '.' "$file")
	collection_id=$(jq -r '.collection' "$file")

	response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \
	-H "Content-Type: application/json" \
	-H "Authorization: Bearer $AUTH_TOKEN" \
	-d "$dataset_config"
	)

	status_code=$(tail -n1 <<< "$response")

	# Update status message based on response code
	if [ "$status_code" -eq 200 ] \|\| [ "$status_code" -eq 201 ]; then
	echo "$collection_id successfully published ✅"
	status_message+="- $collection_id: Successfully published ✅
	"
	success_collections+=("$file")
	all_failed=false
	else
	echo "$collection_id failed to publish ❌"
	status_message+="- $collection_id: Failed to publish. Error code $status_code. ❌
	"
	fi
	else
	echo "File $file does not exist"
	exit 1
	fi
	done

	# Exit workflow if all the requests fail
	if [ "$all_failed" = true ]; then
	echo "All collections failed to publish."
	exit 1
	fi

	# Output only successful collections to be used in subsequent steps
	echo "success_collections=$(IFS=','; echo "${success_collections[*]}")" >> $GITHUB_OUTPUT

	# Update PR comment
	CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
	UPDATED_BODY="$CURRENT_BODY

	$status_message"
	gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: '3.9'
	- uses: actions/cache@v4
	with:
	path: ${{ env.pythonLocation }}
	key: ${{ env.pythonLocation }}-pip-${{ hashFiles('requirements.txt') }}

	# If the workflow fails at any point, the PR comment will be updated
	- name: Update PR comment on overall workflow failure
	if: failure()
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
	run: \|
	WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
	UPDATED_BODY="$CURRENT_BODY

	❌ The workflow run failed. [See logs here]($WORKFLOW_URL)"
	gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"

	create-mdx-files:
	runs-on: ubuntu-latest
	environment: staging
	needs: publish-new-datasets
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Use output from dataset-publication-and-configuration
	run: \|
	echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}"

	# Creates a slim dataset mdx file for each collection based on the dataset config json
	- name: Create dataset mdx for given collections
	env:
	PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }}
	run: \|
	echo $PUBLISHED_COLLECTION_FILES
	pip install -r ./scripts/requirements.txt
	for file in "${PUBLISHED_COLLECTION_FILES[@]}"
	do
	python3 ./scripts/generate-mdx.py "$file"
	done

	- name: List files in workspace
	run: \|
	echo "Listing all files to verify .mdx file generation:"
	ls -la
	ls ./ingestion-data/dataset-mdx/

	- name: Set up Git
	run: \|
	git config --global user.name "github-actions[bot]"
	git config --global user.email "github-actions[bot]@users.noreply.github.com"

	- name: Debug Variables
	run: \|
	echo "CONFIG_REPO_ORG: ${{ vars.VEDA_CONFIG_REPO_ORG }}"
	echo "CONFIG_REPO_NAME: ${{ vars.VEDA_CONFIG_REPO_NAME }}"

	- name: Checkout veda-config repo
	uses: actions/checkout@v4
	with:
	repository: ${{ vars.VEDA_CONFIG_REPO_ORG }}/${{vars.VEDA_CONFIG_REPO_NAME }}
	token: ${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }}
	path: datasets

	- name: Copy MDX file to veda-config
	run: \|
	echo "Copying .mdx file to veda-config repository"
	ls ./ingestion-data/dataset-mdx/
	git status
	# cp ./ingestion-data/dataset-mdx/* datasets/
	ls -la datasets

	# Creates a PR in veda-config with the following changes:
	# 1. the mdx files for all published collections
	# 2. updates the stac/raster urls in .env file
	# This step needs a GH_TOKEN that has permissions to create a PR in veda-config
	- name: Create PR with changes
	id: create-pr
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	VEDA_CONFIG_REPO_ACCESS_TOKEN: ${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }}
	COMMENT_ID: ${{ steps.publish-collections.outputs.COMMENT_ID }}
	PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }}
	run: \|
	files_string=$(IFS=$'\n'; echo "${PUBLISHED_COLLECTION_FILES[*]}")
	hash=$(echo -n "$files_string" \| md5sum \| cut -d ' ' -f 1)
	NEW_BRANCH="add-dataset-$hash"
	cd ${{ vars.VEDA_CONFIG_REPO_NAME }}
	git fetch origin
	if git ls-remote --exit-code --heads origin $NEW_BRANCH; then
	git push origin --delete $NEW_BRANCH
	fi
	git checkout -b $NEW_BRANCH

	# Update the env vars to staging based on env vars
	#sed -i "s\|${{ vars.ENV_FROM }}\|${{ vars.ENV_TO }}\|g" .env
	# cp -r ../datasets/* datasets/
	git status
	# git add .
	# git commit -m "Add dataset(s)"
	# git push origin $NEW_BRANCH
	# PR_URL=$(GITHUB_TOKEN=$VEDA_CONFIG_REPO_ACCESS_TOKEN gh pr create -H $NEW_BRANCH -B develop --title 'Add dataset [Automated workflow]' --body-file <(echo "Add datasets (Automatically created by Github action)"))

	# echo "PR_URL=$PR_URL" >> $GITHUB_OUTPUT
	# echo "PR creation succeeded"

	# Updates the comment with a link to the above PR
	- name: Update PR comment with PR creation result
	if: success()
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
	run: \|
	PR_URL=${{ steps.create-pr.outputs.PR_URL }}
	CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
	UPDATED_BODY="$CURRENT_BODY

	A PR has been created with the dataset configuration: 🗺️ [PR link]($PR_URL)"
	gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"

	- name: Update PR comment on PR creation failure
	if: failure() && steps.create-pr.outcome == 'failure'
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
	run: \|
	CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
	UPDATED_BODY="$CURRENT_BODY

	Failed ❌ to create a PR with the dataset configuration. 😔 "
	gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"

	# If the workflow fails at any point, the PR comment will be updated
	- name: Update PR comment on overall workflow failure
	if: failure() && steps.create-pr.outcome != 'failure'
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
	run: \|
	WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
	UPDATED_BODY="$CURRENT_BODY

	# Output WORKFLOW_URL to logs for verification
	echo "Workflow URL: $WORKFLOW_URL"

	❌ The workflow run failed. [See logs here]($WORKFLOW_URL)"
	gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"

	echo "Updated Comment Body: $UPDATED_BODY"

	publish-to-prod-on-pr-merge:
	if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}
	runs-on: ubuntu-latest
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Publish to production on PR merge
	run: echo "NO-OP. This step runs when a PR is merged."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: update pr.yml to open veda-config pr and publish collections #77

Workflow file

feat: update pr.yml to open veda-config pr and publish collections #77

Jobs

Run details

Workflow file for this run