diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index cb3a0e0d..8e9baf87 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -30,6 +30,7 @@ jobs: environment: staging outputs: publishedCollections: ${{ steps.publish-collections.outputs.success_collections }} + commentId: ${{ steps.init-comment.outputs.COMMENT_ID }} steps: - uses: actions/checkout@v4 @@ -128,16 +129,17 @@ jobs: # Track successful publications all_failed=true - success_collections=() + declare -a success_collections=() status_message='### Collection Publication Status ' - for file in "${ADDED_FILES[@]}"; do + for file in ${ADDED_FILES}; do echo $file if [ -f "$file" ]; then dataset_config=$(jq '.' "$file") collection_id=$(jq -r '.collection' "$file") + echo "Publishing $collection_id" response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $AUTH_TOKEN" \ @@ -171,7 +173,7 @@ jobs: fi # Output only successful collections to be used in subsequent steps - echo "success_collections=$(IFS=','; echo "${success_collections[*]}")" >> $GITHUB_OUTPUT + echo "success_collections=${success_collections[*]}" >> $GITHUB_OUTPUT # Update PR comment CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') @@ -203,14 +205,15 @@ jobs: ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" - create-mdx-files: + create-mdx-files-and-open-pr: runs-on: ubuntu-latest + environment: staging needs: publish-new-datasets steps: - name: Checkout code uses: actions/checkout@v4 - - name: Use output from dataset-publication-and-configuration + - name: Use output from publish-new-datasets run: | echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}" @@ -220,19 +223,125 @@ jobs: PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }} run: | echo $PUBLISHED_COLLECTION_FILES + collection_ids="" pip install -r ./scripts/requirements.txt - for file in "${PUBLISHED_COLLECTION_FILES[@]}" - do - python3 ./scripts/generate-mdx.py "$file" + for file in ${PUBLISHED_COLLECTION_FILES}; do + collection_id=$(python3 ./scripts/generate-mdx.py "$file") + collection_id=$(echo "$collection_id" | sed 's/^["\s]*//;s/["\s]*$//') + echo "Processed collection ID: $collection_id" + collection_ids="$collection_ids$collection_id," done + # Remove trailing comma + collection_ids=${collection_ids%,} + echo "Final collection_ids: $collection_ids" + echo "collection_ids=${collection_ids}" >> $GITHUB_ENV - open-veda-config-pr: - runs-on: ubuntu-latest - needs: create-mdx-files - steps: - - name: Open veda-config PR + - name: Set up Variables run: | - echo "NO-OP. Placeholder for future job that will open a Pull Request in veda-config for a dashboard preview for the new/changed datasets." + echo "VEDA_CONFIG_REPO=${{ vars.VEDA_CONFIG_REPO_ORG }}/${{ vars.VEDA_CONFIG_REPO_NAME }}" >> $GITHUB_ENV + + - name: Clone veda-config repository + run: | + git clone https://github.com/${{ env.VEDA_CONFIG_REPO }}.git + ls + + - name: Copy untracked mdx files to veda-config + run: | + echo "Copying untracked .mdx files to veda-config repository" + ls ./ingestion-data/dataset-mdx/ + mkdir -p datasets + find ingestion-data/dataset-mdx/ -name '*.mdx' -exec cp {} veda-config/datasets/ \; + + - name: Create veda-config PR with changes + id: create-pr + env: + GITHUB_TOKEN: ${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }} + COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} + PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }} + run: | + cd veda-config + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git remote set-url origin https://${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }}@github.com/${{ env.VEDA_CONFIG_REPO }} + + files_string=$(IFS=$'\n'; echo "${PUBLISHED_COLLECTION_FILES[*]}") + hash=$(echo -n "$files_string" | md5sum | cut -d ' ' -f 1) + NEW_BRANCH="add-dataset-$hash" + + git fetch origin + if git ls-remote --exit-code --heads origin $NEW_BRANCH; then + git push origin --delete $NEW_BRANCH + fi + git checkout -b $NEW_BRANCH + + git status + git add . + git commit -m "feat: add MDX files for dataset(s) [Automated workflow]" + git push origin $NEW_BRANCH + + # Convert the comma-separated list into bullet points + collection_bullet_points="" + IFS=',' read -ra IDs <<< "$collection_ids" + + # Extract the first collection ID + first_collection_id="${IDs[0]}" + for id in "${IDs[@]}"; do + collection_bullet_points+="- $id\n" + done + + pr_title="Add dataset(s) - $first_collection_id [Automated PR by ${{ github.actor }}]" + body="### Add dataset(s) - $first_collection_id [Automated PR by ${{ github.actor }}]\n\n$collection_bullet_points" + + echo "$body" + PR_URL=$(GITHUB_TOKEN=${{ secrets.VEDA_CONFIG_REPO_ACCESS_TOKEN }} gh pr create -R ${{ env.VEDA_CONFIG_REPO }} -H $NEW_BRANCH -B develop --title "$pr_title" --body "$(echo -e "$body")") + + echo "PR_URL=$PR_URL" >> $GITHUB_OUTPUT + echo "PR creation succeeded!" + + # Updates the comment with a link to the above PR + - name: Update PR comment with PR creation result + if: success() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} + run: | + PR_URL=${{ steps.create-pr.outputs.PR_URL }} + CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') + UPDATED_BODY="$CURRENT_BODY + + **A PR has been created with the dataset configuration: πŸ—ΊοΈ [PR link]($PR_URL)**" + gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" + + - name: Update PR comment on PR creation failure + if: failure() && steps.create-pr.outcome == 'failure' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} + run: | + CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') + UPDATED_BODY="$CURRENT_BODY + + **Failed ❌ to create a PR with the dataset configuration. πŸ˜” **" + gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" + + # If the workflow fails at any point, the PR comment will be updated + - name: Update PR comment on overall workflow failure + if: failure() && steps.create-pr.outcome != 'failure' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMMENT_ID: ${{ needs.publish-new-datasets.outputs.commentId }} + run: | + WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') + UPDATED_BODY="$CURRENT_BODY + + # Output WORKFLOW_URL to logs for verification + echo "Workflow URL: $WORKFLOW_URL" + + ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" + gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" + + echo "Updated Comment Body: $UPDATED_BODY" publish-to-prod-on-pr-merge: if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }} diff --git a/ingestion-data/staging/dataset-config/test.json b/ingestion-data/staging/dataset-config/test.json new file mode 100644 index 00000000..eafd5b72 --- /dev/null +++ b/ingestion-data/staging/dataset-config/test.json @@ -0,0 +1,31 @@ +{ + "collection": "hls-swir-falsecolor-composite-THIRD-TEST", + "title": "HLS SWIR FalseColor Composite", + "spatial_extent": { + "xmin": -156.75, + "ymin": 20.80, + "xmax": -156.55, + "ymax": 20.94 + }, + "temporal_extent": { + "startdate": "2023-08-08T00:00:00Z", + "enddate": "2023-08-08T23:59:59Z" + }, + "data_type": "cog", + "license": "CC0-1.0", + "description": "HLS falsecolor composite imagery using Bands 12, 8A, and 4.", + "is_periodic": false, + "time_density": "day", + "sample_files": [ + "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-08_SWIR_falsecolor_cog.tif", + "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-13_SWIR_falsecolor_cog.tif" + ], + "discovery_items": [ + { + "discovery": "s3", + "prefix": "maui-fire/", + "bucket": "veda-data-store-staging", + "filename_regex": "(.*)SWIR_falsecolor(.*).tif$" + } + ] +} \ No newline at end of file diff --git a/scripts/generate-mdx.py b/scripts/generate-mdx.py index 8ecc478d..e8726d53 100644 --- a/scripts/generate-mdx.py +++ b/scripts/generate-mdx.py @@ -125,3 +125,6 @@ def safe_open_w(path): ) with safe_open_w(output_filepath) as ofile: ofile.write(new_content) + + collection_id = input_data["collection"] + print(collection_id)