diff --git a/.github/workflows/after-validate-submission.yaml b/.github/workflows/after-validate-submission.yaml index f05b7f45..64e8704e 100644 --- a/.github/workflows/after-validate-submission.yaml +++ b/.github/workflows/after-validate-submission.yaml @@ -25,10 +25,18 @@ jobs: with: install-r: false use-public-rspm: true + - name: Get list of changed files + id: changed-files + uses: tj-actions/changed-files@v44 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - name: Print changed files + run: | + echo 'Changed files: ${{ steps.changed-files.outputs.added_files }}' - name: Install system dependencies run: | - sudo apt-get update - sudo apt-get install -y libcurl4-openssl-dev + sudo apt-get update + sudo apt-get install -y libcurl4-openssl-dev - name: Cache R packages uses: actions/cache@v2 with: @@ -61,17 +69,30 @@ jobs: Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubVis")' - name: Install hubUtils run: | - Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubUtils")' + Rscript -e 'remotes::install_github("kjsato/hubUtils@enhancement/v3-utils", upgrade = "never")' - name: Install hubEnsembles run: | Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubEnsembles")' - name: Install hubData run: | - Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubData")' + Rscript -e 'remotes::install_github("kjsato/hubData@feature/handle-samples", upgrade = "never")' + - name: Check if RETRO files are present + id: check_files + run: | + echo 'retro='$(echo '${{ steps.changed-files.outputs.added_files }}' | grep 'model-output' | grep -c '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV + echo 'conventional='$(echo '${{ steps.changed-files.outputs.added_files }}' | grep -c -v '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV - name: Run R script run: | cd ./scripts Rscript ensemble.R + if: ${{ env.conventional != '0' }} + - name: Run retro_ensemble.R + run: | + cd ./scripts + for file in $(echo '${{ steps.changed-files.outputs.added_files }}' | grep 'model-output' | grep '.*-RETRO.*\.\(csv\|parquet\)'); do + Rscript retro_ensemble.R $file + done + if: ${{ env.retro != '0' }} - name: Commit and push new files to root repository run: | @@ -79,7 +100,7 @@ jobs: git config user.email "41898282+github-actions[bot]@users.noreply.github.com" if [ -n "$(git status --porcelain)" ]; then git add model-output/hub-ensemble/* - git commit -m "Upload new files to root repo" + git commit -m "Upload new files to RSV data repo by ${GITHUB_ACTOR} in #${{ github.event.pull_request.number }}" git push echo "Data for hub-ensemble uploaded to root repo"; else @@ -90,8 +111,8 @@ jobs: - name: Checkout and push to another repository uses: actions/checkout@v2 with: - #repository: kjsato/rsv-forecast-hub_data repository: HopkinsIDD/rsv-forecast-hub_data + #repository: kjsato/rsv-forecast-hub_data token: ${{ secrets.KJ3_PATC }} path: ./rsv-forecast-hub_data fetch-depth: 2 @@ -113,7 +134,7 @@ jobs: git pull origin main if [ -n "$(git status --porcelain)" ]; then git add . - git commit -m "Upload new files to RSV data repo" + git commit -m "Upload new files to RSV data repo by ${GITHUB_ACTOR} in #${{ github.event.pull_request.number }}" git push echo "RSV data uploaded"; else diff --git a/.github/workflows/copy_data.yaml b/.github/workflows/copy_data.yaml index 0413872d..0ac8070a 100644 --- a/.github/workflows/copy_data.yaml +++ b/.github/workflows/copy_data.yaml @@ -15,8 +15,8 @@ jobs: - name: Checkout and push to another repository uses: actions/checkout@v2 with: - #repository: kjsato/rsv-forecast-hub_data repository: HopkinsIDD/rsv-forecast-hub_data + #repository: kjsato/rsv-forecast-hub_data token: ${{ secrets.KJ3_PATC }} path: ./rsv-forecast-hub_data fetch-depth: 2 diff --git a/.github/workflows/copy_hub-config.yaml b/.github/workflows/copy_hub-config.yaml index e9ba0172..86d1c6d2 100644 --- a/.github/workflows/copy_hub-config.yaml +++ b/.github/workflows/copy_hub-config.yaml @@ -21,8 +21,8 @@ jobs: - name: Checkout target repo uses: actions/checkout@v2 with: - #repository: kjsato/rsv-forecast-hub_data repository: HopkinsIDD/rsv-forecast-hub_data + #repository: kjsato/rsv-forecast-hub_data token: ${{ secrets.KJ3_PATC }} path: ./rsv-forecast-hub_data fetch-depth: 2 diff --git a/hub-config/admin.json b/hub-config/admin.json index 07679e3c..3d2bbddd 100644 --- a/hub-config/admin.json +++ b/hub-config/admin.json @@ -1,12 +1,16 @@ { - "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json", - "name": "Simple Forecast Hub", - "maintainer": "Consortium of Infectious Disease Modeling Hubs", + "schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/admin-schema.json", + "name": "US RSV Forecast Hub", + "maintainer": "Johns Hopkins University, Infectious Disease Dynamics Group", "contact": { "name": "K. Sato", "email": "ksato8@jh.edu" }, - "repository_url": "https://github.com/Infectious-Disease-Modeling-Hubs/hubTemplate", + "repository": { + "host": "github", + "owner": "kjsato", + "name": "rsv-forecast-hub" + }, "file_format": ["csv", "parquet"], "timezone": "US/Eastern" } diff --git a/hub-config/tasks.json b/hub-config/tasks.json index 054a7d72..3c002913 100644 --- a/hub-config/tasks.json +++ b/hub-config/tasks.json @@ -1,7 +1,8 @@ { - "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json", + "schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/tasks-schema.json", "rounds": [ { + "round_name": "retrospective round", "round_id_from_variable": true, "round_id": "origin_date", "model_tasks": [ @@ -10,6 +11,7 @@ "origin_date": { "required": null, "optional": [ + "2023-10-18", "2023-10-22", "2023-10-29", "2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03", "2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31", "2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28", @@ -147,6 +149,7 @@ "origin_date": { "required": null, "optional": [ + "2023-10-18", "2023-10-22", "2023-10-29", "2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03", "2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31", "2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28", @@ -230,13 +233,15 @@ }, "output_type": { "sample":{ - "output_type_id":{ - "required": null, - "optional":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] + "output_type_id_params":{ + "is_required": true, + "type": "integer", + "min_samples_per_task": 100, + "max_samples_per_task": 100 }, "value":{ - "type":"double", - "minimum":0 + "type":"integer", + "minimum":0 } } }, @@ -271,7 +276,7 @@ "submissions_due": { "relative_to": "origin_date", "start": -6, - "end": 100 + "end": 283 } } ] diff --git a/model-output/CEPH-Rtrend_rsv/2023-10-22-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-10-22-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..b0c33c05 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-10-22-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2023-11-12-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-11-12-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..299980d7 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-11-12-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2023-11-19-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-11-19-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..e1178794 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-11-19-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2023-12-03-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-12-03-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..fb6e954f Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-12-03-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2023-12-10-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-12-10-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..9051a062 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-12-10-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2023-12-17-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-12-17-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..9ed66ec4 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-12-17-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2023-12-24-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-12-24-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..223ba691 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-12-24-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2023-12-31-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2023-12-31-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..b134ae26 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2023-12-31-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-01-07-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2024-01-07-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..8780e6ff Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-01-07-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-01-14-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2024-01-14-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..206931c8 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-01-14-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-01-21-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2024-01-21-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..8da3ab98 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-01-21-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-01-28-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2024-01-28-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..02468e74 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-01-28-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-02-04-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2024-02-04-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..00b30a5a Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-02-04-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-02-11-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2024-02-11-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..c319e067 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-02-11-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-02-18-CEPH-Rtrend_rsv-RETRO.parquet b/model-output/CEPH-Rtrend_rsv/2024-02-18-CEPH-Rtrend_rsv-RETRO.parquet new file mode 100644 index 00000000..83e667de Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-02-18-CEPH-Rtrend_rsv-RETRO.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-05-05-CEPH-Rtrend_rsv.parquet b/model-output/CEPH-Rtrend_rsv/2024-05-05-CEPH-Rtrend_rsv.parquet new file mode 100644 index 00000000..46d175cd Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-05-05-CEPH-Rtrend_rsv.parquet differ diff --git a/model-output/CEPH-Rtrend_rsv/2024-05-12-CEPH-Rtrend_rsv.parquet b/model-output/CEPH-Rtrend_rsv/2024-05-12-CEPH-Rtrend_rsv.parquet new file mode 100644 index 00000000..7175c6e2 Binary files /dev/null and b/model-output/CEPH-Rtrend_rsv/2024-05-12-CEPH-Rtrend_rsv.parquet differ diff --git a/model-output/PSI-PROF/2024-05-05-PSI-PROF.parquet b/model-output/PSI-PROF/2024-05-05-PSI-PROF.parquet new file mode 100644 index 00000000..6221d04f Binary files /dev/null and b/model-output/PSI-PROF/2024-05-05-PSI-PROF.parquet differ diff --git a/model-output/PSI-PROF/2024-05-12-PSI-PROF.parquet b/model-output/PSI-PROF/2024-05-12-PSI-PROF.parquet new file mode 100644 index 00000000..ddbc0203 Binary files /dev/null and b/model-output/PSI-PROF/2024-05-12-PSI-PROF.parquet differ diff --git a/model-output/UGA_flucast-INFLAenza/2024-05-05-UGA_flucast-INFLAenza.parquet b/model-output/UGA_flucast-INFLAenza/2024-05-05-UGA_flucast-INFLAenza.parquet new file mode 100644 index 00000000..8ad80cc1 Binary files /dev/null and b/model-output/UGA_flucast-INFLAenza/2024-05-05-UGA_flucast-INFLAenza.parquet differ diff --git a/model-output/UGA_flucast-INFLAenza/2024-05-12-UGA_flucast-INFLAenza.parquet b/model-output/UGA_flucast-INFLAenza/2024-05-12-UGA_flucast-INFLAenza.parquet new file mode 100644 index 00000000..363bbd62 Binary files /dev/null and b/model-output/UGA_flucast-INFLAenza/2024-05-12-UGA_flucast-INFLAenza.parquet differ diff --git a/model-output/hub-baseline/2024-05-12-hub-baseline.parquet b/model-output/hub-baseline/2024-05-12-hub-baseline.parquet new file mode 100644 index 00000000..584f0928 Binary files /dev/null and b/model-output/hub-baseline/2024-05-12-hub-baseline.parquet differ diff --git a/model-output/hub-ensemble/2023-10-22-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-10-22-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..f63cd146 Binary files /dev/null and b/model-output/hub-ensemble/2023-10-22-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2023-11-12-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-11-12-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..41033266 Binary files /dev/null and b/model-output/hub-ensemble/2023-11-12-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2023-11-19-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-11-19-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..e02a946e Binary files /dev/null and b/model-output/hub-ensemble/2023-11-19-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2023-12-03-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-12-03-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..033d0bef Binary files /dev/null and b/model-output/hub-ensemble/2023-12-03-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2023-12-10-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-12-10-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..c65d3a8e Binary files /dev/null and b/model-output/hub-ensemble/2023-12-10-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2023-12-17-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-12-17-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..e9a36c8f Binary files /dev/null and b/model-output/hub-ensemble/2023-12-17-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2023-12-24-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-12-24-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..370255bd Binary files /dev/null and b/model-output/hub-ensemble/2023-12-24-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2023-12-31-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2023-12-31-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..a699db5f Binary files /dev/null and b/model-output/hub-ensemble/2023-12-31-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-01-07-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2024-01-07-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..1f85c0f4 Binary files /dev/null and b/model-output/hub-ensemble/2024-01-07-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-01-14-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2024-01-14-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..ecd47931 Binary files /dev/null and b/model-output/hub-ensemble/2024-01-14-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-01-21-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2024-01-21-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..1833eee0 Binary files /dev/null and b/model-output/hub-ensemble/2024-01-21-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-01-28-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2024-01-28-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..e2d26b37 Binary files /dev/null and b/model-output/hub-ensemble/2024-01-28-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-02-04-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2024-02-04-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..cbce7bc3 Binary files /dev/null and b/model-output/hub-ensemble/2024-02-04-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-02-11-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2024-02-11-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..33e24f46 Binary files /dev/null and b/model-output/hub-ensemble/2024-02-11-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-02-18-hub-ensemble-RETRO.parquet b/model-output/hub-ensemble/2024-02-18-hub-ensemble-RETRO.parquet new file mode 100644 index 00000000..28553bcc Binary files /dev/null and b/model-output/hub-ensemble/2024-02-18-hub-ensemble-RETRO.parquet differ diff --git a/model-output/hub-ensemble/2024-04-28-hub-ensemble.parquet b/model-output/hub-ensemble/2024-04-28-hub-ensemble.parquet index 6c2ee328..95539c60 100644 Binary files a/model-output/hub-ensemble/2024-04-28-hub-ensemble.parquet and b/model-output/hub-ensemble/2024-04-28-hub-ensemble.parquet differ diff --git a/model-output/hub-ensemble/2024-05-05-hub-ensemble.parquet b/model-output/hub-ensemble/2024-05-05-hub-ensemble.parquet new file mode 100644 index 00000000..dcd2d696 Binary files /dev/null and b/model-output/hub-ensemble/2024-05-05-hub-ensemble.parquet differ diff --git a/model-output/hub-ensemble/2024-05-12-hub-ensemble.parquet b/model-output/hub-ensemble/2024-05-12-hub-ensemble.parquet new file mode 100644 index 00000000..c1378a5c Binary files /dev/null and b/model-output/hub-ensemble/2024-05-12-hub-ensemble.parquet differ diff --git a/scripts/ensemble.R b/scripts/ensemble.R index 6af19f44..83f777a9 100644 --- a/scripts/ensemble.R +++ b/scripts/ensemble.R @@ -30,7 +30,7 @@ dates_archive <- unlist(jsonlite::read_json(file.path(dir_path, "hub-config/task dates_archive <- dates_archive[as.Date(dates_archive) <= Sys.Date()] curr_origin_date <- as.Date(max(dates_archive, na.rm = TRUE)) -#curr_origin_date <- as.Date("2024-03-17") +#curr_origin_date <- as.Date("2024-03-24") ## ----prep_ens, include=FALSE-------------------------------------------------- diff --git a/scripts/retro_ensemble.R b/scripts/retro_ensemble.R new file mode 100644 index 00000000..251fc21d --- /dev/null +++ b/scripts/retro_ensemble.R @@ -0,0 +1,74 @@ +## retro_ensemble.R customized for rsv-forecast-hub + +# Get the file_path from the command line arguments +args <- commandArgs(trailingOnly = TRUE) +retro_file_path <- args[1] + +local_path <- paste0(dirname(here::here())) +dir_path <- file.path(local_path, "rsv-forecast-hub/") +data_path <- file.path(local_path, "rsv-forecast-hub/") +print(local_path) +#dir_path <- local_path +#data_path <- local_path +print(dir_path) + +library(hubUtils) +library(hubData) +library(hubEnsembles) +library(dplyr) +library(purrr) +library(jsonlite) + +hub_path <- dir_path +print(hub_path) +hub_con <- connect_hub(hub_path) + +loc_data <- readr::read_csv(file.path(dir_path, "auxiliary-data/location_census/locations.csv")) + +output_path <- file.path(dir_path, "model-output") + +# Extract date from file name +curr_origin_date <- as.Date(gsub("^(\\d{4}-\\d{2}-\\d{2}).*", "\\1", basename(retro_file_path))) + +# Get all RETRO files (maybe needed consideration for the case of a combo without RETRO files) +file_paths <- list.files(output_path, pattern = "-RETRO\\.parquet$|-RETRO\\.csv$", full.names = TRUE, recursive = TRUE) +file_paths <- file_paths[grepl(curr_origin_date, file_paths)] +print(file_paths) + +# read the files, and concatenate all the data frames with adding the team name in "model_id" column +projection_data_all <- file_paths %>% + map_df(~{ + # func selection according to the input file format + read_fun <- ifelse(grepl("\\.parquet$", .x), arrow::read_parquet, readr::read_csv) + + # read data + data <- read_fun(.x, stringsAsFactors = FALSE) + + # check if 'origin_date' column exists + if (!"origin_date" %in% names(data)) { + print(paste("File", .x, "does not contain 'origin_date' column")) + } + + # append the team name in "model_id" + data$model_id <- basename(dirname(.x)) + + # return data + data + }) +head(projection_data_all) + +# Prepare data +projection_data_all <- dplyr::mutate(projection_data_all, + target_date = as.Date(origin_date) + (horizon * 7) - 1) +projection_data_all <- as_model_out_tbl(projection_data_all) + +round <- projection_data_all %>% + dplyr::filter(origin_date == as.Date(curr_origin_date)) %>% + dplyr::collect() + +# Generate ensemble +round_ens <- hubEnsembles::simple_ensemble(round) + +# Save ensemble +dir.create(file.path(dir_path, "model-output", "hub-ensemble"), showWarnings = FALSE, recursive = TRUE) +arrow::write_parquet(round_ens, file.path(dir_path, "model-output", "hub-ensemble", paste0(curr_origin_date, "-hub-ensemble-RETRO.parquet")))