Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
kjsato authored Jul 12, 2024
2 parents 1c66b16 + 344e4e9 commit c06c875
Show file tree
Hide file tree
Showing 47 changed files with 125 additions and 21 deletions.
35 changes: 28 additions & 7 deletions .github/workflows/after-validate-submission.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,18 @@ jobs:
with:
install-r: false
use-public-rspm: true
- name: Get list of changed files
id: changed-files
uses: tj-actions/changed-files@v44
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Print changed files
run: |
echo 'Changed files: ${{ steps.changed-files.outputs.added_files }}'
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libcurl4-openssl-dev
sudo apt-get update
sudo apt-get install -y libcurl4-openssl-dev
- name: Cache R packages
uses: actions/cache@v2
with:
Expand Down Expand Up @@ -61,25 +69,38 @@ jobs:
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubVis")'
- name: Install hubUtils
run: |
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubUtils")'
Rscript -e 'remotes::install_github("kjsato/hubUtils@enhancement/v3-utils", upgrade = "never")'
- name: Install hubEnsembles
run: |
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubEnsembles")'
- name: Install hubData
run: |
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubData")'
Rscript -e 'remotes::install_github("kjsato/hubData@feature/handle-samples", upgrade = "never")'
- name: Check if RETRO files are present
id: check_files
run: |
echo 'retro='$(echo '${{ steps.changed-files.outputs.added_files }}' | grep 'model-output' | grep -c '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV
echo 'conventional='$(echo '${{ steps.changed-files.outputs.added_files }}' | grep -c -v '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV
- name: Run R script
run: |
cd ./scripts
Rscript ensemble.R
if: ${{ env.conventional != '0' }}
- name: Run retro_ensemble.R
run: |
cd ./scripts
for file in $(echo '${{ steps.changed-files.outputs.added_files }}' | grep 'model-output' | grep '.*-RETRO.*\.\(csv\|parquet\)'); do
Rscript retro_ensemble.R $file
done
if: ${{ env.retro != '0' }}

- name: Commit and push new files to root repository
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
if [ -n "$(git status --porcelain)" ]; then
git add model-output/hub-ensemble/*
git commit -m "Upload new files to root repo"
git commit -m "Upload new files to RSV data repo by ${GITHUB_ACTOR} in #${{ github.event.pull_request.number }}"
git push
echo "Data for hub-ensemble uploaded to root repo";
else
Expand All @@ -90,8 +111,8 @@ jobs:
- name: Checkout and push to another repository
uses: actions/checkout@v2
with:
#repository: kjsato/rsv-forecast-hub_data
repository: HopkinsIDD/rsv-forecast-hub_data
#repository: kjsato/rsv-forecast-hub_data
token: ${{ secrets.KJ3_PATC }}
path: ./rsv-forecast-hub_data
fetch-depth: 2
Expand All @@ -113,7 +134,7 @@ jobs:
git pull origin main
if [ -n "$(git status --porcelain)" ]; then
git add .
git commit -m "Upload new files to RSV data repo"
git commit -m "Upload new files to RSV data repo by ${GITHUB_ACTOR} in #${{ github.event.pull_request.number }}"
git push
echo "RSV data uploaded";
else
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/copy_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ jobs:
- name: Checkout and push to another repository
uses: actions/checkout@v2
with:
#repository: kjsato/rsv-forecast-hub_data
repository: HopkinsIDD/rsv-forecast-hub_data
#repository: kjsato/rsv-forecast-hub_data
token: ${{ secrets.KJ3_PATC }}
path: ./rsv-forecast-hub_data
fetch-depth: 2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/copy_hub-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ jobs:
- name: Checkout target repo
uses: actions/checkout@v2
with:
#repository: kjsato/rsv-forecast-hub_data
repository: HopkinsIDD/rsv-forecast-hub_data
#repository: kjsato/rsv-forecast-hub_data
token: ${{ secrets.KJ3_PATC }}
path: ./rsv-forecast-hub_data
fetch-depth: 2
Expand Down
12 changes: 8 additions & 4 deletions hub-config/admin.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
{
"schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json",
"name": "Simple Forecast Hub",
"maintainer": "Consortium of Infectious Disease Modeling Hubs",
"schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/admin-schema.json",
"name": "US RSV Forecast Hub",
"maintainer": "Johns Hopkins University, Infectious Disease Dynamics Group",
"contact": {
"name": "K. Sato",
"email": "[email protected]"
},
"repository_url": "https://github.com/Infectious-Disease-Modeling-Hubs/hubTemplate",
"repository": {
"host": "github",
"owner": "kjsato",
"name": "rsv-forecast-hub"
},
"file_format": ["csv", "parquet"],
"timezone": "US/Eastern"
}
19 changes: 12 additions & 7 deletions hub-config/tasks.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json",
"schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/tasks-schema.json",
"rounds": [
{
"round_name": "retrospective round",
"round_id_from_variable": true,
"round_id": "origin_date",
"model_tasks": [
Expand All @@ -10,6 +11,7 @@
"origin_date": {
"required": null,
"optional": [
"2023-10-18", "2023-10-22", "2023-10-29",
"2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03",
"2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31",
"2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28",
Expand Down Expand Up @@ -147,6 +149,7 @@
"origin_date": {
"required": null,
"optional": [
"2023-10-18", "2023-10-22", "2023-10-29",
"2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03",
"2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31",
"2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28",
Expand Down Expand Up @@ -230,13 +233,15 @@
},
"output_type": {
"sample":{
"output_type_id":{
"required": null,
"optional":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
"output_type_id_params":{
"is_required": true,
"type": "integer",
"min_samples_per_task": 100,
"max_samples_per_task": 100
},
"value":{
"type":"double",
"minimum":0
"type":"integer",
"minimum":0
}
}
},
Expand Down Expand Up @@ -271,7 +276,7 @@
"submissions_due": {
"relative_to": "origin_date",
"start": -6,
"end": 100
"end": 283
}
}
]
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added model-output/PSI-PROF/2024-05-05-PSI-PROF.parquet
Binary file not shown.
Binary file added model-output/PSI-PROF/2024-05-12-PSI-PROF.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified model-output/hub-ensemble/2024-04-28-hub-ensemble.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion scripts/ensemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dates_archive <- unlist(jsonlite::read_json(file.path(dir_path, "hub-config/task
dates_archive <- dates_archive[as.Date(dates_archive) <= Sys.Date()]

curr_origin_date <- as.Date(max(dates_archive, na.rm = TRUE))
#curr_origin_date <- as.Date("2024-03-17")
#curr_origin_date <- as.Date("2024-03-24")

## ----prep_ens, include=FALSE--------------------------------------------------

Expand Down
74 changes: 74 additions & 0 deletions scripts/retro_ensemble.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
## retro_ensemble.R customized for rsv-forecast-hub

# Get the file_path from the command line arguments
args <- commandArgs(trailingOnly = TRUE)
retro_file_path <- args[1]

local_path <- paste0(dirname(here::here()))
dir_path <- file.path(local_path, "rsv-forecast-hub/")
data_path <- file.path(local_path, "rsv-forecast-hub/")
print(local_path)
#dir_path <- local_path
#data_path <- local_path
print(dir_path)

library(hubUtils)
library(hubData)
library(hubEnsembles)
library(dplyr)
library(purrr)
library(jsonlite)

hub_path <- dir_path
print(hub_path)
hub_con <- connect_hub(hub_path)

loc_data <- readr::read_csv(file.path(dir_path, "auxiliary-data/location_census/locations.csv"))

output_path <- file.path(dir_path, "model-output")

# Extract date from file name
curr_origin_date <- as.Date(gsub("^(\\d{4}-\\d{2}-\\d{2}).*", "\\1", basename(retro_file_path)))

# Get all RETRO files (maybe needed consideration for the case of a combo without RETRO files)
file_paths <- list.files(output_path, pattern = "-RETRO\\.parquet$|-RETRO\\.csv$", full.names = TRUE, recursive = TRUE)
file_paths <- file_paths[grepl(curr_origin_date, file_paths)]
print(file_paths)

# read the files, and concatenate all the data frames with adding the team name in "model_id" column
projection_data_all <- file_paths %>%
map_df(~{
# func selection according to the input file format
read_fun <- ifelse(grepl("\\.parquet$", .x), arrow::read_parquet, readr::read_csv)

# read data
data <- read_fun(.x, stringsAsFactors = FALSE)

# check if 'origin_date' column exists
if (!"origin_date" %in% names(data)) {
print(paste("File", .x, "does not contain 'origin_date' column"))
}

# append the team name in "model_id"
data$model_id <- basename(dirname(.x))

# return data
data
})
head(projection_data_all)

# Prepare data
projection_data_all <- dplyr::mutate(projection_data_all,
target_date = as.Date(origin_date) + (horizon * 7) - 1)
projection_data_all <- as_model_out_tbl(projection_data_all)

round <- projection_data_all %>%
dplyr::filter(origin_date == as.Date(curr_origin_date)) %>%
dplyr::collect()

# Generate ensemble
round_ens <- hubEnsembles::simple_ensemble(round)

# Save ensemble
dir.create(file.path(dir_path, "model-output", "hub-ensemble"), showWarnings = FALSE, recursive = TRUE)
arrow::write_parquet(round_ens, file.path(dir_path, "model-output", "hub-ensemble", paste0(curr_origin_date, "-hub-ensemble-RETRO.parquet")))

0 comments on commit c06c875

Please sign in to comment.