Skip to content

Commit

Permalink
Merge pull request #12 from theiagen/smw-dashboard-dev
Browse files Browse the repository at this point in the history
New fixes and improvements
  • Loading branch information
kapsakcj authored Mar 17, 2023
2 parents 8c81dfb + 6a48300 commit 79bcf99
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 15 deletions.
20 changes: 9 additions & 11 deletions google_workflows/standard-dashboard.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,21 @@ Usage: ./standard_dashboard.sh
[ -p | --terra-project ] the project hosting the terra workspace ("cdc-terra-la-phl")
[ -w | --terra-workspace ] the terra workspace ("CDC-COVID-LA-Dashboard-Test")
[ -q | --big-query-table-name ] the name of the big query table to upload to ("sars_cov_2_dashboard.workflow_la_state_gisaid_specimens_test")
[ -m | --metadata-parameters ] (optional) any additional metadata cleanser parameter (enclose in quotes). available options: "--puertorico"
[ -m | --metadata-parameters ] apply Puerto Rico-specific changes. available options: true or false
[ -i | --input-tar-file ] the tar file given to the script by the Google Trigger
Happy dashboarding!
EOF
}

# use getopt to parse the input arguments
PARSED_ARGUMENTS=$(getopt -n "standard-dashboard" -o "hvd:j:s:b:o:t:g:r:p:w:q:m::i:" -l "version,help,dashboard-gcp-uri:,dashboard-newline-json:,dashboard-schema:,gisaid-backup-dir:,output-dir:,trigger-bucket:,terra-gcp-uri:,terra-table-root-entity:,terra-project:,terra-workspace:,big-query-table-name:,metadata-parameters::,input-tar-file:" -a -- "$@")
PARSED_ARGUMENTS=$(getopt -n "standard-dashboard" -o "hvd:j:s:b:o:t:g:r:p:w:q:m:i:" -l "version,help,dashboard-gcp-uri:,dashboard-newline-json:,dashboard-schema:,gisaid-backup-dir:,output-dir:,trigger-bucket:,terra-gcp-uri:,terra-table-root-entity:,terra-project:,terra-workspace:,big-query-table-name:,metadata-parameters:,input-tar-file:" -a -- "$@")

eval set -- "$PARSED_ARGUMENTS"

while true; do
case "$1" in
-v|--version)
-v|--version)
echo $VERSION; exit 0;;
-h|--help)
showHelp; exit 0;;
Expand All @@ -68,14 +68,11 @@ while true; do
-q|--big-query-table-name)
big_query_table_name=$2; shift 2;;
-m|--metadata-parameters)
case "$2" in
"") metadata_cleanser_parameters=''; shift 2;;
*) metadata_cleanser_parameters=$2; shift 2;;
esac ;;
-i|--input-tar-file)
input_tar_file=$2; shift 2;;
puerto_rico=$2; shift 2;;
-i|--input-tar-file)
input_tar_file=$2; shift 2;;
--) shift; break ;;
*) echo "Unexpected option: $1 -- this should not happen."; exit 1;;
*) echo "Unexpected option: $1 -- this should not happen."; exit 1;;
esac
done

Expand All @@ -96,6 +93,7 @@ make_directory() {
date_tag=$(date +"%Y-%m-%d-%Hh-%Mm-%Ss")

# Create output subdirectories if they do not yet exist:
make_directory ${gisaid_backup_dir}/
make_directory ${output_dir}/automation_logs
make_directory ${output_dir}/gisaid_processing
make_directory ${output_dir}/backup_jsons
Expand Down Expand Up @@ -148,7 +146,7 @@ if [[ "$file" == *"gisaid_auspice_input"*"tar" ]]; then
\n
# Capture, reformat, and prune GISAID metadata
\n
python3 /scripts/gisaid_metadata_cleanser.py ${gisaid_dir}/*.metadata.tsv ${gisaid_dir}/gisaid_metadata_${date_tag}.tsv ${terra_table_root_entity} ${metadata_cleanser_parameters}
python3 /scripts/gisaid_metadata_cleanser.py ${gisaid_dir}/*.metadata.tsv ${gisaid_dir}/gisaid_metadata_${date_tag}.tsv ${terra_table_root_entity} ${puerto_rico}
\n
\n
# Import formatted data table into Terra
Expand Down
7 changes: 3 additions & 4 deletions scripts/gisaid_metadata_cleanser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def get_opts():
p.add_argument('tsv_meta_file', help='tsv metadata file input')
p.add_argument('out_file', help='Output file: required, must be a string.')
p.add_argument('table_name', help='Terra table name: required, must be a string; do not include entity: or _id.')
p.add_argument('--puertorico', action='store_true', help='Perform Puerto Rico-specific actions')
p.add_argument('puertorico', help='Perform Puerto Rico-specific actions')
args = p.parse_args()
return args
arguments = get_opts()
Expand All @@ -18,7 +18,6 @@ def get_opts():
meta_tsv1 = arguments.tsv_meta_file
meta_df1 = pd.read_csv(meta_tsv1, delimiter='\t', dtype={'strain': str, 'age': str})


table_name = "entity:" + arguments.table_name + "_id"

# input_headers = meta_df1.columns.values
Expand All @@ -28,11 +27,11 @@ def get_opts():
meta_df1.rename(columns={'strain': table_name, 'gisaid_epi_isl': 'gisaid_accession', 'Nextstrain_clade': 'nextclade_clade', 'vendor': 'sequencing_lab', 'location': 'county', 'GISAID_clade': 'gisaid_clade', 'pangolin_lineage': 'pango_lineage', 'date': 'collection_date'}, inplace=True)

# perform PR specific actions:
if arguments.puertorico:
if arguments.puertorico == "true":
# drop pangolin lineage column
meta_df1.drop('pango_lineage', axis='columns', inplace=True)
# remove any samples uploaded by PR
meta_df1[~meta_df1[table_name].str.contains("PR-CVL")]
meta_df1 = meta_df1[~meta_df1[table_name].str.contains("PR-CVL")]

# drop extraneous cols
drop_list = []
Expand Down

0 comments on commit 79bcf99

Please sign in to comment.