diff --git a/google_workflows/standard-dashboard.sh b/google_workflows/standard-dashboard.sh index 6865f91..14b7605 100755 --- a/google_workflows/standard-dashboard.sh +++ b/google_workflows/standard-dashboard.sh @@ -27,7 +27,7 @@ Usage: ./standard_dashboard.sh [ -p | --terra-project ] the project hosting the terra workspace ("cdc-terra-la-phl") [ -w | --terra-workspace ] the terra workspace ("CDC-COVID-LA-Dashboard-Test") [ -q | --big-query-table-name ] the name of the big query table to upload to ("sars_cov_2_dashboard.workflow_la_state_gisaid_specimens_test") - [ -m | --metadata-parameters ] (optional) any additional metadata cleanser parameter (enclose in quotes). available options: "--puertorico" + [ -m | --metadata-parameters ] apply Puerto Rico-specific changes. available options: true or false [ -i | --input-tar-file ] the tar file given to the script by the Google Trigger Happy dashboarding! @@ -35,13 +35,13 @@ EOF } # use getopt to parse the input arguments -PARSED_ARGUMENTS=$(getopt -n "standard-dashboard" -o "hvd:j:s:b:o:t:g:r:p:w:q:m::i:" -l "version,help,dashboard-gcp-uri:,dashboard-newline-json:,dashboard-schema:,gisaid-backup-dir:,output-dir:,trigger-bucket:,terra-gcp-uri:,terra-table-root-entity:,terra-project:,terra-workspace:,big-query-table-name:,metadata-parameters::,input-tar-file:" -a -- "$@") +PARSED_ARGUMENTS=$(getopt -n "standard-dashboard" -o "hvd:j:s:b:o:t:g:r:p:w:q:m:i:" -l "version,help,dashboard-gcp-uri:,dashboard-newline-json:,dashboard-schema:,gisaid-backup-dir:,output-dir:,trigger-bucket:,terra-gcp-uri:,terra-table-root-entity:,terra-project:,terra-workspace:,big-query-table-name:,metadata-parameters:,input-tar-file:" -a -- "$@") eval set -- "$PARSED_ARGUMENTS" while true; do case "$1" in - -v|--version) + -v|--version) echo $VERSION; exit 0;; -h|--help) showHelp; exit 0;; @@ -68,14 +68,11 @@ while true; do -q|--big-query-table-name) big_query_table_name=$2; shift 2;; -m|--metadata-parameters) - case "$2" in - "") metadata_cleanser_parameters=''; shift 2;; - *) metadata_cleanser_parameters=$2; shift 2;; - esac ;; - -i|--input-tar-file) - input_tar_file=$2; shift 2;; + puerto_rico=$2; shift 2;; + -i|--input-tar-file) + input_tar_file=$2; shift 2;; --) shift; break ;; - *) echo "Unexpected option: $1 -- this should not happen."; exit 1;; + *) echo "Unexpected option: $1 -- this should not happen."; exit 1;; esac done @@ -96,6 +93,7 @@ make_directory() { date_tag=$(date +"%Y-%m-%d-%Hh-%Mm-%Ss") # Create output subdirectories if they do not yet exist: +make_directory ${gisaid_backup_dir}/ make_directory ${output_dir}/automation_logs make_directory ${output_dir}/gisaid_processing make_directory ${output_dir}/backup_jsons @@ -148,7 +146,7 @@ if [[ "$file" == *"gisaid_auspice_input"*"tar" ]]; then \n # Capture, reformat, and prune GISAID metadata \n - python3 /scripts/gisaid_metadata_cleanser.py ${gisaid_dir}/*.metadata.tsv ${gisaid_dir}/gisaid_metadata_${date_tag}.tsv ${terra_table_root_entity} ${metadata_cleanser_parameters} + python3 /scripts/gisaid_metadata_cleanser.py ${gisaid_dir}/*.metadata.tsv ${gisaid_dir}/gisaid_metadata_${date_tag}.tsv ${terra_table_root_entity} ${puerto_rico} \n \n # Import formatted data table into Terra diff --git a/scripts/gisaid_metadata_cleanser.py b/scripts/gisaid_metadata_cleanser.py index 7bc72ec..7267338 100755 --- a/scripts/gisaid_metadata_cleanser.py +++ b/scripts/gisaid_metadata_cleanser.py @@ -9,7 +9,7 @@ def get_opts(): p.add_argument('tsv_meta_file', help='tsv metadata file input') p.add_argument('out_file', help='Output file: required, must be a string.') p.add_argument('table_name', help='Terra table name: required, must be a string; do not include entity: or _id.') - p.add_argument('--puertorico', action='store_true', help='Perform Puerto Rico-specific actions') + p.add_argument('puertorico', help='Perform Puerto Rico-specific actions') args = p.parse_args() return args arguments = get_opts() @@ -18,7 +18,6 @@ def get_opts(): meta_tsv1 = arguments.tsv_meta_file meta_df1 = pd.read_csv(meta_tsv1, delimiter='\t', dtype={'strain': str, 'age': str}) - table_name = "entity:" + arguments.table_name + "_id" # input_headers = meta_df1.columns.values @@ -28,11 +27,11 @@ def get_opts(): meta_df1.rename(columns={'strain': table_name, 'gisaid_epi_isl': 'gisaid_accession', 'Nextstrain_clade': 'nextclade_clade', 'vendor': 'sequencing_lab', 'location': 'county', 'GISAID_clade': 'gisaid_clade', 'pangolin_lineage': 'pango_lineage', 'date': 'collection_date'}, inplace=True) # perform PR specific actions: -if arguments.puertorico: +if arguments.puertorico == "true": # drop pangolin lineage column meta_df1.drop('pango_lineage', axis='columns', inplace=True) # remove any samples uploaded by PR - meta_df1[~meta_df1[table_name].str.contains("PR-CVL")] + meta_df1 = meta_df1[~meta_df1[table_name].str.contains("PR-CVL")] # drop extraneous cols drop_list = []