From c363edb7f408ea3ee27983cc7b8b835ef4a214dd Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 3 May 2024 17:15:42 +0000 Subject: [PATCH 01/41] new branch so tbp-parser is compatible --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- tasks/task_versioning.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 7917529d6..1b8cb69a0 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -26,7 +26,7 @@ task tbp_parser { Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.2" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.4" Int disk_size = 100 Int memory = 4 Int cpu = 1 diff --git a/tasks/task_versioning.wdl b/tasks/task_versioning.wdl index e30dea0ec..7794982e7 100644 --- a/tasks/task_versioning.wdl +++ b/tasks/task_versioning.wdl @@ -9,7 +9,7 @@ task version_capture { volatile: true } command { - PHB_Version="PHB v2.0.1" + PHB_Version="PHB v2.0.1: branch smw-tb-2024-05-03-dev" ~{default='' 'export TZ=' + timezone} date +"%Y-%m-%d" > TODAY echo "$PHB_Version" > PHB_VERSION From 6ea5ee87255b10b15291ff6d461bbf73358f0287 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 3 May 2024 20:00:07 +0000 Subject: [PATCH 02/41] use tbprofiler 6.2.0 --- .../mycobacterium/task_tbprofiler.wdl | 76 ++++++++----------- .../standalone_modules/wf_tbprofiler_tngs.wdl | 2 +- .../theiaprok/wf_theiaprok_illumina_pe.wdl | 2 +- workflows/theiaprok/wf_theiaprok_ont.wdl | 2 +- workflows/utilities/wf_merlin_magic.wdl | 4 +- 5 files changed, 38 insertions(+), 48 deletions(-) diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index 5af081c33..d19cd03cd 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -5,7 +5,7 @@ task tbprofiler { File read1 File? read2 String samplename - String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.2.0" Int disk_size = 100 Int memory = 16 String mapper = "bwa" @@ -17,8 +17,9 @@ task tbprofiler { Int cov_frac_threshold = 1 Int cpu = 8 Boolean ont_data = false - File? tbprofiler_custom_db + String? tbprofiler_custom_db Boolean tbprofiler_run_custom_db = false + Boolean tbprofiler_run_cdph_db = false } command <<< # Print and save date @@ -28,13 +29,13 @@ task tbprofiler { tb-profiler version > VERSION && sed -i -e 's/TBProfiler version //' VERSION && sed -n -i '$p' VERSION # check if file is non existant or non empty - if [ -z "~{read2}" ] || [ ! -s "~{read2}" ] ; then + if [ -z "~{read2}" ] || [ ! -s "~{read2}" ]; then INPUT_READS="-1 ~{read1}" else INPUT_READS="-1 ~{read1} -2 ~{read2}" fi - if [ "~{ont_data}" = true ]; then + if ~{ont_data}; then mode="--platform nanopore" export ont_data="true" else @@ -42,19 +43,15 @@ task tbprofiler { fi # check if new database file is provided and not empty - if [ "~{tbprofiler_run_custom_db}" = true ] ; then - echo "Found new database file ~{tbprofiler_custom_db}" - prefix=$(basename "~{tbprofiler_custom_db}" | sed 's/\.tar\.gz$//') - echo "New database will be created with prefix $prefix" + if ~{tbprofiler_run_custom_db}; then + tb-profiler load_library ~{tbprofiler_custom_db} - echo "Inflating the new database..." - tar xfv ~{tbprofiler_custom_db} - - tb-profiler load_library ./"$prefix"/"$prefix" - - TBDB="--db $prefix" + TBDB="--db ~{tbprofiler_custom_db}" + elif ~{tbprofiler_run_cdph_db}; then + tb-profiler update_tbdb --branch CaliforniaDPH + TBDB="--db CaliforniaDPH" else - TBDB="" + TBDB="" 6tt6 fi # Run tb-profiler on the input reads with samplename prefix @@ -65,12 +62,10 @@ task tbprofiler { --mapper ~{mapper} \ --caller ~{variant_caller} \ --calling_params "~{variant_calling_params}" \ - --min_depth ~{min_depth} \ + --depth ~{min_depth} \ --af ~{min_af} \ - --reporting_af ~{min_af_pred} \ - --coverage_fraction_threshold ~{cov_frac_threshold} \ --csv --txt \ - $TBDB + ${TBDB} # Collate results tb-profiler collate --prefix ~{samplename} @@ -92,35 +87,28 @@ task tbprofiler { tsv_reader=csv.reader(tsv_file, delimiter="\t") tsv_data=list(tsv_reader) tsv_dict=dict(zip(tsv_data[0], tsv_data[1])) - with open ("MAIN_LINEAGE", 'wt') as Main_Lineage: - main_lin=tsv_dict['main_lineage'] - Main_Lineage.write(main_lin) - with open ("SUB_LINEAGE", 'wt') as Sub_Lineage: - sub_lin=tsv_dict['sub_lineage'] - Sub_Lineage.write(sub_lin) - with open ("DR_TYPE", 'wt') as DR_Type: - dr_type=tsv_dict['DR_type'] - DR_Type.write(dr_type) - with open ("NUM_DR_VARIANTS", 'wt') as Num_DR_Variants: - num_dr_vars=tsv_dict['num_dr_variants'] - Num_DR_Variants.write(num_dr_vars) - with open ("NUM_OTHER_VARIANTS", 'wt') as Num_Other_Variants: - num_other_vars=tsv_dict['num_other_variants'] - Num_Other_Variants.write(num_other_vars) - with open ("RESISTANCE_GENES", 'wt') as Resistance_Genes: - res_genes_list=['rifampicin', 'isoniazid', 'pyrazinamide', 'ethambutol', 'streptomycin', 'fluoroquinolones', 'moxifloxacin', 'ofloxacin', 'levofloxacin', 'ciprofloxacin', 'aminoglycosides', 'amikacin', 'kanamycin', 'capreomycin', 'ethionamide', 'para-aminosalicylic_acid', 'cycloserine', 'linezolid', 'bedaquiline', 'clofazimine', 'delamanid'] + with open ("MAIN_LINEAGE", 'wt') as main_lineage: + main_lineage.write(tsv_dict['main_lineage']) + with open ("SUB_LINEAGE", 'wt') as sublineage: + sublineage.write(tsv_dict['sub_lineage']) + with open ("DR_TYPE", 'wt') as dr_type: + dr_type.write(tsv_dict['drtype']) + with open ("NUM_DR_VARIANTS", 'wt') as num_dr_variants: + num_dr_variants.write(tsv_dict['num_dr_variants']) + with open ("NUM_OTHER_VARIANTS", 'wt') as num_other_variants: + num_other_variants.write(tsv_dict['num_other_variants']) + with open ("RESISTANCE_GENES", 'wt') as resistance_genes: + res_genes_list=['rifampicin', 'isoniazid', 'ethambutol', 'pyrazinamide', 'moxifloxacin', 'levofloxacin', 'bedaquiline', 'delamanid', 'pretomanid', 'linezolid', 'streptomycin', 'amikacin', 'kanamycin', 'capreomycin', 'clofazimine', 'ethionamide', 'para-aminosalicylic_acid', 'cycloserine'] res_genes=[] for i in res_genes_list: if tsv_dict[i] != '-': res_genes.append(tsv_dict[i]) res_genes_string=';'.join(res_genes) - Resistance_Genes.write(res_genes_string) - with open ("MEDIAN_COVERAGE", 'wt') as Median_Coverage: - median_coverage=tsv_dict['median_coverage'] - Median_Coverage.write(median_coverage) - with open ("PCT_READS_MAPPED", 'wt') as Pct_Reads_Mapped: - pct_reads_mapped=tsv_dict['pct_reads_mapped'] - Pct_Reads_Mapped.write(pct_reads_mapped) + resistance_genes.write(res_genes_string) + with open ("MEDIAN_DEPTH", 'wt') as median_depth: + median_depth.write(tsv_dict['target_median_depth']) + with open ("PCT_READS_MAPPED", 'wt') as pct_reads_mapped: + pct_reads_mapped.write(tsv_dict['pct_reads_mapped']) CODE >>> output { @@ -137,7 +125,7 @@ task tbprofiler { String tbprofiler_num_dr_variants = read_string("NUM_DR_VARIANTS") String tbprofiler_num_other_variants = read_string("NUM_OTHER_VARIANTS") String tbprofiler_resistance_genes = read_string("RESISTANCE_GENES") - Int tbprofiler_median_coverage = read_int("MEDIAN_COVERAGE") + Int tbprofiler_median_depth = read_int("MEDIAN_DEPTH") Float tbprofiler_pct_reads_mapped = read_float("PCT_READS_MAPPED") } runtime { diff --git a/workflows/standalone_modules/wf_tbprofiler_tngs.wdl b/workflows/standalone_modules/wf_tbprofiler_tngs.wdl index 85f75a665..2757a5e0f 100644 --- a/workflows/standalone_modules/wf_tbprofiler_tngs.wdl +++ b/workflows/standalone_modules/wf_tbprofiler_tngs.wdl @@ -71,7 +71,7 @@ workflow tbprofiler_tngs { String tbprofiler_num_dr_variants = tbprofiler.tbprofiler_num_dr_variants String tbprofiler_num_other_variants = tbprofiler.tbprofiler_num_other_variants String tbprofiler_resistance_genes = tbprofiler.tbprofiler_resistance_genes - Int tbprofiler_median_coverage = tbprofiler.tbprofiler_median_coverage + Int tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth Float tbprofiler_pct_reads_mapped = tbprofiler.tbprofiler_pct_reads_mapped # tbp_parser outputs File tbp_parser_looker_report_csv = tbp_parser.tbp_parser_looker_report_csv diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 3a91cd9df..3c1af3ced 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -870,7 +870,7 @@ workflow theiaprok_illumina_pe { String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage String? tbprofiler_dr_type = merlin_magic.tbprofiler_dr_type String? tbprofiler_resistance_genes = merlin_magic.tbprofiler_resistance_genes - Int? tbprofiler_median_coverage = merlin_magic.tbprofiler_median_coverage + Int? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = merlin_magic.tbprofiler_pct_reads_mapped String? tbp_parser_version = merlin_magic.tbp_parser_version String? tbp_parser_docker = merlin_magic.tbp_parser_docker diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index e6fc18624..4f83ffd17 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -760,7 +760,7 @@ workflow theiaprok_ont { String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage String? tbprofiler_dr_type = merlin_magic.tbprofiler_dr_type String? tbprofiler_resistance_genes = merlin_magic.tbprofiler_resistance_genes - Int? tbprofiler_median_coverage = merlin_magic.tbprofiler_median_coverage + Int? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = merlin_magic.tbprofiler_pct_reads_mapped String? tbp_parser_version = merlin_magic.tbp_parser_version String? tbp_parser_docker = merlin_magic.tbp_parser_docker diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index ce904eca8..76c79fb38 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -70,6 +70,7 @@ workflow merlin_magic { String? tbprofiler_variant_caller String? tbprofiler_variant_calling_params Boolean tbprofiler_run_custom_db = false + Boolean tbprofiler_run_cdph_db = false File? tbprofiler_custom_db Boolean tbprofiler_additional_outputs = false String tbp_parser_output_seq_method_type = "WGS" @@ -259,6 +260,7 @@ workflow merlin_magic { samplename = samplename, tbprofiler_run_custom_db = tbprofiler_run_custom_db, tbprofiler_custom_db = tbprofiler_custom_db, + tbprofiler_run_cdph_db = tbprofiler_run_cdph_db, ont_data = ont_data, mapper = tbprofiler_mapper, variant_caller = tbprofiler_variant_caller, @@ -600,7 +602,7 @@ workflow merlin_magic { String? tbprofiler_sub_lineage = tbprofiler.tbprofiler_sub_lineage String? tbprofiler_dr_type = tbprofiler.tbprofiler_dr_type String? tbprofiler_resistance_genes = tbprofiler.tbprofiler_resistance_genes - Int? tbprofiler_median_coverage = tbprofiler.tbprofiler_median_coverage + Int? tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = tbprofiler.tbprofiler_pct_reads_mapped String? tbp_parser_version = tbp_parser.tbp_parser_version String? tbp_parser_docker = tbp_parser.tbp_parser_docker From 4f2165fdeef8d79b8170a1f7180910a161318539 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 3 May 2024 20:05:27 +0000 Subject: [PATCH 03/41] update tbp-parser accordingly --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 1b8cb69a0..8deebe2d9 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -26,7 +26,7 @@ task tbp_parser { Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.4" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.5.0" Int disk_size = 100 Int memory = 4 Int cpu = 1 From 947d095988d5d12be5c4f376050bcc3a192669b7 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 3 May 2024 21:17:44 +0000 Subject: [PATCH 04/41] int -> float --- workflows/standalone_modules/wf_tbprofiler_tngs.wdl | 2 +- workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 2 +- workflows/theiaprok/wf_theiaprok_ont.wdl | 2 +- workflows/utilities/wf_merlin_magic.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/standalone_modules/wf_tbprofiler_tngs.wdl b/workflows/standalone_modules/wf_tbprofiler_tngs.wdl index 2757a5e0f..2d84d91ed 100644 --- a/workflows/standalone_modules/wf_tbprofiler_tngs.wdl +++ b/workflows/standalone_modules/wf_tbprofiler_tngs.wdl @@ -71,7 +71,7 @@ workflow tbprofiler_tngs { String tbprofiler_num_dr_variants = tbprofiler.tbprofiler_num_dr_variants String tbprofiler_num_other_variants = tbprofiler.tbprofiler_num_other_variants String tbprofiler_resistance_genes = tbprofiler.tbprofiler_resistance_genes - Int tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth + Float tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth Float tbprofiler_pct_reads_mapped = tbprofiler.tbprofiler_pct_reads_mapped # tbp_parser outputs File tbp_parser_looker_report_csv = tbp_parser.tbp_parser_looker_report_csv diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 3c1af3ced..098cb30fc 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -870,7 +870,7 @@ workflow theiaprok_illumina_pe { String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage String? tbprofiler_dr_type = merlin_magic.tbprofiler_dr_type String? tbprofiler_resistance_genes = merlin_magic.tbprofiler_resistance_genes - Int? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth + Float? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = merlin_magic.tbprofiler_pct_reads_mapped String? tbp_parser_version = merlin_magic.tbp_parser_version String? tbp_parser_docker = merlin_magic.tbp_parser_docker diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index 4f83ffd17..a929597c0 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -760,7 +760,7 @@ workflow theiaprok_ont { String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage String? tbprofiler_dr_type = merlin_magic.tbprofiler_dr_type String? tbprofiler_resistance_genes = merlin_magic.tbprofiler_resistance_genes - Int? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth + Float? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = merlin_magic.tbprofiler_pct_reads_mapped String? tbp_parser_version = merlin_magic.tbp_parser_version String? tbp_parser_docker = merlin_magic.tbp_parser_docker diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 76c79fb38..cd876a94a 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -602,7 +602,7 @@ workflow merlin_magic { String? tbprofiler_sub_lineage = tbprofiler.tbprofiler_sub_lineage String? tbprofiler_dr_type = tbprofiler.tbprofiler_dr_type String? tbprofiler_resistance_genes = tbprofiler.tbprofiler_resistance_genes - Int? tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth + Float? tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = tbprofiler.tbprofiler_pct_reads_mapped String? tbp_parser_version = tbp_parser.tbp_parser_version String? tbp_parser_docker = tbp_parser.tbp_parser_docker From a090f5ded589c46b881336a4eb08827018668e25 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Sat, 4 May 2024 13:18:53 +0000 Subject: [PATCH 05/41] change to float again and update version so it says whov2 --- tasks/task_versioning.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/task_versioning.wdl b/tasks/task_versioning.wdl index 7794982e7..586bf35c3 100644 --- a/tasks/task_versioning.wdl +++ b/tasks/task_versioning.wdl @@ -9,7 +9,7 @@ task version_capture { volatile: true } command { - PHB_Version="PHB v2.0.1: branch smw-tb-2024-05-03-dev" + PHB_Version="PHB v2.0.1: branch smw-tb-2024-05-03-whov2-dev" ~{default='' 'export TZ=' + timezone} date +"%Y-%m-%d" > TODAY echo "$PHB_Version" > PHB_VERSION From 702185f03fbbd7d4afdd9a93c88eafd306846f95 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Sat, 4 May 2024 13:19:36 +0000 Subject: [PATCH 06/41] change to float --- tasks/species_typing/mycobacterium/task_tbprofiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index d19cd03cd..855060657 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -125,7 +125,7 @@ task tbprofiler { String tbprofiler_num_dr_variants = read_string("NUM_DR_VARIANTS") String tbprofiler_num_other_variants = read_string("NUM_OTHER_VARIANTS") String tbprofiler_resistance_genes = read_string("RESISTANCE_GENES") - Int tbprofiler_median_depth = read_int("MEDIAN_DEPTH") + Float tbprofiler_median_depth = read_float("MEDIAN_DEPTH") Float tbprofiler_pct_reads_mapped = read_float("PCT_READS_MAPPED") } runtime { From 4145854a8c05728fa3323f3583d84bf8a5e4f392 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 6 May 2024 13:57:58 +0000 Subject: [PATCH 07/41] remove typos ahh bad keyboard! --- tasks/species_typing/mycobacterium/task_tbprofiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index 855060657..11ec3b31b 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -51,7 +51,7 @@ task tbprofiler { tb-profiler update_tbdb --branch CaliforniaDPH TBDB="--db CaliforniaDPH" else - TBDB="" 6tt6 + TBDB="" fi # Run tb-profiler on the input reads with samplename prefix From 2e861f4b041cd67521de05e90bab2b840bfdf6d7 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 6 May 2024 14:40:50 +0000 Subject: [PATCH 08/41] clean up clean up everybody everywhere --- .../mycobacterium/task_tbprofiler.wdl | 35 +++++++------------ workflows/utilities/wf_merlin_magic.wdl | 6 ---- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index 11ec3b31b..190061f2d 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -13,13 +13,10 @@ task tbprofiler { String? variant_calling_params Int min_depth = 10 Float min_af = 0.1 - Float min_af_pred = 0.1 - Int cov_frac_threshold = 1 Int cpu = 8 Boolean ont_data = false String? tbprofiler_custom_db Boolean tbprofiler_run_custom_db = false - Boolean tbprofiler_run_cdph_db = false } command <<< # Print and save date @@ -34,29 +31,20 @@ task tbprofiler { else INPUT_READS="-1 ~{read1} -2 ~{read2}" fi - - if ~{ont_data}; then - mode="--platform nanopore" - export ont_data="true" - else - export ont_data="false" - fi # check if new database file is provided and not empty if ~{tbprofiler_run_custom_db}; then tb-profiler load_library ~{tbprofiler_custom_db} - - TBDB="--db ~{tbprofiler_custom_db}" - elif ~{tbprofiler_run_cdph_db}; then - tb-profiler update_tbdb --branch CaliforniaDPH - TBDB="--db CaliforniaDPH" - else - TBDB="" + if [ ! -s ~{tbprofiler_custom_db} ]; then + echo "Custom database file is empty" + TBDB="" + else + TBDB="--db ~{tbprofiler_custom_db}" + fi fi # Run tb-profiler on the input reads with samplename prefix tb-profiler profile \ - ${mode} \ ${INPUT_READS} \ --prefix ~{samplename} \ --mapper ~{mapper} \ @@ -64,15 +52,14 @@ task tbprofiler { --calling_params "~{variant_calling_params}" \ --depth ~{min_depth} \ --af ~{min_af} \ + --threads ~{cpu} \ --csv --txt \ + ~{true="--platform nanopore" false="" ont_data} \ ${TBDB} # Collate results tb-profiler collate --prefix ~{samplename} - # touch optional output files because wdl - touch GENE_NAME LOCUS_TAG VARIANT_SUBSTITUTIONS OUTPUT_SEQ_METHOD_TYPE - # merge all vcf files if multiple are present bcftools index ./vcf/*bcf bcftools index ./vcf/*gz @@ -87,16 +74,19 @@ task tbprofiler { tsv_reader=csv.reader(tsv_file, delimiter="\t") tsv_data=list(tsv_reader) tsv_dict=dict(zip(tsv_data[0], tsv_data[1])) + with open ("MAIN_LINEAGE", 'wt') as main_lineage: main_lineage.write(tsv_dict['main_lineage']) with open ("SUB_LINEAGE", 'wt') as sublineage: sublineage.write(tsv_dict['sub_lineage']) + with open ("DR_TYPE", 'wt') as dr_type: dr_type.write(tsv_dict['drtype']) with open ("NUM_DR_VARIANTS", 'wt') as num_dr_variants: num_dr_variants.write(tsv_dict['num_dr_variants']) with open ("NUM_OTHER_VARIANTS", 'wt') as num_other_variants: num_other_variants.write(tsv_dict['num_other_variants']) + with open ("RESISTANCE_GENES", 'wt') as resistance_genes: res_genes_list=['rifampicin', 'isoniazid', 'ethambutol', 'pyrazinamide', 'moxifloxacin', 'levofloxacin', 'bedaquiline', 'delamanid', 'pretomanid', 'linezolid', 'streptomycin', 'amikacin', 'kanamycin', 'capreomycin', 'clofazimine', 'ethionamide', 'para-aminosalicylic_acid', 'cycloserine'] res_genes=[] @@ -105,6 +95,7 @@ task tbprofiler { res_genes.append(tsv_dict[i]) res_genes_string=';'.join(res_genes) resistance_genes.write(res_genes_string) + with open ("MEDIAN_DEPTH", 'wt') as median_depth: median_depth.write(tsv_dict['target_median_depth']) with open ("PCT_READS_MAPPED", 'wt') as pct_reads_mapped: @@ -117,7 +108,7 @@ task tbprofiler { File tbprofiler_output_json = "./results/~{samplename}.results.json" File tbprofiler_output_bam = "./bam/~{samplename}.bam" File tbprofiler_output_bai = "./bam/~{samplename}.bam.bai" - File tbprofiler_output_vcf = "./vcf/~{samplename}.targets.csq.merged.vcf" + File? tbprofiler_output_vcf = "./vcf/~{samplename}.targets.csq.merged.vcf" String version = read_string("VERSION") String tbprofiler_main_lineage = read_string("MAIN_LINEAGE") String tbprofiler_sub_lineage = read_string("SUB_LINEAGE") diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index cd876a94a..64e50c2ab 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -65,12 +65,9 @@ workflow merlin_magic { String? tbprofiler_mapper Int? tbprofiler_min_depth Float? tbprofiler_min_af - Float? tbprofiler_min_af_pred - Int? tbprofiler_cov_frac_threshold String? tbprofiler_variant_caller String? tbprofiler_variant_calling_params Boolean tbprofiler_run_custom_db = false - Boolean tbprofiler_run_cdph_db = false File? tbprofiler_custom_db Boolean tbprofiler_additional_outputs = false String tbp_parser_output_seq_method_type = "WGS" @@ -260,15 +257,12 @@ workflow merlin_magic { samplename = samplename, tbprofiler_run_custom_db = tbprofiler_run_custom_db, tbprofiler_custom_db = tbprofiler_custom_db, - tbprofiler_run_cdph_db = tbprofiler_run_cdph_db, ont_data = ont_data, mapper = tbprofiler_mapper, variant_caller = tbprofiler_variant_caller, variant_calling_params = tbprofiler_variant_calling_params, min_depth = tbprofiler_min_depth, min_af = tbprofiler_min_af, - min_af_pred = tbprofiler_min_af_pred, - cov_frac_threshold = tbprofiler_cov_frac_threshold } if (tbprofiler_additional_outputs) { call tbp_parser_task.tbp_parser { From 811fcc9f17c753fe7dbc83e8d16a82eedb3b39c6 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 7 May 2024 18:23:26 +0000 Subject: [PATCH 09/41] enable californiaDPH tbprofiler database --- .../mycobacterium/task_tbprofiler.wdl | 14 ++++++++++++-- workflows/utilities/wf_merlin_magic.wdl | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index 190061f2d..6610d7284 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -15,7 +15,8 @@ task tbprofiler { Float min_af = 0.1 Int cpu = 8 Boolean ont_data = false - String? tbprofiler_custom_db + File? tbprofiler_custom_db + Boolean tbprofiler_run_cdph_db = false Boolean tbprofiler_run_custom_db = false } command <<< @@ -39,8 +40,17 @@ task tbprofiler { echo "Custom database file is empty" TBDB="" else - TBDB="--db ~{tbprofiler_custom_db}" + echo "Found new database file ~{tbprofiler_custom_db}" + prefix=$(basename "~{tbprofiler_custom_db}" | sed 's/\.tar\.gz$//') + tar xfv ~{tbprofiler_custom_db} + + tb-profiler load_library ./"$prefix"/"$prefix" + + TBDB="--db $prefix" fi + elif ~{tbprofiler_run_cdph_db}; then + tb-profiler update_tbdb --branch CaliforniaDPH + TBDB="--db CaliforniaDPH" fi # Run tb-profiler on the input reads with samplename prefix diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 64e50c2ab..dd4c428e5 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -68,6 +68,7 @@ workflow merlin_magic { String? tbprofiler_variant_caller String? tbprofiler_variant_calling_params Boolean tbprofiler_run_custom_db = false + Boolean? tbprofiler_run_cdph_db File? tbprofiler_custom_db Boolean tbprofiler_additional_outputs = false String tbp_parser_output_seq_method_type = "WGS" @@ -257,6 +258,7 @@ workflow merlin_magic { samplename = samplename, tbprofiler_run_custom_db = tbprofiler_run_custom_db, tbprofiler_custom_db = tbprofiler_custom_db, + tbprofiler_run_cdph_db = tbprofiler_run_cdph_db, ont_data = ont_data, mapper = tbprofiler_mapper, variant_caller = tbprofiler_variant_caller, From 763606bfdd0ccc11415065417ec79cf647a4db7b Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 29 May 2024 17:30:38 +0000 Subject: [PATCH 10/41] enable ability to alter tbprofiler docker --- workflows/utilities/wf_merlin_magic.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index dd4c428e5..d7b307717 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -62,6 +62,7 @@ workflow merlin_magic { Boolean call_shigeifinder_reads_input = false Boolean assembly_only = false Boolean theiaeuk = false + String? tbprofiler_docker_image String? tbprofiler_mapper Int? tbprofiler_min_depth Float? tbprofiler_min_af @@ -265,6 +266,7 @@ workflow merlin_magic { variant_calling_params = tbprofiler_variant_calling_params, min_depth = tbprofiler_min_depth, min_af = tbprofiler_min_af, + docker = tbprofiler_docker_image } if (tbprofiler_additional_outputs) { call tbp_parser_task.tbp_parser { From 3fbe354abfff9b69991929d3c5c721c58cc06dda Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 5 Jul 2024 20:15:00 +0000 Subject: [PATCH 11/41] update docker --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 8deebe2d9..ea280f9ea 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -26,7 +26,7 @@ task tbp_parser { Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.5.0" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.5.9" Int disk_size = 100 Int memory = 4 Int cpu = 1 From f1c75cf2aa4578a8142038f484a6c84368f4482c Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 5 Jul 2024 20:19:18 +0000 Subject: [PATCH 12/41] update to latest docker --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index ea280f9ea..4ce677c65 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -26,7 +26,7 @@ task tbp_parser { Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.5.9" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.5.10" Int disk_size = 100 Int memory = 4 Int cpu = 1 From 6e505a1030093a273edf007714224a8ed3857ec2 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 5 Jul 2024 20:22:10 +0000 Subject: [PATCH 13/41] add cycloserine parameter --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 4ce677c65..da5f749df 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -17,6 +17,8 @@ task tbp_parser { Boolean tbp_parser_debug = false + Boolean add_cycloserine_lims = false + Boolean tngs_data = false Float? rrs_frequency # default 0.1 Int? rrs_read_support # default 10 @@ -53,7 +55,8 @@ task tbp_parser { ~{"--etha237_frequency " + etha237_frequency} \ --output_prefix ~{samplename} \ ~{true="--debug" false="--verbose" tbp_parser_debug} \ - ~{true="--tngs" false="" tngs_data} + ~{true="--tngs" false="" tngs_data} \ + ~{true="--add_cs_lims" false="" add_cycloserine_lims} # set default genome percent coverage and average depth to 0 to prevent failures echo 0.0 > GENOME_PC From 9e4cbc897e73a1eba1702686890964711b4e2664 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 17 Sep 2024 15:47:27 +0000 Subject: [PATCH 14/41] additional params --- tasks/species_typing/mycobacterium/task_tbprofiler.wdl | 2 ++ workflows/utilities/wf_merlin_magic.wdl | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index 6610d7284..f33fb9770 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -11,6 +11,7 @@ task tbprofiler { String mapper = "bwa" String variant_caller = "freebayes" String? variant_calling_params + String? additional_parameters Int min_depth = 10 Float min_af = 0.1 Int cpu = 8 @@ -65,6 +66,7 @@ task tbprofiler { --threads ~{cpu} \ --csv --txt \ ~{true="--platform nanopore" false="" ont_data} \ + ~{additional_parameters} \ ${TBDB} # Collate results diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index d7b307717..61e462bec 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -68,6 +68,7 @@ workflow merlin_magic { Float? tbprofiler_min_af String? tbprofiler_variant_caller String? tbprofiler_variant_calling_params + String? tbprofiler_additional_parameters Boolean tbprofiler_run_custom_db = false Boolean? tbprofiler_run_cdph_db File? tbprofiler_custom_db @@ -266,7 +267,8 @@ workflow merlin_magic { variant_calling_params = tbprofiler_variant_calling_params, min_depth = tbprofiler_min_depth, min_af = tbprofiler_min_af, - docker = tbprofiler_docker_image + docker = tbprofiler_docker_image, + additional_parameters = tbprofiler_additional_parameters } if (tbprofiler_additional_outputs) { call tbp_parser_task.tbp_parser { From 072dc9886b2ee5c9c5d8757c4ba19082c46a3c88 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Thu, 26 Sep 2024 19:59:05 +0000 Subject: [PATCH 15/41] expose input var --- workflows/utilities/wf_merlin_magic.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 61e462bec..64704a93a 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -75,6 +75,7 @@ workflow merlin_magic { Boolean tbprofiler_additional_outputs = false String tbp_parser_output_seq_method_type = "WGS" String? tbp_parser_operator + Boolean? tbp_parser_add_cs_lims Int? tbp_parser_min_depth Int? tbp_parser_coverage_threshold Boolean? tbp_parser_debug @@ -281,6 +282,7 @@ workflow merlin_magic { operator = tbp_parser_operator, min_depth = tbp_parser_min_depth, coverage_threshold = tbp_parser_coverage_threshold, + add_cycloserine_lims = tbp_parser_add_cs_lims, tbp_parser_debug = tbp_parser_debug, docker = tbp_parser_docker_image } From b8b0e7d6afda50601f9287b8d7d63a0461810a94 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 8 Nov 2024 16:21:11 +0000 Subject: [PATCH 16/41] update tbprofiler & tbp-parser defaults --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 4 ++-- tasks/species_typing/mycobacterium/task_tbprofiler.wdl | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index da5f749df..0a4a346cb 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -15,7 +15,7 @@ task tbp_parser { Float? min_frequency # default 0.1 Int? min_read_support # default 10 - Boolean tbp_parser_debug = false + Boolean tbp_parser_debug = true Boolean add_cycloserine_lims = false @@ -28,7 +28,7 @@ task tbp_parser { Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.5.10" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.0.0" Int disk_size = 100 Int memory = 4 Int cpu = 1 diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index f33fb9770..cc040a8dc 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -5,7 +5,7 @@ task tbprofiler { File read1 File? read2 String samplename - String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.2.0" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.0" Int disk_size = 100 Int memory = 16 String mapper = "bwa" @@ -21,9 +21,6 @@ task tbprofiler { Boolean tbprofiler_run_custom_db = false } command <<< - # Print and save date - date | tee DATE - # Print and save version tb-profiler version > VERSION && sed -i -e 's/TBProfiler version //' VERSION && sed -n -i '$p' VERSION @@ -36,7 +33,6 @@ task tbprofiler { # check if new database file is provided and not empty if ~{tbprofiler_run_custom_db}; then - tb-profiler load_library ~{tbprofiler_custom_db} if [ ! -s ~{tbprofiler_custom_db} ]; then echo "Custom database file is empty" TBDB="" From 3ccb50353be5203b4cd80457ab2d8adbefb1b6d5 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 8 Nov 2024 16:42:02 +0000 Subject: [PATCH 17/41] add updates --- .../mycobacterium/task_tbp_parser.wdl | 19 ++++---- .../mycobacterium/task_tbprofiler.wdl | 19 +++++--- workflows/utilities/wf_merlin_magic.wdl | 43 ++++++++++++------- 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 719fba343..0acae01b7 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -9,19 +9,18 @@ task tbp_parser { String? sequencing_method String? operator + Int? min_depth # default 10 - Int? coverage_threshold # default 100 (--min_percent_coverage) - File? coverage_regions_bed Float? min_frequency # default 0.1 Int? min_read_support # default 10 + + Int? coverage_threshold # default 100 (--min_percent_coverage) + File? coverage_regions_bed - Boolean tbp_parser_debug = true - Boolean add_cycloserine_lims = false - - Boolean add_cycloserine_lims = false - + Boolean tbp_parser_debug = true Boolean tngs_data = false + Float? rrs_frequency # default 0.1 Int? rrs_read_support # default 10 Float? rrl_frequency # default 0.1 @@ -30,7 +29,7 @@ task tbp_parser { Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.0.0" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.2" Int disk_size = 100 Int memory = 4 Int cpu = 1 @@ -44,10 +43,10 @@ task tbp_parser { ~{"--sequencing_method " + sequencing_method} \ ~{"--operator " + operator} \ ~{"--min_depth " + min_depth} \ - ~{"--min_percent_coverage " + coverage_threshold} \ - ~{"--coverage_regions " + coverage_regions_bed} \ ~{"--min_frequency " + min_frequency} \ ~{"--min_read_support " + min_read_support} \ + ~{"--min_percent_coverage " + coverage_threshold} \ + ~{"--coverage_regions " + coverage_regions_bed} \ ~{"--tngs_expert_regions " + expert_rule_regions_bed} \ ~{"--rrs_frequency " + rrs_frequency} \ ~{"--rrs_read_support " + rrs_read_support} \ diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index cc040a8dc..0989cc12a 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -5,20 +5,25 @@ task tbprofiler { File read1 File? read2 String samplename - String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.0" - Int disk_size = 100 - Int memory = 16 + Boolean ont_data = false + String mapper = "bwa" - String variant_caller = "freebayes" + String variant_caller = "gatk" String? variant_calling_params - String? additional_parameters + + String? additional_parameters # for tbprofiler + Int min_depth = 10 Float min_af = 0.1 - Int cpu = 8 - Boolean ont_data = false + File? tbprofiler_custom_db Boolean tbprofiler_run_cdph_db = false Boolean tbprofiler_run_custom_db = false + + Int cpu = 8 + String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.0" + Int disk_size = 100 + Int memory = 16 } command <<< # Print and save version diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 4dd81e686..ef67046bd 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -197,18 +197,17 @@ workflow merlin_magic { Int srst2_gene_max_mismatch = 2000 # tbprofiler options Boolean tbprofiler_run_custom_db = false + Boolean tbprofiler_run_cdph_db = false File? tbprofiler_custom_db - Int? tbprofiler_cov_frac_threshold Float? tbprofiler_min_af - Float? tbprofiler_min_af_pred Int? tbprofiler_min_depth String? tbprofiler_mapper String? tbprofiler_variant_caller String? tbprofiler_variant_calling_params + String? tbprofiler_additional_parameters # tbp-parser options String tbp_parser_output_seq_method_type = "WGS" String? tbp_parser_operator - Boolean? tbp_parser_add_cs_lims Int? tbp_parser_min_depth Int? tbp_parser_min_frequency Int? tbp_parser_min_read_support @@ -216,6 +215,14 @@ workflow merlin_magic { File? tbp_parser_coverage_regions_bed Boolean? tbp_parser_debug Boolean? tbp_parser_add_cs_lims + Boolean? tbp_parser_tngs_data + Float? tbp_parser_rrs_frequency + Int? tbp_parser_rrs_read_support + Float? tbp_parser_rrl_frequency + Int? tbp_parser_rrl_read_support + Float? tbp_parser_rpob449_frequency + Float? tbp_parser_etha237_frequency + File? tbp_parser_expert_rule_regions_bed # virulencefinder options Float? virulencefinder_coverage_threshold Float? virulencefinder_identity_threshold @@ -449,18 +456,16 @@ workflow merlin_magic { read2 = select_first([clockwork_decon_reads.clockwork_cleaned_read2, read2, "gs://theiagen-public-files/terra/theiaprok-files/no-read2.txt"]), samplename = samplename, ont_data = ont_data, - tbprofiler_run_custom_db = tbprofiler_run_custom_db, - tbprofiler_custom_db = tbprofiler_custom_db, - tbprofiler_run_cdph_db = tbprofiler_run_cdph_db, - cov_frac_threshold = tbprofiler_cov_frac_threshold, - min_af = tbprofiler_min_af, - min_af_pred = tbprofiler_min_af_pred, - min_depth = tbprofiler_min_depth, mapper = tbprofiler_mapper, variant_caller = tbprofiler_variant_caller, variant_calling_params = tbprofiler_variant_calling_params, - docker = tbprofiler_docker_image, - additional_parameters = tbprofiler_additional_parameters + additional_parameters = tbprofiler_additional_parameters, + min_depth = tbprofiler_min_depth, + min_af = tbprofiler_min_af, + tbprofiler_custom_db = tbprofiler_custom_db, + tbprofiler_run_custom_db = tbprofiler_run_custom_db, + tbprofiler_run_cdph_db = tbprofiler_run_cdph_db, + docker = tbprofiler_docker_image } if (tbprofiler_additional_outputs) { call tbp_parser_task.tbp_parser { @@ -471,13 +476,21 @@ workflow merlin_magic { samplename = samplename, sequencing_method = tbp_parser_output_seq_method_type, operator = tbp_parser_operator, - coverage_threshold = tbp_parser_coverage_threshold, - coverage_regions_bed = tbp_parser_coverage_regions_bed, min_depth = tbp_parser_min_depth, min_frequency = tbp_parser_min_frequency, min_read_support = tbp_parser_min_read_support, - tbp_parser_debug = tbp_parser_debug, + coverage_threshold = tbp_parser_coverage_threshold, + coverage_regions_bed = tbp_parser_coverage_regions_bed, add_cycloserine_lims = tbp_parser_add_cs_lims, + tbp_parser_debug = tbp_parser_debug, + tngs_data = tbp_parser_tngs_data, + rrs_frequency = tbp_parser_rrs_frequency, + rrs_read_support = tbp_parser_rrs_read_support, + rrl_frequency = tbp_parser_rrl_frequency, + rrl_read_support = tbp_parser_rrl_read_support, + rpob449_frequency = tbp_parser_rpob449_frequency, + etha237_frequency = tbp_parser_etha237_frequency, + expert_rule_regions_bed = tbp_parser_expert_rule_regions_bed, docker = tbp_parser_docker_image } } From 604ecadb7702b7a93107d359dd7361605702dc4a Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 8 Nov 2024 16:42:41 +0000 Subject: [PATCH 18/41] update version --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 0acae01b7..03ef369fd 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -29,7 +29,7 @@ task tbp_parser { Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.2" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.0.0" Int disk_size = 100 Int memory = 4 Int cpu = 1 From bc9dc693ce96b90b25e5d27db24564408babd564 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 8 Nov 2024 16:45:58 +0000 Subject: [PATCH 19/41] organize --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 6 +++--- tasks/species_typing/mycobacterium/task_tbprofiler.wdl | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 03ef369fd..38ca566ca 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -28,11 +28,11 @@ task tbp_parser { Float? rpob449_frequency # default 0.1 Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - + + Int cpu = 1 + Int disk_size = 100 String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.0.0" - Int disk_size = 100 Int memory = 4 - Int cpu = 1 } command <<< # get version diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index 0989cc12a..b48d5eb89 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -20,9 +20,9 @@ task tbprofiler { Boolean tbprofiler_run_cdph_db = false Boolean tbprofiler_run_custom_db = false - Int cpu = 8 - String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.0" + Int cpu = 8 Int disk_size = 100 + String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.0" Int memory = 16 } command <<< From ab6334ae4d4c29df517ecc451659c6c3504739bc Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 12 Nov 2024 15:34:17 +0000 Subject: [PATCH 20/41] update md5sums --- .../workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 8 ++++---- .../workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 71f5bd4a2..53edd0870 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -561,7 +561,7 @@ - path: miniwdl_run/wdl/tasks/gene_typing/drug_resistance/task_resfinder.wdl md5sum: 27528633723303b462d095b642649453 - path: miniwdl_run/wdl/tasks/gene_typing/variant_detection/task_snippy_variants.wdl - md5sum: 3b9e04569d7e856dcc649b7726b306b7 + md5sum: 440a620a10ccdafe612f0b33ef05f86d - path: miniwdl_run/wdl/tasks/quality_control/read_filtering/task_bbduk.wdl md5sum: aec6ef024d6dff31723f44290f6b9cf5 - path: miniwdl_run/wdl/tasks/quality_control/advanced_metrics/task_busco.wdl @@ -617,7 +617,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_sonneityping.wdl md5sum: 3357a36f11992a0ca00c61d7bfccb44b - path: miniwdl_run/wdl/tasks/species_typing/mycobacterium/task_tbprofiler.wdl - md5sum: 3b37e6bf7f4773e12afe1fa15920acd9 + md5sum: 484f7b78f12607a737fe30f4c5f3d697 - path: miniwdl_run/wdl/tasks/species_typing/multi/task_ts_mlst.wdl md5sum: ff8070a06eca94264ad6a7d91cb03bf0 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -631,9 +631,9 @@ - path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl md5sum: 4d69a6539b68503af9f3f1c2787ff920 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: 3cb5c86b15e931b0c0b98ed784386438 + md5sum: 9e37d7d263df7a247764b99ab4e8e946 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ea5cff6eff8c2c42046cf2eae6f16b6f + md5sum: ea00ab995a3121d2bf23241e172f1dfc - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 88584182b..813c3c619 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -526,7 +526,7 @@ - path: miniwdl_run/wdl/tasks/gene_typing/drug_resistance/task_resfinder.wdl md5sum: 27528633723303b462d095b642649453 - path: miniwdl_run/wdl/tasks/gene_typing/variant_detection/task_snippy_variants.wdl - md5sum: 3b9e04569d7e856dcc649b7726b306b7 + md5sum: 440a620a10ccdafe612f0b33ef05f86d - path: miniwdl_run/wdl/tasks/quality_control/read_filtering/task_bbduk.wdl md5sum: aec6ef024d6dff31723f44290f6b9cf5 - path: miniwdl_run/wdl/tasks/quality_control/advanced_metrics/task_busco.wdl @@ -580,7 +580,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_sonneityping.wdl md5sum: 3357a36f11992a0ca00c61d7bfccb44b - path: miniwdl_run/wdl/tasks/species_typing/mycobacterium/task_tbprofiler.wdl - md5sum: 3b37e6bf7f4773e12afe1fa15920acd9 + md5sum: 484f7b78f12607a737fe30f4c5f3d697 - path: miniwdl_run/wdl/tasks/species_typing/multi/task_ts_mlst.wdl md5sum: ff8070a06eca94264ad6a7d91cb03bf0 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -596,7 +596,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: fdb66b59ac886501a4ae90a25cefd633 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ea5cff6eff8c2c42046cf2eae6f16b6f + md5sum: ea00ab995a3121d2bf23241e172f1dfc - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: d11bfe33fdd96eab28892be5a01c1c7d - path: miniwdl_run/workflow.log From e4cc386500dbea2328a264b5595883ac7f48eb57 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 12 Nov 2024 16:08:41 +0000 Subject: [PATCH 21/41] more md5sums so sad --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 4 ++-- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index bea0530f0..680edcbc6 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -627,9 +627,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/contamination/task_midas.wdl md5sum: 64caaaff5910ac0036e2659434500962 - path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl - md5sum: 850ad97598aca5c28eb36e6a5c13c2fc + md5sum: 8c97c5bd65e2787239f12ef425d479ae - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: 9e37d7d263df7a247764b99ab4e8e946 + md5sum: a6387407ef51b7493b0897869375e7df - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: ea00ab995a3121d2bf23241e172f1dfc - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index bb9f50f4c..a8542c2a3 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -590,7 +590,7 @@ - path: miniwdl_run/wdl/tasks/taxon_id/contamination/task_midas.wdl md5sum: 64caaaff5910ac0036e2659434500962 - path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl - md5sum: 850ad97598aca5c28eb36e6a5c13c2fc + md5sum: 8c97c5bd65e2787239f12ef425d479ae - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: 4111a758490174325ae8ea52a95319e9 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl From fa19395b6f0c74ae03fd5e19296a61eae9d24ca0 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 12 Nov 2024 17:56:00 +0000 Subject: [PATCH 22/41] fix genome_percent_coverage --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 38ca566ca..80e09baf7 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -64,7 +64,7 @@ task tbp_parser { echo 0.0 > AVG_DEPTH # get genome percent coverage for the entire reference genome length over min_depth - genome=$(samtools depth -J ~{tbprofiler_bam} | awk -F "\t" '{if ($3 >= ~{min_depth}) print;}' | wc -l ) + genome=$(samtools depth -J ~{tbprofiler_bam} | awk -F "\t" -v min_depth=~{min_depth} '{if ($3 >= min_depth) print;}' | wc -l ) python3 -c "print ( ($genome / 4411532 ) * 100 )" | tee GENOME_PC # get genome average depth From 78d24fe7634ac7ea411d4ec0c69edcedbfaafb36 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 12 Nov 2024 21:07:33 +0000 Subject: [PATCH 23/41] rename tbprofiler_additional_outputs to call_tbp_parser --- .../genomic_characterization/theiaprok.md | 21 +++++++++++++------ workflows/utilities/wf_merlin_magic.wdl | 4 ++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index 6664df6df..e9eec0520 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -301,6 +301,7 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **call_poppunk** | Boolean | If "true", runs PopPUNK for GPSC cluster designation for S. pneumoniae | TRUE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **call_shigeifinder_reads_input** | Boolean | If set to "true", the ShigEiFinder task will run again but using read files as input instead of the assembly file. Input is shown but not used for TheiaProk_FASTA. | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **call_stxtyper** | Boolean | If set to "true", the StxTyper task will run on all samples regardless of the `gambit_predicted_taxon` output. Useful if you suspect a non-E.coli or non-Shigella sample contains stx genes. | FALSE | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **call_tbp_parser** | Boolean | If set to "true", activates the tbp_parser module and results in more outputs, including tbp_parser_looker_report_csv, tbp_parser_laboratorian_report_csv, tbp_parser_lims_report_csv, tbp_parser_coverage_report, and tbp_parser_genome_percent_coverage | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **cauris_cladetyper_docker_image** | String | Internal component, do not modify | | Do not modify, Optional | FASTA, ONT, PE, SE | | merlin_magic | **cladetyper_kmer_size** | Int | Internal component, do not modify | | Do not modify, Optional | FASTA, ONT, PE, SE | | merlin_magic | **cladetyper_ref_clade1** | File | *Provide an empty file if running TheiaProk on the command-line | | Do not modify, Optional | FASTA, ONT, PE, SE | @@ -407,27 +408,35 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **stxtyper_enable_debug** | Boolean | When enabled, additional messages are printed and files in `$TMPDIR` are not removed after running | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **stxtyper_memory** | Int | Amount of memory (in GB) to allocate to the task | 4 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **staphopia_sccmec_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/biocontainers/staphopia-sccmec:1.0.0--hdfd78af_0 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_add_cs_lims** | Boolean | Set to true add cycloserine results to the LIMS report | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_regions_bed** | File | A bed file that lists the regions to be considered for QC | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_threshold** | Int | The minimum coverage for a region to pass QC in tbp_parser | 100 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_debug** | Boolean | Activate the debug mode on tbp_parser; increases logging outputs | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.6.0 | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.0 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_depth** | Int | Minimum depth for a variant to pass QC in tbp_parser | 10 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_frequency** | Int | The minimum frequency for a mutation to pass QC | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_read_support** | Int | The minimum read support for a mutation to pass QC | 10 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_operator** | String | Fills the "operator" field in the tbp_parser output files | Operator not provided | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_output_seq_method_type** | String | Fills out the "seq_method" field in the tbp_parser output files | Sequencing method not provided | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_additional_outputs** | Boolean | If set to "true", activates the tbp_parser module and results in more outputs, including tbp_parser_looker_report_csv, tbp_parser_laboratorian_report_csv, tbp_parser_lims_report_csv, tbp_parser_coverage_report, and tbp_parser_genome_percent_coverage | FALSE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_cov_frac_threshold** | Int | A cutoff used to calculate the fraction of the region covered by ≤ this value | 1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rpob449_frequency** | Float | Minimum frequency for a mutation at protein position 449 to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrl_frequency** | Float | Minimum frequency for a mutation in rrl to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrl_read_support** | Int | Minimum read support for a mutation in rrl to pass QC in tbp-parser | 10 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrs_frequency** | Float | Minimum frequency for a mutation in rrs to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrs_read_support** | Int | Minimum read support for a mutation in rrs to pass QC in tbp-parser | 10 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_tngs_data** | Boolean | Set to true to enable tNGS-specific parameters and runs in tbp-parser | FALSE | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **call_tbp_parser** | Boolean | If set to "true", activates the tbp_parser module and results in more outputs, including tbp_parser_looker_report_csv, tbp_parser_laboratorian_report_csv, tbp_parser_lims_report_csv, tbp_parser_coverage_report, and tbp_parser_genome_percent_coverage | FALSE | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbprofiler_additional_parameters** | String | Add additional parameters to the tbprofiler command | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_custom_db** | File | TBProfiler uses by default the TBDB database; if you have a custom database you wish to use, you must provide a custom database in this field and set tbprofiler_run_custom_db to true | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_mapper** | String | The mapping tool used in TBProfiler to align the reads to the reference genome; see TBProfiler’s original documentation for available options. | bwa | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_min_af** | Float | The minimum allele frequency to call a variant | 0.1 | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_min_af_pred** | Float | The minimum allele frequency to use a variant for resistance prediction | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_min_depth** | Int | The minimum depth for a variant to be called. | 10 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbprofiler_run_cdph_db** | Boolean | TBProfiler uses by default the TBDB database; set this value to "true" to use the WHO v2 database with customizations for CDPH | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_run_custom_db** | Boolean | TBProfiler uses by default the TBDB database; if you have a custom database you wish to use, you must set this value to true and provide a custom database in the tbprofiler_custom_db field | FALSE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_variant_caller** | String | Select a different variant caller for TBProfiler to use by writing it in this block; see TBProfiler’s original documentation for available options. | freebayes | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_variant_calling_params** | String | Enter additional variant calling parameters in this free text input to customize how the variant caller works in TBProfiler | None | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbprofiler_variant_caller** | String | Select a different variant caller for TBProfiler to use by writing it in this block; see TBProfiler’s original documentation for available options. | GATK | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbprofiler_variant_calling_params** | String | Enter additional variant calling parameters in this free text input to customize how the variant caller works in TBProfiler | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **theiaeuk** | Boolean | Internal component, do not modify | | Do not modify, Optional | FASTA, ONT, PE, SE | | merlin_magic | **virulencefinder_coverage_threshold** | Float | The threshold for minimum coverage | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **virulencefinder_database** | String | The specific database to use | virulence_ecoli | Optional | FASTA, ONT, PE, SE | diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index ef67046bd..1e19184bd 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -57,7 +57,7 @@ workflow merlin_magic { # activating tool logic Boolean call_poppunk = true Boolean call_shigeifinder_reads_input = false - Boolean tbprofiler_additional_outputs = false # set to true to run tbp-parser + Boolean call_tbp_parser = false # docker options String? abricate_abaum_docker_image String? abricate_vibrio_docker_image @@ -467,7 +467,7 @@ workflow merlin_magic { tbprofiler_run_cdph_db = tbprofiler_run_cdph_db, docker = tbprofiler_docker_image } - if (tbprofiler_additional_outputs) { + if (call_tbp_parser) { call tbp_parser_task.tbp_parser { input: tbprofiler_json = tbprofiler.tbprofiler_output_json, From 749362a202db69512a10ae6e1ca55dfa200a2e89 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 12 Nov 2024 21:09:19 +0000 Subject: [PATCH 24/41] clarify output --- docs/workflows/genomic_characterization/theiaprok.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index e9eec0520..c1ee1574d 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -2030,7 +2030,7 @@ The TheiaProk workflows automatically activate taxa-specific sub-workflows after | tbp_parser_version | String | Optional output. The version of tbp-parser | ONT, PE | | tbprofiler_dr_type | String | Drug resistance type predicted by TB-Profiler (sensitive, Pre-MDR, MDR, Pre-XDR, XDR) | ONT, PE, SE | | tbprofiler_main_lineage | String | Lineage(s) predicted by TBProfiler | ONT, PE, SE | -| tbprofiler_median_coverage | Int | The median coverage of the H37Rv TB reference genome | ONT, PE | +| tbprofiler_median_depth | Int | The median depth of the H37Rv TB reference genome covered by the sample | ONT, PE | | tbprofiler_output_bai | File | Index BAM file generated by mapping sequencing reads to reference genome by TBProfiler | ONT, PE, SE | | tbprofiler_output_bam | File | BAM alignment file produced by TBProfiler | ONT, PE, SE | | tbprofiler_output_file | File | CSV report from TBProfiler | ONT, PE, SE | From 563f6d2c5a1539c847b483f1f8c54b56d376017d Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 12 Nov 2024 21:19:01 +0000 Subject: [PATCH 25/41] md5sum --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 2 +- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 680edcbc6..2154de230 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -631,7 +631,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl md5sum: a6387407ef51b7493b0897869375e7df - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ea00ab995a3121d2bf23241e172f1dfc + md5sum: f3b18a0b4c2bdeb0896176e8f9c8247d - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index a8542c2a3..40e6c749a 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -594,7 +594,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: 4111a758490174325ae8ea52a95319e9 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ea00ab995a3121d2bf23241e172f1dfc + md5sum: f3b18a0b4c2bdeb0896176e8f9c8247d - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: a7ef5a7a38dd60ff2edf699ae6808ebb - path: miniwdl_run/workflow.log From b8d7d96e316a3ff5ff94ec27b0bf578d2ff3dcc6 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 18 Nov 2024 15:20:33 +0000 Subject: [PATCH 26/41] reorder trimmomatic base crop stuff --- tasks/quality_control/read_filtering/task_trimmomatic.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/quality_control/read_filtering/task_trimmomatic.wdl b/tasks/quality_control/read_filtering/task_trimmomatic.wdl index e8a246497..42f62559d 100644 --- a/tasks/quality_control/read_filtering/task_trimmomatic.wdl +++ b/tasks/quality_control/read_filtering/task_trimmomatic.wdl @@ -40,9 +40,9 @@ task trimmomatic_pe { -threads ~{cpu} \ ~{read1} ~{read2} \ -baseout ~{samplename}.fastq.gz \ + "${CROPPING_VAR}" \ SLIDINGWINDOW:~{trimmomatic_window_size}:~{trimmomatic_quality_trim_score} \ - MINLEN:~{trimmomatic_min_length} &> ~{samplename}.trim.stats.txt \ - "${CROPPING_VAR}" + MINLEN:~{trimmomatic_min_length} &> ~{samplename}.trim.stats.txt >>> output { From ad0b3d95cfaccd1636a722c6b7dcb9810669ab7d Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 18 Nov 2024 21:33:41 +0000 Subject: [PATCH 27/41] add lane concatenation to theiaproks --- .dockstore.yml | 7 +- .../file_handling/task_cat_lanes.wdl | 69 +++++++++++++++++++ .../theiaprok/wf_theiaprok_illumina_pe.wdl | 43 +++++++++--- .../theiaprok/wf_theiaprok_illumina_se.wdl | 27 ++++++-- .../wf_concatenate_illumina_lanes.wdl | 42 +++++++++++ 5 files changed, 175 insertions(+), 13 deletions(-) create mode 100644 tasks/utilities/file_handling/task_cat_lanes.wdl create mode 100644 workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl diff --git a/.dockstore.yml b/.dockstore.yml index 5306d30ed..bab57761f 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -282,4 +282,9 @@ workflows: subclass: WDL primaryDescriptorPath: /workflows/phylogenetics/wf_snippy_streamline_fasta.wdl testParameterFiles: - - /tests/inputs/empty.json \ No newline at end of file + - /tests/inputs/empty.json + - name: Concatenate_Illumina_Lanes_PHB + subclass: WDL + primaryDescriptorPath: /workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl + testParameterFiles: + - /tests/inputs/empty.json \ No newline at end of file diff --git a/tasks/utilities/file_handling/task_cat_lanes.wdl b/tasks/utilities/file_handling/task_cat_lanes.wdl new file mode 100644 index 000000000..8636a951c --- /dev/null +++ b/tasks/utilities/file_handling/task_cat_lanes.wdl @@ -0,0 +1,69 @@ +version 1.0 + +task cat_lanes { + input { + String samplename + + File read1_lane1 + File read1_lane2 + File? read1_lane3 + File? read1_lane4 + + File? read2_lane1 + File? read2_lane2 + File? read2_lane3 + File? read2_lane4 + + Int cpu = 2 + Int disk_size = 50 + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/utility:1.2" + Int memory = 4 + } + meta { + volatile: true + } + command <<< + # exit task if anything throws an error (important for proper gzip format) + set -euo pipefail + + # move reads into single directory + mkdir -v reads + mv -v ~{read1_lane1} \ + ~{read2_lane1} \ + ~{read1_lane2} \ + ~{read2_lane2} \ + ~{read1_lane3} \ + ~{read2_lane3} \ + ~{read1_lane4} \ + ~{read2_lane4} \ + reads/ + + # check for valid gzipped format (this task assumes FASTQ files are gzipped - they should be coming from ILMN instruments) + gzip -t reads/*.gz + + # run concatenate script and send STDOUT/ERR to STDOUT + # reminder: script will skip over samples that only have R1 file present + # reminder: script REQUIRES standard illumina file endings like: _L001_R1_001.fastq.gz and _L002_R2_001.fastq.gz + # see script here: https://github.com/theiagen/utilities/blob/main/scripts/concatenate-across-lanes.sh + concatenate-across-lanes.sh reads/ + + # ensure newly merged FASTQs are valid gzipped format + gzip -t reads/*merged*.gz + + # determine output filenames for outputs + mv -v reads/*_merged_R1.fastq.gz reads/~{samplename}_merged_R1.fastq.gz + mv -v reads/*_merged_R2.fastq.gz reads/~{samplename}_merged_R2.fastq.gz + >>> + output { + File read1_concatenated = "reads/~{samplename}_merged_R1.fastq.gz" + File? read2_concatenated = "reads/~{samplename}_merged_R2.fastq.gz" + } + runtime { + docker: "~{docker}" + memory: memory + " GB" + cpu: cpu + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" + preemptible: 1 + } +} \ No newline at end of file diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 28cf1d149..88f0ef066 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -18,6 +18,7 @@ import "../../tasks/task_versioning.wdl" as versioning import "../../tasks/taxon_id/contamination/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/utilities/data_export/task_broad_terra_tools.wdl" as terra_tools +import "../utilities/file_handling/wf_concatenate_illumina_lanes.wdl" as concatenate_lanes_workflow import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow import "../utilities/wf_read_QC_trim_pe.wdl" as read_qc @@ -30,6 +31,15 @@ workflow theiaprok_illumina_pe { String seq_method = "ILLUMINA" File read1 File read2 + + # optional additional lanes + File? read1_lane2 + File? read1_lane3 + File? read1_lane4 + File? read2_lane2 + File? read2_lane3 + File? read2_lane4 + Int? genome_length # export taxon table parameters String? run_id @@ -68,10 +78,24 @@ workflow theiaprok_illumina_pe { call versioning.version_capture { input: } + if (defined(read1_lane2)) { + call concatenate_lanes_workflow.concatenate_illumina_lanes { + input: + samplename = samplename, + read1_lane1 = read1, + read1_lane2 = select_first([read1_lane2]), + read1_lane3 = read1_lane3, + read1_lane4 = read1_lane4, + read2_lane1 = read2, + read2_lane2 = read2_lane2, + read2_lane3 = read2_lane3, + read2_lane4 = read2_lane4 + } + } call screen.check_reads as raw_check_reads { input: - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), min_reads = min_reads, min_basepairs = min_basepairs, min_genome_length = min_genome_length, @@ -85,8 +109,8 @@ workflow theiaprok_illumina_pe { call read_qc.read_QC_trim_pe as read_QC_trim { input: samplename = samplename, - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), trim_min_length = trim_min_length, trim_quality_min_score = trim_quality_min_score, trim_window_size = trim_window_size, @@ -121,8 +145,8 @@ workflow theiaprok_illumina_pe { } call cg_pipeline.cg_pipeline as cg_pipeline_raw { input: - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), samplename = samplename, genome_length = select_first([genome_length, quast.genome_length]) } @@ -257,8 +281,8 @@ workflow theiaprok_illumina_pe { sample_taxon = gambit.gambit_predicted_taxon, taxon_tables = taxon_tables, samplename = samplename, - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), read1_clean = read_QC_trim.read1_clean, read2_clean = read_QC_trim.read2_clean, run_id = run_id, @@ -608,6 +632,9 @@ workflow theiaprok_illumina_pe { String theiaprok_illumina_pe_analysis_date = version_capture.date # Read Metadata String seq_platform = seq_method + # Concatenated Illumina Reads + File? read1_concatenated = concatenate_illumina_lanes.read1_concatenated + File? read2_concatenated = concatenate_illumina_lanes.read2_concatenated # Sample Screening String read_screen_raw = raw_check_reads.read_screen String? read_screen_clean = clean_check_reads.read_screen diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index e743ecbce..9005766a4 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -18,6 +18,7 @@ import "../../tasks/task_versioning.wdl" as versioning import "../../tasks/taxon_id/contamination/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/utilities/data_export/task_broad_terra_tools.wdl" as terra_tools +import "../utilities/file_handling/wf_concatenate_illumina_lanes.wdl" as concatenate_lanes_workflow import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow import "../utilities/wf_read_QC_trim_se.wdl" as read_qc @@ -29,6 +30,12 @@ workflow theiaprok_illumina_se { String samplename String seq_method = "ILLUMINA" File read1 + + # optional additional lanes + File? read1_lane2 + File? read1_lane3 + File? read1_lane4 + Int? genome_length # export taxon table parameters String? run_id @@ -68,9 +75,19 @@ workflow theiaprok_illumina_se { call versioning.version_capture { input: } + if (defined(read1_lane2)) { + call concatenate_lanes_workflow.concatenate_illumina_lanes { + input: + samplename = samplename, + read1_lane1 = read1, + read1_lane2 = select_first([read1_lane2]), + read1_lane3 = read1_lane3, + read1_lane4 = read1_lane4 + } + } call screen.check_reads_se as raw_check_reads { input: - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), min_reads = min_reads, min_basepairs = min_basepairs, min_genome_length = min_genome_length, @@ -84,7 +101,7 @@ workflow theiaprok_illumina_se { call read_qc.read_QC_trim_se as read_QC_trim { input: samplename = samplename, - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), trim_min_length = trim_min_length, trim_quality_min_score = trim_quality_min_score, trim_window_size = trim_window_size, @@ -116,7 +133,7 @@ workflow theiaprok_illumina_se { } call cg_pipeline.cg_pipeline as cg_pipeline_raw { input: - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), samplename = samplename, genome_length = select_first([genome_length, quast.genome_length]) } @@ -240,7 +257,7 @@ workflow theiaprok_illumina_se { sample_taxon = gambit.gambit_predicted_taxon, taxon_tables = taxon_tables, samplename = samplename, - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), read1_clean = read_QC_trim.read1_clean, run_id = run_id, collection_date = collection_date, @@ -566,6 +583,8 @@ workflow theiaprok_illumina_se { String theiaprok_illumina_se_analysis_date = version_capture.date # Read Metadata String seq_platform = seq_method + # Concatenated Illumina Reads + File? read1_concatenated = concatenate_illumina_lanes.read1_concatenated # Sample Screening String read_screen_raw = raw_check_reads.read_screen String? read_screen_clean = clean_check_reads.read_screen diff --git a/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl b/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl new file mode 100644 index 000000000..f2a5a9ad9 --- /dev/null +++ b/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl @@ -0,0 +1,42 @@ +version 1.0 + +import "../../../tasks/utilities/file_handling/task_cat_lanes.wdl" as concatenate_lanes +import "../../../tasks/task_versioning.wdl" as versioning + +workflow concatenate_illumina_lanes { + input { + String samplename + + File read1_lane1 + File read1_lane2 + File? read1_lane3 + File? read1_lane4 + + File? read2_lane1 + File? read2_lane2 + File? read2_lane3 + File? read2_lane4 + } + call concatenate_lanes.cat_lanes { + input: + samplename = samplename, + read1_lane1 = read1_lane1, + read2_lane1 = read2_lane1, + read1_lane2 = read1_lane2, + read2_lane2 = read2_lane2, + read1_lane3 = read1_lane3, + read2_lane3 = read2_lane3, + read1_lane4 = read1_lane4, + read2_lane4 = read2_lane4 + } + call versioning.version_capture { + input: + } + output { + String concatenate_illumina_lanes_version = version_capture.phb_version + String concatenate_illumina_lanes_analysis_date = version_capture.date + + File read1_concatenated = cat_lanes.read1_concatenated + File? read2_concatenated = cat_lanes.read2_concatenated + } +} \ No newline at end of file From 0713cdd9001b08f0680f8c89dbf56bb8d9d5fe98 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 19 Nov 2024 15:47:28 +0000 Subject: [PATCH 28/41] rewrite so it works --- .../file_handling/task_cat_lanes.wdl | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/tasks/utilities/file_handling/task_cat_lanes.wdl b/tasks/utilities/file_handling/task_cat_lanes.wdl index 8636a951c..555a9c533 100644 --- a/tasks/utilities/file_handling/task_cat_lanes.wdl +++ b/tasks/utilities/file_handling/task_cat_lanes.wdl @@ -25,38 +25,23 @@ task cat_lanes { command <<< # exit task if anything throws an error (important for proper gzip format) set -euo pipefail + + exists() { [[ -f $1 ]]; } - # move reads into single directory - mkdir -v reads - mv -v ~{read1_lane1} \ - ~{read2_lane1} \ - ~{read1_lane2} \ - ~{read2_lane2} \ - ~{read1_lane3} \ - ~{read2_lane3} \ - ~{read1_lane4} \ - ~{read2_lane4} \ - reads/ + set -euo pipefail - # check for valid gzipped format (this task assumes FASTQ files are gzipped - they should be coming from ILMN instruments) - gzip -t reads/*.gz + cat ~{read1_lane1} ~{read1_lane2} ~{read1_lane3} ~{read1_lane4} > "~{samplename}_merged_R1.fastq.gz" - # run concatenate script and send STDOUT/ERR to STDOUT - # reminder: script will skip over samples that only have R1 file present - # reminder: script REQUIRES standard illumina file endings like: _L001_R1_001.fastq.gz and _L002_R2_001.fastq.gz - # see script here: https://github.com/theiagen/utilities/blob/main/scripts/concatenate-across-lanes.sh - concatenate-across-lanes.sh reads/ + if exists ~{read2_lane1} ; then + cat ~{read2_lane1} ~{read2_lane2} ~{read2_lane3} ~{read2_lane4} > "~{samplename}_merged_R2.fastq.gz" + fi # ensure newly merged FASTQs are valid gzipped format - gzip -t reads/*merged*.gz - - # determine output filenames for outputs - mv -v reads/*_merged_R1.fastq.gz reads/~{samplename}_merged_R1.fastq.gz - mv -v reads/*_merged_R2.fastq.gz reads/~{samplename}_merged_R2.fastq.gz + gzip -t *merged*.gz >>> output { - File read1_concatenated = "reads/~{samplename}_merged_R1.fastq.gz" - File? read2_concatenated = "reads/~{samplename}_merged_R2.fastq.gz" + File read1_concatenated = "~{samplename}_merged_R1.fastq.gz" + File? read2_concatenated = "~{samplename}_merged_R2.fastq.gz" } runtime { docker: "~{docker}" From 36ff472616d8b0575a53109f1903133471b69163 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 19 Nov 2024 18:00:48 +0000 Subject: [PATCH 29/41] update documentation for concatenate_lanes --- .../genomic_characterization/theiaprok.md | 23 +++++++-- .../standalone/concatenate_illumina_lanes.md | 47 +++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 docs/workflows/standalone/concatenate_illumina_lanes.md diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index c1ee1574d..02f9277a4 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.2.0 | Yes, some optional features incompatible | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes, some optional features incompatible | Sample-level | ## TheiaProk Workflows @@ -78,6 +78,12 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | *workflow name | **originating_lab** | String | Will be used in the "originating_lab" column in any taxon-specific tables created in the Export Taxon Tables task | | Optional | FASTA, ONT, PE, SE | | *workflow name | **perform_characterization** | Boolean | Set to "false" if you want to only generate an assembly and relevant QC metrics and skip all characterization tasks | TRUE | Optional | FASTA, ONT, PE, SE | | *workflow name | **qc_check_table** | File | TSV value with taxons for rows and QC values for columns; internal cells represent user-determined QC thresholds; if provided, turns on the QC Check task.
Click on the variable name for an example QC Check table | | Optional | FASTA, ONT, PE, SE | +| *workflow name | **read1_lane2** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read1_lane3** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read1_lane4** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read2_lane2** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read2_lane3** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read2_lane4** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | | *workflow name | **run_id** | String | Will be used in the "run_id" column in any taxon-specific tables created in the Export Taxon Tables task | | Optional | FASTA, ONT, PE, SE | | *workflow name | **seq_method** | String | Will be used in the "seq_id" column in any taxon-specific tables created in the Export Taxon Tables task | | Optional | FASTA, ONT, PE, SE | | *workflow name | **skip_mash** | Boolean | If true, skips estimation of genome size and coverage in read screening steps. As a result, providing true also prevents screening using these parameters. | TRUE | Optional | ONT, SE | @@ -603,6 +609,17 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | --- | --- | | Task | [task_versioning.wdl](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/task_versioning.wdl) | +??? task "`concatenate_illumina_lanes`: Concatenate Multi-Lane Illumina FASTQs ==_for Illumina only_==" + + The `concatenate_illumina_lanes` task concatenates Illumina FASTQ files from multiple lanes into a single file. This task only runs if the `read1_lane2` input file has been provided. All read1 lanes are concatenated together and are used in subsequent tasks, as are the read2 lanes. These concatenated files are also provided as output. + + !!! techdetails "Concatenate Illumina Lanes Technical Details" + The `concatenate_illumina_lanes` task is run twice, once for raw reads and once for clean reads. The task is the same for both PE and SE workflows. + + | | Links | + | --- | --- | + | Task | [wf_concatenate_illumina_lanes.wdl](https://github.com/theiagen/public_health_bioinformatics/blob/main/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl) + ??? task "`screen`: Total Raw Read Quantification and Genome Size Estimation" The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses [`fastq-scan`](https://github.com/rpetit3/fastq-scan) and bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: @@ -705,12 +722,12 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al 1. **Species Groups**: - MIDAS clusters bacterial genomes based on 96.5% sequence identity, forming over 5,950 species groups from 31,007 genomes. These groups align with the gold-standard species definition (95% ANI), ensuring highly accurate species identification. - 2. **Genomic Data Structure**: + 1. **Genomic Data Structure**: - **Marker Genes**: Contains 15 universal single-copy genes used to estimate species abundance. - **Representative Genome**: Each species group has a selected representative genome, which minimizes genetic variation and aids in accurate SNP identification. - **Pan-genome**: The database includes clusters of non-redundant genes, with options for multi-level clustering (e.g., 99%, 95%, 90% identity), enabling MIDAS to identify gene content within strains at various clustering thresholds. - 3. **Taxonomic Annotation**: + 1. **Taxonomic Annotation**: - Genomes are annotated based on consensus Latin names. Discrepancies in name assignments may occur due to factors like unclassified genomes or genus-level ambiguities. --- diff --git a/docs/workflows/standalone/concatenate_illumina_lanes.md b/docs/workflows/standalone/concatenate_illumina_lanes.md new file mode 100644 index 000000000..282844fa4 --- /dev/null +++ b/docs/workflows/standalone/concatenate_illumina_lanes.md @@ -0,0 +1,47 @@ +# Concatenate Illumina Lanes + +## Quick Facts + +| **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | +|---|---|---|---|---| +| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB 2.3.0 | Yes | Sample-level | + +## Concatenate_Illumina_Lanes_PHB + +Some Illumina machines produce multi-lane FASTQ files for a single sample. This workflow concatenates the multiple lanes into a single FASTQ file per read type (forward or reverse). + +### Inputs + +| **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | +|---|---|---|---|---|---| +| concatenate_illumina_lanes | **read1_lane1** | File | The first lane for the forward reads | | Required | +| concatenate_illumina_lanes | **read1_lane2** | File | The second lane for the forward reads | | Required | +| concatenate_illumina_lanes | **samplename** | String | The name of the sample, used to name the output files | | Required | +| cat_lanes | **cpu** | Int | Number of CPUs to allocate to the task | 2 | Optional | +| cat_lanes | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 50 | Optional | +| cat_lanes | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/utility:1.2" | Optional | +| cat_lanes | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 4 | Optional | +| concatenate_illumina_lanes | **read1_lane3** | File | The third lane for the forward reads | | Optional | +| concatenate_illumina_lanes | **read1_lane4** | File | The fourth lane for the forward reads | | Optional | +| concatenate_illumina_lanes | **read2_lane1** | File | The first lane for the reverse reads | | Optional | +| concatenate_illumina_lanes | **read2_lane2** | File | The second lane for the reverse reads | | Optional | +| concatenate_illumina_lanes | **read2_lane3** | File | The third lane for the reverse reads | | Optional | +| concatenate_illumina_lanes | **read2_lane4** | File | The fourth lane for the reverse reads | | Optional | +| version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | +| version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | + +### Workflow Tasks + +This workflow concatenates the Illumina lanes for forward and (if provided) reverse reads. The output files are named as followed: + +- Forward reads: `_merged_R1.fastq.gz` +- Reverse reads: `_merged_R2.fastq.gz` + +### Outputs + +| **Variable** | **Type** | **Description** | +|---|---|---| +| concatenate_illumina_lanes_analysis_date | String | Date of analysis | +| concatenate_illumina_lanes_version | String | Version of PHB used for the analysis | +| read1_concatenated | File | Concatenated forward reads | +| read2_concatenated | File | Concatenated reverse reads | From eae459a4822a998fab2ee112d421f227e71605fa Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Thu, 21 Nov 2024 20:23:49 +0000 Subject: [PATCH 30/41] bump version and update docs --- .../genomic_characterization/theiaprok.md | 4 +-- .../workflows_alphabetically.md | 3 ++- docs/workflows_overview/workflows_kingdom.md | 3 ++- docs/workflows_overview/workflows_type.md | 27 ++++++++++--------- mkdocs.yml | 5 +++- .../mycobacterium/task_tbp_parser.wdl | 2 +- .../mycobacterium/task_tbprofiler.wdl | 2 +- 7 files changed, 25 insertions(+), 21 deletions(-) diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index 02f9277a4..39f05a187 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -417,7 +417,7 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **tbp_parser_add_cs_lims** | Boolean | Set to true add cycloserine results to the LIMS report | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_regions_bed** | File | A bed file that lists the regions to be considered for QC | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_threshold** | Int | The minimum coverage for a region to pass QC in tbp_parser | 100 | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbp_parser_debug** | Boolean | Activate the debug mode on tbp_parser; increases logging outputs | FALSE | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_debug** | Boolean | Activate the debug mode on tbp_parser; increases logging outputs | TRUE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.6.0 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | FASTA, ONT, PE, SE | @@ -432,8 +432,6 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **tbp_parser_rrs_frequency** | Float | Minimum frequency for a mutation in rrs to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_rrs_read_support** | Int | Minimum read support for a mutation in rrs to pass QC in tbp-parser | 10 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_tngs_data** | Boolean | Set to true to enable tNGS-specific parameters and runs in tbp-parser | FALSE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **call_tbp_parser** | Boolean | If set to "true", activates the tbp_parser module and results in more outputs, including tbp_parser_looker_report_csv, tbp_parser_laboratorian_report_csv, tbp_parser_lims_report_csv, tbp_parser_coverage_report, and tbp_parser_genome_percent_coverage | FALSE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_additional_parameters** | String | Add additional parameters to the tbprofiler command | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_custom_db** | File | TBProfiler uses by default the TBDB database; if you have a custom database you wish to use, you must provide a custom database in this field and set tbprofiler_run_custom_db to true | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_mapper** | String | The mapping tool used in TBProfiler to align the reads to the reference genome; see TBProfiler’s original documentation for available options. | bwa | Optional | FASTA, ONT, PE, SE | diff --git a/docs/workflows_overview/workflows_alphabetically.md b/docs/workflows_overview/workflows_alphabetically.md index 3543d3cb9..abc5549d9 100644 --- a/docs/workflows_overview/workflows_alphabetically.md +++ b/docs/workflows_overview/workflows_alphabetically.md @@ -11,10 +11,11 @@ title: Alphabetical Workflows | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | -| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.1.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | +| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.3.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | | [**BaseSpace_Fetch**](../workflows/data_import/basespace_fetch.md)| Import data from BaseSpace into Terra | Any taxa | Sample-level | Yes | v2.0.0 | [BaseSpace_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/BaseSpace_Fetch_PHB:main?tab=info) | | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | +| [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**Core_Gene_SNP**](../workflows/phylogenetic_construction/core_gene_snp.md) | Pangenome analysis | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Core_Gene_SNP_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Core_Gene_SNP_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | diff --git a/docs/workflows_overview/workflows_kingdom.md b/docs/workflows_overview/workflows_kingdom.md index c77c7bc3d..eb78b0e4f 100644 --- a/docs/workflows_overview/workflows_kingdom.md +++ b/docs/workflows_overview/workflows_kingdom.md @@ -15,6 +15,7 @@ title: Workflows by Kingdom | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | | [**BaseSpace_Fetch**](../workflows/data_import/basespace_fetch.md)| Import data from BaseSpace into Terra | Any taxa | Sample-level | Yes | v2.0.0 | [BaseSpace_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/BaseSpace_Fetch_PHB:main?tab=info) | | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | +| [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | | [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | @@ -75,7 +76,7 @@ title: Workflows by Kingdom | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.1.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | +| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.3.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | | [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | diff --git a/docs/workflows_overview/workflows_type.md b/docs/workflows_overview/workflows_type.md index 53623d7ee..97452d073 100644 --- a/docs/workflows_overview/workflows_type.md +++ b/docs/workflows_overview/workflows_type.md @@ -25,13 +25,13 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | +| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.3.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**Pangolin_Update**](../workflows/genomic_characterization/pangolin_update.md) | Update Pangolin assignments | SARS-CoV-2, Viral | Sample-level | Yes | v2.0.0 | [Pangolin_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Pangolin_Update_PHB:main?tab=info) | -| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | -| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.0.1 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | +| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | +| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | | [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.0.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | -| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | -| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | +| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | @@ -41,17 +41,17 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.1.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | +| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.3.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | | [**Core_Gene_SNP**](../workflows/phylogenetic_construction/core_gene_snp.md) | Pangenome analysis | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Core_Gene_SNP_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Core_Gene_SNP_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | | [**Find_Shared_Variants**](../workflows/phylogenetic_construction/find_shared_variants.md)| Combines and reshapes variant data from Snippy_Variants to illustrate variants shared across multiple samples | Bacteria, Mycotics | Set-level | Yes | v2.0.0 | [Find_Shared_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Find_Shared_Variants_PHB:main?tab=info) | | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**Lyve_SET**](../workflows/phylogenetic_construction/lyve_set.md)| Alignment of reads to a reference genome, SNP calling, curation of high quality SNPs, phylogenetic analysis | Bacteria | Set-level | Yes | v2.1.0 | [Lyve_SET_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Lyve_SET_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | -| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | -| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | -| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | +| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | +| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.3.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | @@ -72,9 +72,9 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.2.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | +| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.3.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | | [**Terra_2_GISAID**](../workflows/public_data_sharing/terra_2_gisaid.md)| Upload of assembly data to GISAID | SARS-CoV-2, Viral | Set-level | Yes | v1.2.1 | [Terra_2_GISAID_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_GISAID_PHB:main?tab=info) | -| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.3.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | @@ -97,13 +97,14 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | +| [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | | [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | | [**Rename_FASTQ**](../workflows/standalone/rename_fastq.md)| Rename paired-end or single-end read files in a Terra data table in a non-destructive way | Any taxa | Sample-level | Yes | v2.1.0 | [Rename_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Rename_FASTQ_PHB:im-utilities-rename-files?tab=info) | -| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.0.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | +| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.3.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | diff --git a/mkdocs.yml b/mkdocs.yml index cc90e4e3d..db891b442 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -52,6 +52,7 @@ nav: - Zip_Column_Content: workflows/data_export/zip_column_content.md - Standalone: - Cauris_CladeTyper: workflows/standalone/cauris_cladetyper.md + - Concatenate_Illumina_Lanes: workflows/standalone/concatenate_illumina_lanes.md - GAMBIT_Query: workflows/standalone/gambit_query.md - Kraken2: workflows/standalone/kraken2.md - NCBI-AMRFinderPlus: workflows/standalone/ncbi_amrfinderplus.md @@ -65,7 +66,8 @@ nav: - Any Taxa: - Assembly_Fetch: workflows/data_import/assembly_fetch.md - BaseSpace_Fetch: workflows/data_import/basespace_fetch.md - - Concatenate_Column_Content: workflows/data_export/concatenate_column_content.md + - Concatenate_Column_Content: workflows/data_export/concatenate_column_content.md + - Concatenate_Illumina_Lanes: workflows/standalone/concatenate_illumina_lanes.md - Create_Terra_Table: workflows/data_import/create_terra_table.md - Kraken2: workflows/standalone/kraken2.md - NCBI-Scrub: workflows/standalone/ncbi_scrub.md @@ -123,6 +125,7 @@ nav: - BaseSpace_Fetch: workflows/data_import/basespace_fetch.md - Cauris_CladeTyper: workflows/standalone/cauris_cladetyper.md - Concatenate_Column_Content: workflows/data_export/concatenate_column_content.md + - Concatenate_Illumina_Lanes: workflows/standalone/concatenate_illumina_lanes.md - Core_Gene_SNP: workflows/phylogenetic_construction/core_gene_snp.md - Create_Terra_Table: workflows/data_import/create_terra_table.md - CZGenEpi_Prep: workflows/phylogenetic_construction/czgenepi_prep.md diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 80e09baf7..19c3d9cb9 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -31,7 +31,7 @@ task tbp_parser { Int cpu = 1 Int disk_size = 100 - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.0.0" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.0" Int memory = 4 } command <<< diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index b48d5eb89..bc81ce419 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -22,7 +22,7 @@ task tbprofiler { Int cpu = 8 Int disk_size = 100 - String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.0" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.1" Int memory = 16 } command <<< From 682e722afb8a31549d92cd1912fe0374d26cd3ca Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Thu, 21 Nov 2024 20:37:20 +0000 Subject: [PATCH 31/41] update last known changes --- .../genomic_characterization/theiaprok.md | 8 ++--- docs/workflows/standalone/tbprofiler_tngs.md | 2 +- .../workflows_alphabetically.md | 27 ++++++++------- docs/workflows_overview/workflows_kingdom.md | 34 +++++++++---------- docs/workflows_overview/workflows_type.md | 4 +-- 5 files changed, 38 insertions(+), 37 deletions(-) diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index 39f05a187..66c43bdf1 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -418,7 +418,7 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **tbp_parser_coverage_regions_bed** | File | A bed file that lists the regions to be considered for QC | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_threshold** | Int | The minimum coverage for a region to pass QC in tbp_parser | 100 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_debug** | Boolean | Activate the debug mode on tbp_parser; increases logging outputs | TRUE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.6.0 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.0 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_depth** | Int | Minimum depth for a variant to pass QC in tbp_parser | 10 | Optional | FASTA, ONT, PE, SE | @@ -612,7 +612,7 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al The `concatenate_illumina_lanes` task concatenates Illumina FASTQ files from multiple lanes into a single file. This task only runs if the `read1_lane2` input file has been provided. All read1 lanes are concatenated together and are used in subsequent tasks, as are the read2 lanes. These concatenated files are also provided as output. !!! techdetails "Concatenate Illumina Lanes Technical Details" - The `concatenate_illumina_lanes` task is run twice, once for raw reads and once for clean reads. The task is the same for both PE and SE workflows. + The `concatenate_illumina_lanes` task is run before any downstream steps take place. | | Links | | --- | --- | @@ -720,12 +720,12 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al 1. **Species Groups**: - MIDAS clusters bacterial genomes based on 96.5% sequence identity, forming over 5,950 species groups from 31,007 genomes. These groups align with the gold-standard species definition (95% ANI), ensuring highly accurate species identification. - 1. **Genomic Data Structure**: + 2. **Genomic Data Structure**: - **Marker Genes**: Contains 15 universal single-copy genes used to estimate species abundance. - **Representative Genome**: Each species group has a selected representative genome, which minimizes genetic variation and aids in accurate SNP identification. - **Pan-genome**: The database includes clusters of non-redundant genes, with options for multi-level clustering (e.g., 99%, 95%, 90% identity), enabling MIDAS to identify gene content within strains at various clustering thresholds. - 1. **Taxonomic Annotation**: + 3. **Taxonomic Annotation**: - Genomes are annotated based on consensus Latin names. Discrepancies in name assignments may occur due to factors like unclassified genomes or genus-level ambiguities. --- diff --git a/docs/workflows/standalone/tbprofiler_tngs.md b/docs/workflows/standalone/tbprofiler_tngs.md index d0061fdd7..64a64b364 100644 --- a/docs/workflows/standalone/tbprofiler_tngs.md +++ b/docs/workflows/standalone/tbprofiler_tngs.md @@ -23,7 +23,7 @@ This workflow is still in experimental research stages. Documentation is minimal | tbp_parser | **coverage_threshold** | Int | The minimum percentage of a region to exceed the minimum depth for a region to pass QC in tbp_parser | 100 | Optional | | tbp_parser | **cpu** | Int | Number of CPUs to allocate to the task | 1 | Optional | | tbp_parser | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | -| tbp_parser | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.6.0 | Optional | +| tbp_parser | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.0 | Optional | | tbp_parser | **etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | | tbp_parser | **expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | | tbp_parser | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 4 | Optional | diff --git a/docs/workflows_overview/workflows_alphabetically.md b/docs/workflows_overview/workflows_alphabetically.md index abc5549d9..e105c6018 100644 --- a/docs/workflows_overview/workflows_alphabetically.md +++ b/docs/workflows_overview/workflows_alphabetically.md @@ -22,33 +22,34 @@ title: Alphabetical Workflows | [**Find_Shared_Variants**](../workflows/phylogenetic_construction/find_shared_variants.md)| Combines and reshapes variant data from Snippy_Variants to illustrate variants shared across multiple samples | Bacteria, Mycotics | Set-level | Yes | v2.0.0 | [Find_Shared_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Find_Shared_Variants_PHB:main?tab=info) | | [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | -| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | +| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**Lyve_SET**](../workflows/phylogenetic_construction/lyve_set.md)| Alignment of reads to a reference genome, SNP calling, curation of high quality SNPs, phylogenetic analysis | Bacteria | Set-level | Yes | v2.1.0 | [Lyve_SET_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Lyve_SET_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | -| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.2.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | +| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.3.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**Pangolin_Update**](../workflows/genomic_characterization/pangolin_update.md) | Update Pangolin assignments | SARS-CoV-2, Viral | Sample-level | Yes | v2.0.0 | [Pangolin_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Pangolin_Update_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | | [**Rename_FASTQ**](../workflows/standalone/rename_fastq.md)| Rename paired-end or single-end read files in a Terra data table in a non-destructive way | Any taxa | Sample-level | Yes | v2.1.0 | [Rename_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Rename_FASTQ_PHB:im-utilities-rename-files?tab=info) | | [**Samples_to_Ref_Tree**](../workflows/phylogenetic_placement/samples_to_ref_tree.md)| Use Nextclade to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Samples_to_Ref_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Samples_to_Ref_Tree_PHB:main?tab=info) | -| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | -| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | -| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | +| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | +| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.3.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | -| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.0.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | +| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.3.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | | [**Terra_2_GISAID**](../workflows/public_data_sharing/terra_2_gisaid.md)| Upload of assembly data to GISAID | SARS-CoV-2, Viral | Set-level | Yes | v1.2.1 | [Terra_2_GISAID_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_GISAID_PHB:main?tab=info) | -| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | -| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.0.1 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | -| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.0.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | -| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | +| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.3.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | +| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | +| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.3.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | +| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | | [**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md)| Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location | Any taxa | Set-level | Yes | v1.3.0 | [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info) | | [**Samples_to_Ref_Tree**](../workflows/phylogenetic_placement/usher.md)| Use UShER to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Usher_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Usher_PHB:main?tab=info) | -| [**Usher_PHB**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**Usher_PHB**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.1.0 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.2.0 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | diff --git a/docs/workflows_overview/workflows_kingdom.md b/docs/workflows_overview/workflows_kingdom.md index eb78b0e4f..9cdc7d1c5 100644 --- a/docs/workflows_overview/workflows_kingdom.md +++ b/docs/workflows_overview/workflows_kingdom.md @@ -17,12 +17,12 @@ title: Workflows by Kingdom | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | | [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | -| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | +| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | | [**Rename_FASTQ**](../workflows/standalone/rename_fastq.md)| Rename paired-end or single-end read files in a Terra data table in a non-destructive way | Any taxa | Sample-level | Yes | v2.1.0 | [Rename_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Rename_FASTQ_PHB:im-utilities-rename-files?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | -| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.0.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | +| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.3.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | | [**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md)| Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location | Any taxa | Set-level | Yes | v1.3.0 | [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | @@ -42,13 +42,13 @@ title: Workflows by Kingdom | [**Lyve_SET**](../workflows/phylogenetic_construction/lyve_set.md)| Alignment of reads to a reference genome, SNP calling, curation of high quality SNPs, phylogenetic analysis | Bacteria | Set-level | Yes | v2.1.0 | [Lyve_SET_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Lyve_SET_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | -| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | -| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | -| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | -| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.0.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | +| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | +| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | +| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.3.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.3.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | | [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | +| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | @@ -64,9 +64,9 @@ title: Workflows by Kingdom | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | -| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.0.1 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.3.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | @@ -78,18 +78,18 @@ title: Workflows by Kingdom |---|---|---|---|---|---|---| | [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.3.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | -| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | +| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.3.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | -| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.2.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | +| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.3.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | | [**Pangolin_Update**](../workflows/genomic_characterization/pangolin_update.md) | Update Pangolin assignments | SARS-CoV-2, Viral | Sample-level | Yes | v2.0.0 | [Pangolin_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Pangolin_Update_PHB:main?tab=info) | | [**Samples_to_Ref_Tree**](../workflows/phylogenetic_placement/samples_to_ref_tree.md)| Use Nextclade to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Samples_to_Ref_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Samples_to_Ref_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | | [**Terra_2_GISAID**](../workflows/public_data_sharing/terra_2_gisaid.md)| Upload of assembly data to GISAID | SARS-CoV-2, Viral | Set-level | Yes | v1.2.1 | [Terra_2_GISAID_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_GISAID_PHB:main?tab=info) | -| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | +| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.3.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | | [**Usher_PHB**](../workflows/phylogenetic_placement/usher.md)| Use UShER to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Usher_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Usher_PHB:main?tab=info) | -| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.2.0 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | diff --git a/docs/workflows_overview/workflows_type.md b/docs/workflows_overview/workflows_type.md index 97452d073..b6c9ba8b2 100644 --- a/docs/workflows_overview/workflows_type.md +++ b/docs/workflows_overview/workflows_type.md @@ -29,7 +29,7 @@ title: Workflows by Type | [**Pangolin_Update**](../workflows/genomic_characterization/pangolin_update.md) | Update Pangolin assignments | SARS-CoV-2, Viral | Sample-level | Yes | v2.0.0 | [Pangolin_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Pangolin_Update_PHB:main?tab=info) | | [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | | [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | -| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.0.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | +| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.3.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | | [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | | [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | @@ -99,7 +99,7 @@ title: Workflows by Type | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | | [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | -| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | +| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | From 35779a6f3a9ea9fd0a5156ae1c79e2c02a4fde7f Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 22 Nov 2024 15:51:15 +0000 Subject: [PATCH 32/41] fix md5sums --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 6 +++--- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 2154de230..4ef704cf3 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -464,7 +464,7 @@ - path: miniwdl_run/call-read_QC_trim/call-fastq_scan_raw/work/_miniwdl_inputs/0/SRR2838702_R1.fastq.gz - path: miniwdl_run/call-read_QC_trim/call-fastq_scan_raw/work/_miniwdl_inputs/0/SRR2838702_R2.fastq.gz - path: miniwdl_run/call-read_QC_trim/call-trimmomatic_pe/command - md5sum: cc137a029d5143592b40edf01d53735f + md5sum: cc961dbda52c70200555ffb34e5ba62d - path: miniwdl_run/call-read_QC_trim/call-trimmomatic_pe/inputs.json contains: ["read", "fastq", "test", "trimmomatic_min_length"] - path: miniwdl_run/call-read_QC_trim/call-trimmomatic_pe/outputs.json @@ -615,7 +615,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_sonneityping.wdl md5sum: 3357a36f11992a0ca00c61d7bfccb44b - path: miniwdl_run/wdl/tasks/species_typing/mycobacterium/task_tbprofiler.wdl - md5sum: 484f7b78f12607a737fe30f4c5f3d697 + md5sum: a4d6d24a04a453227b4fa320ff79e45f - path: miniwdl_run/wdl/tasks/species_typing/multi/task_ts_mlst.wdl md5sum: ff8070a06eca94264ad6a7d91cb03bf0 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -629,7 +629,7 @@ - path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl md5sum: 8c97c5bd65e2787239f12ef425d479ae - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: a6387407ef51b7493b0897869375e7df + md5sum: ac49217c129add7c000eedf38acee8f3 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: f3b18a0b4c2bdeb0896176e8f9c8247d - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 40e6c749a..28362db8d 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -578,7 +578,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_sonneityping.wdl md5sum: 3357a36f11992a0ca00c61d7bfccb44b - path: miniwdl_run/wdl/tasks/species_typing/mycobacterium/task_tbprofiler.wdl - md5sum: 484f7b78f12607a737fe30f4c5f3d697 + md5sum: a4d6d24a04a453227b4fa320ff79e45f - path: miniwdl_run/wdl/tasks/species_typing/multi/task_ts_mlst.wdl md5sum: ff8070a06eca94264ad6a7d91cb03bf0 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -592,7 +592,7 @@ - path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl md5sum: 8c97c5bd65e2787239f12ef425d479ae - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 4111a758490174325ae8ea52a95319e9 + md5sum: 5e735ae6cb60f86ec7983274f3baf9f8 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: f3b18a0b4c2bdeb0896176e8f9c8247d - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl From 92eb348691de92b9ea71c18cc3d93f33552f88cf Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 25 Nov 2024 17:19:55 +0000 Subject: [PATCH 33/41] version --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 19c3d9cb9..6e893252a 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -31,7 +31,7 @@ task tbp_parser { Int cpu = 1 Int disk_size = 100 - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.0" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.1" Int memory = 4 } command <<< From ef96f079f6fad02c99eabb9db40bc199fa6d77f8 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 25 Nov 2024 17:24:44 +0000 Subject: [PATCH 34/41] update doc dockers --- docs/workflows/genomic_characterization/theiaprok.md | 2 +- docs/workflows/standalone/tbprofiler_tngs.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index 66c43bdf1..ee112ce53 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -418,7 +418,7 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **tbp_parser_coverage_regions_bed** | File | A bed file that lists the regions to be considered for QC | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_threshold** | Int | The minimum coverage for a region to pass QC in tbp_parser | 100 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_debug** | Boolean | Activate the debug mode on tbp_parser; increases logging outputs | TRUE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.0 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_depth** | Int | Minimum depth for a variant to pass QC in tbp_parser | 10 | Optional | FASTA, ONT, PE, SE | diff --git a/docs/workflows/standalone/tbprofiler_tngs.md b/docs/workflows/standalone/tbprofiler_tngs.md index 64a64b364..874183471 100644 --- a/docs/workflows/standalone/tbprofiler_tngs.md +++ b/docs/workflows/standalone/tbprofiler_tngs.md @@ -23,7 +23,7 @@ This workflow is still in experimental research stages. Documentation is minimal | tbp_parser | **coverage_threshold** | Int | The minimum percentage of a region to exceed the minimum depth for a region to pass QC in tbp_parser | 100 | Optional | | tbp_parser | **cpu** | Int | Number of CPUs to allocate to the task | 1 | Optional | | tbp_parser | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | -| tbp_parser | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.0 | Optional | +| tbp_parser | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.1 | Optional | | tbp_parser | **etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | | tbp_parser | **expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | | tbp_parser | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 4 | Optional | From 725cd595b8439c8f1568e144a662ce2d1b142dcd Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 11 Dec 2024 18:46:26 +0000 Subject: [PATCH 35/41] bump version --- tasks/species_typing/mycobacterium/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 6e893252a..60baa0b99 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -31,7 +31,7 @@ task tbp_parser { Int cpu = 1 Int disk_size = 100 - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.1" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.1" Int memory = 4 } command <<< From 363c10ed2e9359e4ff1a96accd5ba13387eab442 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 11 Dec 2024 19:03:49 +0000 Subject: [PATCH 36/41] alphabetize things and fix description --- docs/workflows_overview/workflows_alphabetically.md | 2 +- docs/workflows_overview/workflows_kingdom.md | 2 +- docs/workflows_overview/workflows_type.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/workflows_overview/workflows_alphabetically.md b/docs/workflows_overview/workflows_alphabetically.md index 432cc1e1f..a797f2251 100644 --- a/docs/workflows_overview/workflows_alphabetically.md +++ b/docs/workflows_overview/workflows_alphabetically.md @@ -19,7 +19,7 @@ title: Alphabetical Workflows | [**Core_Gene_SNP**](../workflows/phylogenetic_construction/core_gene_snp.md) | Pangenome analysis | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Core_Gene_SNP_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Core_Gene_SNP_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Update SRR metadata in a Terra data table at the sample level | Any taxa | | Yes | v2.3.0 | [*Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | | Yes | v2.3.0 | [*Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**Find_Shared_Variants**](../workflows/phylogenetic_construction/find_shared_variants.md)| Combines and reshapes variant data from Snippy_Variants to illustrate variants shared across multiple samples | Bacteria, Mycotics | Set-level | Yes | v2.0.0 | [Find_Shared_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Find_Shared_Variants_PHB:main?tab=info) | | [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | diff --git a/docs/workflows_overview/workflows_kingdom.md b/docs/workflows_overview/workflows_kingdom.md index 0396de2f3..bc446c45a 100644 --- a/docs/workflows_overview/workflows_kingdom.md +++ b/docs/workflows_overview/workflows_kingdom.md @@ -17,6 +17,7 @@ title: Workflows by Kingdom | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | | [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Set-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | @@ -25,7 +26,6 @@ title: Workflows by Kingdom | [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.3.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | | [**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md)| Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location | Any taxa | Set-level | Yes | v1.3.0 | [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Update SRR metadata in a Terra data table at the sample level | Any taxa | Set-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | diff --git a/docs/workflows_overview/workflows_type.md b/docs/workflows_overview/workflows_type.md index 2b8c7d56a..289febf71 100644 --- a/docs/workflows_overview/workflows_type.md +++ b/docs/workflows_overview/workflows_type.md @@ -98,7 +98,7 @@ title: Workflows by Type |---|---|---|---|---|---|---| | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | | [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Update SRR metadata in a Terra data table at the sample level | Any taxa | | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | | [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | From 46d10051c2ef7e02b285145ddb440a1e4ccb229c Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 11 Dec 2024 19:06:22 +0000 Subject: [PATCH 37/41] move to appropriate type --- docs/workflows/public_data_sharing/fetch_srr_accession.md | 2 +- docs/workflows_overview/workflows_type.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/workflows/public_data_sharing/fetch_srr_accession.md b/docs/workflows/public_data_sharing/fetch_srr_accession.md index aa18c6438..df432d1ca 100644 --- a/docs/workflows/public_data_sharing/fetch_srr_accession.md +++ b/docs/workflows/public_data_sharing/fetch_srr_accession.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | +| [Data Import](../../workflows_overview/workflows_type.md/#data-import) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | ## Fetch SRR Accession diff --git a/docs/workflows_overview/workflows_type.md b/docs/workflows_overview/workflows_type.md index 289febf71..2c77d443a 100644 --- a/docs/workflows_overview/workflows_type.md +++ b/docs/workflows_overview/workflows_type.md @@ -15,6 +15,7 @@ title: Workflows by Type | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | | [**BaseSpace_Fetch**](../workflows/data_import/basespace_fetch.md)| Import data from BaseSpace into Terra | Any taxa | Sample-level | Yes | v2.0.0 | [BaseSpace_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/BaseSpace_Fetch_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | @@ -98,7 +99,6 @@ title: Workflows by Type |---|---|---|---|---|---|---| | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | | [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | | [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | From f50f62c17edf0d9b03c87783c61da6462854799f Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 11 Dec 2024 19:07:54 +0000 Subject: [PATCH 38/41] fix disk type --- tasks/utilities/file_handling/task_cat_lanes.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/utilities/file_handling/task_cat_lanes.wdl b/tasks/utilities/file_handling/task_cat_lanes.wdl index 555a9c533..60a2b7d28 100644 --- a/tasks/utilities/file_handling/task_cat_lanes.wdl +++ b/tasks/utilities/file_handling/task_cat_lanes.wdl @@ -47,7 +47,7 @@ task cat_lanes { docker: "~{docker}" memory: memory + " GB" cpu: cpu - disks: "local-disk " + disk_size + " HDD" + disks: "local-disk " + disk_size + " SSD" disk: disk_size + " GB" preemptible: 1 } From 5bc14bd51172274727634c5493433d8f4de3a709 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 11 Dec 2024 19:19:57 +0000 Subject: [PATCH 39/41] remove files from test --- .../test_wf_theiaprok_illumina_pe.yml | 75 ------------------- .../test_wf_theiaprok_illumina_se.yml | 70 ----------------- 2 files changed, 145 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 5c6ad15b3..58f0b6fb4 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -70,81 +70,6 @@ md5sum: 3cfdda0096f0689c9829ed27bdef6b1a - path: miniwdl_run/call-busco/work/_miniwdl_inputs/0/test_contigs.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: miniwdl_run/call-busco/work/busco_downloads/file_versions.tsv - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/101957at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102178at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102360at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98221at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98657at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99236at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99734at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99842at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1009041at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1024388at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1036075at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1043239at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/961486at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/981870at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/984717at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/list_of_reference_markers.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxid-lineage.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxids-busco_dataset_name.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/supermatrix.aln.bacteria_odb10.2019-12-16.faa - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree.bacteria_odb10.2019-12-16.nwk - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree_metadata.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/fragmented_busco_sequences/108145at2157.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/fragmented_busco_sequences/108145at2157.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/single_copy_busco_sequences/84219at2157.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/single_copy_busco_sequences/84219at2157.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/101957at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102178at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102360at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99734at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99842at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1827334at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1211060at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1009041at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1024388at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1036075at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/961486at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/981870at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/984717at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/placement_files/marker_genes.fasta - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/logs/busco.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_err.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_out.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_err.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_out.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_err.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_out.log - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.faa - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.fna - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 39a78ebe1..06caae04d 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -70,76 +70,6 @@ md5sum: 3cfdda0096f0689c9829ed27bdef6b1a - path: miniwdl_run/call-busco/work/_miniwdl_inputs/0/test_contigs.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: miniwdl_run/call-busco/work/busco_downloads/file_versions.tsv - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/101957at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102178at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102360at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98221at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98657at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99236at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99734at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99842at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1009041at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1024388at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1036075at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1043239at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/961486at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/981870at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/984717at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/list_of_reference_markers.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxid-lineage.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxids-busco_dataset_name.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/supermatrix.aln.bacteria_odb10.2019-12-16.faa - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree.bacteria_odb10.2019-12-16.nwk - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree_metadata.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/101957at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102178at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102360at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99734at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99842at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1505038at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1009041at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1024388at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1036075at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/961486at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/981870at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/984717at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/placement_files/marker_genes.fasta - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/logs/busco.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_err.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_out.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_err.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_out.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_err.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_out.log - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.faa - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.fna - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa From 04414e27f7a4a7de7a934a11f7496b9d6187394e Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Thu, 12 Dec 2024 18:18:26 +0000 Subject: [PATCH 40/41] finish updating versions everywhere --- .github/PULL_REQUEST_TEMPLATE.md | 3 ++- docs/workflows/genomic_characterization/theiacov.md | 2 +- docs/workflows/genomic_characterization/theiaeuk.md | 2 +- docs/workflows/genomic_characterization/theiameta.md | 2 +- docs/workflows/genomic_characterization/vadr_update.md | 2 +- docs/workflows/phylogenetic_construction/augur.md | 2 +- docs/workflows/phylogenetic_construction/snippy_streamline.md | 2 +- .../phylogenetic_construction/snippy_streamline_fasta.md | 2 +- docs/workflows/public_data_sharing/mercury_prep_n_batch.md | 2 +- docs/workflows/public_data_sharing/terra_2_ncbi.md | 2 +- docs/workflows/standalone/kraken2.md | 2 +- docs/workflows/standalone/tbprofiler_tngs.md | 2 +- docs/workflows_overview/workflows_kingdom.md | 2 +- docs/workflows_overview/workflows_type.md | 2 +- 14 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3aebe21d9..96f294e54 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -45,7 +45,8 @@ This PR uses an element that could cause duplicate runs to have different result - [ ] The workflow/task has been tested and results, including file contents, are as anticipated - [ ] The CI/CD has been adjusted and tests are passing (Theiagen developers) - [ ] Code changes follow the [style guide](https://theiagen.notion.site/Style-Guide-WDL-Workflow-Development-51b66a47dde54c798f35d673fff80249) -- [ ] Documentation and/or workflow diagrams have been updated if applicable (Theiagen developers only) +- [ ] Documentation and/or workflow diagrams have been updated if applicable + - [ ] You have updated the latest version for any affected worklows in the respective workflow documentation page and for every entry in the three `workflows_overview` tables. ## 🎯 Reviewer Checklist diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index 3e2ad8956..480bfbf04 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.2.0 | Yes, some optional features incompatible | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | Yes, some optional features incompatible | Sample-level | ## TheiaCoV Workflows diff --git a/docs/workflows/genomic_characterization/theiaeuk.md b/docs/workflows/genomic_characterization/theiaeuk.md index cc9cba9c1..2bfc2e6cf 100644 --- a/docs/workflows/genomic_characterization/theiaeuk.md +++ b/docs/workflows/genomic_characterization/theiaeuk.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibliity** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Mycotics](../../workflows_overview/workflows_kingdom.md/#mycotics) | PHB v2.2.0 | Yes | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Mycotics](../../workflows_overview/workflows_kingdom.md/#mycotics) | PHB v2.3.0 | Yes | Sample-level | ## TheiaEuk Workflows diff --git a/docs/workflows/genomic_characterization/theiameta.md b/docs/workflows/genomic_characterization/theiameta.md index d6b55e80a..fad3c359a 100644 --- a/docs/workflows/genomic_characterization/theiameta.md +++ b/docs/workflows/genomic_characterization/theiameta.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.2.0 | Yes | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | ## TheiaMeta Workflows diff --git a/docs/workflows/genomic_characterization/vadr_update.md b/docs/workflows/genomic_characterization/vadr_update.md index ceaa45fa8..b3d706d72 100644 --- a/docs/workflows/genomic_characterization/vadr_update.md +++ b/docs/workflows/genomic_characterization/vadr_update.md @@ -5,7 +5,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v1.2.1 | Yes | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.2.0 | Yes | Sample-level | ## Vadr_Update_PHB diff --git a/docs/workflows/phylogenetic_construction/augur.md b/docs/workflows/phylogenetic_construction/augur.md index c9d144997..9e5f67637 100644 --- a/docs/workflows/phylogenetic_construction/augur.md +++ b/docs/workflows/phylogenetic_construction/augur.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.1.0 | Yes | Sample-level, Set-level | +| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | Yes | Sample-level, Set-level | ## Augur Workflows diff --git a/docs/workflows/phylogenetic_construction/snippy_streamline.md b/docs/workflows/phylogenetic_construction/snippy_streamline.md index aa04198b3..facc3e1c4 100644 --- a/docs/workflows/phylogenetic_construction/snippy_streamline.md +++ b/docs/workflows/phylogenetic_construction/snippy_streamline.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.2.0 | Yes; some optional features incompatible | Set-level | +| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes; some optional features incompatible | Set-level | ## Snippy_Streamline_PHB diff --git a/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md b/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md index 890674b3f..118c66588 100644 --- a/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md +++ b/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.2.0 | Yes; some optional features incompatible | Set-level | +| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes; some optional features incompatible | Set-level | ## Snippy_Streamline_FASTA_PHB diff --git a/docs/workflows/public_data_sharing/mercury_prep_n_batch.md b/docs/workflows/public_data_sharing/mercury_prep_n_batch.md index 4fcc48d36..56e169e82 100644 --- a/docs/workflows/public_data_sharing/mercury_prep_n_batch.md +++ b/docs/workflows/public_data_sharing/mercury_prep_n_batch.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.2.0 | Yes | Set-level | +| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | Yes | Set-level | ## Mercury_Prep_N_Batch_PHB diff --git a/docs/workflows/public_data_sharing/terra_2_ncbi.md b/docs/workflows/public_data_sharing/terra_2_ncbi.md index 0fa48e50e..54e17aa9d 100644 --- a/docs/workflows/public_data_sharing/terra_2_ncbi.md +++ b/docs/workflows/public_data_sharing/terra_2_ncbi.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Bacteria](../../workflows_overview/workflows_kingdom.md#bacteria), [Mycotics](../../workflows_overview/workflows_kingdom.md#mycotics) [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.1.0 | No | Set-level | +| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Bacteria](../../workflows_overview/workflows_kingdom.md#bacteria), [Mycotics](../../workflows_overview/workflows_kingdom.md#mycotics) [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | No | Set-level | ## Terra_2_NCBI_PHB diff --git a/docs/workflows/standalone/kraken2.md b/docs/workflows/standalone/kraken2.md index df36e56a1..95c86c216 100644 --- a/docs/workflows/standalone/kraken2.md +++ b/docs/workflows/standalone/kraken2.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.0.0 | Yes | Sample-level | +| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | ## Kraken2 Workflows diff --git a/docs/workflows/standalone/tbprofiler_tngs.md b/docs/workflows/standalone/tbprofiler_tngs.md index 874183471..96f29e0bf 100644 --- a/docs/workflows/standalone/tbprofiler_tngs.md +++ b/docs/workflows/standalone/tbprofiler_tngs.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.0.0 | Yes | Sample-level | +| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes | Sample-level | ## TBProfiler_tNGS_PHB diff --git a/docs/workflows_overview/workflows_kingdom.md b/docs/workflows_overview/workflows_kingdom.md index bc446c45a..9d8ffc719 100644 --- a/docs/workflows_overview/workflows_kingdom.md +++ b/docs/workflows_overview/workflows_kingdom.md @@ -17,7 +17,7 @@ title: Workflows by Kingdom | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | | [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Set-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Sample-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | diff --git a/docs/workflows_overview/workflows_type.md b/docs/workflows_overview/workflows_type.md index 2c77d443a..e43a3f7c0 100644 --- a/docs/workflows_overview/workflows_type.md +++ b/docs/workflows_overview/workflows_type.md @@ -15,7 +15,7 @@ title: Workflows by Type | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | | [**BaseSpace_Fetch**](../workflows/data_import/basespace_fetch.md)| Import data from BaseSpace into Terra | Any taxa | Sample-level | Yes | v2.0.0 | [BaseSpace_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/BaseSpace_Fetch_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Sample-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | From 75c2594d1c8e51e0265be16802042ebbc4b4422e Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Thu, 12 Dec 2024 18:21:56 +0000 Subject: [PATCH 41/41] add sample-level to fetch_srr_accession thing --- docs/workflows_overview/workflows_alphabetically.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflows_overview/workflows_alphabetically.md b/docs/workflows_overview/workflows_alphabetically.md index a797f2251..cc90bf439 100644 --- a/docs/workflows_overview/workflows_alphabetically.md +++ b/docs/workflows_overview/workflows_alphabetically.md @@ -19,7 +19,7 @@ title: Alphabetical Workflows | [**Core_Gene_SNP**](../workflows/phylogenetic_construction/core_gene_snp.md) | Pangenome analysis | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Core_Gene_SNP_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Core_Gene_SNP_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | | Yes | v2.3.0 | [*Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Sample-level | Yes | v2.3.0 | [*Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**Find_Shared_Variants**](../workflows/phylogenetic_construction/find_shared_variants.md)| Combines and reshapes variant data from Snippy_Variants to illustrate variants shared across multiple samples | Bacteria, Mycotics | Set-level | Yes | v2.0.0 | [Find_Shared_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Find_Shared_Variants_PHB:main?tab=info) | | [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) |