From d55822d6ab848a82caa3dd015dc552f6608179da Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Sat, 27 Jul 2024 09:58:49 -0700 Subject: [PATCH 1/2] Fix staging This fixes staging for the map_file. It also stages the files in parallel to speed things up a bit. --- mbin_nmdc.wdl | 46 ++++++++++++++++++++++++++-------------------- version.txt | 2 +- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/mbin_nmdc.wdl b/mbin_nmdc.wdl index 582f334..3670fe0 100755 --- a/mbin_nmdc.wdl +++ b/mbin_nmdc.wdl @@ -45,7 +45,8 @@ workflow nmdc_mags { supfam_file=supfam_file, product_names_file=product_names_file, gene_phylogeny_file=gene_phylogeny_file, - lineage_file=lineage_file + lineage_file=lineage_file, + map_file=map_file } call mbin_nmdc { @@ -60,7 +61,7 @@ workflow nmdc_mags { gtdbtk_env = gtdbtk_db, checkm_env = checkm_db, eukcc2_env = eukcc2_db, - map_file = map_file, + map_file = stage.map_file, mbin_container = container } call package { @@ -140,7 +141,7 @@ task mbin_nmdc { Int? pthreads String gtdbtk_env String checkm_env - String? eukcc2_env + String? eukcc2_env String mbin_container } @@ -192,7 +193,7 @@ task mbin_nmdc { runtime{ docker : mbin_container memory : "120 G" - time : "2:00:00" + time : "2:00:00" cpu : threads } @@ -207,7 +208,7 @@ task mbin_nmdc { File mbin_version = "mbin_nmdc_versions.log" File bacsum = "gtdbtk-output/gtdbtk.bac120.summary.tsv" File arcsum = "gtdbtk-output/gtdbtk.ar122.summary.tsv" - File eukcc_csv = "eukcc_output/eukcc.csv.final" + File eukcc_csv = "eukcc_output/eukcc.csv.final" Array[File] hqmq_bin_fasta_files = glob("hqmq-metabat-bins/*fa") Array[File] lq_bin_fasta_files = glob("filtered-metabat-bins/*fa") } @@ -232,6 +233,7 @@ task stage { String product_names_file String gene_phylogeny_file String lineage_file + String? map_file String contigs_out="contigs.fasta" String bam_out="pairedMapped_sorted.bam" String gff_out="functional_annotation.gff" @@ -247,6 +249,7 @@ task stage { String products_out="products.tsv" String gene_phylogeny_out="gene_phylogeny.tsv" String lineage_out="lineage.tsv" + String map_out="map_file.tsv" } command<<< @@ -262,23 +265,25 @@ task stage { fi } - stage ~{contig_file} ~{contigs_out} - stage ~{sam_file} ~{bam_out} - stage ~{gff_file} ~{gff_out} - stage ~{proteins_file} ~{proteins_out} - stage ~{cog_file} ~{cog_out} - stage ~{ec_file} ~{ec_out} - stage ~{ko_file} ~{ko_out} - stage ~{pfam_file} ~{pfam_out} - stage ~{tigrfam_file} ~{tigrfam_out} - stage ~{cath_funfam_file} ~{cath_funfam_out} - stage ~{smart_file} ~{smart_out} - stage ~{supfam_file} ~{supfam_out} - stage ~{product_names_file} ~{products_out} - stage ~{gene_phylogeny_file} ~{gene_phylogeny_out} + stage ~{contig_file} ~{contigs_out} & + stage ~{sam_file} ~{bam_out} & + stage ~{gff_file} ~{gff_out} & + stage ~{proteins_file} ~{proteins_out} & + stage ~{cog_file} ~{cog_out} & + stage ~{ec_file} ~{ec_out} & + stage ~{ko_file} ~{ko_out} & + stage ~{pfam_file} ~{pfam_out} & + stage ~{tigrfam_file} ~{tigrfam_out} & + stage ~{cath_funfam_file} ~{cath_funfam_out} & + stage ~{smart_file} ~{smart_out} & + stage ~{supfam_file} ~{supfam_out} & + stage ~{product_names_file} ~{products_out} & + stage ~{gene_phylogeny_file} ~{gene_phylogeny_out} & stage ~{lineage_file} ~{lineage_out} + ~{"stage " + map_file + " " + map_out} + wait - date --iso-8601=seconds > start.txt + date --iso-8601=seconds > start.txt >>> @@ -298,6 +303,7 @@ task stage { File product_names = "products.tsv" File gene_phylogeny = "gene_phylogeny.tsv" File lineage_tsv = "lineage.tsv" + File? map_file = map_out String start = read_string("start.txt") } runtime { diff --git a/version.txt b/version.txt index 3252f64..dd2cf2f 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v1.3.5 +v1.3.6 From f60c480bdfcffd86d397f665c9481564e07d301e Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Sat, 27 Jul 2024 10:03:32 -0700 Subject: [PATCH 2/2] Fix multiple map_file This was failing linting. --- mbin_nmdc.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mbin_nmdc.wdl b/mbin_nmdc.wdl index 3670fe0..d88950c 100755 --- a/mbin_nmdc.wdl +++ b/mbin_nmdc.wdl @@ -61,7 +61,7 @@ workflow nmdc_mags { gtdbtk_env = gtdbtk_db, checkm_env = checkm_db, eukcc2_env = eukcc2_db, - map_file = stage.map_file, + map_file = stage.map_tsv, mbin_container = container } call package { @@ -303,7 +303,7 @@ task stage { File product_names = "products.tsv" File gene_phylogeny = "gene_phylogeny.tsv" File lineage_tsv = "lineage.tsv" - File? map_file = map_out + File? map_tsv = map_out String start = read_string("start.txt") } runtime {