From 39ece663024fe0b3a95ed1bbf8babf816400e0dc Mon Sep 17 00:00:00 2001
From: Michal-Babins <michal.babinski@theiagen.com>
Date: Fri, 8 Nov 2024 20:39:12 +0000
Subject: [PATCH 1/7] Add iqtree model extraction to augur_tree task and update
 workflow output

---
 .../augur/task_augur_tree.wdl                     | 15 +++++++++++++++
 workflows/phylogenetics/wf_augur.wdl              |  1 +
 2 files changed, 16 insertions(+)

diff --git a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
index f16c73618..a8b4f30ea 100644
--- a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
+++ b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
@@ -19,6 +19,9 @@ task augur_tree {
     # capture version information
     augur version > VERSION
 
+    # Get alignment basename
+    FASTA_BASENAME=$(basename ~{aligned_fasta} .fasta)
+
     AUGUR_RECURSION_LIMIT=10000 augur tree \
       --alignment "~{aligned_fasta}" \
       --output "~{build_name}_~{method}.nwk" \
@@ -28,10 +31,22 @@ task augur_tree {
       ~{"--tree-builder-args " + tree_builder_args} \
       ~{true="--override-default-args" false="" override_default_args} \
       --nthreads auto
+
+    # If iqtree, get the model used
+    if [ "~{method}" == "iqtree" ]; then
+      if [ "~{substitution_model}" == "auto" ]; then
+        MODEL=$(grep "Best-fit model:" ${FASTA_BASENAME}-delim.fasta.log | sed 's|Best-fit model: ||g;s|chosen.*||' | tr -d '\n\r')
+      else
+        MODEL="~{substitution_model}"
+      fi
+      echo "$MODEL" > FINAL_MODEL.txt
+    fi
   >>>
+
   output {
     File aligned_tree  = "~{build_name}_~{method}.nwk"
     String augur_version = read_string("VERSION")
+    String? iqtree_model_used = read_string("FINAL_MODEL.txt")
   }
   runtime {
     docker: docker
diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl
index 3398d430f..e77c20fb5 100644
--- a/workflows/phylogenetics/wf_augur.wdl
+++ b/workflows/phylogenetics/wf_augur.wdl
@@ -203,6 +203,7 @@ workflow augur {
     File? auspice_input_json = augur_export.auspice_json
     File? time_tree = augur_refine.refined_tree
     File distance_tree = augur_tree.aligned_tree
+    String? iqtree_model_used = augur_tree.iqtree_model_used
     File aligned_fastas = select_first([augur_align.aligned_fasta, alignment_fasta])
     File combined_assemblies = filter_sequences_by_length.filtered_fasta
     File? metadata_merged = tsv_join.out_tsv

From d181a1747fe503346f8d7463e1b78d828a08c162 Mon Sep 17 00:00:00 2001
From: Michal-Babins <michal.babinski@theiagen.com>
Date: Fri, 8 Nov 2024 21:17:07 +0000
Subject: [PATCH 2/7] Refactor augur_tree task to correctly derive FASTA
 basename and directory for iqtree model extraction

---
 tasks/phylogenetic_inference/augur/task_augur_tree.wdl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
index a8b4f30ea..be6566595 100644
--- a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
+++ b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
@@ -19,9 +19,6 @@ task augur_tree {
     # capture version information
     augur version > VERSION
 
-    # Get alignment basename
-    FASTA_BASENAME=$(basename ~{aligned_fasta} .fasta)
-
     AUGUR_RECURSION_LIMIT=10000 augur tree \
       --alignment "~{aligned_fasta}" \
       --output "~{build_name}_~{method}.nwk" \
@@ -35,7 +32,9 @@ task augur_tree {
     # If iqtree, get the model used
     if [ "~{method}" == "iqtree" ]; then
       if [ "~{substitution_model}" == "auto" ]; then
-        MODEL=$(grep "Best-fit model:" ${FASTA_BASENAME}-delim.fasta.log | sed 's|Best-fit model: ||g;s|chosen.*||' | tr -d '\n\r')
+        FASTA_BASENAME=$(basename ~{aligned_fasta} .fasta)
+        FASTA_DIR=$(dirname ~{aligned_fasta})
+        MODEL=$(grep "Best-fit model:" ${FASTA_DIR}/${FASTA_BASENAME}-delim.iqtree.log | sed 's|Best-fit model: ||g;s|chosen.*||' | tr -d '\n\r')
       else
         MODEL="~{substitution_model}"
       fi

From 2ac0c0d358ae92014d64e1d0224105e0da38f2a7 Mon Sep 17 00:00:00 2001
From: Michal-Babins <michal.babinski@theiagen.com>
Date: Tue, 12 Nov 2024 14:42:38 +0000
Subject: [PATCH 3/7] Add iqtree model used field to augur workflow
 documentation

---
 docs/workflows/phylogenetic_construction/augur.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/workflows/phylogenetic_construction/augur.md b/docs/workflows/phylogenetic_construction/augur.md
index 7ccf78d56..7e885d945 100644
--- a/docs/workflows/phylogenetic_construction/augur.md
+++ b/docs/workflows/phylogenetic_construction/augur.md
@@ -290,6 +290,7 @@ The Nextstrain team hosts documentation surrounding the Augur workflow → Auspi
 | auspice_input_json | File | JSON file used as input to Auspice |
 | combined_assemblies | File | Concatenated FASTA file containing all samples |
 | distance_tree | File | The distance tree created in Newick (.nwk) format |
+| iqtree_model_used | String | The iqtree model used during augur tree |
 | keep_list | File | A list of samples included in the phylogenetic tree |
 | metadata_merged | File | Tab-delimited text file of the merged augur_metadata input files from all samples |
 | snp_matrix | File | The SNP distance matrix for all samples used in the phylogenetic tree |

From a9a4e65a54a1880c6ca6992cdf966a4674f3d29a Mon Sep 17 00:00:00 2001
From: Michal-Babins <michal.babinski@theiagen.com>
Date: Tue, 12 Nov 2024 14:53:32 +0000
Subject: [PATCH 4/7] Handle empty substitution model case in augur_tree task

---
 tasks/phylogenetic_inference/augur/task_augur_tree.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
index be6566595..af50fb9f9 100644
--- a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
+++ b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
@@ -39,6 +39,8 @@ task augur_tree {
         MODEL="~{substitution_model}"
       fi
       echo "$MODEL" > FINAL_MODEL.txt
+    else
+      echo "" > FINAL_MODEL.txt
     fi
   >>>
 

From 064d6778c01bf7f00ebf18a87a2cb6738fabdb07 Mon Sep 17 00:00:00 2001
From: Michal-Babins <michal.babinski@theiagen.com>
Date: Tue, 12 Nov 2024 15:23:17 +0000
Subject: [PATCH 5/7] Ensure iqtree_model_used is a non-nullable String in
 augur_tree task and workflow

---
 tasks/phylogenetic_inference/augur/task_augur_tree.wdl | 2 +-
 workflows/phylogenetics/wf_augur.wdl                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
index af50fb9f9..22bd469e7 100644
--- a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
+++ b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl
@@ -47,7 +47,7 @@ task augur_tree {
   output {
     File aligned_tree  = "~{build_name}_~{method}.nwk"
     String augur_version = read_string("VERSION")
-    String? iqtree_model_used = read_string("FINAL_MODEL.txt")
+    String iqtree_model_used = read_string("FINAL_MODEL.txt")
   }
   runtime {
     docker: docker
diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl
index e77c20fb5..3a6b64ba9 100644
--- a/workflows/phylogenetics/wf_augur.wdl
+++ b/workflows/phylogenetics/wf_augur.wdl
@@ -203,7 +203,7 @@ workflow augur {
     File? auspice_input_json = augur_export.auspice_json
     File? time_tree = augur_refine.refined_tree
     File distance_tree = augur_tree.aligned_tree
-    String? iqtree_model_used = augur_tree.iqtree_model_used
+    String iqtree_model_used = augur_tree.iqtree_model_used
     File aligned_fastas = select_first([augur_align.aligned_fasta, alignment_fasta])
     File combined_assemblies = filter_sequences_by_length.filtered_fasta
     File? metadata_merged = tsv_join.out_tsv

From 8e8f1f2a77a13bd13df6fa1835359ce8536573b1 Mon Sep 17 00:00:00 2001
From: Michal-Babins <michal.babinski@theiagen.com>
Date: Thu, 14 Nov 2024 16:03:48 +0000
Subject: [PATCH 6/7] Update augur workflow documentation to include model
 options for substitution model and rename iqtree model variable for clarity

---
 docs/workflows/phylogenetic_construction/augur.md | 2 +-
 workflows/phylogenetics/wf_augur.wdl              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/workflows/phylogenetic_construction/augur.md b/docs/workflows/phylogenetic_construction/augur.md
index 7e885d945..24c53d8f7 100644
--- a/docs/workflows/phylogenetic_construction/augur.md
+++ b/docs/workflows/phylogenetic_construction/augur.md
@@ -220,7 +220,7 @@ This workflow runs on the set level. Please note that for every task, runtime pa
 | augur_tree | **exclude_sites** | File | File of one-based sites to exclude for raw tree building (BED format in .bed files, DRM format in tab-delimited files, or one position per line) |  | Optional |
 | augur_tree | **method** | String | Which method to use to build the tree; options: "fasttree", "raxml", "iqtree" | iqtree | Optional |
 | augur_tree | **override_default_args** | Boolean | If true, override default tree builder arguments instead of augmenting them | FALSE | Optional |
-| augur_tree | **substitution_model** | String | The substitution model to use; only available for iqtree. Specify "auto" to run ModelTest; options: "GTR" | GTR | Optional |
+| augur_tree | **substitution_model** | String | The substitution model to use; only available for iqtree. Specify "auto" to run ModelTest; model options can be found [here](http://www.iqtree.org/doc/Substitution-Models) | GTR | Optional |
 | augur_tree | **tree_builder_args** | String | Additional tree builder arguments either augmenting or overriding the default arguments. FastTree defaults: "-nt -nosupport". RAxML defaults: "-f d -m GTRCAT -c 25 -p 235813". IQ-TREE defaults: "-ninit 2 -n 2 -me 0.05 -nt AUTO -redo" |  | Optional |
 | sc2_defaults | **nextstrain_ncov_repo_commit** | String | The version of the <https://github.com/nextstrain/ncov/> from which to draw default values for SARS-CoV-2. | `23d1243127e8838a61b7e5c1a72bc419bf8c5a0d` | Optional |
 | organism_parameters | **gene_locations_bed_file** | File | Use to provide locations of interest where average coverage will be calculated | Defaults are organism-specific. Please find default values for some organisms here: <https://github.com/theiagen/public_health_bioinformatics/blob/main/workflows/utilities/wf_organism_parameters.wdl>. For an organism without set defaults, an empty file is provided, "gs://theiagen-public-files/terra/theiacov-files/empty.bed", but will not be as useful as an organism specific gene locations bed file. | Optional |
diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl
index 3a6b64ba9..bb003b705 100644
--- a/workflows/phylogenetics/wf_augur.wdl
+++ b/workflows/phylogenetics/wf_augur.wdl
@@ -203,7 +203,7 @@ workflow augur {
     File? auspice_input_json = augur_export.auspice_json
     File? time_tree = augur_refine.refined_tree
     File distance_tree = augur_tree.aligned_tree
-    String iqtree_model_used = augur_tree.iqtree_model_used
+    String augur_iqtree_model_used = augur_tree.iqtree_model_used
     File aligned_fastas = select_first([augur_align.aligned_fasta, alignment_fasta])
     File combined_assemblies = filter_sequences_by_length.filtered_fasta
     File? metadata_merged = tsv_join.out_tsv

From 548ca2696b04a90dd273bc944a6ff50c8d68ace5 Mon Sep 17 00:00:00 2001
From: Michal-Babins <michal.babinski@theiagen.com>
Date: Thu, 14 Nov 2024 19:07:26 +0000
Subject: [PATCH 7/7] Add augur_iqtree_model_used variable to documentation for
 clarity

---
 docs/workflows/phylogenetic_construction/augur.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/workflows/phylogenetic_construction/augur.md b/docs/workflows/phylogenetic_construction/augur.md
index 24c53d8f7..d8eb10f9f 100644
--- a/docs/workflows/phylogenetic_construction/augur.md
+++ b/docs/workflows/phylogenetic_construction/augur.md
@@ -284,13 +284,13 @@ The Nextstrain team hosts documentation surrounding the Augur workflow → Auspi
 | **Variable** | **Type** | **Description** |
 | --- | --- | --- |
 | aligned_fastas | File | A FASTA file of the aligned genomes |
+| augur_iqtree_model_used | String | The iqtree model used during augur tree |
 | augur_phb_analysis_date | String | The date the analysis was run |
 | augur_phb_version | String | The version of the Public Health Bioinformatics (PHB) repository used |
 | augur_version | String | Version of Augur used |
 | auspice_input_json | File | JSON file used as input to Auspice |
 | combined_assemblies | File | Concatenated FASTA file containing all samples |
 | distance_tree | File | The distance tree created in Newick (.nwk) format |
-| iqtree_model_used | String | The iqtree model used during augur tree |
 | keep_list | File | A list of samples included in the phylogenetic tree |
 | metadata_merged | File | Tab-delimited text file of the merged augur_metadata input files from all samples |
 | snp_matrix | File | The SNP distance matrix for all samples used in the phylogenetic tree |