-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
changes to dorado basecall task: added logic for selecting model to b…
…e used at runtime; improved logging of dorado STDERR to a file; parsed explict model name from STDERR file or accept user input string; added dorado_log task output file
- Loading branch information
Showing
1 changed file
with
29 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,54 +17,66 @@ task basecall { | |
# Capture Dorado version and log it | ||
dorado --version > DORADO_VERSION 2>&1 | ||
echo "Captured Dorado version:" $(cat DORADO_VERSION) | ||
|
||
# Define the model to use, substituting "sup" with the full model name if given | ||
resolved_model="~{dorado_model}" | ||
if [ "$resolved_model" = "sup" ]; then | ||
resolved_model="[email protected]" | ||
|
||
# if user provides fast, hac, or sup, pass those strings to dorado basecaller command | ||
if [ "~{dorado_model}" == "fast" ] || [ "~{dorado_model}" == "hac" ] || [ "~{dorado_model}" == "sup" ]; then | ||
dorado_model_variable=~{dorado_model} | ||
echo "DEBUG: dorado_model_variable is set to: $dorado_model_variable" | ||
# if user provides explicit model name, for example "[email protected]", then provide hardcoded path to directory in container filesystem that has the dorado models | ||
else | ||
dorado_model_variable="/dorado_models/~{dorado_model}" | ||
echo "DEBUG: dorado_model_variable is set to: $dorado_model_variable" | ||
fi | ||
|
||
# Log the resolved model namet | ||
echo "Using Dorado model: $resolved_model" | ||
echo "$resolved_model" > "DORADO_MODEL" | ||
|
||
# Define a log file path to capture output | ||
log_file="dorado_basecall.log" | ||
|
||
# Create a unique output directory for each scatter job | ||
base_name=$(basename "~{input_file}" .pod5) | ||
sam_output="output/sam_${base_name}/" | ||
mkdir -p "$sam_output" | ||
|
||
echo "### Starting basecalling for ~{input_file} ###" | tee -a "$log_file" | ||
echo "### Starting basecalling for ~{input_file} ###" | tee -a dorado_basecall.log | ||
|
||
# Set SAM file path with unique naming based on POD5 basename | ||
sam_file="$sam_output/${base_name}.sam" | ||
|
||
echo "Processing ~{input_file}, expected output: $sam_file" | tee -a "$log_file" | ||
echo "Processing ~{input_file}, expected output: $sam_file" | tee -a dorado_basecall.log | ||
|
||
# Run Dorado basecaller and log output | ||
# This part "2> >(tee -a log.txt >&2)" is used to redirect STDERR to the screen AND to append the STDERR to log.txt file. | ||
# Useful for troubleshooting in Terra and for parsing for important information. | ||
dorado basecaller \ | ||
"~{dorado_model}" \ | ||
"${dorado_model_variable}" \ | ||
"~{input_file}" \ | ||
--kit-name ~{kit_name} \ | ||
--emit-sam \ | ||
--no-trim \ | ||
--output-dir "$sam_output" \ | ||
--verbose | tee -a "$log_file" || { echo "ERROR: Dorado basecaller failed for ~{input_file}"; exit 1; } | ||
--verbose 2> >(tee -a dorado_basecall.log >&2) || { echo "ERROR: Dorado basecaller failed for ~{input_file}"; exit 1; } | ||
|
||
# Log the resolved model name | ||
echo "DEBUG: Parsing model name from dorado log or capturing string from user input..." | ||
if [ "~{dorado_model}" == "fast" || "~{dorado_model}" == "hac" || "~{dorado_model}" == "sup" ]; then | ||
echo "DEBUG: User provided either fast, hac, or sup as input for dorado_model variable, parsing log for explicit model name now..." | ||
grep -m 1 'downloading' dorado_basecall.log | sed -e 's/.*downloading //' -e 's/ with.*//' | tr -d '\n' | tee DORADO_MODEL | ||
|
||
# (else) if user provides explicit model name, just output that string, no parsing involved | ||
else | ||
echo "~{dorado_model}" | tee DORADO_MODEL | ||
fi | ||
|
||
# Rename the generated SAM file to the unique name based on input_file | ||
generated_sam=$(find "$sam_output" -name "*.sam" | head -n 1) | ||
mv "$generated_sam" "$sam_file" | ||
|
||
echo "Basecalling completed for ~{input_file}. SAM file renamed to: $sam_file" | tee -a "$log_file" | ||
echo "Basecalling completed for ~{input_file}. SAM file renamed to: $sam_file" | tee -a "dorado_basecall.log" | ||
>>> | ||
|
||
output { | ||
Array[File] sam_files = glob("output/sam_*/*.sam") | ||
String dorado_docker = docker | ||
String dorado_version = read_string("DORADO_VERSION") | ||
String dorado_model_used = read_string("DORADO_MODEL") | ||
# keeping this dorado_log just for debugging purposes, not a wf output | ||
File dorado_log = "dorado_basecall.log" | ||
} | ||
|
||
runtime { | ||
|