Skip to content

Commit

Permalink
improving validation subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
DOH-JDJ0303 committed May 1, 2024
1 parent e040a71 commit a756e8c
Show file tree
Hide file tree
Showing 11 changed files with 153 additions and 46 deletions.
2 changes: 1 addition & 1 deletion bin/validate.sh → bin/val_gather.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

# validate.sh v1.0
# val_gather.sh v1.0
# Author: Jared Johnson, [email protected]

#----- INPUTS -----#
Expand Down
5 changes: 5 additions & 0 deletions bin/contig-pairs.sh → bin/val_pair.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
#!/bin/bash

# val_pair.sh v1.0
# Author: Jared Johnson, [email protected]

#----- INPUTS -----#
F1=$1
F2=$2
PREFIX=$3

#
cat $F1 | awk '{print $1}' > f1.fa
cat $F2 | awk '{print $1}' > f2.fa

Expand Down
8 changes: 4 additions & 4 deletions bin/val_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ library(tidyr)
# calculate global metrics
global_metrics <- function(results_file, pairs_file, metric){
if((file.exists(results_file) && file.exists(pairs_file))){
results <- read_csv(results_file) %>%
.$Result
n_missing_extra <- read_csv(pairs_file, col_names = F) %>%
df <- read_csv(results_file)
results <- df[,ncol(df)] %>% unlist()
n_missing_extra <- read_tsv(pairs_file, col_names = F) %>%
rename(seq1=1,
seq2=2) %>%
filter(seq1 == "null" | seq2 == "null") %>%
Expand All @@ -29,7 +29,7 @@ global_metrics <- function(results_file, pairs_file, metric){
}else(return(data.frame("metric" = metric)))
}

acc <- global_metrics("accuracy_null_results.csv","accuracy_null_pairs.csv", "Accuracy")
acc <- global_metrics("accuracy_results.csv","accuracy_pairs.csv", "Accuracy")
inter <- global_metrics("precision_inter_results.csv","precision_inter_pairs.csv", "Inter-Assay Reproducility")
intra <- global_metrics("precision_intra_results.csv","precision_intra_pairs.csv", "Intra-Assay Reproducility")

Expand Down
31 changes: 30 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -242,5 +242,34 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'PAIR' {
ext.args = ""
ext.when = { }
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/validation" },
pattern: "*.pairs.txt"
]
}
withName: 'GATHER' {
ext.args = ""
ext.when = { }
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/validation" },
pattern: "*.csv"
]
}
withName: 'REPORT' {
ext.args = ""
ext.when = { }
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/validation" },
pattern: "validation-report.csv"
]
}
}
16 changes: 15 additions & 1 deletion modules/local/irma.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ process IRMA {
path module_template

output:
tuple val(meta), path('results/*.fasta'), emit: consensus
tuple val(meta), path('results/*.fa'), emit: consensus
tuple val(meta), path('results/*.bam'), emit: bam
tuple val(meta), path('results/logs/'), emit: logs
tuple val(meta), path('results/figures/'), emit: figures
Expand All @@ -26,13 +26,27 @@ process IRMA {
"""
# determine IRMA path
irma_path=\$(which IRMA)
# create module
mod=\$(shuf -er -n20 {A..Z} {a..z} {0..9} | tr -d '\n')
mv ${module_template} \${irma_path}_RES/modules/\${mod}
# combine references into single file
cat ${refs} > \${irma_path}_RES/modules/\${mod}/reference/consensus.fasta
# run IRMA
IRMA \${mod} ${reads[0]} ${reads[1]} results
# update fasta names and headers
for f in \$(ls results/*.fasta)
do
file=\${f##*/}
ref_id=\${file%.fasta}
PREFIX="${prefix}_\${ref_id}"
cat \${f} | sed "s/>.*/>\${PREFIX}/g" > results/\${PREFIX}.fa
done
# clean up
rm -r \${irma_path}_RES/modules/\${mod}
"""
Expand Down
4 changes: 3 additions & 1 deletion modules/local/ivar_consensus.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ process IVAR_CONSENSUS {

script:
def args = task.ext.args ?: ''
prefix = "${meta.id}-${ref_id}"
prefix = "${meta.id}_${ref_id}"

"""
# setup for pipe
Expand All @@ -32,6 +32,8 @@ process IVAR_CONSENSUS {
-t ${params.ivar_t} \\
-q ${params.ivar_q} \\
${args}
sed -i 's/>.*/>${prefix}/g' ${prefix}.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 2 additions & 2 deletions modules/local/val_metrics.nf → modules/local/val_gather.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process METRICS {
process GATHER {
label 'process_low'

conda "bioconda::mafft=7.520"
Expand All @@ -20,7 +20,7 @@ process METRICS {
def args = task.ext.args ?: ''
"""
mafft --auto ${fasta} > ${fasta.baseName}.aln
validate.sh ${fasta.baseName}.aln "${metric}"
val_gather.sh ${fasta.baseName}.aln "${metric}"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
24 changes: 0 additions & 24 deletions modules/local/val_join.nf

This file was deleted.

2 changes: 1 addition & 1 deletion modules/local/val_pair.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ process PAIR {
cat ${seqs2} > seq2.fa
# create contig pairs
contig-pairs.sh seq1.fa seq2.fa "${id}"
val_pair.sh seq1.fa seq2.fa "${id}"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/assemble.nf
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ workflow ASSEMBLE {
.out
.consensus
.transpose()
.map{ meta, consensus -> [meta, consensus.getSimpleName(), consensus] }
.map{ meta, consensus -> [meta, consensus.getSimpleName().replace(meta.id+'_', ''), consensus] }
.set{ ch_consensus }
}

Expand Down
101 changes: 91 additions & 10 deletions subworkflows/local/validate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
//

include { PAIR } from '../../modules/local/val_pair'
include { METRICS } from '../../modules/local/val_metrics'
include { JOIN } from '../../modules/local/val_join'
include { GATHER } from '../../modules/local/val_gather'
include { REPORT } from '../../modules/local/val_report'


Expand Down Expand Up @@ -64,26 +63,108 @@ workflow VALIDATE {

ch_accuracy.concat(ch_inter_group).concat(ch_intra_group).set{ ch_datasets }

/*
=============================================================================================================================
DETERMINE PAIRWISE COMPARISONS
=============================================================================================================================
*/
PAIR (
ch_datasets
)

METRICS (
/*
=============================================================================================================================
GATHER METRICS
=============================================================================================================================
*/
GATHER (
PAIR.out.fasta.transpose()
)

METRICS
/*
=============================================================================================================================
JOIN ALL METRICS/PAIRS INTO SINGLE CHANNEL
=============================================================================================================================
*/
// Accuracy
GATHER
.out
.result
.groupTuple(by: [0,1])
.join(PAIR.out.pairs.groupTuple(by: [0,1]), by: [0,1])
.filter{ metric, type, results -> metric == "accuracy" }
.map{ metric, type, results -> results }
.splitText()
.filter(line -> line != "Sample,Truth,TP,FP,FN,Result\n")
.collectFile(name: "accuracy_results.csv")
.set{ ch_acc_res }

PAIR
.out
.pairs
.filter{ metric, type, pairs -> metric == "accuracy" }
.map{ metric, type, pairs -> pairs }
.splitText()
.collectFile(name: "accuracy_pairs.csv")
.set{ ch_acc_pair }

// Intra-assay Precision
GATHER
.out
.result
.filter{ metric, type, results -> metric == "precision" && type == "inter" }
.map{ metric, type, results -> results }
.splitText()
.filter(line -> line != "Sample1,Sample2,TP,PP,Result\n")
.collectFile(name: "precision_inter_results.csv")
.set{ ch_prec_inter_res }


PAIR
.out
.pairs
.filter{ metric, type, pairs -> metric == "precision" && type == "inter" }
.map{ metric, type, pairs -> pairs }
.splitText()
.collectFile(name: "precision_inter_pairs.csv")
.set{ ch_prec_inter_pair }

// Intra-assay Precision
GATHER
.out
.result
.filter{ metric, type, results -> metric == "precision" && type == "intra" }
.map{ metric, type, results -> results }
.splitText()
.filter(line -> line != "Sample1,Sample2,TP,PP,Result\n")
.collectFile(name: "precision_intra_results.csv")
.set{ ch_prec_intra_res }

PAIR
.out
.pairs
.filter{ metric, type, pairs -> metric == "precision" && type == "intra" }
.map{ metric, type, pairs -> pairs }
.splitText()
.collectFile(name: "precision_intra_pairs.csv")
.set{ ch_prec_intra_pair }

// Combine all
ch_acc_res
.concat(ch_acc_pair)
.concat(ch_prec_inter_res)
.concat(ch_prec_inter_pair)
.concat(ch_prec_intra_res)
.concat(ch_prec_intra_pair)
.flatten()
.collect()
.set{ ch_results }
JOIN (
ch_results
)

/*
=============================================================================================================================
GENERATE REPORT
=============================================================================================================================
*/
REPORT (
JOIN.out.results.flatten().collect()
ch_results
)

//emit:
Expand Down

0 comments on commit a756e8c

Please sign in to comment.