Skip to content

Commit

Permalink
cleaning up and adding version info
Browse files Browse the repository at this point in the history
  • Loading branch information
DOH-JDJ0303 committed Mar 29, 2024
1 parent 48cf569 commit 7d15347
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 47 deletions.
9 changes: 9 additions & 0 deletions bin/consensus.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
#!/bin/bash
version="1.0"

# consensus.sh
# Author: Jared Johnson, [email protected]

set -o pipefail

# get version info
if [ "$1" == "version" ]; then echo "${version}" && exit 0; fi

# input
name=$1
Expand Down
8 changes: 8 additions & 0 deletions bin/input-qc.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
#!/bin/bash
version="1.0"

# input-qc.sh
# Author: Jared Johnson, [email protected]

set -o pipefail

# get version info
if [ "$1" == "version" ]; then echo "${version}" && exit 0; fi

# input
fasta=$1
prefix=$2
Expand Down
14 changes: 12 additions & 2 deletions bin/summary.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env Rscript
version <- "1.0"

#---- LIBRARIES ----#
library(tidyverse)
# summary.R
# Author: Jared Johnson, [email protected]

#---- ARGUMENTS ----#
args <- commandArgs(trailingOnly = T)
Expand All @@ -11,6 +12,15 @@ fastani_ava_file <- args[3]
fastani_seeds_file <- args[4]
seeds_file <- args[5]

#---- VERSION ----#
if(clusters_file == "version"){
cat(version, sep = "\n")
quit(status=0)
}

#---- LIBRARIES ----#
library(tidyverse)

#---- FUNCTIONS ----#
basename_fa <- function(path){
result <- basename(path) %>%
Expand Down
5 changes: 5 additions & 0 deletions modules/local/consensus.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,10 @@ process CONSENSUS {
# collect consensus size info
length=\$(cat ${prefix}.fa | grep -v '>' | tr -d '\n\t ' | wc -c)
echo "${prefix},\${length}" > ${prefix}_length.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
consensus: \$(consensus.sh version)
END_VERSIONS
"""
}
5 changes: 5 additions & 0 deletions modules/local/input-qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,10 @@ process INPUT_QC {
input-qc.sh \${seqs} ${prefix} "${expected_length}" "${params.len_threshold}"
# set sequence count
seq_count=\$(cat ${prefix}-qc-summary.csv | cut -f 5 -d ',' | grep -v 'filter4' | tr -d '\t\r\n ')
cat <<-END_VERSIONS > versions.yml
"${task.process}":
input-qc: \$(input-qc.sh version)
END_VERSIONS
"""
}
5 changes: 5 additions & 0 deletions modules/local/summary.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,10 @@ process SUMMARY {
cat ${clusters} | grep -v 'seq,taxa,segment,cluster' > clusters-no-header.csv
# run script
summary.R clusters-no-header.csv ${lengths} ${ani_ava} ${ani_seeds} ${seeds}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
summary: \$(summary.R version)
END_VERSIONS
"""
}
117 changes: 72 additions & 45 deletions workflows/epitome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ include { FASTANI_AVA } from '../modules/local/fastani'
include { FASTANI_SEEDS } from '../modules/local/fastani'
include { SUMMARY } from '../modules/local/summary'


//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
Expand Down Expand Up @@ -72,34 +71,54 @@ workflow EPITOME {

ch_versions = Channel.empty()

Channel.fromPath(params.input)
/*
=============================================================================================================================
LOAD SAMPLESHEET
=============================================================================================================================
*/

Channel
.fromPath(params.input)
.splitCsv(header:true)
.map{ tuple(it.taxa, it.segment, file(it.assembly, checkIfExists: true), it.length) }
.set{ manifest }
.set{ manifest }

/*
=============================================================================================================================
QUALITY FILTER SEQUENCES
=============================================================================================================================
*/

// MODULE: Filter low quality sequences
// MODULE: Filter low quality sequences & remove duplicates
INPUT_QC(
manifest
)

//
/*
=============================================================================================================================
CLUSTER SEQUENCES
=============================================================================================================================
*/
// MODULE: Run Mash
//
MASH (
INPUT_QC.out.assemblies
)
ch_versions = ch_versions.mix(MASH.out.versions.first())

// MODULE: CLUSTER
MASH.out.dist.filter{ taxa, segment, dist, count -> count.toInteger() <= 2000 }.set{ small_datasets }
MASH.out.dist.filter{ taxa, segment, dist, count -> count.toInteger() > 2000 }.set{ large_datasets }
// MODULE: Cluster sequences with cutree
// Small datasets
CLUSTER (
small_datasets
MASH.out.dist.filter{ taxa, segment, dist, count -> count.toInteger() <= 2000 }
)
ch_versions = ch_versions.mix(CLUSTERS.out.versions.first())

// Large datasets - requires much more memory!
CLUSTER_LARGE (
large_datasets
MASH.out.dist.filter{ taxa, segment, dist, count -> count.toInteger() > 2000 }
)
ch_versions = ch_versions.mix(CLUSTERS_LARGE.out.versions.first())

// Combine small and large dataset cluster results and add clean sequence paths
CLUSTER
.out
.results
Expand All @@ -111,52 +130,78 @@ workflow EPITOME {
.map{ taxa, segment, cluster, contigs, seqs, count -> [ taxa, segment, cluster, contigs, seqs, contigs.size() ] }
.set{ clusters }

// MODULE: SEQTK_SUBSEQ
// MODULE: Split clusters into multi-fasta files
SEQTK_SUBSEQ(
clusters
)
ch_versions = ch_versions.mix(SEQTK_SUBSEQ.out.versions.first())

/*
=============================================================================================================================
ALIGN SEQUENCE CLUSTERS
=============================================================================================================================
*/

// MODULE: MAFFT
// MODULE: Align clustered sequences with mafft - only performed on clusters containing more than one sequence
MAFFT(
SEQTK_SUBSEQ
.out
.sequences
.filter{ taxa, segment, cluster, seqs, count -> count > 1 }
.map{ taxa, segment, cluster, seqs, count -> [ taxa, segment, cluster, seqs ] }
)
// recombine with singletons
ch_versions = ch_versions.mix(MAFFT.out.versions.first())

// recombine with singletons (i.e., clusters containing 1 sequence)
SEQTK_SUBSEQ
.out
.sequences
.filter{ taxa, segment, cluster, seqs, count -> count == 1 }
.map{ taxa, segment, cluster, seqs, count -> [ taxa, segment, cluster, seqs ] }
.concat(MAFFT.out.fa)
.set{ alignments }


/*
=============================================================================================================================
CREATE CONSENSUS
=============================================================================================================================
*/
// MODULE: Create consensus sequences
CONSENSUS(
alignments
)
ch_versions = ch_versions.mix(CONSENSUS.out.versions.first())

// MODULE: Run blastn
/*
=============================================================================================================================
GATHER DATA ON CONSENSUS SEQUENCES
=============================================================================================================================
*/
// MODULE: Determine average nucleotide identity between consensus sequences
FASTANI_AVA (
CONSENSUS.out.fa.groupTuple(by: [0,1]).map{ taxa, segment, cluster, assembly, length -> [ taxa, segment, assembly, length.min() ] }
)

ch_versions = ch_versions.mix(FASTANI_AVA.out.versions.first())
// Classify consensus sequences based on supplied seed sequences - if supplied
if(params.seeds){
Channel
.fromPath(params.seeds)
.splitCsv(header:true)
.map{ tuple(it.ref, file(it.assembly)) }
.set{ seeds }
// MODULE: Run blastn
// MODULE: Determine average nucleotide identity between the consensus sequences and seed sequences
FASTANI_SEEDS (
CONSENSUS.out.fa.map{ taxa, segment, cluster, assembly, length -> assembly }.collect(),
seeds.map{ ref, assembly -> assembly }.collect()
)
)
ch_versions = ch_versions.mix(FASTANI_SEEDS.out.versions.first())
}


/*
=============================================================================================================================
SUMMARIZE RESULTS
=============================================================================================================================
*/
// MODULE: Create summary
SUMMARY(
CLUSTER.out.results.concat(CLUSTER_LARGE.out.results).splitText().collectFile(name: "all-clusters.csv"),
Expand All @@ -165,34 +210,16 @@ workflow EPITOME {
params.seeds ? FASTANI_SEEDS.out.ani : [],
params.seeds ? file(params.seeds) : []
)
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
ch_versions = ch_versions.mix(SUMMARY.out.versions.first())

/*
//
// MODULE: MultiQC
//
workflow_summary = WorkflowRefmaker.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary)
methods_description = WorkflowRefmaker.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
ch_methods_description = Channel.value(methods_description)
ch_multiqc_files = Channel.empty()
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
MULTIQC (
ch_multiqc_files.collect(),
ch_multiqc_config.toList(),
ch_multiqc_custom_config.toList(),
ch_multiqc_logo.toList()
)
multiqc_report = MULTIQC.out.report.toList()
=============================================================================================================================
NEXTFLOW DEFAULTS
=============================================================================================================================
*/
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
}

/*
Expand Down

0 comments on commit 7d15347

Please sign in to comment.