Skip to content

Commit

Permalink
Merge pull request #2 from Kincekara/dev
Browse files Browse the repository at this point in the history
v 1.1.0
  • Loading branch information
Kincekara authored Sep 7, 2023
2 parents 511ccb6 + 2955502 commit ff4155d
Show file tree
Hide file tree
Showing 10 changed files with 141 additions and 62 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ The following programs and tools are used in the C-BIRD pipeline.

| Tools | Version | Comments |
| --- | --- | --- |
| [FastP](https://github.com/OpenGene/fastp) | 0.23.2 | QC, adapter removal, quality filtering and trimming |
| [BBTools](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/) | 38.98 | phiX removal & optional normalization |
| [Kraken2](https://github.com/DerrickWood/kraken2) | 2.1.2 | Taxonomic profiling & contamination check |
| [FastP](https://github.com/OpenGene/fastp) | 0.23.4 | QC, adapter removal, quality filtering and trimming |
| [BBTools](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/) | 39.01 | phiX removal & optional normalization |
| [Kraken2](https://github.com/DerrickWood/kraken2) | 2.1.3 | Taxonomic profiling & contamination check |
| [Bracken](https://github.com/jenniferlu717/Bracken) | 2.8 | Abundance estimation |
| [SPAdes](https://github.com/ablab/spades) | 3.15.5 | *De novo* assembly |
| [Mash](https://github.com/marbl/Mash) | 2.3 | Bacterial identification |
| [QUAST](https://github.com/ablab/quast) | 5.0.2 | Genome assembly evaluation |
| [BUSCO](https://gitlab.com/ezlab/busco/-/tree/master) | 5.4.3 | Genomic data quality assesment |
| [QUAST](https://github.com/ablab/quast) | 5.2.0 | Genome assembly evaluation |
| [BUSCO](https://gitlab.com/ezlab/busco/-/tree/master) | 5.4.7 | Genomic data quality assesment |
| [mlst](https://github.com/tseemann/mlst) | 2.22.0 | MLST typing |
| [AMRFinderPlus](https://github.com/ncbi/amr) | 3.10.40 | AMR gene identification |
| [BLAST+](https://blast.ncbi.nlm.nih.gov/doc/blast-help/downloadblastdata.html)| 2.13.0 | Target gene search |
| [AMRFinderPlus](https://github.com/ncbi/amr) | 3.11.18 | AMR gene identification |
| [BLAST+](https://blast.ncbi.nlm.nih.gov/doc/blast-help/downloadblastdata.html)| 2.14.0 | Target gene search |
| [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder/src/master/) | 2.1.6 | Plasmid detection |
| Cbird-Util | 0.8 | Individual summary report generation |

Expand Down
172 changes: 125 additions & 47 deletions tasks/task_amrfinderplus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@ task amrfinderplus_nuc {
input {
File assembly
String samplename
File amr_db
# Parameters
# --indent_min Minimum DNA %identity [0-1]; default is 0.9 (90%) or curated threshold if it exists
# --mincov Minimum DNA %coverage [0-1]; default is 0.5 (50%)
String? organism # make optional?
File amr_db
String bracken_organism
String? mash_organism
Float? minid
Float? mincov
Int cpu = 4
String docker = "kincekara/amrfinder:3.10.40"
String docker = "kincekara/amrfinder:3.11.18"
}
command <<<
# logging info
Expand All @@ -21,49 +19,129 @@ task amrfinderplus_nuc {

mkdir db
tar -C ./db/ -xzvf ~{amr_db}

### set $amrfinder_organism BASH variable based on gambit_predicted_taxon or user-defined input string
### final variable has strict syntax/spelling based on list from amrfinder --list_organisms
# there may be other Acinetobacter species to add later, like those in the A. baumannii-calcoaceticus species complex
if [[ "~{organism}" == *"Acinetobacter"*"baumannii"* ]]; then
amrfinder_organism="Acinetobacter_baumannii"
elif [[ "~{organism}" == *"Campylobacter"*"coli"* ]] || [[ "~{organism}" == *"Campylobacter"*"jejuni"* ]]; then
amrfinder_organism="Campylobacter"
elif [[ "~{organism}" == *"Clostridioides"*"difficile"* ]]; then
amrfinder_organism="Clostridioides_difficile"
elif [[ "~{organism}" == *"Enterococcus"*"faecalis"* ]]; then
amrfinder_organism="Enterococcus_faecalis"
elif [[ "~{organism}" == *"Enterococcus"*"faecium"* ]] || [[ "~{organism}" == *"Enterococcus"*"hirae"* ]]; then
amrfinder_organism="Enterococcus_faecium"
# should capture all Shigella and Escherichia species
elif [[ "~{organism}" == *"Escherichia"* ]] || [[ "~{organism}" == *"Shigella"* ]]; then

# select mash organism if avalible
if [[ "~{mash_organism}" != "" ]]; then
organism="~{mash_organism}"
else
organism="~{bracken_organism}"
fi
echo "organism is set to:" $organism

## curated organisms ##
# A. baumannii-calcoaceticus species complex
declare -a abcc=(
"Acinetobacter baumannii"
"Acinetobacter calcoaceticus"
"Acinetobacter lactucae"
"Acinetobacter nosocomialis"
"Acinetobacter pittii"
"Acinetobacter seifertii"
)
# Burkholderia cepacia species complex
declare -a bcc=(
"Burkholderia aenigmatica"
"Burkholderia ambifaria"
"Burkholderia anthina"
"Burkholderia arboris"
"Burkholderia catarinensis"
"Burkholderia cenocepacia"
"Burkholderia cepacia"
"Burkholderia cf. cepacia"
"Burkholderia contaminans"
"Burkholderia diffusa"
"Burkholderia dolosa"
"Burkholderia lata"
"Burkholderia latens"
"Burkholderia metallica"
"Burkholderia multivorans"
"Burkholderia orbicola"
"Burkholderia paludis"
"Burkholderia pseudomultivorans"
"Burkholderia puraquae"
"Burkholderia pyrrocinia"
"Burkholderia semiarida"
"Burkholderia seminalis"
"Burkholderia sola"
"Burkholderia stabilis"
"Burkholderia stagnalis"
"Burkholderia territorii"
"Burkholderia ubonensis"
"Burkholderia vietnamiensis"
)
# Burkholderia pseudomallei species complex
declare -a bpc=(
"Burkholderia humptydooensis"
"Burkholderia mallei"
"Burkholderia mayonis"
"Burkholderia oklahomensis"
"Burkholderia pseudomallei"
"Burkholderia savannae"
"Burkholderia singularis"
"Burkholderia thailandensis"
)
# other species
declare -a taxa=(
"Citrobacter freundii"
"Clostridioides difficile"
"Enterobacter asburiae"
"Enterobacter cloacae"
"Enterococcus faecalis"
"Klebsiella oxytoca"
"Neisseria meningitidis"
"Neisseria gonorrhoeae"
"Pseudomonas aeruginosa"
"Serratia marcescens"
"Staphylococcus aureus"
"Staphylococcus pseudintermedius"
"Streptococcus agalactiae"
"Streptococcus pyogenes"
"Vibrio cholerae"
)

# check organism in curated organism list
genus=$(echo $organism | cut -d " " -f1)
taxon=$(echo $organism | cut -d " " -f1,2)

if [[ "$genus" == "Acinetobacter" ]]; then
for i in "${abcc[@]}"; do
if [[ "$taxon" == "$i" ]]; then
amrfinder_organism="Acinetobacter_baumannii"
break
fi
done
elif [[ "$genus" == "Burkholderia" ]]; then
for i in "${bcc[@]}"; do
if [[ "$taxon" == "$i" ]]; then
amrfinder_organism="Burkholderia_cepacia"
break
fi
done
for i in "${bpc[@]}"; do
if [[ "$taxon" == "$i" ]]; then
amrfinder_organism="Burkholderia_pseudomallei"
break
fi
done
elif [[ "$genus" == "Shigella" ]] || [[ "$genus" == "Escherichia" ]]; then
amrfinder_organism="Escherichia"
# add other Klebsiella species later? Cannot use K. oxytoca as per amrfinderplus wiki
elif [[ "~{organism}" == *"Klebsiella"*"aerogenes"* ]] || [[ "~{organism}" == *"Klebsiella"*"pneumoniae"* ]]; then
amrfinder_organism="Klebsiella"
# because some people spell the species 'gonorrhea' differently
elif [[ "~{organism}" == *"Neisseria"*"gonorrhea"* ]] || [[ "~{organism}" == *"Neisseria"*"gonorrhoeae"* ]] || [[ "~{organism}" == *"Neisseria"*"meningitidis"* ]]; then
amrfinder_organism="Neisseria"
elif [[ "~{organism}" == *"Pseudomonas"*"aeruginosa"* ]]; then
amrfinder_organism="Pseudomonas_aeruginosa"
# pretty broad, could work on Salmonella bongori and other species
elif [[ "~{organism}" == *"Salmonella"* ]]; then
elif [[ "$genus" == "Salmonella" ]]; then
amrfinder_organism="Salmonella"
elif [[ "~{organism}" == *"Staphylococcus"*"aureus"* ]]; then
amrfinder_organism="Staphylococcus_aureus"
elif [[ "~{organism}" == *"Staphylococcus"*"pseudintermedius"* ]]; then
amrfinder_organism="Staphylococcus_pseudintermedius"
elif [[ "~{organism}" == *"Streptococcus"*"agalactiae"* ]]; then
amrfinder_organism="Streptococcus_agalactiae"
elif [[ "~{organism}" == *"Streptococcus"*"pneumoniae"* ]] || [[ "~{organism}" == *"Streptococcus"*"mitis"* ]]; then
elif [[ "$taxon" == "Campylobacter coli" ]] || [[ "$taxon" == "Campylobacter jejuni" ]]; then
amrfinder_organism="Campylobacter"
elif [[ "$taxon" == "Enterococcus faecium" ]] || [[ "$taxon" == "Enterococcus hirae" ]]; then
amrfinder_organism="Enterococcus_faecium"
elif [[ "$taxon" == "Klebsiella pneumoniae" ]] || [[ "$taxon" == "Klebsiella aerogenes" ]]; then
amrfinder_organism="Klebsiella_pneumoniae"
elif [[ "$taxon" == "Streptococcus pneumoniae" ]] || [[ "$taxon" == "Streptococcus mitis" ]]; then
amrfinder_organism="Streptococcus_pneumoniae"
elif [[ "~{organism}" == *"Streptococcus"*"pyogenes"* ]]; then
amrfinder_organism="Streptococcus_pyogenes"
elif [[ "~{organism}" == *"Vibrio"*"cholerae"* ]]; then
amrfinder_organism="Vibrio_cholerae"
else
echo "Either Bracken predicted taxon is not supported by NCBI-AMRFinderPlus or the user did not supply an organism as input."
echo "Skipping the use of amrfinder --organism optional parameter."
else
for i in "${taxa[@]}"; do
if [[ "$taxon" == "$i" ]]; then
amrfinder_organism=${taxon// /_}
break
fi
done
fi

# checking bash variable
Expand All @@ -75,7 +153,7 @@ task amrfinderplus_nuc {
# send STDOUT/ERR to log file for capturing database version
amrfinder --plus \
-d ./db/ \
--organism ${amrfinder_organism} \
--organism "${amrfinder_organism}" \
~{'--name ' + samplename} \
~{'--nucleotide ' + assembly} \
~{'-o ' + samplename + '_amrfinder_all.tsv'} \
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_bbtools.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ task assembly_prep {
Int min_depth = 5
Int read_threshold = 8000000
Int memory = 8
String docker = "kincekara/bbduk:38.98"
String docker = "kincekara/bbtools:39.01"
}

command <<<
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_blast.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ task tblastn {
Float evalue = 0.001
Int max_hsps = 1
Int percent_identity = 90
String docker = "staphb/blast:2.13.0"
String docker = "staphb/blast:2.14.0"
}

command <<<
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_busco.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ task busco {
File assembly
File busco_db
String samplename
String docker = "kincekara/busco:5.4.3"
String docker = "kincekara/busco:5.4.7"
Int? memory = 16
Int? cpu = 4
}
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_fastp.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ task fastp_pe {
File read1
File read2
File? adapters
String docker = "kincekara/fastp:0.23.2"
String docker = "kincekara/fastp:0.23.4"
String samplename
Int? leading = 1
Int? front_mean_quality = 10
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_quast.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ task quast {
input {
File assembly
String samplename
String docker= "staphb/quast:5.0.2"
String docker= "kincekara/quast:5.2.0-lite"
}
command <<<
# version
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_taxonomy.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ task profile {
File read2
File kraken2_db
String samplename
String docker = "kincekara/kraken-bracken:k2.1.2-b2.8"
String docker = "kincekara/kraken-bracken:k2.1.3-b2.8"
Int? bracken_read_len = 100
Int? bracken_threshold = 10
String? min_hit_groups = 3
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_version.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ task version_capture {
volatile: true
}
command {
cbird_version="C-BIRD v1.0.0"
cbird_version="C-BIRD v1.1.0"
~{default='' 'export TZ=' + timezone}
date +"%Y-%m-%d" > TODAY
echo "$cbird_version" > CBIRD_VERSION
Expand Down
3 changes: 2 additions & 1 deletion workflows/wf_c-bird.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ workflow cbird_workflow {
samplename = samplename,
assembly = assembly.scaffolds_trim,
amr_db = amrfinder_database,
organism = profile.bracken_taxon
bracken_organism = profile.bracken_taxon,
mash_organism = predict_taxon.taxon
}

call plasmid.plasmidfinder {
Expand Down

0 comments on commit ff4155d

Please sign in to comment.