Skip to content

Commit

Permalink
cleaning up
Browse files Browse the repository at this point in the history
  • Loading branch information
DOH-JDJ0303 committed Feb 7, 2024
1 parent 3b39bb7 commit c0cd004
Show file tree
Hide file tree
Showing 18 changed files with 289 additions and 226 deletions.
65 changes: 65 additions & 0 deletions assets/nextclade-template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"alignmentParams": {
"excessBandwidth": 100,
"terminalBandwidth": 300,
"allowedMismatches": 8,
"windowSize": 40,
"minSeedCover": 0.1,
"gapAlignmentSide": "left"
},
"attributes": {
"name": "NA",
"reference accession": "NA",
"reference name": "NA"
},
"compatibility": {
"cli": "3.0.0-alpha.0",
"web": "3.0.0-alpha.0"
},
"deprecated": false,
"enabled": true,
"experimental": true,
"files": {
"pathogenJson": "pathogen.json",
"reference": "reference.fasta"
},
"official": false,
"qc": {
"frameShifts": {
"enabled": true,
"scoreWeight": 20
},
"missingData": {
"enabled": true,
"missingDataThreshold": 20000,
"scoreBias": 1000
},
"mixedSites": {
"enabled": true,
"mixedSitesThreshold": 40
},
"privateMutations": {
"cutoff": 300,
"enabled": true,
"typical": 50,
"weightLabeledSubstitutions": 6,
"weightReversionSubstitutions": 6,
"weightUnlabeledSubstitutions": 1
},
"snpClusters": {
"clusterCutOff": 10,
"enabled": false,
"scoreWeight": 10,
"windowSize": 100
},
"stopCodons": {
"enabled": true,
"scoreWeight": 20
}
},
"schemaVersion": "3.0.0",
"version": {
"updatedAt": "2024-01-16T20:31:02Z",
"tag": "NA"
}
}
11 changes: 0 additions & 11 deletions bin/assembly-stats.sh

This file was deleted.

32 changes: 31 additions & 1 deletion bin/combine-summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,37 @@ df <- do.call(bind_rows, lapply(files, FUN=read.csv))
# calculate depth of coverage, if any of the samples mapped
if("BASES_MAPPED" %in% colnames(df)){
df <- df %>%
mutate(ASSEMBLY_EST_DEPTH = BASES_MAPPED / ASSEMBLY_LENGTH)
mutate(ASSEMBLY_EST_DEPTH = round(BASES_MAPPED / ASSEMBLY_LENGTH, digits = 0))
}
# order columns
df <- df %>%
select(ID,
REFERENCE,
ASSEMBLY_QC,
ASSEMBLY_LENGTH,
ASSEMBLY_EST_DEPTH,
ASSEMBLY_GEN_FRAC,
ASSEMBLY_SUBS,
ASSEMBLY_DEL,
ASSEMBLY_INS,
ASSEMBLY_FRAMESHIFTS,
ASSEMBLY_NON_ATCG,
READS_MAPPED,
BASES_MAPPED,
MEAN_MAPPED_READ_LENGTH,
MEAN_MAPPED_READ_QUALITY,
PERC_PAIRED_MAPPED_READS,
TOTAL_READS_CLEAN,
TOTAL_BASES_CLEAN,
READ1_MEAN_LENGTH_CLEAN,
READ2_MEAN_LENGTH_CLEAN,
Q30_RATE_CLEAN,
TOTAL_READS_RAW,
TOTAL_BASES_RAW,
READ1_MEAN_LENGTH_RAW,
READ2_MEAN_LENGTH_RAW,
Q30_RATE_RAW,
SPECIES_SUMMARY
)
# save combined summary
write.csv(x=df, file="combined-summary.csv", quote = F, row.names = F)
2 changes: 1 addition & 1 deletion bin/fastp2tbl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ read1_mean_length_after=$(jq '.summary.after_filtering.read1_mean_length' ${fast
read2_mean_length_after=$(jq '.summary.after_filtering.read2_mean_length' ${fastp_json})
q30_rate_after=$(jq '.summary.after_filtering.q30_rate' ${fastp_json})

echo "total_reads_before_fastp,total_bases_before_fastp,read1_mean_length_before_fastp,read2_mean_length_before_fastp,q30_rate_before_fastp,total_reads_after_fastp,total_bases_after_fastp,read1_mean_length_after_fastp,read2_mean_length_after_fastp,q30_rate_after_fastp"
echo "total_reads_raw,total_bases_raw,read1_mean_length_raw,read2_mean_length_raw,q30_rate_raw,total_reads_clean,total_bases_clean,read1_mean_length_clean,read2_mean_length_clean,q30_rate_clean"
echo "${total_reads_before},${total_bases_before},${read1_mean_length_before},${read2_mean_length_before},${q30_rate_before},${total_reads_after},${total_bases_after},${read1_mean_length_after},${read2_mean_length_after},${q30_rate_after}"
30 changes: 20 additions & 10 deletions bin/summaryline.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ args <- commandArgs(trailingOnly=T)
fastp2tbl <- args[1]
sm_summary <- args[2]
samtoolstats2tbl <- args[3]
assembly_stats <- args[4]
nextclade <- args[4]
sample <- args[5]
ref <- args[6]
snvs <- args[7]


#----- Sample ID & Reference
df.summaryline <- data.frame(ID = sample, REFERENCE = ref)
Expand All @@ -34,13 +32,25 @@ if(file.exists(samtoolstats2tbl)){
df.summaryline <- cbind(df.summaryline, df.samtoolstats2tbl)
}else(cat("\nNo mapping stats provided\n"))

#----- Assembly Stats -----#
if(file.exists(assembly_stats)){
cat(snvs)
df.assembly_stats <- read.csv(assembly_stats) %>%
mutate(assembly_snvs = as.numeric(snvs),
assembly_nuc_id = (assembly_length - assembly_snvs) / assembly_length)
df.summaryline <- cbind(df.summaryline, df.assembly_stats)
#----- Nextclade -----#
if(file.exists(nextclade)){
df.nextclade <- read_tsv(nextclade) %>%
select(qc.overallStatus,
totalSubstitutions,
totalDeletions,
totalInsertions,
totalFrameShifts,
totalNonACGTNs,
coverage,
ASSEMBLY_LENGTH) %>%
rename(ASSEMBLY_QC = qc.overallStatus,
ASSEMBLY_SUBS = totalSubstitutions,
ASSEMBLY_DEL = totalDeletions,
ASSEMBLY_INS = totalInsertions,
ASSEMBLY_FRAMESHIFTS = totalFrameShifts,
ASSEMBLY_NON_ATCG = totalNonACGTNs,
ASSEMBLY_GEN_FRAC = coverage)
df.summaryline <- cbind(df.summaryline, df.nextclade)
}else(cat("\nNo assembly stats provided\n"))

#---- Sourmash Species Summary ----#
Expand Down
57 changes: 32 additions & 25 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ process {
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/${meta.id}/trmd_reads" }
path: { "${params.outdir}/${meta.id}/reads/" },
pattern: "none"
]
}
withName: 'SHOVILL' {
Expand All @@ -73,31 +74,46 @@ process {
path: { "${params.outdir}/${meta.id}/qc/" }
]
}
withName: 'SOURMASH_SKETCH' {
withName: 'SM_SKETCH_REF' {
ext.args = "dna --param-string 'scaled=1000,k=21,abund'"
ext.when = { }
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/${meta.id}/taxonomy/" }
path: { "${params.outdir}/${meta.id}/taxonomy/" },
pattern: "none"
]
}
withName: 'SOURMASH_SEARCH' {
ext.args = ""
withName: 'SM_SKETCH_SAMPLE' {
ext.args = "dna --param-string 'scaled=1000,k=21,abund'"
ext.when = { }
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/${meta.id}/taxonomy/" }
path: { "${params.outdir}/${meta.id}/taxonomy/" },
pattern: "none"
]
}
withName: 'SOURMASH_GATHER' {
withName: 'SM_GATHER_SELECT' {
ext.args = "--threshold-bp 500 -k 21"
ext.when = { }
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/${meta.id}/taxonomy/" }
path: { "${params.outdir}/${meta.id}/taxonomy/" },
pattern: "*.csv.gz",
saveAs: { "${meta.id}.fast-ref.csv.gz" }
]
}
withName: 'SM_GATHER_SAMPLE' {
ext.args = "--threshold-bp 500 -k 21"
ext.when = { }
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/${meta.id}/taxonomy/" },
pattern: "*.csv.gz",
saveAs: { "${meta.id}.all-taxa.csv.gz" }
]
}
withName: 'MINIMAP2_ALIGN' {
Expand All @@ -106,7 +122,9 @@ process {
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/${meta.id}/taxonomy/" }
path: { "${params.outdir}/${meta.id}/taxonomy/" },
pattern: "*.paf",
saveAs: { "${meta.id}.acc-ref.paf" }
]
}
withName: 'SUMMARIZE_TAXA' {
Expand All @@ -123,7 +141,7 @@ process {
ext.when = { }
publishDir = [
[
path: { "${params.outdir}/${meta.id}/assembly/" },
path: { "${params.outdir}/${meta.id}/bam/" },
pattern: "*.bam",
mode: 'copy'
],
Expand All @@ -149,7 +167,7 @@ process {
publishDir = [
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/${meta.id}/assembly/" },
path: { "${params.outdir}/${meta.id}/reads/" },
pattern: "*.gz"
]
}
Expand All @@ -169,24 +187,13 @@ process {
]
]
}
withName: 'MAFFT' {
ext.args = ""
ext.when = { }
publishDir = [
[
path: { "${params.outdir}/${meta.id}/assembly/" },
pattern: "*.txt",
mode: 'copy'
]
]
}
withName: 'SNPSITES' {
withName: 'NEXTCLADE_RUN' {
ext.args = ""
ext.when = { }
publishDir = [
[
path: { "${params.outdir}/${meta.id}/assembly/" },
pattern: "*.vcf",
path: { "${params.outdir}/${meta.id}/qc/" },
pattern: { "${meta.id}-{ref_id}.tsv" },
mode: 'copy'
]
]
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"nextclade/run": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
},
"samtools/fastq": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
Expand Down
12 changes: 2 additions & 10 deletions modules/local/create-summaryline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process SUMMARYLINE {
container "docker.io/jdj0303/waphl-viral-base:1.0.0"

input:
tuple val(meta), val(ref_id), path(samtoolstats2tbl), path(assembly_stats), path(vcf), path(fastp2tbl), path(sm_summary)
tuple val(meta), val(ref_id), path(samtoolstats2tbl), path(nextclade), path(fastp2tbl), path(sm_summary)

output:
tuple val(meta), path("*.summaryline.csv"), emit: summaryline
Expand All @@ -16,16 +16,8 @@ process SUMMARYLINE {
prefix = task.ext.prefix ?: "${meta.id}"
script: // This script is bundled with the pipeline, in nf-core/waphlviral/bin/
"""
# determine number of SNVs
if [ -f "${vcf}" ]
then
snvs=\$(cat ${vcf} | grep -v "#" | wc -l)
else
snvs="NA"
fi
# create summaryline
summaryline.R "${fastp2tbl}" "${sm_summary}" "${samtoolstats2tbl}" "${assembly_stats}" "${prefix}" "${ref_id}" "\$(echo \$snvs)"
summaryline.R "${fastp2tbl}" "${sm_summary}" "${samtoolstats2tbl}" "${nextclade}" "${prefix}" "${ref_id}"
# rename using prefix and reference
mv summaryline.csv "${prefix}-${ref_id}.summaryline.csv"
"""
Expand Down
4 changes: 0 additions & 4 deletions modules/local/ivar_consensus.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ process IVAR_CONSENSUS {

output:
tuple val(meta), val(ref_id), path('*.fa'), emit: consensus
tuple val(meta), val(ref_id), path('*.csv'), emit: stats
path "versions.yml", emit: versions

when:
Expand All @@ -34,9 +33,6 @@ process IVAR_CONSENSUS {
-q ${params.ivar_q} \\
${args}
# gather stats
assembly-stats.sh ${prefix}-${ref_id}.fa > ${prefix}-${ref_id}.assembly-stats.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(samtools 2>&1 | grep "Version" | cut -f 2 -d ' ')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: sourmash_gather
name: nextclade_run
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::sourmash=4.8.4
- bioconda::nextclade=2.12.0
Loading

0 comments on commit c0cd004

Please sign in to comment.