Skip to content

Commit

Permalink
Merge pull request #3 from DOH-HNH0303/jdj_dev
Browse files Browse the repository at this point in the history
summary now functional
  • Loading branch information
DOH-HNH0303 authored Aug 21, 2024
2 parents 997a652 + 65cef8c commit 006ac28
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 28 deletions.
16 changes: 6 additions & 10 deletions bin/plot.R
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
#!/usr/bin/env Rscript


install.packages("argparser")
library(tidyverse)
library(broom)
library(ggplot2)
library("magrittr")
library(dplyr)
library(readr)
library(tidyr)
library(argparser)


p <- arg_parser("create visuals for Varcraft summary")
# Add command line arguments

Expand All @@ -27,9 +24,9 @@ step<-argv$step
df <- read_csv("summary.csv") %>%
mutate(y = 100*n/length)

cash <- read_tsv("results.txt",col_names = c("name", "year_a", "ps"))
#cash <- read_tsv("results.txt",col_names = c("name", "year_a", "ps"))

mash <- read_tsv("results.txt", col_names = c("sample", "mock", "true_ani")) %>%
mash <- read_tsv("mash.txt", col_names = c("sample", "mock", "true_ani")) %>%
mutate(x=100*(1-true_ani)) %>%
select(sample, mock, true_ani, x) #list column name to select
library(stringr)
Expand Down Expand Up @@ -66,7 +63,6 @@ mash <- mash %>%

df <- merge(df, mash, by=c("sample","ani","rep")) %>% unique()


p <- ggplot(df, aes(x=100*ani, y=true_ani))+
geom_point()+
labs(x="Predicted ANI (Mash)", y = "Actual ANI")+
Expand Down
63 changes: 63 additions & 0 deletions bin/summary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env Rscript

library(tidyverse)
library(broom)

# load dataframe
df <- read_csv("summary.csv") %>%
mutate(ani = 100*ani,
y = 100*n/length)

mash <- read_tsv("mash.txt", col_names = c("sample", "mock", "true_ani")) %>%
separate_wider_delim(cols = c("mock"), delim = '_', names = c("id","ani","rep")) %>%
mutate(true_ani = as.numeric(true_ani),
sample = str_remove_all(id, pattern = '.ani'),
ani = str_remove_all(ani, pattern = '.rep'),
ani = 100*as.numeric(str_replace_all(ani, pattern = '-', replacement = '.')),
rep = str_remove_all(rep, pattern = ".fasta")
) %>%
select(sample, ani, rep, true_ani)

df <- merge(df, mash, by=c("sample","ani","rep"))

p <- ggplot(df, aes(x=ani, y=true_ani))+
geom_point()+
labs(x="Predicted ANI (Mash)", y = "Actual ANI")+
theme_bw()
ggsave(plot = p, filename = "pred-vs-ani_act.jpg", dpi = 300, width = 5, height = 4)
#
p <- df %>%
mutate(diff = ani-true_ani) %>%
ggplot(aes(x=ani, y=diff))+
geom_point()+
labs(x="Predicted ANI", y = "Difference (Actual ANI - Predicted ANI)")+
theme_bw()
ggsave(plot = p, filename = "ani_diff.jpg", dpi = 300, width = 5, height = 4)

# https://douglas-watson.github.io/post/2018-09_exponential_curve_fitting/
fit <- nls(y ~ SSasymp(true_ani, yf, y0, log_alpha), data = df)
sink("summary.nls.txt")
summary(fit)
sink()

df.fitted <- augment(fit, newdata = data.frame(true_ani=seq(from=min(df$ani), to=max(df$ani), by = 0.0001)))
write.csv(x=df.fitted, file="df_fitted.csv", quote=F, row.names=F)

threshold <- df.fitted %>%
filter( abs(`.fitted`) == min(abs(`.fitted`))) %>%
filter( true_ani == max(true_ani)) %>%
.$true_ani %>%
as.character()
writeLines(threshold, "threshold.txt")
# output funtion from r script

# Threshold @ 0: 96.0936
cat(paste0("Threshold @ 0: ",threshold, sep = "\n"))

p <- ggplot(data = augment(fit), aes(x=as.numeric(true_ani), y=as.numeric(y)))+
geom_point()+
geom_line(data = df.fitted, aes(x=as.numeric(true_ani), y=as.numeric(.fitted)))+
geom_vline(xintercept = as.numeric(threshold), linetype="dashed", color = "red")+
theme_bw()+
labs(x="Sample-Reference % ANI", y = "% Ambiguous Bases (N) in Consensus")
ggsave(plot = p, filename = "nls.jpg", dpi = 300, width = 5, height = 4)
3 changes: 1 addition & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ process {
stageInMode = 'copy'
publishDir = [
[
path: { "${params.outdir}/" },
pattern: "none",
path: { "${params.outdir}/summary/" },
mode: 'copy'
],
]
Expand Down
20 changes: 8 additions & 12 deletions modules/local/summary.nf
Original file line number Diff line number Diff line change
@@ -1,27 +1,23 @@
process SUMMARY {
label 'process_medium'

container 'docker.io/hnh0303/varcraft-tool:1.0'
container 'docker.io/jdj0303/epitome-base:1.0'

input:
tuple val(sample), path(assemblies), path(ref), path(summary)
//path assemblies
//val sample
//path summary
path assemblies
path mash

output:
path "${sample}_summary.csv", emit: summary
path "df_fitted.csv", emit: df_fitted
path "threshold.txt", emit: threshold
path "*.jpg", emit: plots

when:
task.ext.when == null || task.ext.when

script:
"""
cat ${summary}
ls ${assemblies} | wc -l > assembly_count.txt
cat ${assemblies} > all.fa
echo ${sample}
summary.sh all.fa
mv summary.csv ${sample}_summary.csv
summary.sh ${assemblies}
summary.R
"""
}
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ params {
input = null
max_ani = 0.2
min_ani = 0
step = 0.1
step = 0.05
rep = 1

// References
Expand Down
5 changes: 2 additions & 3 deletions workflows/varcraft.nf
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,8 @@ workflow VARCRAFT {
IVAR_CONSENSUS.out.consensus.collect().view()
SUMMARY (

IVAR_CONSENSUS.out.as_sample//.collect()
.combine(manifest.map{ sample, assembly, fastq_1, fastq_2 -> [ sample, assembly ] })
.combine(MASH.out.mash_summary).collect().view()
IVAR_CONSENSUS.out.consensus.splitFasta().collectFile(name: 'all.fa'),
MASH.out.mash_summary.map{ sample, summary -> summary }.splitText().collectFile(name: 'mash.txt')


)
Expand Down

0 comments on commit 006ac28

Please sign in to comment.