Skip to content

Commit

Permalink
fixing validation calcs
Browse files Browse the repository at this point in the history
  • Loading branch information
DOH-JDJ0303 committed May 30, 2024
1 parent 4d31864 commit 8cf7c6c
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 8 deletions.
8 changes: 5 additions & 3 deletions bin/combine-summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ col.list <- c("ID",
"TAXA",
"SEGMENT",
"REFERENCE",
"ASSEMBLY_VARIANT",
"ASSEMBLY_NC",
"ASSEMBLY_QC",
"ASSEMBLY_QC_REASON",
Expand Down Expand Up @@ -84,13 +85,14 @@ df <- df %>%
ASSEMBLY_QC_REASON = gsub(ASSEMBLY_QC_REASON, pattern = '; $', replacement = ''))
# summarize assembly variants
df <- df %>%
group_by(TAXA, SEGMENT) %>%
mutate(ASSEMBLY_VARIANT = paste0(row_number()," / ", n()))
group_by(ID, TAXA, SEGMENT) %>%
mutate(ASSEMBLY_VARIANT = paste0(row_number()," of ", n())) %>%
ungroup()

# order columns
df <- df %>%
drop_na(ID) %>%
select(col.list) %>%
select(-ASSEMBLY_NC)
# save combined summary
write.csv(x=df, file="combined-summary.csv", quote = F, row.names = F)
write.csv(x=df, file="combined-summary.csv", row.names = F)
3 changes: 2 additions & 1 deletion bin/ref-select_accurate.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ paf <- read_tsv(paf_file, col_names = F) %>%
LENGTH = 7,
START = 8,
END = 9,
ALIGN = 11) %>%
ALIGN = 11,
QUAL = 12) %>%
mutate(ASSEMBLY=str_remove_all(TARGET, pattern="(\\d+$)"),
CONTIG = str_extract(TARGET, "(\\d+$)")) %>%
select(TARGET, ASSEMBLY, CONTIG, LENGTH, START, END, ALIGN)
Expand Down
6 changes: 3 additions & 3 deletions bin/val_gather.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ S2=$(cat $ALN | grep '>' | sed -n 2p | tr -d '>\n')
# Second sequence (S2) in the alignment is treated as the "sample"
# True Positives (TP) = all sites in S2 that match S1
# True Negative (TN) = does not apply
# False Positives (FP) = sites in S2 that are absent in S1 (i.e., "A", "T", "C", or "G" in S2 and "-" in S1)
# False Positives (FP) = sites in S2 that are absent in S1 (i.e., "A", "T", "C", or "G" in S2 and "-" in S1) or sites in S2 that differ from S1, excluding absent sites in S2
# False Negative (FN) = sites in S1 that are absent in S2 (i.e., "A", "T", "C", or "G" in S1 and "-" in S2)
# Accuracy = 100 * TP / (TP + FP + FN)

if [[ "${METRIC}" == "accuracy" ]]
then
TP=$(cat transposed.txt | awk '$1 == $2 {print}' | wc -l)
FP=$(cat transposed.txt | awk '$1 == "-" {print}' | wc -l)
FN=$(cat transposed.txt | awk '$1 == "-" {print}' | wc -l)
FP=$(cat transposed.txt | awk '$2 != "-" && $1 != $2 {print}' | wc -l)
FN=$(cat transposed.txt | awk '$2 == "-" {print}' | wc -l)

echo "Sample,Truth,TP,FP,FN,Result" > "${S2}_result.csv"
echo -e "${S2}\t${S1}\t${TP}\t${FP}\t${FN}" | awk -v OFS=',' '{print $1,$2,$3,$4,$5, 100 * $3 / ($3+$4+$5)}' >> "${S2}_result.csv"
Expand Down
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ process {
enabled: true,
mode: "${params.publish_dir_mode}",
path: { "${params.outdir}/validation" },
pattern: "*.csv"
pattern: "*.{csv,aln}"
]
}
withName: 'REPORT' {
Expand Down
1 change: 1 addition & 0 deletions modules/local/val_gather.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ process GATHER {

output:
tuple val(metric), val(precision_type), path("*.csv"), emit: result
path "*.aln", emit: aln
path "versions.yml", emit: versions

when:
Expand Down

0 comments on commit 8cf7c6c

Please sign in to comment.