Skip to content

Commit

Permalink
align: max_errors can take either an integer (absolute) or float (rel…
Browse files Browse the repository at this point in the history
…ative) value
  • Loading branch information
proycon committed Oct 2, 2024
1 parent 578c6b3 commit 1245082
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 4 deletions.
38 changes: 35 additions & 3 deletions src/align.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use stam::*;

use seal::pair::{AlignmentSet, InMemoryAlignmentMatrix, NeedlemanWunsch, SmithWaterman, Step};
use std::str::FromStr;

const TRIM_CHARS: [char; 4] = [' ', '\n', '\t', '\r'];

Expand All @@ -22,13 +23,14 @@ pub struct AlignmentConfig {
/// Only allow for alignments that consist of one contiguous text selection on either side. This is a so-called simple transposition.
pub simple_only: bool,

/// The minimal number of characters that must be aligned (absolute number) for a transposition to be valid
/// The minimal number of characters that must be aligned (absolute number) for a transposition/translation to be valid
pub minimal_align_length: usize,

/// The maximum number of errors that may occur (absolute number) for a transposition to be valid, each insertion/deletion counts as 1. This is more efficient than `minimal_align_length`
/// The maximum number of errors (max edit distance) that may occur for a transposition to be valid.
/// This is either an absolute integer or a relative ratio between 0.0 and 1.0, interpreted in relation to the length of the first text in the alignment.
/// In other words; this represents the number of characters in the search string that may be missed when matching in the larger text.
/// The transposition itself will only consist of fully matching parts, use `grow` if you want to include non-matching parts.
pub max_errors: Option<usize>,
pub max_errors: Option<AbsoluteOrRelative>,

/// Grow aligned parts into larger alignments by incorporating non-matching parts. This will return translations rather than transpositions.
/// You'll want to set `max_errors` in combination with this one to prevent very low-quality alignments.
Expand Down Expand Up @@ -375,6 +377,7 @@ pub fn align_texts<'store>(
}

if let Some(max_errors) = config.max_errors {
let max_errors = max_errors.as_absolute(seq1.len());
if let Some(last) = last {
//everything after the last match (that was not matched, counts as an error)
errors += seq1.len() - last;
Expand Down Expand Up @@ -574,3 +577,32 @@ fn print_alignment<'a>(
}
);
}

#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub enum AbsoluteOrRelative {
Absolute(usize),
Relative(f64),
}

impl AbsoluteOrRelative {
pub fn as_absolute(self, total: usize) -> usize {
match self {
Self::Absolute(i) => i,
Self::Relative(f) => (f * total as f64).round() as usize,
}
}
}

impl FromStr for AbsoluteOrRelative {
type Err = &'static str;

fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Ok(i) = s.parse::<usize>() {
Ok(Self::Absolute(i))
} else if let Ok(f) = s.parse::<f64>() {
Ok(Self::Relative(f))
} else {
Err("Value must be either an integer (absolute) or a floating point value (relative)")
}
}
}
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,7 @@ fn run<W: Write>(store: &mut AnnotationStore, writer: &mut W, rootargs: &ArgMat
simple_only: args.is_present("simple-only"),
trim: args.is_present("trim"),
max_errors: if args.is_present("max-errors") {
Some(args.value_of("max-errors").unwrap().parse().expect("value for --max-errors must be integer"))
Some(args.value_of("max-errors").unwrap().parse().expect("value for --max-errors must be integer (absolute) or float (relative)"))
} else {
None
},
Expand Down

0 comments on commit 1245082

Please sign in to comment.