Skip to content

Commit

Permalink
Change StringSubstringLength to handle excessive lengths (#494)
Browse files Browse the repository at this point in the history
Completes #492 

- StringSubstringLength now handles excessive lengths
- Both SUBSTR & SUBSTRING now use 1-based indexing
  • Loading branch information
mmarx authored Jun 11, 2024
2 parents 9c8d0b2 + 8a2122b commit f10dcd0
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 19 deletions.
48 changes: 34 additions & 14 deletions nemo-physical/src/function/definitions/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,13 @@ impl BinaryFunction for StringSubstring {
let string = parameter_first.to_plain_string()?;
let start = usize::try_from(parameter_second.to_u64()?).ok()?;

if start >= string.len() {
if start > string.len() || start < 1 {
return None;
}

Some(AnyDataValue::new_plain_string(string[start..].to_string()))
Some(AnyDataValue::new_plain_string(
string[(start - 1)..].to_string(),
))
}

fn type_propagation(&self) -> FunctionTypePropagation {
Expand Down Expand Up @@ -399,8 +401,8 @@ impl UnaryFunction for StringLowercase {
/// and an integer value as the second and third parameter.
///
/// Return a string containing the characters from the first parameter,
/// starting from the position given by the second paramter
/// with the length given by the third parameter.
/// starting from the position given by the second parameter
/// with the maximum length given by the third parameter.
///
/// Returns `None` if the type requirements from above are not met.
#[derive(Debug, Copy, Clone)]
Expand All @@ -414,15 +416,21 @@ impl TernaryFunction for StringSubstringLength {
) -> Option<AnyDataValue> {
let string = parameter_first.to_plain_string()?;
let start = usize::try_from(parameter_second.to_u64()?).ok()?;
let length = usize::try_from(parameter_third.to_u64()?).ok()?;

if start + length > string.len() {
if start > string.len() || start < 1 {
return None;
}

Some(AnyDataValue::new_plain_string(
string[start..(start + length)].to_string(),
))
let length = usize::try_from(parameter_third.to_u64()?).ok()?;
let end = start + length;

let result = if end > string.len() {
string[(start - 1)..].to_string()
} else {
string[(start - 1)..(end - 1)].to_string()
};

Some(AnyDataValue::new_plain_string(result))
}

fn type_propagation(&self) -> FunctionTypePropagation {
Expand All @@ -444,37 +452,49 @@ mod test {
fn test_string_substring_length() {
let string = AnyDataValue::new_plain_string("abc".to_string());

let start1 = AnyDataValue::new_integer_from_u64(0);
let start1 = AnyDataValue::new_integer_from_u64(1);
let length1 = AnyDataValue::new_integer_from_u64(1);
let result1 = AnyDataValue::new_plain_string("a".to_string());
let actual_result1 = StringSubstringLength.evaluate(string.clone(), start1, length1);
assert!(actual_result1.is_some());
assert_eq!(result1, actual_result1.unwrap());

let start2 = AnyDataValue::new_integer_from_u64(1);
let start2 = AnyDataValue::new_integer_from_u64(2);
let length2 = AnyDataValue::new_integer_from_u64(1);
let result2 = AnyDataValue::new_plain_string("b".to_string());
let actual_result2 = StringSubstringLength.evaluate(string.clone(), start2, length2);
assert!(actual_result2.is_some());
assert_eq!(result2, actual_result2.unwrap());

let start3 = AnyDataValue::new_integer_from_u64(2);
let start3 = AnyDataValue::new_integer_from_u64(3);
let length3 = AnyDataValue::new_integer_from_u64(1);
let result3 = AnyDataValue::new_plain_string("c".to_string());
let actual_result3 = StringSubstringLength.evaluate(string.clone(), start3, length3);
assert!(actual_result3.is_some());
assert_eq!(result3, actual_result3.unwrap());

let start4 = AnyDataValue::new_integer_from_u64(3);
let start4 = AnyDataValue::new_integer_from_u64(4);
let length4 = AnyDataValue::new_integer_from_u64(1);
let actual_result4 = StringSubstringLength.evaluate(string.clone(), start4, length4);
assert!(actual_result4.is_none());

let start5 = AnyDataValue::new_integer_from_u64(0);
let start5 = AnyDataValue::new_integer_from_u64(1);
let length5 = AnyDataValue::new_integer_from_u64(3);
let result5 = AnyDataValue::new_plain_string("abc".to_string());
let actual_result5 = StringSubstringLength.evaluate(string.clone(), start5, length5);
assert!(actual_result5.is_some());
assert_eq!(result5, actual_result5.unwrap());

let start6 = AnyDataValue::new_integer_from_u64(1);
let length6 = AnyDataValue::new_integer_from_u64(4);
let result6 = AnyDataValue::new_plain_string("abc".to_string());
let actual_result6 = StringSubstringLength.evaluate(string.clone(), start6, length6);
assert!(actual_result6.is_some());
assert_eq!(result6, actual_result6.unwrap());

let start7 = AnyDataValue::new_integer_from_u64(0);
let length7 = AnyDataValue::new_integer_from_u64(4);
let actual_result7 = StringSubstringLength.evaluate(string.clone(), start7, length7);
assert!(actual_result7.is_none());
}
}
13 changes: 12 additions & 1 deletion nemo-physical/src/function/evaluation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,10 @@ mod test {
let tree_length = Function::string_length(Function::constant(any_string("12345")));
evaluate_expect(&tree_length, Some(AnyDataValue::new_integer_from_i64(5)));

let tree_string_reverse =
Function::string_reverse(Function::constant(any_string("Hello World")));
evaluate_expect(&tree_string_reverse, Some(any_string("dlroW olleH")));

let tree_lower_case = Function::string_lowercase(Function::constant(any_string("tEsT123")));
evaluate_expect(&tree_lower_case, Some(any_string("test123")));

Expand Down Expand Up @@ -412,9 +416,16 @@ mod test {
);
evaluate_expect(&tree_not_contains, Some(AnyDataValue::new_boolean(false)));

let tree_substring_length = Function::string_subtstring_length(
Function::constant(any_string("Hello World")),
Function::constant(AnyDataValue::new_integer_from_u64(7)),
Function::constant(AnyDataValue::new_integer_from_u64(3)),
);
evaluate_expect(&tree_substring_length, Some(any_string("Wor")));

let tree_substring = Function::string_subtstring(
Function::constant(any_string("Hello World")),
Function::constant(AnyDataValue::new_integer_from_u64(6)),
Function::constant(AnyDataValue::new_integer_from_u64(7)),
);
evaluate_expect(&tree_substring, Some(any_string("World")));

Expand Down
2 changes: 1 addition & 1 deletion nemo-physical/src/function/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ where
/// This evaluates to a string containing the
/// characters from the string that results from evaluating `string`,
/// starting from the position that results from evaluating `start`
/// with the length given by evaluating `length`.
/// with the maximum length given by evaluating `length`.
pub fn string_subtstring_length(string: Self, start: Self, length: Self) -> Self {
Self::Ternary {
function: TernaryFunctionEnum::StringSubstringLength(StringSubstringLength),
Expand Down
2 changes: 1 addition & 1 deletion nemo/src/model/rule_model/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl BinaryOperation {
/// Ternary operation applied to a [Term]
#[derive(Debug, Eq, PartialEq, Copy, Clone, PartialOrd, Ord)]
pub enum TernaryOperation {
/// String starting at some start position with a given length
/// String starting at some start position with a given maximum length
StringSubstringLength,
}

Expand Down
2 changes: 1 addition & 1 deletion resources/testcases/arithmetic/builtins.rls
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ result(compare, ?R) :- strings(?A, ?B), ?R = 10 * COMPARE(?A, ?B).
result(contains, ?R) :- strings(?A, _), ?R = CONTAINS(?A, "lo").
result(subString, ?R) :- strings(?A, ?B), ?R = SUBSTR(?A, STRLEN(?B) / 2).
result(stringreverse, ?R) :- strings(?A, _), ?R = STRREV(?A).
result(subStringLength, ?R) :- strings(?A, _), ?R = SUBSTRING(?A, 1, 3).
result(subStringLength, ?R) :- strings(?A, _), ?R = SUBSTRING(?A, 2, 3).
result(ucase, ?R) :- strings(?A, _), ?R = UCASE(?A).
result(lcase, ?R) :- strings(_, ?B), ?R = LCASE(?B).
result(stringbefore, ?R) :- strings(?A, _), ?R = STRBEFORE(?A, "ll").
Expand Down
2 changes: 1 addition & 1 deletion resources/testcases/arithmetic/builtins/result.csv
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ concat,"""Hello World"""
compare,-10
contains,"""true""^^<http://www.w3.org/2001/XMLSchema#boolean>"
stringreverse,"""olleH"""
subString,"""llo"""
subString,"""ello"""
subStringLength,"""ell"""
ucase,"""hello"""
lcase,"""WORLD"""
Expand Down

0 comments on commit f10dcd0

Please sign in to comment.