Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate documentation for all string functions from scalar_functions.md to code #12775

Merged
merged 8 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions datafusion/core/src/bin/print_functions_docs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,14 @@ fn print_docs(
.find(|f| f.get_name() == name || f.get_aliases().contains(&name))
.unwrap();

let name = f.get_name();
let aliases = f.get_aliases();
let documentation = f.get_documentation();

// if this name is an alias we need to display what it's an alias of
if aliases.contains(&name) {
let _ = write!(docs, "_Alias of [{name}](#{name})._");
let fname = f.get_name();
let _ = writeln!(docs, r#"### `{name}`"#);
let _ = writeln!(docs, "_Alias of [{fname}](#{fname})._");
continue;
}

Expand Down Expand Up @@ -183,10 +184,10 @@ fn print_docs(

// next, aliases
if !f.get_aliases().is_empty() {
let _ = write!(docs, "#### Aliases");
let _ = writeln!(docs, "#### Aliases");

for alias in f.get_aliases() {
let _ = writeln!(docs, "- {alias}");
let _ = writeln!(docs, "- {}", alias.replace("_", r#"\_"#));
}
}

Expand Down
51 changes: 33 additions & 18 deletions datafusion/functions/src/string/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,6 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::{Arc, OnceLock};

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ascii_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Returns the ASCII value of the first character in a string.")
.with_syntax_example("ascii(str)")
.with_argument(
"str",
"String expression to operate on. Can be a constant, column, or function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a Utf8View.",
)
.with_related_udf("chr")
.build()
.unwrap()
})
}

#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
Expand Down Expand Up @@ -96,6 +78,39 @@ impl ScalarUDFImpl for AsciiFunc {
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ascii_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description(
"Returns the Unicode character code of the first character in a string.",
)
.with_syntax_example("ascii(str)")
.with_sql_example(
r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
)
.with_standard_argument("str", "String")
.with_related_udf("chr")
.build()
.unwrap()
})
}

fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
Expand Down
40 changes: 35 additions & 5 deletions datafusion/functions/src/string/bit_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
// specific language governing permissions and limitations
// under the License.

use std::any::Any;

use arrow::compute::kernels::length::bit_length;
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::{ColumnarValue, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::utils::utf8_to_int_type;

#[derive(Debug)]
pub struct BitLengthFunc {
signature: Signature,
Expand Down Expand Up @@ -88,4 +88,34 @@ impl ScalarUDFImpl for BitLengthFunc {
},
}
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_bit_length_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_bit_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Returns the bit length of a string.")
.with_syntax_example("bit_length(str)")
.with_sql_example(
r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
)
.with_standard_argument("str", "String")
.with_related_udf("length")
.with_related_udf("octet_length")
.build()
.unwrap()
})
}
41 changes: 35 additions & 6 deletions datafusion/functions/src/string/btrim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@
// specific language governing permissions and limitations
// under the License.

use crate::string::common::*;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{ArrayRef, OffsetSizeTrait};
use arrow::datatypes::DataType;
use std::any::Any;

use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ColumnarValue, Volatility};
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::string::common::*;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use std::any::Any;
use std::sync::OnceLock;

/// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed.
/// btrim('xyxtrimyyx', 'xyz') = 'trim'
Expand Down Expand Up @@ -109,6 +109,35 @@ impl ScalarUDFImpl for BTrimFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_btrim_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_btrim_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string.")
.with_syntax_example("btrim(str[, trim_str])")
.with_sql_example(r#"```sql
> select btrim('__datafusion____', '_');
+-------------------------------------------+
| btrim(Utf8("__datafusion____"),Utf8("_")) |
+-------------------------------------------+
| datafusion |
+-------------------------------------------+
```"#)
.with_standard_argument("str", "String")
.with_argument("trim_str", "String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._")
.with_related_udf("ltrim")
.with_related_udf("rtrim")
.build()
.unwrap()
})
}

#[cfg(test)]
Expand Down
39 changes: 35 additions & 4 deletions datafusion/functions/src/string/chr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@
// under the License.

use std::any::Any;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use arrow::array::ArrayRef;
use arrow::array::StringArray;
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Int64;
use arrow::datatypes::DataType::Utf8;

use crate::utils::make_scalar_function;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::{ColumnarValue, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::utils::make_scalar_function;

/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
/// chr(65) = 'A'
pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down Expand Up @@ -99,4 +99,35 @@ impl ScalarUDFImpl for ChrFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(chr, vec![])(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_chr_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_chr_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description(
"Returns the character with the specified ASCII or Unicode code value.",
)
.with_syntax_example("chr(expression)")
.with_sql_example(
r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
)
.with_standard_argument("expression", "String")
.with_related_udf("ascii")
.build()
.unwrap()
})
}
40 changes: 35 additions & 5 deletions datafusion/functions/src/string/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@
use arrow::array::{as_largestring_array, Array};
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use crate::string::common::*;
use crate::string::concat;
use datafusion_common::cast::{as_string_array, as_string_view_array};
use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{lit, ColumnarValue, Expr, Volatility};
use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use crate::string::common::*;
use crate::string::concat;

#[derive(Debug)]
pub struct ConcatFunc {
signature: Signature,
Expand Down Expand Up @@ -244,6 +244,36 @@ impl ScalarUDFImpl for ConcatFunc {
) -> Result<ExprSimplifyResult> {
simplify_concat(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_concat_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_concat_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_STRING)
.with_description("Concatenates multiple strings together.")
.with_syntax_example("concat(str[, ..., str_n])")
.with_sql_example(
r#"```sql
> select concat('data', 'f', 'us', 'ion');
+-------------------------------------------------------+
| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) |
+-------------------------------------------------------+
| datafusion |
+-------------------------------------------------------+
```"#,
)
.with_standard_argument("str", "String")
.with_argument("str_n", "Subsequent string expressions to concatenate.")
.with_related_udf("concat_ws")
.build()
.unwrap()
})
}

pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
Expand Down
Loading