Skip to content

Commit

Permalink
Preserve field name when casting List (apache#13468)
Browse files Browse the repository at this point in the history
* Add option to pass in field name to create array to support retaining field name during cast

* add unit tests for list casting round trip

* Documentation example was missing parameter

* Rather than deprecate an existing function or change pub signature add in a parallel function for the small cases where we want to explicitly set the field name
  • Loading branch information
timsaucer authored and findepi committed Nov 28, 2024
1 parent 2d5fe9d commit 625dcce
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 8 deletions.
72 changes: 64 additions & 8 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ use crate::cast::{
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
use crate::hash_utils::create_hashes;
use crate::utils::{
array_into_fixed_size_list_array, array_into_large_list_array, array_into_list_array,
array_into_fixed_size_list_array_with_field_name, array_into_large_list_array,
array_into_large_list_array_with_field_name, array_into_list_array,
array_into_list_array_with_field_name,
};
use arrow::compute::kernels::numeric::*;
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
Expand Down Expand Up @@ -2663,27 +2665,36 @@ impl ScalarValue {
let list_array = array.as_list::<i32>();
let nested_array = list_array.value(index);
// Produces a single element `ListArray` with the value at `index`.
let arr =
Arc::new(array_into_list_array(nested_array, field.is_nullable()));
let arr = Arc::new(array_into_list_array_with_field_name(
nested_array,
field.is_nullable(),
field.name(),
));

ScalarValue::List(arr)
}
DataType::LargeList(_) => {
DataType::LargeList(field) => {
let list_array = as_large_list_array(array);
let nested_array = list_array.value(index);
// Produces a single element `LargeListArray` with the value at `index`.
let arr = Arc::new(array_into_large_list_array(nested_array));
let arr = Arc::new(array_into_large_list_array_with_field_name(
nested_array,
field.name(),
));

ScalarValue::LargeList(arr)
}
// TODO: There is no test for FixedSizeList now, add it later
DataType::FixedSizeList(_, _) => {
DataType::FixedSizeList(field, _) => {
let list_array = as_fixed_size_list_array(array)?;
let nested_array = list_array.value(index);
// Produces a single element `ListArray` with the value at `index`.
let list_size = nested_array.len();
let arr =
Arc::new(array_into_fixed_size_list_array(nested_array, list_size));
let arr = Arc::new(array_into_fixed_size_list_array_with_field_name(
nested_array,
list_size,
field.name(),
));

ScalarValue::FixedSizeList(arr)
}
Expand Down Expand Up @@ -5970,6 +5981,51 @@ mod tests {
ScalarValue::from("larger than 12 bytes string"),
DataType::Utf8View,
);
check_scalar_cast(
{
let element_field =
Arc::new(Field::new("element", DataType::Int32, true));

let mut builder =
ListBuilder::new(Int32Builder::new()).with_field(element_field);
builder.append_value([Some(1)]);
builder.append(true);

ScalarValue::List(Arc::new(builder.finish()))
},
DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
);
check_scalar_cast(
{
let element_field =
Arc::new(Field::new("element", DataType::Int32, true));

let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
.with_field(element_field);
builder.values().append_value(1);
builder.append(true);

ScalarValue::FixedSizeList(Arc::new(builder.finish()))
},
DataType::FixedSizeList(
Arc::new(Field::new("element", DataType::Int64, true)),
1,
),
);
check_scalar_cast(
{
let element_field =
Arc::new(Field::new("element", DataType::Int32, true));

let mut builder =
LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
builder.append_value([Some(1)]);
builder.append(true);

ScalarValue::LargeList(Arc::new(builder.finish()))
},
DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
);
}

// mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
Expand Down
41 changes: 41 additions & 0 deletions datafusion/common/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,20 @@ pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray {
)
}

pub fn array_into_list_array_with_field_name(
arr: ArrayRef,
nullable: bool,
field_name: &str,
) -> ListArray {
let offsets = OffsetBuffer::from_lengths([arr.len()]);
ListArray::new(
Arc::new(Field::new(field_name, arr.data_type().to_owned(), nullable)),
offsets,
arr,
None,
)
}

/// Wrap an array into a single element `LargeListArray`.
/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
Expand All @@ -354,6 +368,19 @@ pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
)
}

pub fn array_into_large_list_array_with_field_name(
arr: ArrayRef,
field_name: &str,
) -> LargeListArray {
let offsets = OffsetBuffer::from_lengths([arr.len()]);
LargeListArray::new(
Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)),
offsets,
arr,
None,
)
}

pub fn array_into_fixed_size_list_array(
arr: ArrayRef,
list_size: usize,
Expand All @@ -367,6 +394,20 @@ pub fn array_into_fixed_size_list_array(
)
}

pub fn array_into_fixed_size_list_array_with_field_name(
arr: ArrayRef,
list_size: usize,
field_name: &str,
) -> FixedSizeListArray {
let list_size = list_size as i32;
FixedSizeListArray::new(
Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)),
list_size,
arr,
None,
)
}

/// Wrap arrays into a single element `ListArray`.
///
/// Example:
Expand Down

0 comments on commit 625dcce

Please sign in to comment.