Skip to content

Commit

Permalink
implement better builder
Browse files Browse the repository at this point in the history
  • Loading branch information
rluvaton committed Dec 10, 2024
1 parent c6e88cd commit a8cb7d0
Show file tree
Hide file tree
Showing 21 changed files with 456 additions and 4,209 deletions.
12 changes: 12 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,18 @@ impl ArrayAccessor for &BooleanArray {
}
}

impl ArrayAccessor for BooleanArray {
type Item = bool;

fn value(&self, index: usize) -> Self::Item {
self.value(index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
self.value_unchecked(index)
}
}

impl From<Vec<bool>> for BooleanArray {
fn from(data: Vec<bool>) -> Self {
let mut mut_buf = MutableBuffer::new_null(data.len());
Expand Down
12 changes: 12 additions & 0 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,18 @@ impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize
}
}

impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for GenericListArray<OffsetSize> {
type Item = ArrayRef;

fn value(&self, index: usize) -> Self::Item {
self.value(index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
self.value_unchecked(index)
}
}

impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let prefix = OffsetSize::PREFIX;
Expand Down
13 changes: 13 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,19 @@ impl<T: ArrowPrimitiveType> ArrayAccessor for &PrimitiveArray<T> {
}
}

impl<T: ArrowPrimitiveType> ArrayAccessor for PrimitiveArray<T> {
type Item = T::Native;

fn value(&self, index: usize) -> Self::Item {
self.value(index)
}

#[inline]
unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
self.value_unchecked(index)
}
}

impl<T: ArrowTemporalType> PrimitiveArray<T>
where
i64: From<T::Native>,
Expand Down
47 changes: 45 additions & 2 deletions arrow-array/src/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::{ArrayBuilder, BooleanBufferBuilder};
use crate::{ArrayRef, BooleanArray};
use crate::builder::{SpecificArrayBuilder, ArrayBuilder, BooleanBufferBuilder};
use crate::{Array, ArrayRef, BooleanArray};
use arrow_buffer::Buffer;
use arrow_buffer::NullBufferBuilder;
use arrow_data::ArrayData;
Expand Down Expand Up @@ -219,6 +219,49 @@ impl ArrayBuilder for BooleanBuilder {
}
}


impl SpecificArrayBuilder for BooleanBuilder {
type Output = BooleanArray;
type Item<'a> = bool;

/// Builds the array and reset this builder.
fn finish(&mut self) -> Arc<BooleanArray> {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> Arc<BooleanArray> {
Arc::new(self.finish_cloned())
}

fn append_value(&mut self, value: bool) {
self.append_value(value)
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(*value)
}

fn append_null(&mut self) {
self.append_null()
}

fn append_nulls(&mut self, n: usize) {
self.append_nulls(n)
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i));
}
}
}
}

impl Extend<Option<bool>> for BooleanBuilder {
#[inline]
fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
Expand Down
39 changes: 37 additions & 2 deletions arrow-array/src/builder/fixed_size_binary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
use crate::{ArrayRef, FixedSizeBinaryArray};
use crate::builder::{ArrayBuilder, SpecificArrayBuilder, UInt8BufferBuilder};
use crate::{Array, ArrayRef, FixedSizeBinaryArray};
use arrow_buffer::Buffer;
use arrow_buffer::NullBufferBuilder;
use arrow_data::ArrayData;
Expand Down Expand Up @@ -154,6 +154,41 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {
}
}

impl SpecificArrayBuilder for FixedSizeBinaryBuilder {
type Output = FixedSizeBinaryArray;
type Item<'a> = &'a [u8];

fn finish(&mut self) -> Arc<Self::Output> {
Arc::new(self.finish())
}

fn finish_cloned(&self) -> Arc<Self::Output> {
Arc::new(self.finish_cloned())
}

fn append_value<'a>(&'a mut self, value: Self::Item<'a>) {
self.append_value(value).unwrap()
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(value).unwrap()
}

fn append_null(&mut self) {
self.append_null()
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i)).unwrap();
}
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
55 changes: 53 additions & 2 deletions arrow-array/src/builder/fixed_size_list_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use crate::builder::{ArrayBuilder, SpecificArrayBuilder};
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray};
use arrow_buffer::NullBufferBuilder;
use arrow_schema::{Field, FieldRef};
use std::any::Any;
Expand Down Expand Up @@ -215,6 +215,57 @@ where
}
}


impl<ValuesOutput, T> SpecificArrayBuilder for FixedSizeListBuilder<T>
where
ValuesOutput: Array + 'static,
T: SpecificArrayBuilder<Output = ValuesOutput>,
for<'a> &'a ValuesOutput: ArrayAccessor,
for<'a> <T as SpecificArrayBuilder>::Item<'a>: From<<&'a ValuesOutput as ArrayAccessor>::Item>
{
type Output = FixedSizeListArray;
type Item<'a> = T::Output;

/// Builds the array and reset this builder.
fn finish(&mut self) -> Arc<FixedSizeListArray> {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> Arc<FixedSizeListArray> {
Arc::new(self.finish_cloned())
}

fn append_value<'a>(&'a mut self, value: Self::Item<'a>) {
// our item is their output
self.values_builder.append_output(value.as_any().downcast_ref::<ValuesOutput>().unwrap());
self.append(true);
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.values_builder.append_output(value.as_any().downcast_ref::<ValuesOutput>().unwrap());
self.append(true);
}

fn append_null(&mut self) {
// TODO - make sure we should append nulls to the values builder
self.values_builder.append_nulls(self.list_len as usize);
self.append(false);
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.values_builder.append_output(output.value(i).as_any().downcast_ref::<ValuesOutput>().unwrap());
self.append(true);
}
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
7 changes: 2 additions & 5 deletions arrow-array/src/builder/generic_byte_run_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,9 @@
use crate::types::bytes::ByteArrayNativeType;
use std::{any::Any, sync::Arc};

use crate::{
types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type},
ArrayRef, ArrowPrimitiveType, RunArray,
};
use crate::{types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type}, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, RunArray};

use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder, SpecificArrayBuilder};

use arrow_buffer::ArrowNativeType;

Expand Down
42 changes: 40 additions & 2 deletions arrow-array/src/builder/generic_bytes_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
use crate::builder::{ArrayBuilder, BufferBuilder, SpecificArrayBuilder, UInt8BufferBuilder};
use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
use arrow_buffer::NullBufferBuilder;
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
use arrow_data::ArrayDataBuilder;
Expand Down Expand Up @@ -228,6 +228,44 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
}
}

impl<T: ByteArrayType> SpecificArrayBuilder for GenericByteBuilder<T> {
type Output = GenericByteArray<T>;
type Item<'a> = &'a T::Native;

/// Builds the array and reset this builder.
fn finish(&mut self) -> Arc<GenericByteArray<T>> {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> Arc<GenericByteArray<T>> {
Arc::new(self.finish_cloned())
}

fn append_value(&mut self, value: &T::Native) {
self.append_value(value)
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(value)
}

fn append_null(&mut self) {
self.append_null()
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i));
}
}
}
}

impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
#[inline]
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
Expand Down
40 changes: 38 additions & 2 deletions arrow-array/src/builder/generic_bytes_view_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ use arrow_schema::ArrowError;
use hashbrown::hash_table::Entry;
use hashbrown::HashTable;

use crate::builder::ArrayBuilder;
use crate::builder::{ArrayBuilder, SpecificArrayBuilder};
use crate::types::bytes::ByteArrayNativeType;
use crate::types::{BinaryViewType, ByteViewType, StringViewType};
use crate::{ArrayRef, GenericByteViewArray};
use crate::{Array, ArrayRef, GenericByteViewArray};

const STARTING_BLOCK_SIZE: u32 = 8 * 1024; // 8KiB
const MAX_BLOCK_SIZE: u32 = 2 * 1024 * 1024; // 2MiB
Expand Down Expand Up @@ -452,6 +452,42 @@ impl<T: ByteViewType + ?Sized> ArrayBuilder for GenericByteViewBuilder<T> {
}
}

impl<T: ByteViewType + ?Sized> SpecificArrayBuilder for GenericByteViewBuilder<T> {
type Output = GenericByteViewArray<T>;
type Item<'a> = &'a T::Native;

fn finish(&mut self) -> Arc<GenericByteViewArray<T>> {
Arc::new(self.finish())
}

fn finish_cloned(&self) -> Arc<GenericByteViewArray<T>> {
Arc::new(self.finish_cloned())
}

fn append_value(&mut self, value: &T::Native) {
self.append_value(value)
}

fn append_value_ref<'a>(&'a mut self, value: &'a Self::Item<'a>) {
self.append_value(value)
}

fn append_null(&mut self) {
self.append_null()
}

fn append_output<'a>(&'a mut self, output: &'a Self::Output) {
// TODO - if iterator exists try it?
for i in 0..output.len() {
if output.is_null(i) {
self.append_null();
} else {
self.append_value(output.value(i));
}
}
}
}

impl<T: ByteViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
for GenericByteViewBuilder<T>
{
Expand Down
Loading

0 comments on commit a8cb7d0

Please sign in to comment.