Skip to content

Commit

Permalink
test: generator for temporal types
Browse files Browse the repository at this point in the history
  • Loading branch information
aljazerzen committed Feb 20, 2024
1 parent 61c7320 commit 921508d
Show file tree
Hide file tree
Showing 6 changed files with 211 additions and 46 deletions.
2 changes: 1 addition & 1 deletion connector_arrow/src/sqlite/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub(crate) fn table_create(
.fields()
.iter()
.map(|field| {
let ty = ty_from_arrow(field.data_type()).expect("TODO: err message");
let ty = ty_from_arrow(field.data_type());

let not_null = if field.is_nullable() { "" } else { " NOT NULL" };

Expand Down
72 changes: 36 additions & 36 deletions connector_arrow/src/sqlite/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,42 +46,42 @@ pub fn decl_ty_to_arrow(decl_ty: &str, col: &str, table: &str) -> Result<DataTyp
})
}

pub fn ty_from_arrow(ty: &DataType) -> Option<&'static str> {
pub fn ty_from_arrow(ty: &DataType) -> &'static str {
match ty {
DataType::Null => Some("NULL"),
DataType::Boolean => Some("INTEGER"),
DataType::Int8 => Some("INTEGER"),
DataType::Int16 => Some("INTEGER"),
DataType::Int32 => Some("INTEGER"),
DataType::Int64 => Some("INTEGER"),
DataType::UInt8 => Some("INTEGER"),
DataType::UInt16 => Some("INTEGER"),
DataType::UInt32 => Some("INTEGER"),
DataType::UInt64 => Some("TEXT"),
DataType::Float16 => Some("REAL"),
DataType::Float32 => Some("REAL"),
DataType::Float64 => Some("REAL"),
DataType::Timestamp(_, _) => None,
DataType::Date32 => None,
DataType::Date64 => None,
DataType::Time32(_) => None,
DataType::Time64(_) => None,
DataType::Duration(_) => None,
DataType::Interval(_) => None,
DataType::Binary => Some("BLOB"),
DataType::FixedSizeBinary(_) => Some("BLOB"),
DataType::LargeBinary => Some("BLOB"),
DataType::Utf8 => Some("TEXT"),
DataType::LargeUtf8 => Some("TEXT"),
DataType::List(_) => None,
DataType::FixedSizeList(_, _) => None,
DataType::LargeList(_) => None,
DataType::Struct(_) => None,
DataType::Union(_, _) => None,
DataType::Dictionary(_, _) => None,
DataType::Decimal128(_, _) => None,
DataType::Decimal256(_, _) => None,
DataType::Map(_, _) => None,
DataType::RunEndEncoded(_, _) => None,
DataType::Null => "NULL",
DataType::Boolean => "INTEGER",
DataType::Int8 => "INTEGER",
DataType::Int16 => "INTEGER",
DataType::Int32 => "INTEGER",
DataType::Int64 => "INTEGER",
DataType::UInt8 => "INTEGER",
DataType::UInt16 => "INTEGER",
DataType::UInt32 => "INTEGER",
DataType::UInt64 => "TEXT",
DataType::Float16 => "REAL",
DataType::Float32 => "REAL",
DataType::Float64 => "REAL",
DataType::Timestamp(_, _) => unimplemented!(),
DataType::Date32 => unimplemented!(),
DataType::Date64 => unimplemented!(),
DataType::Time32(_) => unimplemented!(),
DataType::Time64(_) => unimplemented!(),
DataType::Duration(_) => unimplemented!(),
DataType::Interval(_) => unimplemented!(),
DataType::Binary => "BLOB",
DataType::FixedSizeBinary(_) => "BLOB",
DataType::LargeBinary => "BLOB",
DataType::Utf8 => "TEXT",
DataType::LargeUtf8 => "TEXT",
DataType::List(_) => unimplemented!(),
DataType::FixedSizeList(_, _) => unimplemented!(),
DataType::LargeList(_) => unimplemented!(),
DataType::Struct(_) => unimplemented!(),
DataType::Union(_, _) => unimplemented!(),
DataType::Dictionary(_, _) => unimplemented!(),
DataType::Decimal128(_, _) => unimplemented!(),
DataType::Decimal256(_, _) => unimplemented!(),
DataType::Map(_, _) => unimplemented!(),
DataType::RunEndEncoded(_, _) => unimplemented!(),
}
}
Binary file added connector_arrow/tests/data/temporal.parquet
Binary file not shown.
10 changes: 10 additions & 0 deletions connector_arrow/tests/it/test_duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ fn roundtrip_numeric() {
super::tests::roundtrip_of_parquet(&mut conn, file_name, table_name);
}

#[test]
#[ignore]
fn roundtrip_temporal() {
let table_name = "roundtrip_temporal";
let file_name = "temporal.parquet";

let mut conn = init();
super::tests::roundtrip_of_parquet(&mut conn, file_name, table_name);
}

#[test]
fn introspection_basic_small() {
let table_name = "introspection_basic_small";
Expand Down
10 changes: 10 additions & 0 deletions connector_arrow/tests/it/test_sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ fn roundtrip_numeric() {
super::tests::roundtrip_of_parquet(&mut conn, file_name, table_name);
}

#[test]
#[ignore]
fn roundtrip_temporal() {
let table_name = "roundtrip_temporal";
let file_name = "temporal.parquet";

let mut conn = init();
super::tests::roundtrip_of_parquet(&mut conn, file_name, table_name);
}

#[test]
#[ignore] // cannot introspect the Null column
fn introspection_basic_small() {
Expand Down
163 changes: 154 additions & 9 deletions test_generator/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arrow::array::*;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::datatypes::{DataType, Field, Int32Type, Int64Type, IntervalUnit, Schema, TimeUnit};
use half::f16;
use rand::{Rng, SeedableRng};
use std::fs::File;
Expand Down Expand Up @@ -156,13 +156,91 @@ fn generate_array<R: Rng>(data_type: &DataType, values: &[ValuesDesc], rng: &mut
rng.gen::<f64>() // TODO: this is standard instead of uniform
]
}
DataType::Timestamp(_, _) => todo!(),
DataType::Date32 => todo!(),
DataType::Date64 => todo!(),
DataType::Time32(_) => todo!(),
DataType::Time64(_) => todo!(),
DataType::Duration(_) => todo!(),
DataType::Interval(_) => todo!(),
DataType::Timestamp(_, _) => {
let array = gen_array![
values,
TimestampMicrosecondBuilder,
i64::MIN,
i64::MAX,
rng.gen_range(i64::MIN..=i64::MAX)
];
arrow::compute::cast(&array, data_type).unwrap()
}
DataType::Date32 => {
gen_array![
values,
Date32Builder,
i32::MIN,
i32::MAX,
rng.gen_range(i32::MIN..=i32::MAX)
]
}
DataType::Date64 => {
gen_array![
values,
Date64Builder,
i64::MIN,
i64::MAX,
rng.gen_range(i64::MIN..=i64::MAX)
]
}
DataType::Time32(_) => {
let array = gen_array![
values,
PrimitiveBuilder<Int32Type>,
i32::MIN,
i32::MAX,
rng.gen_range(i32::MIN..=i32::MAX)
];
arrow::compute::cast(&array, data_type).unwrap()
}
DataType::Time64(_) => {
let array = gen_array![
values,
PrimitiveBuilder<Int64Type>,
i64::MIN,
i64::MAX,
rng.gen_range(i64::MIN..=i64::MAX)
];
arrow::compute::cast(&array, data_type).unwrap()
}
DataType::Duration(_) => {
let array = gen_array![
values,
PrimitiveBuilder<Int64Type>,
i64::MIN,
i64::MAX,
rng.gen_range(i64::MIN..=i64::MAX)
];
arrow::compute::cast(&array, data_type).unwrap()
}
DataType::Interval(IntervalUnit::YearMonth) => {
gen_array![
values,
IntervalYearMonthBuilder,
i32::MIN,
i32::MAX,
rng.gen_range(i32::MIN..=i32::MAX)
]
}
DataType::Interval(IntervalUnit::MonthDayNano) => {
gen_array![
values,
IntervalMonthDayNanoBuilder,
i128::MIN,
i128::MAX,
rng.gen_range(i128::MIN..=i128::MAX)
]
}
DataType::Interval(IntervalUnit::DayTime) => {
gen_array![
values,
IntervalDayTimeBuilder,
i64::MIN,
i64::MAX,
rng.gen_range(i64::MIN..=i64::MAX)
]
}
DataType::Binary => todo!(),
DataType::FixedSizeBinary(_) => todo!(),
DataType::LargeBinary => todo!(),
Expand Down Expand Up @@ -253,8 +331,74 @@ fn numeric() -> Vec<ColumnDesc> {
columns
}

fn temporal() -> Vec<ColumnDesc> {
let data_types_domain = [
DataType::Timestamp(TimeUnit::Nanosecond, None),
DataType::Timestamp(TimeUnit::Microsecond, None),
DataType::Timestamp(TimeUnit::Millisecond, None),
DataType::Timestamp(TimeUnit::Second, None),
DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+07:30"))),
DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("+07:30"))),
DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+07:30"))),
DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+07:30"))),
DataType::Date32,
DataType::Date64,
DataType::Time32(TimeUnit::Millisecond),
DataType::Time32(TimeUnit::Second),
DataType::Time64(TimeUnit::Nanosecond),
DataType::Time64(TimeUnit::Microsecond),
// DataType::Duration(TimeUnit::Nanosecond),
// DataType::Duration(TimeUnit::Microsecond),
// DataType::Duration(TimeUnit::Millisecond),
// DataType::Duration(TimeUnit::Second),
DataType::Interval(IntervalUnit::YearMonth),
// DataType::Interval(IntervalUnit::MonthDayNano),
DataType::Interval(IntervalUnit::DayTime),
];
let is_nullable_domain = [true];
let value_gen_process_domain = [
ValueGenProcess::Low,
ValueGenProcess::High,
ValueGenProcess::Null,
ValueGenProcess::RandomUniform,
];

let mut columns = Vec::new();
for data_type in &data_types_domain {
for is_nullable in is_nullable_domain {
if matches!(data_type, &DataType::Null) && !is_nullable {
continue;
}

let mut field_name = data_type.to_string();
if is_nullable {
field_name += "_null";
}
let mut col = ColumnDesc {
field_name,
data_type: data_type.clone(),
is_nullable,
values: Vec::new(),
};

for gen_process in value_gen_process_domain {
col.values.push(ValuesDesc {
gen_process: if matches!(gen_process, ValueGenProcess::Null) && !is_nullable {
ValueGenProcess::RandomUniform
} else {
gen_process
},
repeat: 1,
});
}
columns.push(col);
}
}
columns
}

fn write_parquet_to_file(batch: RecordBatch, file_name: &str) {
let path = Path::new("../connector_arrow/tests/data/file").with_file_name(file_name);
let path = Path::new("connector_arrow/tests/data/file").with_file_name(file_name);

let mut file = File::create(path).unwrap();

Expand All @@ -269,4 +413,5 @@ fn main() {
let mut rng = rand_chacha::ChaCha8Rng::from_seed([0; 32]);

write_parquet_to_file(generate_batch(numeric(), &mut rng), "numeric.parquet");
write_parquet_to_file(generate_batch(temporal(), &mut rng), "temporal.parquet");
}

0 comments on commit 921508d

Please sign in to comment.