From dc33dcc2bbd305ebecafd5e73729c81dcc7beb3d Mon Sep 17 00:00:00 2001 From: Tim Van Wassenhove Date: Tue, 23 Apr 2024 12:43:46 +0200 Subject: [PATCH] feat: support for deltalake (wip) --- Cargo.lock | 933 +++++++++++++++++++++++++++++++---- Cargo.toml | 5 + examples/demo.rs | 11 +- src/catalog_provider/glue.rs | 96 +++- src/error.rs | 3 + 5 files changed, 962 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8918017..377b620 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,6 +82,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anyhow" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + [[package]] name = "apache-avro" version = "0.16.0" @@ -90,7 +96,7 @@ checksum = "ceb7c683b2f8f40970b70e39ff8be514c95b96fcb9c4af87e1ed2cb2e10801a0" dependencies = [ "bzip2", "crc32fast", - "digest", + "digest 0.10.7", "lazy_static", "libflate", "log", @@ -197,7 +203,7 @@ dependencies = [ "arrow-data", "arrow-schema", "arrow-select", - "base64", + "base64 0.21.7", "chrono", "comfy-table", "half", @@ -306,6 +312,9 @@ name = "arrow-schema" version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" +dependencies = [ + "serde", +] [[package]] name = "arrow-select" @@ -395,7 +404,7 @@ dependencies = [ "hex", "http 0.2.12", "hyper", - "ring", + "ring 0.17.8", "time", "tokio", "tracing", @@ -540,12 +549,12 @@ dependencies = [ "bytes", "form_urlencoded", "hex", - "hmac", + "hmac 0.12.1", "http 0.2.12", "http 1.1.0", "once_cell", "percent-encoding", - "sha2", + "sha2 0.10.8", "time", "tracing", ] @@ -617,11 +626,11 @@ dependencies = [ "http-body 0.4.6", "http-body 1.0.0", "hyper", - "hyper-rustls", + "hyper-rustls 0.24.2", "once_cell", "pin-project-lite", "pin-utils", - "rustls", + "rustls 0.21.11", "tokio", "tracing", ] @@ -693,6 +702,20 @@ dependencies = [ "tracing", ] +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "futures-core", + "getrandom", + "instant", + "pin-project-lite", + "rand", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.71" @@ -708,6 +731,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.21.7" @@ -742,7 +771,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "digest", + "digest 0.10.7", ] [[package]] @@ -758,6 +787,15 @@ dependencies = [ "constant_time_eq", ] +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -794,6 +832,12 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +[[package]] +name = "bytemuck" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" + [[package]] name = "byteorder" version = "1.5.0" @@ -856,9 +900,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" dependencies = [ "android-tzdata", "iana-time-zone", @@ -985,6 +1029,16 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-mac" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714" +dependencies = [ + "generic-array", + "subtle", +] + [[package]] name = "csv" version = "1.3.0" @@ -1006,6 +1060,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ct-logs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1a816186fa68d9e426e3cb4ae4dff1fcd8e4a2c34b781bf7a822574a0d0aac8" +dependencies = [ + "sct 0.6.1", +] + [[package]] name = "dary_heap" version = "0.3.6" @@ -1025,6 +1088,54 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "datafusion" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-common 35.0.0", + "datafusion-execution 35.0.0", + "datafusion-expr 35.0.0", + "datafusion-optimizer 35.0.0", + "datafusion-physical-expr 35.0.0", + "datafusion-physical-plan 35.0.0", + "datafusion-sql 35.0.0", + "flate2", + "futures", + "glob", + "half", + "hashbrown 0.14.3", + "indexmap", + "itertools", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "pin-project-lite", + "rand", + "sqlparser 0.41.0", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd 0.13.1", +] + [[package]] name = "datafusion" version = "36.0.0" @@ -1043,15 +1154,15 @@ dependencies = [ "bzip2", "chrono", "dashmap", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", + "datafusion-common 36.0.0", + "datafusion-execution 36.0.0", + "datafusion-expr 36.0.0", "datafusion-functions", "datafusion-functions-array", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", + "datafusion-optimizer 36.0.0", + "datafusion-physical-expr 36.0.0", + "datafusion-physical-plan 36.0.0", + "datafusion-sql 36.0.0", "flate2", "futures", "glob", @@ -1067,7 +1178,7 @@ dependencies = [ "parquet", "pin-project-lite", "rand", - "sqlparser", + "sqlparser 0.43.1", "tempfile", "tokio", "tokio-util", @@ -1085,7 +1196,8 @@ dependencies = [ "aws-sdk-glue", "aws-types", "dashmap", - "datafusion", + "datafusion 36.0.0", + "deltalake", "object_store", "pest", "pest_derive", @@ -1093,6 +1205,26 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-common" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "libc", + "num_cpus", + "object_store", + "parquet", + "sqlparser 0.41.0", +] + [[package]] name = "datafusion-common" version = "36.0.0" @@ -1111,7 +1243,28 @@ dependencies = [ "num_cpus", "object_store", "parquet", - "sqlparser", + "sqlparser 0.43.1", +] + +[[package]] +name = "datafusion-execution" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common 35.0.0", + "datafusion-expr 35.0.0", + "futures", + "hashbrown 0.14.3", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", ] [[package]] @@ -1123,8 +1276,8 @@ dependencies = [ "arrow", "chrono", "dashmap", - "datafusion-common", - "datafusion-expr", + "datafusion-common 36.0.0", + "datafusion-expr 36.0.0", "futures", "hashbrown 0.14.3", "log", @@ -1135,6 +1288,22 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-expr" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "datafusion-common 35.0.0", + "paste", + "sqlparser 0.41.0", + "strum 0.25.0", + "strum_macros 0.25.3", +] + [[package]] name = "datafusion-expr" version = "36.0.0" @@ -1144,9 +1313,9 @@ dependencies = [ "ahash", "arrow", "arrow-array", - "datafusion-common", + "datafusion-common 36.0.0", "paste", - "sqlparser", + "sqlparser 0.43.1", "strum 0.26.2", "strum_macros 0.26.2", ] @@ -1158,10 +1327,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98f1c73f7801b2b8ba2297b3ad78ffcf6c1fc6b8171f502987eb9ad5cb244ee7" dependencies = [ "arrow", - "base64", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", + "base64 0.21.7", + "datafusion-common 36.0.0", + "datafusion-execution 36.0.0", + "datafusion-expr 36.0.0", "hex", "log", ] @@ -1173,13 +1342,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42d16a0ddf2c991526f6ffe2f47a72c6da0b7354d6c32411dd20631fe2e38937" dependencies = [ "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", + "datafusion-common 36.0.0", + "datafusion-execution 36.0.0", + "datafusion-expr 36.0.0", "log", "paste", ] +[[package]] +name = "datafusion-optimizer" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common 35.0.0", + "datafusion-expr 35.0.0", + "datafusion-physical-expr 35.0.0", + "hashbrown 0.14.3", + "itertools", + "log", + "regex-syntax", +] + [[package]] name = "datafusion-optimizer" version = "36.0.0" @@ -1189,15 +1376,49 @@ dependencies = [ "arrow", "async-trait", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-common 36.0.0", + "datafusion-expr 36.0.0", + "datafusion-physical-expr 36.0.0", "hashbrown 0.14.3", "itertools", "log", "regex-syntax", ] +[[package]] +name = "datafusion-physical-expr" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "base64 0.21.7", + "blake2", + "blake3", + "chrono", + "datafusion-common 35.0.0", + "datafusion-expr 35.0.0", + "half", + "hashbrown 0.14.3", + "hex", + "indexmap", + "itertools", + "log", + "md-5 0.10.6", + "paste", + "petgraph", + "rand", + "regex", + "sha2 0.10.8", + "unicode-segmentation", + "uuid", +] + [[package]] name = "datafusion-physical-expr" version = "36.0.0" @@ -1211,29 +1432,60 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-string", - "base64", + "base64 0.21.7", "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", + "datafusion-common 36.0.0", + "datafusion-execution 36.0.0", + "datafusion-expr 36.0.0", "half", "hashbrown 0.14.3", "hex", "indexmap", "itertools", "log", - "md-5", + "md-5 0.10.6", "paste", "petgraph", "rand", "regex", - "sha2", + "sha2 0.10.8", "unicode-segmentation", "uuid", ] +[[package]] +name = "datafusion-physical-plan" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common 35.0.0", + "datafusion-execution 35.0.0", + "datafusion-expr 35.0.0", + "datafusion-physical-expr 35.0.0", + "futures", + "half", + "hashbrown 0.14.3", + "indexmap", + "itertools", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", + "uuid", +] + [[package]] name = "datafusion-physical-plan" version = "36.0.0" @@ -1247,10 +1499,10 @@ dependencies = [ "arrow-schema", "async-trait", "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-common 36.0.0", + "datafusion-execution 36.0.0", + "datafusion-expr 36.0.0", + "datafusion-physical-expr 36.0.0", "futures", "half", "hashbrown 0.14.3", @@ -1265,6 +1517,35 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-proto" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5742f993d1812d6bb3cdc4ce2a0aa99e10f6dc0daa11dd69b0ff57f2d8e7518c" +dependencies = [ + "arrow", + "chrono", + "datafusion 35.0.0", + "datafusion-common 35.0.0", + "datafusion-expr 35.0.0", + "object_store", + "prost", +] + +[[package]] +name = "datafusion-sql" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common 35.0.0", + "datafusion-expr 35.0.0", + "log", + "sqlparser 0.41.0", +] + [[package]] name = "datafusion-sql" version = "36.0.0" @@ -1273,10 +1554,107 @@ checksum = "21474a95c3a62d113599d21b439fa15091b538bac06bd20be0bb2e7d22903c09" dependencies = [ "arrow", "arrow-schema", - "datafusion-common", - "datafusion-expr", + "datafusion-common 36.0.0", + "datafusion-expr 36.0.0", "log", - "sqlparser", + "sqlparser 0.43.1", +] + +[[package]] +name = "deltalake" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e5fa38c2d00cc8a96789eaf3e7a2b27f0bd4c4c7a23e1e59bd5b7b4ceb796fe" +dependencies = [ + "deltalake-aws", + "deltalake-core", +] + +[[package]] +name = "deltalake-aws" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c27bed8839d53570bbaf9c4ae3a6b187b34f9e07cebd2f0abf2118774f466d9" +dependencies = [ + "async-trait", + "backoff", + "bytes", + "deltalake-core", + "futures", + "lazy_static", + "maplit", + "object_store", + "regex", + "rusoto_core", + "rusoto_credential", + "rusoto_dynamodb", + "rusoto_sts", + "thiserror", + "tokio", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "deltalake-core" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "607be097d9bf5998bfbde3c5e6364f775e5adde0be55843130e5e50f2a2a4387" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-trait", + "bytes", + "cfg-if", + "chrono", + "dashmap", + "datafusion 35.0.0", + "datafusion-common 35.0.0", + "datafusion-expr 35.0.0", + "datafusion-physical-expr 35.0.0", + "datafusion-proto", + "datafusion-sql 35.0.0", + "either", + "errno", + "fix-hidden-lifetime-bug", + "futures", + "hashbrown 0.14.3", + "indexmap", + "itertools", + "lazy_static", + "libc", + "maplit", + "num-bigint", + "num-traits", + "num_cpus", + "object_store", + "once_cell", + "parking_lot", + "parquet", + "percent-encoding", + "pin-project-lite", + "rand", + "regex", + "roaring", + "serde", + "serde_json", + "sqlparser 0.41.0", + "thiserror", + "tokio", + "tracing", + "url", + "uuid", + "z85", ] [[package]] @@ -1289,14 +1667,44 @@ dependencies = [ ] [[package]] -name = "digest" -version = "0.10.7" +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ - "block-buffer", - "crypto-common", - "subtle", + "libc", + "redox_users", + "winapi", ] [[package]] @@ -1342,6 +1750,26 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +[[package]] +name = "fix-hidden-lifetime-bug" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ae9c2016a663983d4e40a9ff967d6dcac59819672f0b47f2b17574e99c33c8" +dependencies = [ + "fix-hidden-lifetime-bug-proc_macros", +] + +[[package]] +name = "fix-hidden-lifetime-bug-proc_macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4c81935e123ab0741c4c4f0d9b8377e5fb21d3de7e062fa4b1263b1fbcba1ea" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1572,13 +2000,23 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hmac" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b" +dependencies = [ + "crypto-mac", + "digest 0.9.0", +] + [[package]] name = "hmac" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest", + "digest 0.10.7", ] [[package]] @@ -1679,6 +2117,23 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f9f7a97316d44c0af9b0301e65010573a853a9fc97046d7331d7f6bc0fd5a64" +dependencies = [ + "ct-logs", + "futures-util", + "hyper", + "log", + "rustls 0.19.1", + "rustls-native-certs 0.5.0", + "tokio", + "tokio-rustls 0.22.0", + "webpki", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -1689,10 +2144,10 @@ dependencies = [ "http 0.2.12", "hyper", "log", - "rustls", - "rustls-native-certs", + "rustls 0.21.11", + "rustls-native-certs 0.6.3", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", ] [[package]] @@ -1738,6 +2193,15 @@ dependencies = [ "hashbrown 0.14.3", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -1889,6 +2353,16 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.5.0", + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -1931,6 +2405,23 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "md-5" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +dependencies = [ + "block-buffer 0.9.0", + "digest 0.9.0", + "opaque-debug", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1938,7 +2429,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ "cfg-if", - "digest", + "digest 0.10.7", ] [[package]] @@ -2076,25 +2567,24 @@ dependencies = [ [[package]] name = "object_store" -version = "0.9.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" +checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" dependencies = [ "async-trait", - "base64", + "base64 0.21.7", "bytes", "chrono", "futures", "humantime", "hyper", "itertools", - "md-5", "parking_lot", "percent-encoding", "quick-xml", "rand", "reqwest", - "ring", + "ring 0.17.8", "serde", "serde_json", "snafu", @@ -2110,6 +2600,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "openssl-probe" version = "0.1.5" @@ -2168,7 +2664,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64", + "base64 0.21.7", "brotli", "bytes", "chrono", @@ -2252,7 +2748,7 @@ checksum = "2adbf29bb9776f28caece835398781ab24435585fe0d4dc1374a61db5accedca" dependencies = [ "once_cell", "pest", - "sha2", + "sha2 0.10.8", ] [[package]] @@ -2342,6 +2838,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.60", +] + [[package]] name = "quad-rand" version = "0.2.1" @@ -2406,6 +2925,17 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_users" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + [[package]] name = "regex" version = "1.10.4" @@ -2447,7 +2977,7 @@ version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ - "base64", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -2456,7 +2986,7 @@ dependencies = [ "http 0.2.12", "http-body 0.4.6", "hyper", - "hyper-rustls", + "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -2464,8 +2994,8 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", - "rustls-native-certs", + "rustls 0.21.11", + "rustls-native-certs 0.6.3", "rustls-pemfile", "serde", "serde_json", @@ -2473,7 +3003,7 @@ dependencies = [ "sync_wrapper", "system-configuration", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-util", "tower-service", "url", @@ -2484,6 +3014,21 @@ dependencies = [ "winreg", ] +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + [[package]] name = "ring" version = "0.17.8" @@ -2494,8 +3039,8 @@ dependencies = [ "cfg-if", "getrandom", "libc", - "spin", - "untrusted", + "spin 0.9.8", + "untrusted 0.9.0", "windows-sys 0.52.0", ] @@ -2505,6 +3050,114 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "roaring" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "rusoto_core" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b4f000e8934c1b4f70adde180056812e7ea6b1a247952db8ee98c94cd3116cc" +dependencies = [ + "async-trait", + "base64 0.13.1", + "bytes", + "crc32fast", + "futures", + "http 0.2.12", + "hyper", + "hyper-rustls 0.22.1", + "lazy_static", + "log", + "rusoto_credential", + "rusoto_signature", + "rustc_version", + "serde", + "serde_json", + "tokio", + "xml-rs", +] + +[[package]] +name = "rusoto_credential" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a46b67db7bb66f5541e44db22b0a02fed59c9603e146db3a9e633272d3bac2f" +dependencies = [ + "async-trait", + "chrono", + "dirs-next", + "futures", + "hyper", + "serde", + "serde_json", + "shlex", + "tokio", + "zeroize", +] + +[[package]] +name = "rusoto_dynamodb" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7935e1f9ca57c4ee92a4d823dcd698eb8c992f7e84ca21976ae72cd2b03016e7" +dependencies = [ + "async-trait", + "bytes", + "futures", + "rusoto_core", + "serde", + "serde_json", +] + +[[package]] +name = "rusoto_signature" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6264e93384b90a747758bcc82079711eacf2e755c3a8b5091687b5349d870bcc" +dependencies = [ + "base64 0.13.1", + "bytes", + "chrono", + "digest 0.9.0", + "futures", + "hex", + "hmac 0.11.0", + "http 0.2.12", + "hyper", + "log", + "md-5 0.9.1", + "percent-encoding", + "pin-project-lite", + "rusoto_credential", + "rustc_version", + "serde", + "sha2 0.9.9", + "tokio", +] + +[[package]] +name = "rusoto_sts" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7edd42473ac006fd54105f619e480b0a94136e7f53cf3fb73541363678fd92" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "rusoto_core", + "serde_urlencoded", + "xml-rs", +] + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -2533,6 +3186,19 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustls" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" +dependencies = [ + "base64 0.13.1", + "log", + "ring 0.16.20", + "sct 0.6.1", + "webpki", +] + [[package]] name = "rustls" version = "0.21.11" @@ -2540,9 +3206,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" dependencies = [ "log", - "ring", + "ring 0.17.8", "rustls-webpki", - "sct", + "sct 0.7.1", +] + +[[package]] +name = "rustls-native-certs" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a07b7c1885bd8ed3831c289b7870b13ef46fe0e856d288c30d9cc17d75a2092" +dependencies = [ + "openssl-probe", + "rustls 0.19.1", + "schannel", + "security-framework", ] [[package]] @@ -2563,7 +3241,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", ] [[package]] @@ -2572,8 +3250,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring", - "untrusted", + "ring 0.17.8", + "untrusted 0.9.0", ] [[package]] @@ -2612,14 +3290,24 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce" +dependencies = [ + "ring 0.16.20", + "untrusted 0.7.1", +] + [[package]] name = "sct" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring", - "untrusted", + "ring 0.17.8", + "untrusted 0.9.0", ] [[package]] @@ -2700,6 +3388,19 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if", + "cpufeatures", + "digest 0.9.0", + "opaque-debug", +] + [[package]] name = "sha2" version = "0.10.8" @@ -2708,9 +3409,15 @@ checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.7", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -2779,12 +3486,28 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "sqlparser" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" +dependencies = [ + "log", + "sqlparser_derive", +] + [[package]] name = "sqlparser" version = "0.43.1" @@ -2817,6 +3540,9 @@ name = "strum" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", +] [[package]] name = "strum" @@ -2855,9 +3581,9 @@ dependencies = [ [[package]] name = "subtle" -version = "2.5.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" @@ -3016,6 +3742,7 @@ dependencies = [ "libc", "mio", "num_cpus", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -3034,13 +3761,24 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "tokio-rustls" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6" +dependencies = [ + "rustls 0.19.1", + "tokio", + "webpki", +] + [[package]] name = "tokio-rustls" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", + "rustls 0.21.11", "tokio", ] @@ -3070,6 +3808,7 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3176,6 +3915,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "untrusted" version = "0.9.0" @@ -3335,6 +4080,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea" +dependencies = [ + "ring 0.16.20", + "untrusted 0.7.1", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3524,6 +4279,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "xml-rs" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" + [[package]] name = "xmlparser" version = "0.13.6" @@ -3539,6 +4300,12 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "z85" +version = "3.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a599daf1b507819c1121f0bf87fa37eb19daac6aff3aefefd4e6e2e0f2020fc" + [[package]] name = "zerocopy" version = "0.7.32" diff --git a/Cargo.toml b/Cargo.toml index edd76ef..c0fef31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,4 +27,9 @@ pest_derive = "2" tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread", "sync", "fs"] } url = "2" dashmap = "5" +deltalake = {version = "0.17", optional=true, features=["datafusion", "deltalake-aws", "s3"]} + +[features] +default = ["deltalake"] +deltalake = ["dep:deltalake"] diff --git a/examples/demo.rs b/examples/demo.rs index f36107b..fd2c151 100644 --- a/examples/demo.rs +++ b/examples/demo.rs @@ -50,7 +50,8 @@ async fn main() -> Result<()> { let mut glue_catalog_provider = GlueCatalogProvider::new(&sdk_config); let register_results = glue_catalog_provider - .register_all_with_options(&TableRegistrationOptions::InferSchemaFromData, &ctx.state()) + .register_tables("oidc_aggregates", &ctx.state()) + //.register_all_with_options(&TableRegistrationOptions::InferSchemaFromData, &ctx.state()) .await?; for result in register_results { if result.is_err() { @@ -66,6 +67,12 @@ async fn main() -> Result<()> { .show() .await?; + ctx.sql("select * from glue.oidc_aggregates.customer_frequency_v2 limit 1") + .await? + .show() + .await?; + + /* let tables = ctx .sql( r#" @@ -104,7 +111,7 @@ async fn main() -> Result<()> { } }; } - } + }*/ Ok(()) } diff --git a/src/catalog_provider/glue.rs b/src/catalog_provider/glue.rs index 6863581..0a7404c 100644 --- a/src/catalog_provider/glue.rs +++ b/src/catalog_provider/glue.rs @@ -17,7 +17,10 @@ use datafusion::datasource::file_format::FileFormat; use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; +use datafusion::datasource::object_store::ObjectStoreRegistry; +use datafusion::datasource::TableProvider; use datafusion::execution::context::SessionState; +use deltalake::DeltaTableBuilder; use std::any::Any; use std::collections::HashMap; use std::sync::Arc; @@ -34,6 +37,7 @@ pub enum TableRegistrationOptions { pub struct GlueCatalogProvider { client: Client, schema_provider_by_database: HashMap>, + object_store_registry: Option>, } impl GlueCatalogProvider { @@ -185,6 +189,76 @@ impl GlueCatalogProvider { let sd = Self::get_storage_descriptor(glue_table)?; let storage_location_uri = Self::get_storage_location(&sd)?; + let table_parameters = match &glue_table.parameters { + Some(x) => x.clone(), + None => HashMap::new(), + }; + + let table_type = table_parameters.get("table_type"); + if table_type.unwrap().to_lowercase() == "delta" { + self.register_delta_table( + glue_table, + ctx, + database_name, + table_name, + &sd, + storage_location_uri, + ) + .await?; + } else { + self.register_listing_table( + glue_table, + table_registration_options, + ctx, + database_name, + table_name, + &sd, + storage_location_uri, + ) + .await?; + } + + Ok(()) + } + + async fn register_delta_table( + &mut self, + glue_table: &Table, + ctx: &SessionState, + database_name: &str, + table_name: &String, + sd: &StorageDescriptor, + storage_location_uri: &str, + ) -> Result<()> { + + let mut builder = DeltaTableBuilder::from_uri(&storage_location_uri); + + self. + + if let Ok(object_store) = get_object_store(object_store_registry, &location) { + builder = builder.with_storage_backend(object_store, url); + } + + builder + .with_storage_options(std::env::vars().collect()) + .load() + .await + .map(|t| Arc::new(t) as Arc) + .map_err(|e| GlueError::Deltalake(e)) + + Ok(()) + } + + async fn register_listing_table( + &mut self, + glue_table: &Table, + table_registration_options: &TableRegistrationOptions, + ctx: &SessionState, + database_name: &str, + table_name: &String, + sd: &StorageDescriptor, + storage_location_uri: &str, + ) -> Result<()> { let listing_options = Self::get_listing_options(database_name, table_name, &sd, glue_table)?; @@ -206,7 +280,6 @@ impl GlueCatalogProvider { schema_provider_for_database .register_table(table_name.to_string(), Arc::new(listing_table))?; - Ok(()) } @@ -508,6 +581,27 @@ impl CatalogProvider for GlueCatalogProvider { } } +/* +async fn create_delta_table( + table: &Table, + object_store_registry: &Option>, +) -> Result> { + let location = table_location(table)?; + let url = url::Url::parse(&location)?; + let mut builder = DeltaTableBuilder::from_uri(&location); + + if let Ok(object_store) = get_object_store(object_store_registry, &location) { + builder = builder.with_storage_backend(object_store, url); + } + + builder + .with_storage_options(std::env::vars().collect()) + .load() + .await + .map(|t| Arc::new(t) as Arc) + .map_err(|e| GlueError::Deltalake(e)) +}*/ + #[cfg(test)] mod tests { use super::*; diff --git a/src/error.rs b/src/error.rs index 0703226..5d3cfed 100644 --- a/src/error.rs +++ b/src/error.rs @@ -18,6 +18,8 @@ pub enum GlueError { DataFusion(DataFusionError), /// Error during mapping of GlueDataType GlueDataTypeMapping(String), + /// Error during loading of delta lake table + Deltalake(deltalake::errors::DeltaTableError), } impl Display for GlueError { @@ -29,6 +31,7 @@ impl Display for GlueError { GlueError::GlueDataTypeMapping(desc) => { write!(f, "Could not map glue data type: {}", desc) } + GlueError::Deltalake(e) => e.fmt(f), } } }