Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve CommonSubexprEliminate identifier management (10% faster planning) #10473

Merged
merged 10 commits into from
Jun 24, 2024
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ homepage = "https://datafusion.apache.org"
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/datafusion"
rust-version = "1.75"
rust-version = "1.76"
version = "39.0.0"

[workspace.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ license = "Apache-2.0"
homepage = "https://datafusion.apache.org"
repository = "https://github.com/apache/datafusion"
# Specify MSRV here as `cargo msrv` doesn't support workspace version
rust-version = "1.75"
rust-version = "1.76"
readme = "README.md"

[dependencies]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::error::{Result, _internal_err};

// Combines two hashes into one hash
#[inline]
fn combine_hashes(l: u64, r: u64) -> u64 {
pub fn combine_hashes(l: u64, r: u64) -> u64 {
let hash = (17 * 37u64).wrapping_add(l);
hash.wrapping_mul(37).wrapping_add(r)
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ authors = { workspace = true }
# Specify MSRV here as `cargo msrv` doesn't support workspace version and fails with
# "Unable to find key 'package.rust-version' (or 'package.metadata.msrv') in 'arrow-datafusion/Cargo.toml'"
# https://github.com/foresterre/cargo-msrv/issues/590
rust-version = "1.75"
rust-version = "1.76"

[lints]
workspace = true
Expand Down
173 changes: 172 additions & 1 deletion datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
use std::collections::HashSet;
use std::fmt::{self, Display, Formatter, Write};
use std::hash::Hash;
use std::hash::{Hash, Hasher};
use std::mem;
use std::str::FromStr;
use std::sync::Arc;

Expand Down Expand Up @@ -1461,6 +1462,176 @@ impl Expr {
| Expr::Placeholder(..) => false,
}
}

/// Hashes the direct content of an `Expr` without recursing into its children.
///
/// This method is useful to incrementally compute hashes, such as in
/// `CommonSubexprEliminate` which builds a deep hash of a node and its descendants
/// during the bottom-up phase of the first traversal and so avoid computing the hash
/// of the node and then the hash of its descendants separately.
///
/// If a node doesn't have any children then this method is similar to `.hash()`, but
/// not necessarily returns the same value.
///
/// As it is pretty easy to forget changing this method when `Expr` changes the
/// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes
/// compile time.
pub fn hash_node<H: Hasher>(&self, hasher: &mut H) {
mem::discriminant(self).hash(hasher);
match self {
Expr::Alias(Alias {
expr: _expr,
relation,
name,
}) => {
relation.hash(hasher);
name.hash(hasher);
}
Expr::Column(column) => {
column.hash(hasher);
}
Expr::ScalarVariable(data_type, name) => {
data_type.hash(hasher);
name.hash(hasher);
}
Expr::Literal(scalar_value) => {
scalar_value.hash(hasher);
}
Expr::BinaryExpr(BinaryExpr {
left: _left,
op,
right: _right,
}) => {
op.hash(hasher);
}
Expr::Like(Like {
negated,
expr: _expr,
pattern: _pattern,
escape_char,
case_insensitive,
})
| Expr::SimilarTo(Like {
negated,
expr: _expr,
pattern: _pattern,
escape_char,
case_insensitive,
}) => {
negated.hash(hasher);
escape_char.hash(hasher);
case_insensitive.hash(hasher);
}
Expr::Not(_expr)
| Expr::IsNotNull(_expr)
| Expr::IsNull(_expr)
| Expr::IsTrue(_expr)
| Expr::IsFalse(_expr)
| Expr::IsUnknown(_expr)
| Expr::IsNotTrue(_expr)
| Expr::IsNotFalse(_expr)
| Expr::IsNotUnknown(_expr)
| Expr::Negative(_expr) => {}
Expr::Between(Between {
expr: _expr,
negated,
low: _low,
high: _high,
}) => {
negated.hash(hasher);
}
Expr::Case(Case {
expr: _expr,
when_then_expr: _when_then_expr,
else_expr: _else_expr,
}) => {}
Expr::Cast(Cast {
expr: _expr,
data_type,
})
| Expr::TryCast(TryCast {
expr: _expr,
data_type,
}) => {
data_type.hash(hasher);
}
Expr::Sort(Sort {
expr: _expr,
asc,
nulls_first,
}) => {
asc.hash(hasher);
nulls_first.hash(hasher);
}
Expr::ScalarFunction(ScalarFunction { func, args: _args }) => {
func.hash(hasher);
}
Expr::AggregateFunction(AggregateFunction {
func_def,
args: _args,
distinct,
filter: _filter,
order_by: _order_by,
null_treatment,
}) => {
func_def.hash(hasher);
distinct.hash(hasher);
null_treatment.hash(hasher);
}
Expr::WindowFunction(WindowFunction {
fun,
args: _args,
partition_by: _partition_by,
order_by: _order_by,
window_frame,
null_treatment,
}) => {
fun.hash(hasher);
window_frame.hash(hasher);
null_treatment.hash(hasher);
}
Expr::InList(InList {
expr: _expr,
list: _list,
negated,
}) => {
negated.hash(hasher);
}
Expr::Exists(Exists { subquery, negated }) => {
subquery.hash(hasher);
negated.hash(hasher);
}
Expr::InSubquery(InSubquery {
expr: _expr,
subquery,
negated,
}) => {
subquery.hash(hasher);
negated.hash(hasher);
}
Expr::ScalarSubquery(subquery) => {
subquery.hash(hasher);
}
Expr::Wildcard { qualifier } => {
qualifier.hash(hasher);
}
Expr::GroupingSet(grouping_set) => {
mem::discriminant(grouping_set).hash(hasher);
match grouping_set {
GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {}
GroupingSet::GroupingSets(_exprs) => {}
}
}
Expr::Placeholder(place_holder) => {
place_holder.hash(hasher);
}
Expr::OuterReferenceColumn(data_type, column) => {
data_type.hash(hasher);
column.hash(hasher);
}
Expr::Unnest(Unnest { expr: _expr }) => {}
};
}
}

// modifies expr if it is a placeholder with datatype of right
Expand Down
Loading