Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Fix dump_model() information for root node #6569

Merged
merged 45 commits into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
12102cc
Fix value calculation in root node
Jul 24, 2024
c933399
Fix dask tests
Jul 26, 2024
c240016
Merge branch 'master' into fix-root-values
neNasko1 Jul 26, 2024
2f1de57
Create proper tests
Jul 29, 2024
273a1df
Merge branch 'master' into fix-root-values
neNasko1 Jul 29, 2024
208df85
Test only on cpu
Jul 29, 2024
130879b
Merge branch 'fix-root-values' of github.com:neNasko1/LightGBM into f…
Jul 29, 2024
48e6b96
Disable new tests for CUDA
Jul 30, 2024
26b9859
Merge with #5964
Aug 3, 2024
88e3dec
Finish merging with dump_model unification
Aug 3, 2024
e1274dc
Improve tests
Aug 3, 2024
38ee92c
Add linear test for stump
Aug 4, 2024
3b423de
Fix CUDA compilation
Aug 5, 2024
c89e257
Merge branch 'master' into fix-root-values
neNasko1 Aug 5, 2024
3de14d9
Merge branch 'master' into fix-root-values
neNasko1 Aug 6, 2024
fc42c1c
Merge branch 'master' into fix-root-values
neNasko1 Aug 14, 2024
3ffcac6
Comments after code review
Aug 14, 2024
d5a82c4
Fix test
Aug 15, 2024
be7675d
Reenable cuda testing
Aug 15, 2024
f616e03
Tests
Aug 15, 2024
6c6bc33
Merge branch 'microsoft:master' into fix-root-values
neNasko1 Aug 15, 2024
c28a2cf
test cuda
Aug 15, 2024
6113f90
.
Aug 15, 2024
94cf7f0
Fix warning
Aug 15, 2024
01aa952
reenable tests
Aug 15, 2024
fadaa83
.
Aug 15, 2024
b9c681b
Merge branch 'fix-cuda' into fix-root-values
Aug 15, 2024
a323acb
fix cuda
Aug 15, 2024
0fd0c59
Fix compilation error
Aug 15, 2024
4cc5dd4
Fix weight
Aug 15, 2024
a743a87
Fix numerical
Aug 15, 2024
031c945
Make tests more robust
Aug 16, 2024
91993a9
Merge branch 'master' into fix-root-values
neNasko1 Sep 2, 2024
f744f64
Merge branch 'master' into fix-root-values
neNasko1 Sep 5, 2024
634b0fc
Fix test failing because of accuracy reasons
Sep 17, 2024
3fe4577
Fix test_dask::test_init_scores
Sep 21, 2024
9e3e8ed
Decrease size of trees in test
Sep 21, 2024
a01e737
Merge branch 'master' of github.com:microsoft/LightGBM into fix-root-…
jameslamb Oct 8, 2024
e76d5bc
add a test on predictions from a model of all stumps
jameslamb Oct 8, 2024
0af4631
Comments after code review
neNasko1 Oct 8, 2024
04886c0
Small text QOL
neNasko1 Oct 9, 2024
15fc3bf
Add test_predict_stump on dask
neNasko1 Oct 9, 2024
938cb63
Merge branch 'master' into fix-root-values
jameslamb Oct 11, 2024
bed5ded
Update tests/python_package_test/test_dask.py
neNasko1 Oct 11, 2024
ac01d79
Appease linter
neNasko1 Oct 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/LightGBM/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ inline void Tree::Split(int leaf, int feature, int real_feature,
leaf_parent_[leaf] = new_node_idx;
leaf_parent_[num_leaves_] = new_node_idx;
// save current leaf value to internal node before change
internal_weight_[new_node_idx] = leaf_weight_[leaf];
internal_weight_[new_node_idx] = left_weight + right_weight;
internal_value_[new_node_idx] = leaf_value_[leaf];
internal_count_[new_node_idx] = left_cnt + right_cnt;
leaf_value_[leaf] = std::isnan(left_value) ? 0.0f : left_value;
Expand Down
3 changes: 3 additions & 0 deletions src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
}
}
new_tree->AsConstantTree(init_scores[cur_tree_id]);
} else {
// extend init_scores with zeros
new_tree->AsConstantTree(0);
}
}
// add model
Expand Down
6 changes: 6 additions & 0 deletions src/treelearner/serial_tree_learner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,12 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
auto tree_ptr = tree.get();
constraints_->ShareTreePointer(tree_ptr);

// set the root value by hand, as it is not handled by splits
tree->SetLeafOutput(0, FeatureHistogram::CalculateSplittedLeafOutput<true, true, true, false>(
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
config_->lambda_l1, config_->lambda_l2, config_->max_delta_step,
BasicConstraint(), config_->path_smooth, static_cast<data_size_t>(num_data_), 0));

// root leaf
int left_leaf = 0;
int cur_depth = 1;
Expand Down
3 changes: 1 addition & 2 deletions tests/python_package_test/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -1464,8 +1464,7 @@ def test_init_score(task, output, cluster):
init_scores = dy.map_blocks(lambda x: np.full((x.size, size_factor), init_score))
model = model_factory(client=client, **params)
model.fit(dX, dy, sample_weight=dw, init_score=init_scores, group=dg)
# value of the root node is 0 when init_score is set
assert model.booster_.trees_to_dataframe()["value"][0] == 0
assert model.fitted_
neNasko1 marked this conversation as resolved.
Show resolved Hide resolved


def sklearn_checks_to_run():
Expand Down
20 changes: 17 additions & 3 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from .utils import (
SERIALIZERS,
assert_all_trees_valid,
dummy_obj,
load_breast_cancer,
load_digits,
Expand Down Expand Up @@ -3857,16 +3858,29 @@ def test_dump_model():
train_data = lgb.Dataset(X, label=y)
params = {"objective": "binary", "verbose": -1}
bst = lgb.train(params, train_data, num_boost_round=5)
dumped_model_str = str(bst.dump_model(5, 0))
dumped_model = bst.dump_model(5, 0)
neNasko1 marked this conversation as resolved.
Show resolved Hide resolved
dumped_model_str = str(dumped_model)
assert "leaf_features" not in dumped_model_str
assert "leaf_coeff" not in dumped_model_str
assert "leaf_const" not in dumped_model_str
assert "leaf_value" in dumped_model_str
assert "leaf_count" in dumped_model_str
params["linear_tree"] = True

# CUDA does not return correct values for the root
if getenv("TASK", "") != "cuda":
for tree in dumped_model["tree_info"]:
assert not np.allclose(tree["tree_structure"]["internal_value"], 0)
assert_all_trees_valid(dumped_model)


def test_dump_model_linear():
X, y = load_breast_cancer(return_X_y=True)
params = {"objective": "binary", "verbose": -1, "linear_tree": True}
train_data = lgb.Dataset(X, label=y)
bst = lgb.train(params, train_data, num_boost_round=5)
dumped_model_str = str(bst.dump_model(5, 0))
dumped_model = bst.dump_model(5, 0)
neNasko1 marked this conversation as resolved.
Show resolved Hide resolved
assert_all_trees_valid(dumped_model)
dumped_model_str = str(dumped_model)
assert "leaf_features" in dumped_model_str
assert "leaf_coeff" in dumped_model_str
assert "leaf_const" in dumped_model_str
Expand Down
35 changes: 35 additions & 0 deletions tests/python_package_test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,38 @@ def np_assert_array_equal(*args, **kwargs):
if not _numpy_testing_supports_strict_kwarg:
kwargs.pop("strict")
np.testing.assert_array_equal(*args, **kwargs)


def assert_subtree_valid(root):
"""Recursively checks the validity of a subtree rooted at `root`.

Currently it only checks whether weights and counts are consistent between
all parent nodes and their children.

Parameters
----------
root : dict
A dictionary representing the root of the subtree.
It should be produced by dump_model()

Returns
-------
tuple
A tuple containing the weight and count of the subtree rooted at `root`.
"""
if "leaf_count" in root:
return (root["leaf_weight"], root["leaf_count"])

left_child = root["left_child"]
right_child = root["right_child"]
(l_w, l_c) = assert_subtree_valid(left_child)
(r_w, r_c) = assert_subtree_valid(right_child)
assert np.allclose(root["internal_weight"], l_w + r_w)
assert np.allclose(root["internal_count"], l_c + r_c)
neNasko1 marked this conversation as resolved.
Show resolved Hide resolved
return (root["internal_weight"], root["internal_count"])


def assert_all_trees_valid(model_dump):
for idx, tree in enumerate(model_dump["tree_info"]):
assert tree["tree_index"] == idx
neNasko1 marked this conversation as resolved.
Show resolved Hide resolved
assert_subtree_valid(tree["tree_structure"])
Loading