From ea11ec19cd24b600f11aa1e7c0967d93f6700d5d Mon Sep 17 00:00:00 2001 From: Dmitry Khominich Date: Sat, 16 Mar 2019 23:36:16 +0200 Subject: [PATCH] [+] NOTES.md & README.md --- NOTES.md | 28 ++++++++++++++++++++++++++++ README.md | 10 +++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 NOTES.md diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..8d589f9 --- /dev/null +++ b/NOTES.md @@ -0,0 +1,28 @@ +## 16.03.2019 + +Transformation functions are introduced. Before this step `leaves` was able to output only raw predictions. Here is new bool option named `loadTransformation` adedd to all model load functions: `XGEnsembleFromReader`, `XGEnsembleFromFile`, `XGBLinearFromReader`, `XGBLinearFromFile`, `SKEnsembleFromReader`, `SKEnsembleFromFile`, `LGEnsembleFromJSON`, `LGEnsembleFromReader`, `LGEnsembleFromFile`. + +For example, line: +```go +model, err := leaves.LGEnsembleFromFile("lg_breast_cancer.model") +``` + +Should be changed to: +```go +model, err := leaves.LGEnsembleFromFile("lg_breast_cancer.model", false) +``` + +if one wants to leave old behaviour. + + +Also, `NClasses` `Ensemble` method will be renamed to `NRawOutputGroups` while keeping the same meaning - number of values that model provides for every object in raw predictions. There is also added `NOutputGroups` - number of values that model provides for every object after applying transformation function. Generally, that means that transformation function can change outputs dimensionality. Please note, if current transformation funciton is `raw`: + +```go +model.Transformation().Name() == "raw" +``` + +then + +```go +model.RawOutputGroups() == model.NOutputGroups() +``` diff --git a/README.md b/README.md index 6f2f9f6..d03facf 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ _leaves_ is a library implementing prediction code for GBRT (Gradient Boosting R ## Features * General Features: * support parallel predictions for batches + * support sigmoid, softmax transformation functions * Support LightGBM ([repo](https://github.com/Microsoft/LightGBM)) models: * read models from `text` format and from `JSON` format * support `gbdt`, `rf` (random forest) and `dart` models @@ -51,7 +52,8 @@ import ( func main() { // 1. Read model - model, err := leaves.LGEnsembleFromFile("lightgbm_model.txt") + useTransformation := true + model, err := leaves.LGEnsembleFromFile("lightgbm_model.txt", useTransformation) if err != nil { panic(err) } @@ -69,6 +71,8 @@ In order to use XGBoost model, just change `leaves.LGEnsembleFromFile`, to `leav Documentation is hosted on godoc ([link](https://godoc.org/github.com/dmitryikh/leaves)). Documentation contains complex usage examples and full API reference. Some additional information about usage examples can be found in [leaves_test.go](leaves_test.go). +Some additional information on new features and backward compatibility can be found in [NOTES.md](NOTES.md). + ## Benchmark Below are comparisons of prediction speed on batches (~1000 objects in 1 API @@ -104,9 +108,9 @@ Single thread: ## Limitations * LightGBM models: - * no support transformations functions (sigmoid, lambdarank, etc). Output scores is _raw scores_ + * limited support of transformation functions (support only sigmoid, softmax) * XGBoost models: - * no support transformations functions. Output scores is _raw scores_ + * limited support of transformation functions (support only sigmoid, softmax) * could be slight divergence between C API predictions vs. _leaves_ because of floating point convertions and comparisons tolerances * scikit-learn tree models: * no support transformations functions. Output scores is _raw scores_ (as from `GradientBoostingClassifier.decision_function`)