From 17c644ea0febc047b84bf930106a25c323c59cb9 Mon Sep 17 00:00:00 2001 From: Dmitry Khominich Date: Wed, 19 Sep 2018 08:54:51 +0300 Subject: [PATCH] [!] change uint32 -> int in API signatures --- leaves.go | 4 ++-- leaves_test.go | 4 ++-- lgensemble.go | 16 ++++++++-------- lgensemble_io.go | 2 +- lgensemble_test.go | 4 ++-- mat.go | 32 ++++++++++++++++---------------- mat_io.go | 34 +++++++++++++++++----------------- mat_test.go | 8 ++++---- xgensemble.go | 16 ++++++++-------- xgensemble_io.go | 2 +- 10 files changed, 61 insertions(+), 61 deletions(-) diff --git a/leaves.go b/leaves.go index b00d843..c0fc2a7 100644 --- a/leaves.go +++ b/leaves.go @@ -5,7 +5,7 @@ const BatchSize = 16 // Ensemble is common interface that every model in leaves should implement type Ensemble interface { - PredictDense(vals []float64, nrows uint32, ncols uint32, predictions []float64, nTrees int, nThreads int) error - PredictCSR(indptr []uint32, cols []uint32, vals []float64, predictions []float64, nTrees int, nThreads int) + PredictDense(vals []float64, nrows int, ncols int, predictions []float64, nTrees int, nThreads int) error + PredictCSR(indptr []int, cols []int, vals []float64, predictions []float64, nTrees int, nThreads int) Predict(fvals []float64, nTrees int) float64 } diff --git a/leaves_test.go b/leaves_test.go index 40819b9..3c37a88 100644 --- a/leaves_test.go +++ b/leaves_test.go @@ -111,7 +111,7 @@ func InnerTestHiggs(t *testing.T, model Ensemble, nThreads int, dense bool, true bufReader := bufio.NewReader(reader) var denseMat DenseMat var csrMat CSRMat - var nRows uint32 + var nRows int if dense { denseMat, err = DenseMatFromLibsvm(bufReader, 0, true) if err != nil { @@ -312,7 +312,7 @@ func InnerBenchmarkHiggs(b *testing.B, model Ensemble, nThreads int, dense bool) bufReader := bufio.NewReader(reader) var denseMat DenseMat var csrMat CSRMat - var nRows uint32 + var nRows int if dense { denseMat, err = DenseMatFromLibsvm(bufReader, 0, true) if err != nil { diff --git a/lgensemble.go b/lgensemble.go index 4b3732f..3cd77e0 100644 --- a/lgensemble.go +++ b/lgensemble.go @@ -10,7 +10,7 @@ import ( // LGEnsemble is LightGBM model (ensemble of trees) type LGEnsemble struct { Trees []lgTree - MaxFeatureIdx uint32 + MaxFeatureIdx int } // NTrees returns number of trees in ensemble @@ -23,7 +23,7 @@ func (e *LGEnsemble) NTrees() int { // return 0.0. Note, that result is a raw score (before sigmoid function // transformation and etc) func (e *LGEnsemble) Predict(fvals []float64, nTrees int) float64 { - if e.MaxFeatureIdx+1 > uint32(len(fvals)) { + if e.MaxFeatureIdx+1 > len(fvals) { return 0.0 } ret := 0.0 @@ -45,7 +45,7 @@ func (e *LGEnsemble) Predict(fvals []float64, nTrees int) float64 { // threads that will be utilized (maximum is GO_MAX_PROCS) // Note, that result is a raw score (before sigmoid function transformation and etc). // Note, `predictions` slice should be properly allocated on call side -func (e *LGEnsemble) PredictCSR(indptr []uint32, cols []uint32, vals []float64, predictions []float64, nTrees int, nThreads int) { +func (e *LGEnsemble) PredictCSR(indptr []int, cols []int, vals []float64, predictions []float64, nTrees int, nThreads int) { nRows := len(indptr) - 1 if nRows <= BatchSize || nThreads == 0 || nThreads == 1 { fvals := make([]float64, e.MaxFeatureIdx+1) @@ -89,18 +89,18 @@ func (e *LGEnsemble) PredictCSR(indptr []uint32, cols []uint32, vals []float64, wg.Wait() } -func (e *LGEnsemble) predictCSRInner(indptr []uint32, cols []uint32, vals []float64, startIndex int, endIndex int, predictions []float64, nTrees int, fvals []float64) { +func (e *LGEnsemble) predictCSRInner(indptr []int, cols []int, vals []float64, startIndex int, endIndex int, predictions []float64, nTrees int, fvals []float64) { for i := startIndex; i < endIndex; i++ { start := indptr[i] end := indptr[i+1] for j := start; j < end; j++ { - if cols[j] < uint32(len(fvals)) { + if cols[j] < len(fvals) { fvals[cols[j]] = vals[j] } } predictions[i] = e.Predict(fvals, nTrees) for j := start; j < end; j++ { - if cols[j] < uint32(len(fvals)) { + if cols[j] < len(fvals) { fvals[cols[j]] = 0.0 } } @@ -113,8 +113,8 @@ func (e *LGEnsemble) predictCSRInner(indptr []uint32, cols []uint32, vals []floa // threads that will be utilized (maximum is GO_MAX_PROCS) // Note, that result is a raw score (before sigmoid function transformation and etc). // Note, `predictions` slice should be properly allocated on call side -func (e *LGEnsemble) PredictDense(vals []float64, nrows uint32, ncols uint32, predictions []float64, nTrees int, nThreads int) error { - nRows := int(nrows) +func (e *LGEnsemble) PredictDense(vals []float64, nrows int, ncols int, predictions []float64, nTrees int, nThreads int) error { + nRows := nrows if ncols == 0 || e.MaxFeatureIdx > ncols-1 { return fmt.Errorf("incorrect number of columns") } diff --git a/lgensemble_io.go b/lgensemble_io.go index 473aa84..e7a08b0 100644 --- a/lgensemble_io.go +++ b/lgensemble_io.go @@ -226,7 +226,7 @@ func LGEnsembleFromReader(reader *bufio.Reader) (*LGEnsemble, error) { if err != nil { return nil, err } - e.MaxFeatureIdx = uint32(maxFeatureIdx) + e.MaxFeatureIdx = maxFeatureIdx treeSizesStr, isFound := params["tree_sizes"] if !isFound { diff --git a/lgensemble_test.go b/lgensemble_test.go index 6768b78..93fb462 100644 --- a/lgensemble_test.go +++ b/lgensemble_test.go @@ -168,8 +168,8 @@ func TestLGEnsemble(t *testing.T) { math.NaN(), math.NaN(), } - denseRows := uint32(7) - denseCols := uint32(2) + denseRows := 7 + denseCols := 2 // check predictions predictions := make([]float64, denseRows) diff --git a/mat.go b/mat.go index 1fca488..2ee7d3a 100644 --- a/mat.go +++ b/mat.go @@ -7,15 +7,15 @@ import ( // DenseMat is dense matrix data structure type DenseMat struct { Values []float64 - Cols uint32 - Rows uint32 + Cols int + Rows int } // DenseMatFromArray converts arrays of `values` to DenseMat using shape // information `rows` and `cols` -func DenseMatFromArray(values []float64, rows uint32, cols uint32) (DenseMat, error) { +func DenseMatFromArray(values []float64, rows int, cols int) (DenseMat, error) { mat := DenseMat{} - if uint32(len(values)) != cols*rows { + if len(values) != cols*rows { return mat, fmt.Errorf("wrong dimensions") } mat.Values = append(mat.Values, values...) @@ -26,36 +26,36 @@ func DenseMatFromArray(values []float64, rows uint32, cols uint32) (DenseMat, er // CSRMat is Compressed Sparse Row matrix data structure type CSRMat struct { - RowHeaders []uint32 - ColIndexes []uint32 + RowHeaders []int + ColIndexes []int Values []float64 } // Rows returns number of rows in the matrix -func (m *CSRMat) Rows() uint32 { +func (m *CSRMat) Rows() int { if len(m.RowHeaders) == 0 { return 0 } - return uint32(len(m.RowHeaders)) - 1 + return len(m.RowHeaders) - 1 } // CSRMatFromArray converts arrays of `values` to CSRMat using shape information // `rows` and `cols`. See also DenseMatFromArray to store dense data in matrix -func CSRMatFromArray(values []float64, rows uint32, cols uint32) (CSRMat, error) { +func CSRMatFromArray(values []float64, rows int, cols int) (CSRMat, error) { mat := CSRMat{} - if uint32(len(values)) != cols*rows { + if len(values) != cols*rows { return mat, fmt.Errorf("wrong dimensions") } mat.Values = append(mat.Values, values...) - mat.ColIndexes = make([]uint32, 0, len(values)) - mat.RowHeaders = make([]uint32, 0, rows+1) + mat.ColIndexes = make([]int, 0, len(values)) + mat.RowHeaders = make([]int, 0, rows+1) - for i := uint32(0); i < rows; i++ { - mat.RowHeaders = append(mat.RowHeaders, uint32(len(mat.ColIndexes))) - for j := uint32(0); j < cols; j++ { + for i := 0; i < rows; i++ { + mat.RowHeaders = append(mat.RowHeaders, len(mat.ColIndexes)) + for j := 0; j < cols; j++ { mat.ColIndexes = append(mat.ColIndexes, j) } } - mat.RowHeaders = append(mat.RowHeaders, uint32(len(mat.ColIndexes))) + mat.RowHeaders = append(mat.RowHeaders, len(mat.ColIndexes)) return mat, nil } diff --git a/mat_io.go b/mat_io.go index 9dc901c..35a9be7 100644 --- a/mat_io.go +++ b/mat_io.go @@ -11,9 +11,9 @@ import ( // DenseMatFromLibsvm reads dense matrix from libsvm format from `reader` // stream. If `limit` > 0, reads only first limit `rows`. First colums is label, // and usually you should set `skipFirstColumn` = true -func DenseMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool) (DenseMat, error) { +func DenseMatFromLibsvm(reader *bufio.Reader, limit int, skipFirstColumn bool) (DenseMat, error) { mat := DenseMat{} - startIndex := uint32(0) + startIndex := 0 if skipFirstColumn { startIndex = 1 } @@ -31,8 +31,8 @@ func DenseMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool return mat, fmt.Errorf("too few columns") } - var column uint32 - for col := startIndex; col < uint32(len(tokens)); col++ { + var column int + for col := startIndex; col < len(tokens); col++ { if len(tokens[col]) == 0 { break } @@ -41,7 +41,7 @@ func DenseMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool return mat, fmt.Errorf("can't parse %s", tokens[col]) } columnUint64, err := strconv.ParseUint(pair[0], 10, 32) - column = uint32(columnUint64) + column = int(columnUint64) if err != nil { return mat, fmt.Errorf("can't convert to float %s: %s", pair[0], err.Error()) } @@ -71,13 +71,13 @@ func DenseMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool // CSRMatFromLibsvm reads CSR (Compressed Sparse Row) matrix from libsvm format // from `reader` stream. If `limit` > 0, reads only first limit `rows`. First // colums is label, and usually you should set `skipFirstColumn` = true -func CSRMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool) (CSRMat, error) { +func CSRMatFromLibsvm(reader *bufio.Reader, limit int, skipFirstColumn bool) (CSRMat, error) { mat := CSRMat{} - startIndex := uint32(0) + startIndex := 0 if skipFirstColumn { startIndex = 1 } - rows := uint32(0) + rows := 0 for { line, err := reader.ReadString('\n') if err != nil && err != io.EOF { @@ -92,9 +92,9 @@ func CSRMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool) return mat, fmt.Errorf("too few columns") } - mat.RowHeaders = append(mat.RowHeaders, uint32(len(mat.Values))) - var column uint32 - for col := startIndex; col < uint32(len(tokens)); col++ { + mat.RowHeaders = append(mat.RowHeaders, len(mat.Values)) + var column int + for col := startIndex; col < len(tokens); col++ { if len(tokens[col]) == 0 { break } @@ -103,7 +103,7 @@ func CSRMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool) return mat, fmt.Errorf("can't parse %s", tokens[col]) } columnUint64, err := strconv.ParseUint(pair[0], 10, 32) - column = uint32(columnUint64) + column = int(columnUint64) if err != nil { return mat, fmt.Errorf("can't convert to float %s: %s", pair[0], err.Error()) } @@ -120,7 +120,7 @@ func CSRMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool) break } } - mat.RowHeaders = append(mat.RowHeaders, uint32(len(mat.Values))) + mat.RowHeaders = append(mat.RowHeaders, len(mat.Values)) return mat, nil } @@ -129,13 +129,13 @@ func CSRMatFromLibsvm(reader *bufio.Reader, limit uint32, skipFirstColumn bool) // is label, and usually you should set `skipFirstColumn` = true. If value is // absent `defValue` will be used instead func DenseMatFromCsv(reader *bufio.Reader, - limit uint32, + limit int, skipFirstColumn bool, delimiter string, defValue float64) (DenseMat, error) { mat := DenseMat{} - startIndex := uint32(0) + startIndex := 0 if skipFirstColumn { startIndex = 1 } @@ -150,8 +150,8 @@ func DenseMatFromCsv(reader *bufio.Reader, } tokens := strings.Split(line, delimiter) - var column uint32 - for col := startIndex; col < uint32(len(tokens)); col++ { + var column int + for col := startIndex; col < len(tokens); col++ { var value float64 if len(tokens[col]) == 0 { value = defValue diff --git a/mat_test.go b/mat_test.go index fbba7e3..6db1c95 100644 --- a/mat_test.go +++ b/mat_test.go @@ -84,12 +84,12 @@ func TestCSRMatFromLibsvm(t *testing.T) { t.Errorf("mat.Values incorrect: %s", err.Error()) } - trueRowHeaders := []uint32{0, 3, 5} + trueRowHeaders := []int{0, 3, 5} if !reflect.DeepEqual(mat.RowHeaders, trueRowHeaders) { t.Error("mat.RowHeaders are incorrect") } - trueColIndexes := []uint32{0, 10, 12, 4, 5} + trueColIndexes := []int{0, 10, 12, 4, 5} if !reflect.DeepEqual(mat.ColIndexes, trueColIndexes) { t.Error("mat.ColIndexes are incorrect") } @@ -107,12 +107,12 @@ func TestCSRMatFromLibsvm(t *testing.T) { t.Errorf("mat.Values incorrect: %s", err.Error()) } - trueRowHeaders = []uint32{0, 3} + trueRowHeaders = []int{0, 3} if !reflect.DeepEqual(mat.RowHeaders, trueRowHeaders) { t.Error("mat.RowHeaders are incorrect") } - trueColIndexes = []uint32{0, 10, 12} + trueColIndexes = []int{0, 10, 12} if !reflect.DeepEqual(mat.ColIndexes, trueColIndexes) { t.Error("mat.ColIndexes are incorrect") } diff --git a/xgensemble.go b/xgensemble.go index 49521c9..ac17550 100644 --- a/xgensemble.go +++ b/xgensemble.go @@ -10,7 +10,7 @@ import ( // XGEnsemble is XGBoost model (ensemble of trees) type XGEnsemble struct { Trees []lgTree - MaxFeatureIdx uint32 + MaxFeatureIdx int } // NTrees returns number of trees in ensemble @@ -24,7 +24,7 @@ func (e *XGEnsemble) NTrees() int { // Note, that result is a raw score (before sigmoid function transformation and etc). // Note, nan feature values treated as missing values func (e *XGEnsemble) Predict(fvals []float64, nTrees int) float64 { - if e.MaxFeatureIdx+1 > uint32(len(fvals)) { + if e.MaxFeatureIdx+1 > len(fvals) { return 0.0 } ret := 0.0 @@ -46,7 +46,7 @@ func (e *XGEnsemble) Predict(fvals []float64, nTrees int) float64 { // threads that will be utilized (maximum is GO_MAX_PROCS) // Note, that result is a raw score (before sigmoid function transformation and etc). // Note, `predictions` slice should be properly allocated on call side -func (e *XGEnsemble) PredictCSR(indptr []uint32, cols []uint32, vals []float64, predictions []float64, nTrees int, nThreads int) { +func (e *XGEnsemble) PredictCSR(indptr []int, cols []int, vals []float64, predictions []float64, nTrees int, nThreads int) { nRows := len(indptr) - 1 if nRows <= BatchSize || nThreads == 0 || nThreads == 1 { fvals := make([]float64, e.MaxFeatureIdx+1) @@ -96,18 +96,18 @@ func (e *XGEnsemble) PredictCSR(indptr []uint32, cols []uint32, vals []float64, wg.Wait() } -func (e *XGEnsemble) predictCSRInner(indptr []uint32, cols []uint32, vals []float64, startIndex int, endIndex int, predictions []float64, nTrees int, fvals []float64) { +func (e *XGEnsemble) predictCSRInner(indptr []int, cols []int, vals []float64, startIndex int, endIndex int, predictions []float64, nTrees int, fvals []float64) { for i := startIndex; i < endIndex; i++ { start := indptr[i] end := indptr[i+1] for j := start; j < end; j++ { - if cols[j] < uint32(len(fvals)) { + if cols[j] < len(fvals) { fvals[cols[j]] = vals[j] } } predictions[i] = e.Predict(fvals, nTrees) for j := start; j < end; j++ { - if cols[j] < uint32(len(fvals)) { + if cols[j] < len(fvals) { fvals[cols[j]] = math.NaN() } } @@ -120,8 +120,8 @@ func (e *XGEnsemble) predictCSRInner(indptr []uint32, cols []uint32, vals []floa // threads that will be utilized (maximum is GO_MAX_PROCS) // Note, that result is a raw score (before sigmoid function transformation and etc). // Note, `predictions` slice should be properly allocated on call side -func (e *XGEnsemble) PredictDense(vals []float64, nrows uint32, ncols uint32, predictions []float64, nTrees int, nThreads int) error { - nRows := int(nrows) +func (e *XGEnsemble) PredictDense(vals []float64, nrows int, ncols int, predictions []float64, nTrees int, nThreads int) error { + nRows := nrows if ncols == 0 || e.MaxFeatureIdx > ncols-1 { return fmt.Errorf("incorrect number of columns") } diff --git a/xgensemble_io.go b/xgensemble_io.go index 18c1e17..0fb913a 100644 --- a/xgensemble_io.go +++ b/xgensemble_io.go @@ -156,7 +156,7 @@ func XGEnsembleFromReader(reader *bufio.Reader) (*XGEnsemble, error) { if header.Param.NumFeatures == 0 { return nil, fmt.Errorf("zero number of features") } - e.MaxFeatureIdx = header.Param.NumFeatures - 1 + e.MaxFeatureIdx = int(header.Param.NumFeatures) - 1 // reading gbtree origModel, err := xgbin.ReadGBTreeModel(reader)