diff --git a/CMakeLists.txt b/CMakeLists.txt index 803e5ac..a3aefda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/utils.cmake) project(pico_tree LANGUAGES CXX - VERSION 0.8.0 + VERSION 0.8.1 DESCRIPTION "PicoTree is a C++ header only library for fast nearest neighbor searches and range searches using a KdTree." HOMEPAGE_URL "https://github.com/Jaybro/pico_tree") diff --git a/README.md b/README.md index 6e02e0d..5137884 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ PicoTree is a C++ header only library with [Python bindings](https://github.com/ | [Scikit-learn KDTree][skkd] 1.2.2 | ... | 6.2s | ... | 42.2s | | [pykdtree][pykd] 1.3.7 | ... | 1.0s | ... | 6.6s | | [OpenCV FLANN][cvfn] 4.6.0 | 1.9s | ... | 4.7s | ... | -| PicoTree KdTree v0.8.0 | 0.9s | 1.0s | 2.8s | 3.1s | +| PicoTree KdTree v0.8.1 | 0.9s | 1.0s | 2.8s | 3.1s | Two [LiDAR](./docs/benchmark.md) based point clouds of sizes 7733372 and 7200863 were used to generate these numbers. The first point cloud was the input to the build algorithm and the second to the query algorithm. All benchmarks were run on a single thread with the following parameters: `max_leaf_size=10` and `knn=1`. A more detailed [C++ comparison](./docs/benchmark.md) of PicoTree is available with respect to [nanoflann][nano]. @@ -61,6 +61,7 @@ PicoTree can interface with different types of points and point sets through tra * Creating a [custom search visitor](./examples/kd_tree/kd_tree_custom_search_visitor.cpp). * [Saving and loading](./examples/kd_tree/kd_tree_save_and_load.cpp) a KdTree to and from a file. * Support for [Eigen](./examples/eigen/eigen.cpp) and [OpenCV](./examples/opencv/opencv.cpp) data types. +* Running the KdTree on the [MNIST](./examples/mnist/mnist.cpp) [database](http://yann.lecun.com/exdb/mnist/). * How to use the [KdTree with Python](./examples/python/kd_tree.py). # Requirements diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 1d475f5..f891860 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -35,6 +35,10 @@ else() message(STATUS "benchmark not found. PicoTree benchmarks skipped.") endif() +if(Eigen3_FOUND) + add_subdirectory(mnist) +endif() + # The Python examples only get copied when the bindings module will be build. if(TARGET _pyco_tree) add_subdirectory(python) diff --git a/examples/benchmark/bm_opencv_flann.cpp b/examples/benchmark/bm_opencv_flann.cpp index 5b7195f..b52a52c 100644 --- a/examples/benchmark/bm_opencv_flann.cpp +++ b/examples/benchmark/bm_opencv_flann.cpp @@ -116,9 +116,10 @@ BENCHMARK_DEFINE_F(BmOpenCvFlann, KnnCt)(benchmark::State& state) { // There is also the option to query them all at once, but this doesn't really // change performance and this version looks more like the other benchmarks. for (auto _ : state) { - std::vector indices(knn_count); + // The only supported index type is int. + std::vector indices(knn_count); std::vector distances(knn_count); - fl::Matrix mat_indices(indices.data(), 1, knn_count); + fl::Matrix mat_indices(indices.data(), 1, knn_count); fl::Matrix mat_distances(distances.data(), 1, knn_count); for (auto& p : points_test_) { diff --git a/examples/kd_tree/kd_tree_custom_search_visitor.cpp b/examples/kd_tree/kd_tree_custom_search_visitor.cpp index 7252967..9649a9c 100644 --- a/examples/kd_tree/kd_tree_custom_search_visitor.cpp +++ b/examples/kd_tree/kd_tree_custom_search_visitor.cpp @@ -3,8 +3,8 @@ #include #include -//! \brief Search visitor that counts how many points were considered as a -//! nearest neighbor. +// Search visitor that counts how many points were considered as a possible +// nearest neighbor. template class SearchNnCounter { public: @@ -12,31 +12,30 @@ class SearchNnCounter { using IndexType = typename Neighbor::IndexType; using ScalarType = typename Neighbor::ScalarType; - //! \brief Creates a visitor for approximate nearest neighbor searching. - //! \param nn Search result. + // Create a visitor for approximate nearest neighbor searching. The argument + // is the search result. inline SearchNnCounter(Neighbor& nn) : count_(0), nn_(nn) { // Initial search distance. nn_.distance = std::numeric_limits::max(); } - //! \brief Visit current point. - //! \details This method is required. The KdTree calls this function when it - //! finds a point that is closer to the query than the result of this - //! visitors' max() function. I.e., it found a new nearest neighbor. - //! \param idx Point index. - //! \param d Point distance (that depends on the metric). + // Visit current point. This method is required. The search algorithm calls + // this function for every point it encounters in the KdTree. The arguments of + // the method are respectively the index and distance of the visited point. inline void operator()(IndexType const idx, ScalarType const dst) { + // Only update the nearest neighbor when the point we visit is actually + // closer to the query point. + if (max() > dst) { + nn_ = {idx, dst}; + } count_++; - nn_ = {idx, dst}; } - //! \brief Maximum search distance with respect to the query point. - //! \details This method is required. + // Maximum search distance with respect to the query point. This method is + // required. The nodes of the KdTree are filtered using this method. inline ScalarType const& max() const { return nn_.distance; } - //! \brief Returns the number of points that were considered the nearest - //! neighbor. - //! \details This method is not required. + // The amount of points visited during a query. inline IndexType const& count() const { return count_; } private: @@ -62,7 +61,7 @@ int main() { SearchNnCounter v(nn); tree.SearchNearest(q, v); - std::cout << "Custom visitor # nns considered: " << v.count() << std::endl; + std::cout << "Number of points visited: " << v.count() << std::endl; return 0; } diff --git a/examples/mnist/CMakeLists.txt b/examples/mnist/CMakeLists.txt new file mode 100644 index 0000000..1c67a14 --- /dev/null +++ b/examples/mnist/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable(mnist mnist.cpp) +set_default_target_properties(mnist) +target_link_libraries(mnist PUBLIC pico_toolshed pico_understory Eigen3::Eigen) diff --git a/examples/mnist/mnist.cpp b/examples/mnist/mnist.cpp new file mode 100644 index 0000000..466f784 --- /dev/null +++ b/examples/mnist/mnist.cpp @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +std::array Cast(std::array const& i) { + std::array c; + std::transform(i.begin(), i.end(), c.begin(), [](T a) -> U { + return static_cast(a); + }); + return c; +} + +template +std::vector> Cast( + std::vector> const& i) { + std::vector> c; + std::transform( + i.begin(), + i.end(), + std::back_inserter(c), + [](std::array const& a) -> std::array { + return Cast(a); + }); + return c; +} + +int main(int argc, char** argv) { + using ImageByte = std::array; + using ImageFloat = std::array; + + std::string fn_images_train = "train-images.idx3-ubyte"; + std::string fn_images_test = "t10k-images.idx3-ubyte"; + std::string fn_mnist_nns_gt = "mnist_nns_gt.bin"; + + if (!std::filesystem::exists(fn_images_train)) { + std::cout << fn_images_train << " doesn't exist." << std::endl; + return 0; + } + + if (!std::filesystem::exists(fn_images_test)) { + std::cout << fn_images_test << " doesn't exist." << std::endl; + return 0; + } + + std::vector images_train; + { + std::vector images_train_u8; + pico_tree::ReadMnistImages(fn_images_train, images_train_u8); + images_train = Cast(images_train_u8); + } + + std::vector images_test; + { + std::vector images_test_u8; + pico_tree::ReadMnistImages(fn_images_test, images_test_u8); + images_test = Cast(images_test_u8); + } + + std::size_t max_leaf_size_ex = 16; + std::size_t max_leaf_size_rp = 128; + // With 16 trees we can get a precision of around 85-90%. + // With 32 trees we can get a precision of around 95-97%. + std::size_t forest_size = 2; + std::size_t count = images_test.size(); + std::vector> nns(count); + + if (!std::filesystem::exists(fn_images_train)) { + auto kd_tree = [&images_train, &max_leaf_size_ex]() { + ScopedTimer t0("kd_tree build"); + return pico_tree::KdTree>>( + images_train, max_leaf_size_ex); + }(); + + { + ScopedTimer t1("kd_tree query"); + for (std::size_t i = 0; i < nns.size(); ++i) { + kd_tree.SearchNn(images_test[i], nns[i]); + } + } + + std::cout << "Writing " << fn_mnist_nns_gt << "." << std::endl; + pico_tree::WriteBin(fn_mnist_nns_gt, nns); + } else { + std::cout << "Reading " << fn_mnist_nns_gt << "." << std::endl; + pico_tree::ReadBin(fn_mnist_nns_gt, nns); + } + + std::size_t equal = 0; + + { + auto rkd_tree = [&images_train, &max_leaf_size_rp, &forest_size]() { + ScopedTimer t0("rkd_tree build"); + return pico_tree::RKdTree< + std::reference_wrapper>>( + images_train, max_leaf_size_rp, forest_size); + }(); + + ScopedTimer t1("rkd_tree query"); + pico_tree::Neighbor nn; + for (std::size_t i = 0; i < nns.size(); ++i) { + rkd_tree.SearchNn(images_test[i], nn); + + if (nns[i].index == nn.index) { + ++equal; + } + } + } + + std::cout << "Precision: " + << (static_cast(equal) / static_cast(count)) + << std::endl; + + return 0; +} diff --git a/examples/pico_understory/CMakeLists.txt b/examples/pico_understory/CMakeLists.txt index 87b1b7c..786cfcc 100644 --- a/examples/pico_understory/CMakeLists.txt +++ b/examples/pico_understory/CMakeLists.txt @@ -3,6 +3,16 @@ target_include_directories(pico_understory INTERFACE ${CMAKE_CURRENT_LIST_DIR}) target_link_libraries(pico_understory INTERFACE PicoTree::PicoTree) target_sources(pico_understory INTERFACE - ${CMAKE_CURRENT_LIST_DIR}/pico_understory/cover_tree.hpp - ${CMAKE_CURRENT_LIST_DIR}/pico_understory/metric.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/cover_tree_base.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/cover_tree_builder.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/cover_tree_data.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/cover_tree_node.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/cover_tree_search.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/rkd_tree_builder.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/rkd_tree_rr_data.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/rkd_tree_search.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/internal/static_buffer.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/cover_tree.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/metric.hpp + ${CMAKE_CURRENT_LIST_DIR}/pico_understory/rkd_tree.hpp ) diff --git a/examples/pico_understory/pico_understory/cover_tree.hpp b/examples/pico_understory/pico_understory/cover_tree.hpp index 3f2cef0..27072d8 100644 --- a/examples/pico_understory/pico_understory/cover_tree.hpp +++ b/examples/pico_understory/pico_understory/cover_tree.hpp @@ -80,21 +80,22 @@ class CoverTree { data_(BuildCoverTreeType()(SpaceWrapperType(space_), metric_, base)) {} //! \brief Searches for the nearest neighbor of point \p x. - //! \details Interpretation of the output distance depends on the Metric. The - //! default distance metric equals L2. template inline void SearchNn(P const& x, NeighborType& nn) const { internal::SearchNn v(nn); SearchNearest(data_.root_node, x, v); } + //! \brief Searches for an approximate nearest neighbor of point \p x. + template + inline void SearchNn(P const& x, ScalarType const e, NeighborType& nn) const { + internal::SearchApproximateNn v(e, nn); + SearchNearest(data_.root_node, x, v); + } + //! \brief Searches for the k nearest neighbors of point \p x, where k equals //! std::distance(begin, end). It is expected that the value type of the //! iterator equals Neighbor. - //! \details Interpretation of the output distances depend on the Metric. The - //! default L2 metric results in Euclidean distances. - //! \tparam P Point type. - //! \tparam RandomAccessIterator Iterator type. template inline void SearchKnn( P const& x, RandomAccessIterator begin, RandomAccessIterator end) const { @@ -102,7 +103,7 @@ class CoverTree { std::is_same_v< typename std::iterator_traits::value_type, NeighborType>, - "SEARCH_ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_INDEX_SCALAR"); + "ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_TYPE"); internal::SearchKnn v(begin, end); SearchNearest(data_.root_node, x, v); @@ -110,9 +111,6 @@ class CoverTree { //! \brief Searches for the \p k nearest neighbors of point \p x and stores //! the results in output vector \p knn. - //! \tparam P Point type. - //! \see template void SearchKnn(P - //! const&, RandomAccessIterator, RandomAccessIterator) const template inline void SearchKnn( P const& x, Size const k, std::vector& knn) const { @@ -122,52 +120,9 @@ class CoverTree { SearchKnn(x, knn.begin(), knn.end()); } - //! \brief Searches for all the neighbors of point \p x that are within radius - //! \p radius and stores the results in output vector \p n. - //! \details Interpretation of the in and output distances depend on the - //! Metric. The default L2 results in squared distances. - //! \tparam P Point type. - //! \param x Input point. - //! \param radius Search radius. - //! \param n Output points. - //! \param sort If true, the result set is sorted from closest to farthest - //! distance with respect to the query point. - template - inline void SearchRadius( - P const& x, - Scalar const radius, - std::vector& n, - bool const sort = false) const { - internal::SearchRadius v(radius, n); - SearchNearest(data_.root_node, x, v); - - if (sort) { - v.Sort(); - } - } - //! \brief Searches for the k approximate nearest neighbors of point \p x, //! where k equals std::distance(begin, end). It is expected that the value //! type of the iterator equals Neighbor. - //! \details This function can result in faster search queries compared to - //! KdTree::SearchKnn by skipping points and tree nodes. This is achieved by - //! scaling down the search distance, possibly not visiting the true nearest - //! neighbor. An approximate nearest neighbor will at most be a factor of - //! distance ratio \p e farther from the query point than the true nearest - //! neighbor: max_ann_distance = true_nn_distance * e. This holds true for - //! each respective nn index i, 0 <= i < k. - //! - //! Interpretation of both the input error ratio and output distances depend - //! on the Metric. The default L2 metric calculates Euclidean distances. - //! - //! Example: - //! \code{.cpp} - //! // A max error of 15%. I.e. max 15% farther away from the true nn. - //! Scalar max_error = Scalar(0.15); - //! Scalar e = Scalar(1.0) + max_error; - //! std::vector> knn(k); - //! tree.SearchKnn(x, e, knn.begin(), knn.end()); - //! \endcode template inline void SearchKnn( P const& x, @@ -178,17 +133,14 @@ class CoverTree { std::is_same_v< typename std::iterator_traits::value_type, NeighborType>, - "SEARCH_ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_INDEX_SCALAR"); + "ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_TYPE"); - internal::SearchAknn v(e, begin, end); + internal::SearchApproximateKnn v(e, begin, end); SearchNearest(data_.root_node, x, v); } //! \brief Searches for the \p k approximate nearest neighbors of point \p x //! and stores the results in output vector \p knn. - //! \tparam P Point type. - //! \see template void - //! SearchKnn(P const&, RandomAccessIterator, RandomAccessIterator) const template inline void SearchKnn( P const& x, @@ -201,6 +153,39 @@ class CoverTree { SearchKnn(x, e, knn.begin(), knn.end()); } + //! \brief Searches for all the neighbors of point \p x that are within radius + //! \p radius and stores the results in output vector \p n. + template + inline void SearchRadius( + P const& x, + Scalar const radius, + std::vector& n, + bool const sort = false) const { + internal::SearchRadius v(radius, n); + SearchNearest(data_.root_node, x, v); + + if (sort) { + v.Sort(); + } + } + + //! \brief Searches for the approximate neighbors of point \p x that are + //! within radius \p radius and stores the results in output vector \p n. + template + inline void SearchRadius( + P const& x, + Scalar const radius, + Scalar const e, + std::vector& n, + bool const sort = false) const { + internal::SearchApproximateRadius v(e, radius, n); + SearchNearest(data_.root_node, x, v); + + if (sort) { + v.Sort(); + } + } + //! \brief Point set used by the tree. inline Space const& points() const { return space_; } diff --git a/examples/pico_understory/pico_understory/internal/eigen3.hpp b/examples/pico_understory/pico_understory/internal/eigen3.hpp new file mode 100644 index 0000000..3b7d1da --- /dev/null +++ b/examples/pico_understory/pico_understory/internal/eigen3.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include + +#include "pico_tree/core.hpp" + +namespace pico_tree::internal { + +constexpr int PicoDimToEigenDim(pico_tree::Size dim) { + return dim == pico_tree::kDynamicSize ? Eigen::Dynamic + : static_cast(dim); +} + +template +using VectorDX = Eigen::Matrix; + +} // namespace pico_tree::internal diff --git a/examples/pico_understory/pico_understory/internal/rkd_tree_builder.hpp b/examples/pico_understory/pico_understory/internal/rkd_tree_builder.hpp new file mode 100644 index 0000000..5632c0f --- /dev/null +++ b/examples/pico_understory/pico_understory/internal/rkd_tree_builder.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include + +#include "pico_tree/internal/kd_tree_builder.hpp" +#include "rkd_tree_rr_data.hpp" + +namespace pico_tree::internal { + +template +class BuildRKdTree { + public: + using RKdTreeDataType = RKdTreeRrData; + + template + std::vector operator()( + SpaceWrapper_ space, Size max_leaf_size, Size forest_size) { + assert(space.size() > 0); + assert(max_leaf_size > 0); + assert(forest_size > 0); + + using SpaceWrapperType = typename RKdTreeDataType::SpaceWrapperType; + using BuildKdTreeType = BuildKdTree; + + std::vector trees(forest_size); + for (std::size_t i = 0; i < forest_size; ++i) { + auto r = RKdTreeDataType::RandomRotation(space); + auto s = RKdTreeDataType::RotateSpace(r, space); + auto t = BuildKdTreeType()(SpaceWrapperType(s), max_leaf_size); + trees[i] = {std::move(r), std::move(s), std::move(t)}; + } + return trees; + } +}; + +} // namespace pico_tree::internal diff --git a/examples/pico_understory/pico_understory/internal/rkd_tree_rr_data.hpp b/examples/pico_understory/pico_understory/internal/rkd_tree_rr_data.hpp new file mode 100644 index 0000000..709337c --- /dev/null +++ b/examples/pico_understory/pico_understory/internal/rkd_tree_rr_data.hpp @@ -0,0 +1,93 @@ +#pragma once + +#include + +#include "eigen3.hpp" +#include "pico_tree/eigen3_traits.hpp" +#include "pico_tree/internal/kd_tree_data.hpp" +#include "pico_tree/internal/space_wrapper.hpp" + +namespace pico_tree::internal { + +//! \brief Sample from the uniform distribution on the Stiefel manifold (the set +//! of all orthonormal k-frames in R^n). +//! \details Sample = Z(Z^TZ)^(-1/2). Z has elements drawn from N(0,1). +//! The matrix may be improper. +template +inline Eigen::Matrix +RandomOrthogonalMatrix(pico_tree::Size dim) { + // Working with floats makes all but JacobiSVD fail. Results for all are not + // that accurate as well ((m * m.transpose()).diagonal().sum()). + // * JacobiSVD<,Eigen::NoQRPreconditioner> never failed but slow. + // * BDCSVD / bdcSvd may lose a rank or 2. + // * SelfAdjointEigenSolver may lose 1 rank. + // Solution: don't use float. + // Reproduce: Matrix S = d.matrixU() * + // d.singularValues().cwiseInverse().cwiseSqrt().asDiagonal() * + // d.matrixU().transpose(); + // // Eigen::DecompositionOptions::ComputeThinU + using T = double; + using Matrix = Eigen::Matrix; + + std::random_device rd; + std::mt19937 e(rd()); + std::normal_distribution gaussian(T(0), T(1)); + + Matrix X = Matrix::Zero(dim, dim).unaryExpr( + [&gaussian, &e](T dummy) { return gaussian(e); }); + + Matrix XtX = X.transpose() * X; + Eigen::SelfAdjointEigenSolver d(XtX); + Matrix S = d.operatorInverseSqrt(); + // Returns the frames per row. Doesn't matter. + if constexpr (std::is_same_v) { + return X * S; + } else { + return (X * S).cast(); + } +} + +template +class RKdTreeRrData { + template + using VectorType = VectorDX; + + public: + using ScalarType = typename Node_::ScalarType; + static Size constexpr Dim = Dim_; + using NodeType = Node_; + using RotationType = + Eigen::Matrix; + using SpaceType = + Eigen::Matrix; + using SpaceWrapperType = SpaceWrapper; + + template + static inline auto RandomRotation(SpaceWrapper_ space) { + return RandomOrthogonalMatrix(space.sdim()); + } + + template + static inline SpaceType RotateSpace( + RotationType const& rotation, SpaceWrapper_ space) { + SpaceType s(static_cast(space.sdim()), static_cast(space.size())); + for (std::size_t i = 0; i < space.size(); ++i) { + Eigen::Map const> p( + space[i], static_cast(space.sdim())); + s.col(i) = rotation * p; + } + return s; + } + + template + VectorType RotatePoint(PointWrapper_ w) const { + Eigen::Map const> p(w.begin(), space.rows()); + return rotation * p; + } + + RotationType rotation; + SpaceType space; + KdTreeData tree; +}; + +} // namespace pico_tree::internal diff --git a/examples/pico_understory/pico_understory/internal/rkd_tree_search.hpp b/examples/pico_understory/pico_understory/internal/rkd_tree_search.hpp new file mode 100644 index 0000000..5307110 --- /dev/null +++ b/examples/pico_understory/pico_understory/internal/rkd_tree_search.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include + +#include "pico_tree/internal/kd_tree_node.hpp" +#include "pico_tree/internal/point.hpp" +#include "pico_tree/metric.hpp" + +namespace pico_tree::internal { + +//! \brief This class provides a search nearest function for Euclidean spaces. +template < + typename SpaceWrapper_, + typename Metric_, + typename PointWrapper_, + typename Visitor_, + typename Index_> +class SearchNearestEuclideanDefeatist { + public: + using IndexType = Index_; + using ScalarType = typename SpaceWrapper_::ScalarType; + //! \brief Node type supported by this SearchNearestEuclideanDefeatist. + using NodeType = KdTreeNodeEuclidean; + + inline SearchNearestEuclideanDefeatist( + SpaceWrapper_ space, + Metric_ metric, + std::vector const& indices, + PointWrapper_ query, + Visitor_& visitor) + : space_(space), + metric_(metric), + indices_(indices), + query_(query), + visitor_(visitor) {} + + //! \brief Search nearest neighbors starting from \p node. + inline void operator()(NodeType const* const node) { SearchNearest(node); } + + private: + inline void SearchNearest(NodeType const* const node) { + if (node->IsLeaf()) { + for (IndexType i = node->data.leaf.begin_idx; i < node->data.leaf.end_idx; + ++i) { + ScalarType const d = + metric_(query_.begin(), query_.end(), space_[indices_[i]]); + if (visitor_.max() > d) { + visitor_(indices_[i], d); + } + } + } else { + ScalarType const v = query_[node->data.branch.split_dim]; + NodeType const* node_1st; + + // On equals we would possibly need to go left as well if we wanted exact + // nearest neighbors. However, this search algorithm is solely used for + // approximate nearest neighbor searches. So we'll consider it bad luck. + // If left_max - v > 0, this means that the query is inside the left node, + // if right_min - v < 0 it's inside the right one. For the area in between + // we just pick the closest one by summing them. + if ((node->data.branch.left_max + node->data.branch.right_min - v - v) > + 0) { + node_1st = node->left; + } else { + node_1st = node->right; + } + + SearchNearest(node_1st); + } + } + + SpaceWrapper_ space_; + Metric_ metric_; + std::vector const& indices_; + PointWrapper_ query_; + Visitor_& visitor_; +}; + +} // namespace pico_tree::internal diff --git a/examples/pico_understory/pico_understory/rkd_tree.hpp b/examples/pico_understory/pico_understory/rkd_tree.hpp new file mode 100644 index 0000000..d18225f --- /dev/null +++ b/examples/pico_understory/pico_understory/rkd_tree.hpp @@ -0,0 +1,115 @@ +#pragma once + +#include +#include +#include + +#include "pico_understory/internal/rkd_tree_builder.hpp" +#include "pico_understory/internal/rkd_tree_search.hpp" + +namespace pico_tree { + +template < + typename Space_, + typename Metric_ = L2Squared, + SplittingRule SplittingRule_ = SplittingRule::kSlidingMidpoint, + typename Index_ = int> +class RKdTree { + using SpaceWrapperType = internal::SpaceWrapper; + //! \brief Node type based on Metric_::SpaceTag. + using NodeType = + typename internal::KdTreeSpaceTagTraits:: + template NodeType; + using BuildRKdTreeType = + internal::BuildRKdTree; + using RKdTreeDataType = typename BuildRKdTreeType::RKdTreeDataType; + + public: + //! \brief Size type. + using SizeType = Size; + //! \brief Index type. + using IndexType = Index_; + //! \brief Scalar type. + using ScalarType = typename SpaceWrapperType::ScalarType; + //! \brief KdTree dimension. It equals pico_tree::kDynamicSize in case Dim is + //! only known at run-time. + static SizeType constexpr Dim = SpaceWrapperType::Dim; + //! \brief Point set or adaptor type. + using SpaceType = Space_; + //! \brief The metric used for various searches. + using MetricType = Metric_; + //! \brief Neighbor type of various search resuls. + using NeighborType = Neighbor; + + RKdTree(SpaceType space, SizeType max_leaf_size, SizeType forest_size) + : space_(std::move(space)), + metric_(), + data_(BuildRKdTreeType()( + SpaceWrapperType(space_), max_leaf_size, forest_size)) {} + + //! \brief The RKdTree cannot be copied. + //! \details The RKdTree uses pointers to nodes and copying pointers is not + //! the same as creating a deep copy. + RKdTree(RKdTree const&) = delete; + + //! \brief Move constructor of the RKdTree. + RKdTree(RKdTree&&) = default; + + //! \brief RKdTree copy assignment. + RKdTree& operator=(RKdTree const& other) = delete; + + //! \brief RKdTree move assignment. + RKdTree& operator=(RKdTree&& other) = default; + + //! \brief Returns the nearest neighbor (or neighbors) of point \p x depending + //! on their selection by visitor \p visitor . + template + inline void SearchNearest(P const& x, V& visitor) const { + internal::PointWrapper

p(x); + SearchNearest(p, visitor, typename Metric_::SpaceTag()); + } + + //! \brief Searches for the nearest neighbor of point \p x. + //! \details Interpretation of the output distance depends on the Metric. The + //! default L2Squared results in a squared distance. + template + inline void SearchNn(P const& x, NeighborType& nn) const { + internal::SearchNn v(nn); + SearchNearest(x, v); + } + + private: + //! \brief Returns the nearest neighbor (or neighbors) of point \p x depending + //! on their selection by visitor \p visitor for node \p node. + template + inline void SearchNearest( + PointWrapper_ point, Visitor_& visitor, EuclideanSpaceTag) const { + // Range based for loop (rightfully) results in a warning that shouldn't be + // needed if the user creates the forest with at least a single tree. + for (std::size_t i = 0; i < data_.size(); ++i) { + auto p = data_[i].RotatePoint(point); + using PointWrapperType = internal::PointWrapper; + PointWrapperType w(p); + internal::SearchNearestEuclideanDefeatist< + typename RKdTreeDataType::SpaceWrapperType, + Metric_, + PointWrapperType, + Visitor_, + IndexType>( + typename RKdTreeDataType::SpaceWrapperType(data_[i].space), + metric_, + data_[i].tree.indices, + w, + visitor)(data_[i].tree.root_node); + } + } + + //! \brief Point set used for querying point data. + SpaceType space_; + //! \brief Metric used for comparing distances. + MetricType metric_; + //! \brief Data structure of the KdTree. + std::vector data_; +}; + +} // namespace pico_tree diff --git a/setup.py b/setup.py index 6fec325..3a3b215 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup(name='pico_tree', # The same as the CMake project version. - version='0.8.0', + version='0.8.1', description='PicoTree Python Bindings', author='Jonathan Broere', url='https://github.com/Jaybro/pico_tree', diff --git a/src/pico_tree/pico_tree/eigen3_traits.hpp b/src/pico_tree/pico_tree/eigen3_traits.hpp index d24a04e..ccae17f 100644 --- a/src/pico_tree/pico_tree/eigen3_traits.hpp +++ b/src/pico_tree/pico_tree/eigen3_traits.hpp @@ -31,7 +31,7 @@ template inline constexpr bool is_matrix_base_v = is_matrix_base::value; template -constexpr Eigen::Index EigenVectorDim() { +constexpr int EigenVectorDim() { static_assert( (!Derived::IsRowMajor && Derived::ColsAtCompileTime == 1) || (Derived::IsRowMajor && Derived::RowsAtCompileTime == 1), @@ -40,7 +40,7 @@ constexpr Eigen::Index EigenVectorDim() { : Derived::RowsAtCompileTime; } -constexpr Size EigenDimToPicoDim(Eigen::Index dim) { +constexpr Size EigenDimToPicoDim(int dim) { return dim == Eigen::Dynamic ? kDynamicSize : static_cast(dim); } @@ -184,6 +184,27 @@ struct SpaceTraits> {}; +//! \brief EigenTraits provides an interface for Eigen::Map +//! const>. +template < + typename Scalar_, + int Rows_, + int Cols_, + int Options_, + int MaxRows_, + int MaxCols_, + int MapOptions_, + typename StrideType_> +struct SpaceTraits const, + MapOptions_, + StrideType_>> + : public internal::EigenTraitsImpl const, + MapOptions_, + StrideType_>> {}; + //! \brief PointTraits provides an interface for Eigen::Matrix<>. template < typename Scalar_, @@ -217,6 +238,27 @@ struct PointTraits> {}; +//! \brief PointTraits provides an interface for Eigen::Map +//! const>. +template < + typename Scalar_, + int Rows_, + int Cols_, + int Options_, + int MaxRows_, + int MaxCols_, + int MapOptions_, + typename StrideType_> +struct PointTraits const, + MapOptions_, + StrideType_>> + : public internal::EigenPointTraits const, + MapOptions_, + StrideType_>> {}; + //! \brief PointTraits provides an interface for Eigen::Block<>. template struct PointTraits> diff --git a/src/pico_tree/pico_tree/internal/kd_tree_builder.hpp b/src/pico_tree/pico_tree/internal/kd_tree_builder.hpp index eaa5e09..1890866 100644 --- a/src/pico_tree/pico_tree/internal/kd_tree_builder.hpp +++ b/src/pico_tree/pico_tree/internal/kd_tree_builder.hpp @@ -290,7 +290,7 @@ class BuildKdTreeImpl { node->left = SplitIndices(depth + 1, begin, split, box); node->right = SplitIndices(depth + 1, split, end, right); - SetBranch(box, right, split_dim, *node); + node->SetBranch(box, right, split_dim); // Merges both child boxes. We can expect any of the min max values to // change except for the ones of split_dim. @@ -311,28 +311,6 @@ class BuildKdTreeImpl { } } - inline void SetBranch( - BoxType const& left, - BoxType const& right, - SizeType const split_dim, - KdTreeNodeEuclidean& node) const { - node.data.branch.split_dim = static_cast(split_dim); - node.data.branch.left_max = left.max(split_dim); - node.data.branch.right_min = right.min(split_dim); - } - - inline void SetBranch( - BoxType const& left, - BoxType const& right, - SizeType const split_dim, - KdTreeNodeTopological& node) const { - node.data.branch.split_dim = static_cast(split_dim); - node.data.branch.left_min = left.min(split_dim); - node.data.branch.left_max = left.max(split_dim); - node.data.branch.right_min = right.min(split_dim); - node.data.branch.right_max = right.max(split_dim); - } - SpaceType const& space_; typename std::vector::difference_type const max_leaf_size_; SplitterType splitter_; @@ -360,38 +338,34 @@ struct KdTreeSpaceTagTraits { using NodeType = KdTreeNodeTopological; }; -template < - typename SpaceWrapper_, - typename Metric_, - SplittingRule SplittingRule_, - typename Index_> +template class BuildKdTree { - using IndexType = Index_; - using ScalarType = typename SpaceWrapper_::ScalarType; - static Size constexpr Dim = SpaceWrapper_::Dim; - //! \brief Node type based on Metric_::SpaceTag. - using NodeType = typename KdTreeSpaceTagTraits< - typename Metric_::SpaceTag>::template NodeType; + using IndexType = typename Node_::IndexType; + using ScalarType = typename Node_::ScalarType; public: - using KdTreeDataType = KdTreeData; + using KdTreeDataType = KdTreeData; //! \brief Construct a KdTree given \p points , \p max_leaf_size and //! SplitterType. + template KdTreeDataType operator()(SpaceWrapper_ space, Size max_leaf_size) { + static_assert( + std::is_same_v); + static_assert(Dim_ == SpaceWrapper_::Dim); assert(space.size() > 0); assert(max_leaf_size > 0); using BuildKdTreeImplType = BuildKdTreeImpl; using NodeAllocatorType = typename KdTreeDataType::NodeAllocatorType; - using BoxType = Box; + using BoxType = Box; std::vector indices(space.size()); std::iota(indices.begin(), indices.end(), 0); BoxType root_box = space.ComputeBoundingBox(); NodeAllocatorType allocator; - NodeType* root_node = + Node_* root_node = BuildKdTreeImplType{space, max_leaf_size, indices, allocator}(root_box); return KdTreeDataType{ diff --git a/src/pico_tree/pico_tree/internal/kd_tree_node.hpp b/src/pico_tree/pico_tree/internal/kd_tree_node.hpp index 76e41a4..bdfdb36 100644 --- a/src/pico_tree/pico_tree/internal/kd_tree_node.hpp +++ b/src/pico_tree/pico_tree/internal/kd_tree_node.hpp @@ -71,6 +71,14 @@ struct KdTreeNodeEuclidean using IndexType = Index_; using ScalarType = Scalar_; + template + inline void SetBranch( + Box_ const& left_box, Box_ const& right_box, Size const split_dim) { + data.branch.split_dim = static_cast(split_dim); + data.branch.left_max = left_box.max(split_dim); + data.branch.right_min = right_box.min(split_dim); + } + //! \brief Node data as a union of a leaf and branch. KdTreeNodeData, KdTreeBranchSplit> data; }; @@ -82,6 +90,16 @@ struct KdTreeNodeTopological using IndexType = Index_; using ScalarType = Scalar_; + template + inline void SetBranch( + Box_ const& left_box, Box_ const& right_box, Size const split_dim) { + data.branch.split_dim = static_cast(split_dim); + data.branch.left_min = left_box.min(split_dim); + data.branch.left_max = left_box.max(split_dim); + data.branch.right_min = right_box.min(split_dim); + data.branch.right_max = right_box.max(split_dim); + } + //! \brief Node data as a union of a leaf and branch. KdTreeNodeData, KdTreeBranchRange> data; }; diff --git a/src/pico_tree/pico_tree/internal/kd_tree_search.hpp b/src/pico_tree/pico_tree/internal/kd_tree_search.hpp index 4f7d5e0..7b5b4d4 100644 --- a/src/pico_tree/pico_tree/internal/kd_tree_search.hpp +++ b/src/pico_tree/pico_tree/internal/kd_tree_search.hpp @@ -49,11 +49,9 @@ class SearchNearestEuclidean { if (node->IsLeaf()) { for (IndexType i = node->data.leaf.begin_idx; i < node->data.leaf.end_idx; ++i) { - ScalarType const d = - metric_(query_.begin(), query_.end(), space_[indices_[i]]); - if (visitor_.max() > d) { - visitor_(indices_[i], d); - } + visitor_( + indices_[i], + metric_(query_.begin(), query_.end(), space_[indices_[i]])); } } else { // Go left or right and then check if we should still go down the other @@ -156,11 +154,9 @@ class SearchNearestTopological { if (node->IsLeaf()) { for (IndexType i = node->data.leaf.begin_idx; i < node->data.leaf.end_idx; ++i) { - ScalarType const d = - metric_(query_.begin(), query_.end(), space_[indices_[i]]); - if (visitor_.max() > d) { - visitor_(indices_[i], d); - } + visitor_( + indices_[i], + metric_(query_.begin(), query_.end(), space_[indices_[i]])); } } else { // Go left or right and then check if we should still go down the other diff --git a/src/pico_tree/pico_tree/internal/search_visitor.hpp b/src/pico_tree/pico_tree/internal/search_visitor.hpp index 0d91d39..cc33373 100644 --- a/src/pico_tree/pico_tree/internal/search_visitor.hpp +++ b/src/pico_tree/pico_tree/internal/search_visitor.hpp @@ -52,7 +52,9 @@ class SearchNn { //! \brief Visit current point. inline void operator()(IndexType const idx, ScalarType const dst) const { - nn_ = {idx, dst}; + if (max() > dst) { + nn_ = {idx, dst}; + } } //! \brief Maximum search distance with respect to the query point. @@ -85,7 +87,7 @@ class SearchKnn { std::random_access_iterator_tag, typename std::iterator_traits< RandomAccessIterator_>::iterator_category>, - "SEARCH_KNN_EXPECTED_RANDOM_ACCESS_ITERATOR"); + "EXPECTED_RANDOM_ACCESS_ITERATOR"); using NeighborType = typename std::iterator_traits::value_type; @@ -102,11 +104,13 @@ class SearchKnn { //! \brief Visit current point. inline void operator()(IndexType const idx, ScalarType const dst) { - if (active_end_ < end_) { - ++active_end_; - } + if (max() > dst) { + if (active_end_ < end_) { + ++active_end_; + } - InsertSorted(begin_, active_end_, NeighborType{idx, dst}); + InsertSorted(begin_, active_end_, NeighborType{idx, dst}); + } } //! \brief Maximum search distance with respect to the query point. @@ -134,7 +138,9 @@ class SearchRadius { //! \brief Visit current point. inline void operator()(IndexType const idx, ScalarType const dst) const { - n_.push_back({idx, dst}); + if (max() > dst) { + n_.push_back({idx, dst}); + } } //! \brief Sort the neighbors by distance from the query point. Can be used @@ -149,28 +155,53 @@ class SearchRadius { std::vector& n_; }; -//! \brief Search visitor for finding approximate nearest neighbors. -//! \details Points and tree nodes are skipped by scaling down the search -//! distance, possibly not visiting the true nearest neighbor. An approximate -//! nearest neighbor will at most be a factor of distance ratio \p e farther -//! from the query point than the true nearest neighbor: max_ann_distance = +//! \brief Search visitor for finding an approximate nearest neighbor. +//! \details Tree nodes are skipped by scaling down the search distance, +//! possibly not visiting the true nearest neighbor. An approximate nearest +//! neighbor will at most be a factor of distance ratio e farther from the +//! query point than the true nearest neighbor: max_ann_distance = //! true_nn_distance * e. -//! -//! There are different possible implementations to get an approximate nearest -//! neighbor but this one is (probably) the cheapest by skipping both points -//! inside leafs and complete tree nodes. Even though all points are checked -//! inside a leaf, not all of them are visited. This saves on scaling and heap -//! updates. +template +class SearchApproximateNn { + public: + using NeighborType = Neighbor_; + using IndexType = typename Neighbor_::IndexType; + using ScalarType = typename Neighbor_::ScalarType; + + //! \private + inline SearchApproximateNn(ScalarType const e, NeighborType& nn) + : e_inv_{ScalarType(1.0) / e}, nn_{nn} { + nn_.distance = std::numeric_limits::max(); + } + + //! \brief Visit current point. + inline void operator()(IndexType const idx, ScalarType const dst) const { + ScalarType sdst = dst * e_inv_; + if (max() > sdst) { + nn_ = {idx, sdst}; + } + } + + //! \brief Maximum search distance with respect to the query point. + inline ScalarType max() const { return nn_.distance; } + + private: + ScalarType e_inv_; + NeighborType& nn_; +}; + +//! \brief Search visitor for finding approximate nearest neighbors. +//! \see SearchApproximateNn //! \see SearchKnn template -class SearchAknn { +class SearchApproximateKnn { public: static_assert( std::is_base_of_v< std::random_access_iterator_tag, typename std::iterator_traits< RandomAccessIterator_>::iterator_category>, - "SEARCH_AKNN_EXPECTED_RANDOM_ACCESS_ITERATOR"); + "EXPECTED_RANDOM_ACCESS_ITERATOR"); using NeighborType = typename std::iterator_traits::value_type; @@ -178,11 +209,14 @@ class SearchAknn { using ScalarType = typename NeighborType::ScalarType; //! \private - inline SearchAknn( + inline SearchApproximateKnn( ScalarType const e, RandomAccessIterator_ begin, RandomAccessIterator_ end) - : re_{ScalarType(1.0) / e}, begin_{begin}, end_{end}, active_end_{begin} { + : e_inv_{ScalarType(1.0) / e}, + begin_{begin}, + end_{end}, + active_end_{begin} { // Initial search distance that gets updated once k neighbors have been // found. std::prev(end_)->distance = std::numeric_limits::max(); @@ -190,23 +224,65 @@ class SearchAknn { //! \brief Visit current point. inline void operator()(IndexType const idx, ScalarType const dst) { - if (active_end_ < end_) { - ++active_end_; - } + ScalarType sdst = dst * e_inv_; + if (max() > sdst) { + if (active_end_ < end_) { + ++active_end_; + } - // Replace the current maximum for which the distance is scaled to be: - // d = d / e. - InsertSorted(begin_, active_end_, NeighborType{idx, dst * re_}); + // Replace the current maximum for which the distance is scaled to be: + // d = d / e. + InsertSorted(begin_, active_end_, NeighborType{idx, sdst}); + } } //! \brief Maximum search distance with respect to the query point. inline ScalarType max() const { return std::prev(end_)->distance; } private: - ScalarType re_; + ScalarType e_inv_; RandomAccessIterator_ begin_; RandomAccessIterator_ end_; RandomAccessIterator_ active_end_; }; +//! \brief KdTree search visitor for finding the approximate neighbors within a +//! radius. +//! \see SearchApproximateNn +//! \see SearchRadius +template +class SearchApproximateRadius { + public: + using NeighborType = Neighbor_; + using IndexType = typename Neighbor_::IndexType; + using ScalarType = typename Neighbor_::ScalarType; + + //! \private + inline SearchApproximateRadius( + ScalarType const e, ScalarType const radius, std::vector& n) + : e_inv_{ScalarType(1.0) / e}, radius_{radius * e_inv_}, n_{n} { + n_.clear(); + } + + //! \brief Visit current point. + inline void operator()(IndexType const idx, ScalarType const dst) const { + ScalarType sdst = dst * e_inv_; + if (max() > sdst) { + n_.push_back({idx, sdst}); + } + } + + //! \brief Sort the neighbors by distance from the query point. Can be used + //! after the search has ended. + inline void Sort() const { std::sort(n_.begin(), n_.end()); } + + //! \brief Maximum search distance with respect to the query point. + inline ScalarType max() const { return radius_; } + + private: + ScalarType e_inv_; + ScalarType radius_; + std::vector& n_; +}; + } // namespace pico_tree::internal diff --git a/src/pico_tree/pico_tree/kd_tree.hpp b/src/pico_tree/pico_tree/kd_tree.hpp index b9c1646..2bb5cad 100644 --- a/src/pico_tree/pico_tree/kd_tree.hpp +++ b/src/pico_tree/pico_tree/kd_tree.hpp @@ -22,10 +22,13 @@ template < typename Index_ = int> class KdTree { using SpaceWrapperType = internal::SpaceWrapper; + //! \brief Node type based on Metric_::SpaceTag. + using NodeType = + typename internal::KdTreeSpaceTagTraits:: + template NodeType; using BuildKdTreeType = - internal::BuildKdTree; + internal::BuildKdTree; using KdTreeDataType = typename BuildKdTreeType::KdTreeDataType; - using NodeType = typename KdTreeDataType::NodeType; public: //! \brief Size type. @@ -78,10 +81,6 @@ class KdTree { //! \brief Returns the nearest neighbor (or neighbors) of point \p x depending //! on their selection by visitor \p visitor . - //! \see internal::SearchNn - //! \see internal::SearchKnn - //! \see internal::SearchRadius - //! \see internal::SearchAknn template inline void SearchNearest(P const& x, V& visitor) const { internal::PointWrapper

p(x); @@ -97,6 +96,35 @@ class KdTree { SearchNearest(x, v); } + //! \brief Searches for the approximate nearest neighbor of point \p x. + //! \details Nodes in the tree are skipped by scaling down the search + //! distance and as a result the true nearest neighbor may not be found. An + //! approximate nearest neighbor will at most be a factor of distance ratio \p + //! e farther from the query point than the true nearest neighbor: + //! max_ann_distance = true_nn_distance * e. + //! + //! Interpretation of both the input error ratio and output distances + //! depend on the Metric. The default L2Squared calculates squared + //! distances. Using this metric, the input error ratio should be the + //! squared error ratio and the output distances will be squared distances + //! scaled by the inverse error ratio. + //! + //! Example: + //! \code{.cpp} + //! // A max error of 15%. I.e. max 15% farther away from the true nn. + //! ScalarType max_error = ScalarType(0.15); + //! ScalarType e = tree.metric()(ScalarType(1.0) + max_error); + //! Neighbor nn; + //! tree.SearchNn(p, e, nn); + //! // Optionally scale back to the actual metric distance. + //! nn.second *= e; + //! \endcode + template + inline void SearchNn(P const& x, ScalarType const e, NeighborType& nn) const { + internal::SearchApproximateNn v(e, nn); + SearchNearest(x, v); + } + //! \brief Searches for the k nearest neighbors of point \p x, where k equals //! std::distance(begin, end). It is expected that the value type of the //! iterator equals Neighbor. @@ -111,7 +139,7 @@ class KdTree { std::is_same_v< typename std::iterator_traits::value_type, NeighborType>, - "SEARCH_ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_INDEX_SCALAR"); + "ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_TYPE"); internal::SearchKnn v(begin, end); SearchNearest(x, v); @@ -134,36 +162,10 @@ class KdTree { //! \brief Searches for the k approximate nearest neighbors of point \p x, //! where k equals std::distance(begin, end). It is expected that the value //! type of the iterator equals Neighbor. - //! \details This function can result in faster search queries compared to - //! KdTree::SearchKnn by skipping points and tree nodes. This is achieved by - //! scaling down the search distance, possibly not visiting the true nearest - //! neighbor. An approximate nearest neighbor will at most be a factor of - //! distance ratio \p e farther from the query point than the true nearest - //! neighbor: max_ann_distance = true_nn_distance * e. This holds true for - //! each respective nn index i, 0 <= i < k. - //! - //! The number of requested neighbors, k, should be sufficiently large to get - //! a noticeable speed increase from this method. Within a leaf all points are - //! compared to the query anyway, even if they are skipped. These calculations - //! can be avoided by skipping leafs completely, which will never happen if - //! all requested neighbors reside within a single one. - //! - //! Interpretation of both the input error ratio and output distances - //! depend on the Metric. The default L2Squared calculates squared - //! distances. Using this metric, the input error ratio should be the squared - //! error ratio and the output distances will be squared distances scaled by - //! the inverse error ratio. - //! - //! Example: - //! \code{.cpp} - //! // A max error of 15%. I.e. max 15% farther away from the true nn. - //! ScalarType max_error = ScalarType(0.15); - //! ScalarType e = tree.metric()(ScalarType(1.0) + max_error); - //! std::vector> knn(k); - //! tree.SearchKnn(p, e, knn.begin(), knn.end()); - //! // Optionally scale back to the actual metric distance. - //! for (auto& nn : knn) { nn.second *= e; } - //! \endcode + //! \see template void + //! SearchKnn(P const&, RandomAccessIterator, RandomAccessIterator) const + //! \see template void SearchNn(P + //! const&, ScalarType, NeighborType&) const template inline void SearchKnn( P const& x, @@ -174,9 +176,9 @@ class KdTree { std::is_same_v< typename std::iterator_traits::value_type, NeighborType>, - "SEARCH_ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_INDEX_SCALAR"); + "ITERATOR_VALUE_TYPE_DOES_NOT_EQUAL_NEIGHBOR_TYPE"); - internal::SearchAknn v(e, begin, end); + internal::SearchApproximateKnn v(e, begin, end); SearchNearest(x, v); } @@ -185,6 +187,8 @@ class KdTree { //! \tparam P Point type. //! \see template void //! SearchKnn(P const&, RandomAccessIterator, RandomAccessIterator) const + //! \see template void SearchNn(P + //! const&, ScalarType, NeighborType&) const template inline void SearchKnn( P const& x, @@ -228,6 +232,27 @@ class KdTree { } } + //! \brief Searches for all approximate neighbors of point \p x that are + //! within radius \p radius and stores the results in output vector \p n. + //! \see template void + //! SearchRadius(P const&, ScalarType, std::vector&, bool) const + //! \see template void SearchNn(P + //! const&, ScalarType, NeighborType&) const + template + inline void SearchRadius( + P const& x, + ScalarType const radius, + ScalarType const e, + std::vector& n, + bool const sort = false) const { + internal::SearchApproximateRadius v(e, radius, n); + SearchNearest(x, v); + + if (sort) { + v.Sort(); + } + } + //! \brief Returns all points within the box defined by \p min and \p max. //! Query time is bounded by O(n^(1-1/Dim)+k). //! \tparam P Point type. diff --git a/src/pyco_tree/pico_tree/_pyco_tree/def_kd_tree.cpp b/src/pyco_tree/pico_tree/_pyco_tree/def_kd_tree.cpp index 31e54e5..aeb16a5 100644 --- a/src/pyco_tree/pico_tree/_pyco_tree/def_kd_tree.cpp +++ b/src/pyco_tree/pico_tree/_pyco_tree/def_kd_tree.cpp @@ -197,6 +197,38 @@ and store the result in the specified output. py::arg("sort").none(false) = false, R"ptdoc( Search for all neighbors within a radius of each of the input points. +)ptdoc") + .def( + "search_radius", + static_cast const, + Scalar const, + Scalar const, + Neighborhoods&, + bool const) const>(&KdTree::SearchRadius), + py::arg("pts").noconvert().none(false), + py::arg("radius").none(false), + py::arg("e").none(false), + py::arg("nns").noconvert().none(false), + py::arg("sort").none(false) = false, + R"ptdoc( +Search for the approximate neighbors within a radius of each of the +input points and store the result in the specified output. +)ptdoc") + .def( + "search_radius", + static_cast const, + Scalar const, + Scalar const, + bool const) const>(&KdTree::SearchRadius), + py::arg("pts").noconvert().none(false), + py::arg("radius").none(false), + py::arg("e").none(false), + py::arg("sort").none(false) = false, + R"ptdoc( +Search for the approximate neighbors within a radius of each of the +input points. )ptdoc") .def( "search_box", diff --git a/src/pyco_tree/pico_tree/_pyco_tree/kd_tree.hpp b/src/pyco_tree/pico_tree/_pyco_tree/kd_tree.hpp index b9b3764..8a0323a 100644 --- a/src/pyco_tree/pico_tree/_pyco_tree/kd_tree.hpp +++ b/src/pyco_tree/pico_tree/_pyco_tree/kd_tree.hpp @@ -50,7 +50,8 @@ class KdTree : public pico_tree::KdTree { #pragma omp parallel for schedule(dynamic, kChunkSize) for (SSize i = 0; i < static_cast(query.size()); ++i) { - Base::SearchKnn(query[i], output + i * k, output + (i + 1) * k); + auto index = static_cast(i * k); + Base::SearchKnn(query[i], output + index, output + index + k); } } @@ -72,7 +73,8 @@ class KdTree : public pico_tree::KdTree { #pragma omp parallel for schedule(dynamic, kChunkSize) for (SSize i = 0; i < static_cast(query.size()); ++i) { - Base::SearchKnn(query[i], e, output + i * k, output + (i + 1) * k); + auto index = static_cast(i * k); + Base::SearchKnn(query[i], e, output + index, output + index + k); } } @@ -111,6 +113,34 @@ class KdTree : public pico_tree::KdTree { return nns; } + void SearchRadius( + py::array_t const pts, + ScalarType const radius, + ScalarType const e, + DArray& nns, + bool const sort) const { + auto query = MakeMap(pts); + + auto& nns_data = nns.data(); + nns_data.resize(query.size()); + +#pragma omp parallel for schedule(dynamic, kChunkSize) + // TODO Reduce the vector resize overhead + for (SSize i = 0; i < static_cast(query.size()); ++i) { + Base::SearchRadius(query[i], radius, e, nns_data[i], sort); + } + } + + DArray SearchRadius( + py::array_t const pts, + ScalarType const radius, + ScalarType const e, + bool const sort) const { + DArray nns = DArray(std::vector>()); + SearchRadius(pts, radius, e, nns, sort); + return nns; + } + void SearchBox( py::array_t const boxes, DArray& indices) const { auto query = MakeMap(boxes); @@ -126,7 +156,7 @@ class KdTree : public pico_tree::KdTree { #pragma omp parallel for schedule(dynamic, kChunkSize) // TODO Reduce the vector resize overhead for (SSize i = 0; i < static_cast(box_count); ++i) { - auto index = static_cast(i * 2); + auto index = static_cast(i * 2); Base::SearchBox(query[index + 0], query[index + 1], indices_data[i]); } } diff --git a/test/pico_tree/common.hpp b/test/pico_tree/common.hpp index 51a445b..445e431 100644 --- a/test/pico_tree/common.hpp +++ b/test/pico_tree/common.hpp @@ -132,17 +132,28 @@ void TestRadius(Tree const& tree, typename Tree::ScalarType const radius) { points[points.size() / 2], points.sdim()); auto const& metric = tree.metric(); - Scalar const lp_radius = metric(radius); - std::vector> results; - tree.SearchRadius(p, lp_radius, results); + Scalar lp_radius = metric(radius); + Scalar lp_scale = metric(Scalar(1.5)); - for (auto const& r : results) { + std::vector> results_exact; + std::vector> results_apprx; + tree.SearchRadius(p, lp_radius, results_exact); + tree.SearchRadius(p, lp_radius, lp_scale, results_apprx); + + for (auto const& r : results_exact) { Scalar d = metric(p.data(), p.data() + p.size(), points[r.index]); EXPECT_LE(d, lp_radius); EXPECT_EQ(d, r.distance); } + for (auto const& r : results_apprx) { + Scalar d = metric(p.data(), p.data() + p.size(), points[r.index]); + + EXPECT_LE(d, lp_radius); + FloatEq(d, r.distance * lp_scale); + } + std::size_t count = 0; for (pico_tree::Size j = 0; j < points.size(); ++j) { @@ -151,7 +162,8 @@ void TestRadius(Tree const& tree, typename Tree::ScalarType const radius) { } } - EXPECT_EQ(count, results.size()); + EXPECT_EQ(count, results_exact.size()); + EXPECT_GE(count, results_apprx.size()); } template @@ -160,14 +172,13 @@ void TestKnn(Tree const& tree, pico_tree::Size const k, Point const& p) { using Index = typename Tree::IndexType; using Scalar = typename Tree::ScalarType; - // The data doesn't have to be by reference_wrapper, but that prevents a copy. auto const points = tree.points(); - Scalar ratio = tree.metric()(Scalar(1.5)); + Scalar lp_scale = tree.metric()(Scalar(1.5)); std::vector> results_exact; std::vector> results_apprx; tree.SearchKnn(p, k, results_exact); - tree.SearchKnn(p, k, ratio, results_apprx); + tree.SearchKnn(p, k, lp_scale, results_apprx); std::vector> compare; SearchKnn(p, points, k, tree.metric(), &compare); @@ -177,8 +188,8 @@ void TestKnn(Tree const& tree, pico_tree::Size const k, Point const& p) { // Index is not tested in case it happens points have an equal distance. // TODO Would be nicer to test indices too. FloatEq(results_exact[i].distance, compare[i].distance); - // Because results_apprx[i] is already scaled: approx = approx / ratio, - // the check below is the same as: approx <= exact * ratio + // Because results_apprx[i] is already scaled: approx = approx / lp_scale, + // the check below is the same as: approx <= exact * lp_scale. FloatLe(results_apprx[i].distance, results_exact[i].distance); } } diff --git a/test/pyco_tree/kd_tree_test.py b/test/pyco_tree/kd_tree_test.py index 2ee19fb..b8b9a2d 100644 --- a/test/pyco_tree/kd_tree_test.py +++ b/test/pyco_tree/kd_tree_test.py @@ -51,10 +51,11 @@ def test_metric(self): def test_search_knn(self): a = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float32) t = pt.KdTree(a, pt.Metric.L2Squared, 10) + k = 2 # Test if the query actually works - nns = t.search_knn(a, 2) - self.assertEqual(nns.shape, (3, 2)) + nns = t.search_knn(a, k) + self.assertEqual(nns.shape, (3, k)) for i in range(len(nns)): self.assertEqual(nns[i][0][0], i) @@ -62,19 +63,18 @@ def test_search_knn(self): # Test that the memory is re-used data = nns.ctypes.data - t.search_knn(a, 2, nns) + t.search_knn(a, k, nns) self.assertEqual(nns.ctypes.data, data) - t.search_knn(a, 3, nns) - self.assertNotEqual(nns.ctypes.data, data) - def test_search_aknn(self): + def test_search_approximate_knn(self): a = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float32) t = pt.KdTree(a, pt.Metric.L2Squared, 10) - r = 1.0 + k = 2 + s = 1.0 # Test if the query actually works - nns = t.search_knn(a, 2, r) - self.assertEqual(nns.shape, (3, 2)) + nns = t.search_knn(a, k, s) + self.assertEqual(nns.shape, (3, k)) for i in range(len(nns)): self.assertEqual(nns[i][0][0], i) @@ -82,17 +82,15 @@ def test_search_aknn(self): # Test that the memory is re-used data = nns.ctypes.data - t.search_knn(a, 2, r, nns) + t.search_knn(a, k, s, nns) self.assertEqual(nns.ctypes.data, data) - t.search_knn(a, 3, nns) - self.assertNotEqual(nns.ctypes.data, data) def test_search_radius(self): a = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float32) t = pt.KdTree(a, pt.Metric.L2Squared, 10) - search_radius = t.metric(2.5) - nns = t.search_radius(a, search_radius) + radius = t.metric(2.5) + nns = t.search_radius(a, radius) self.assertEqual(len(nns), 3) self.assertEqual(nns.dtype, t.dtype_neighbor) self.assertTrue(nns) @@ -114,10 +112,39 @@ def addresses(nns): return [x.ctypes.data if len(x) else 0 for x in nns] datas = addresses(nns) - t.search_radius(a, search_radius, nns) + t.search_radius(a, radius, nns) + self.assertEqual(addresses(nns), datas) + + def test_search_approximate_radius(self): + a = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float32) + t = pt.KdTree(a, pt.Metric.L2Squared, 10) + s = 1.0 + + radius = t.metric(2.5) + nns = t.search_radius(a, radius, s) + self.assertEqual(len(nns), 3) + self.assertEqual(nns.dtype, t.dtype_neighbor) + self.assertTrue(nns) + + for i, n in enumerate(nns): + self.assertEqual(len(n), 1) + self.assertEqual(n[0][0], i) + self.assertAlmostEqual(n[0][1], 0) + + # This checks if DArray is also a sequence. + for i in range(len(nns)): + self.assertEqual(nns[i][0][0], i) + self.assertAlmostEqual(nns[i][0][1], 0) + + # Test that the memory is re-used by comparing memory addresses. + # In case the size of an array equals zero, its memory address is + # random. See darray.hpp for more details. + def addresses(nns): + return [x.ctypes.data if len(x) else 0 for x in nns] + + datas = addresses(nns) + t.search_radius(a, radius, s, nns) self.assertEqual(addresses(nns), datas) - t.search_radius(a, search_radius**2, nns) - self.assertNotEqual(addresses(nns), datas) def test_search_box(self): a = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float32) @@ -173,7 +200,10 @@ def test_creation_darray(self): self.assertEqual(d.dtype, t.dtype_neighbor) self.assertFalse(d) - # 2) {'names':['index','distance'], 'formats':['