Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Runtime Performance Optimization #46

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/gcem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ namespace gcem
#include "gcem_incl/gcd.hpp"
#include "gcem_incl/lcm.hpp"

#include "gcem_incl/tan.hpp"
#include "gcem_incl/cos.hpp"
#include "gcem_incl/sin.hpp"
#include "gcem_incl/tan.hpp"


#include "gcem_incl/atan.hpp"
#include "gcem_incl/atan2.hpp"
Expand Down
32 changes: 20 additions & 12 deletions include/gcem_incl/cos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
################################################################################*/

/*
* compile-time cosine function using tan(x/2)
* compile-time cosine function using 30th degree Chebyshev approximation
*/

#ifndef _gcem_cos_HPP
Expand All @@ -37,6 +37,24 @@ noexcept
return( T(1) - x*x)/(T(1) + x*x );
}

template<typename T>
constexpr
T
cos_compute_chebyshev_inner(const T x2)
noexcept {
// Chebyshev approximation of sin(x) with degree 30
// weights are from: https://publik-void.github.io/sin-cos-approximations/#_cos_rel_error_minimized_degree_30
return 1. + x2*(-0.499999999999999999999999999999999997 + x2*(0.0416666666666666666666666666666665864 + x2*(-0.00138888888888888888888888888888792216 + x2*(0.0000248015873015873015873015872954247692 + x2*(-2.75573192239858906525573168323374212e-7 + x2*(2.08767569878680989792094774070130099e-9 + x2*(-1.147074559772972471374259582051505e-11 + x2*(4.77947733238738528351115798998742201e-14 + x2*(-1.56192069685862134697357839719554288e-16 + x2*(4.11031762331127151219297331433042247e-19 + x2*(-8.8967913919984472368244650875181655e-22 + x2*(1.61173755446230144849643763129805917e-24 + x2*(-2.47959193649582299714152529144837331e-27 + x2*(3.27913485904815466449434894827885198e-30 - 3.69081529290172836989264646507263976e-33*x2))))))))))))));
}

template<typename T>
constexpr
T
cos_compute_chebyshev(const T x1)
noexcept {
return cos_compute_chebyshev_inner(x1 * x1);
}

template<typename T>
constexpr
T
Expand All @@ -49,17 +67,7 @@ noexcept
// indistinguishable from 0
GCLIM<T>::min() > abs(x) ?
T(1) :
// special cases: pi/2 and pi
GCLIM<T>::min() > abs(x - T(GCEM_HALF_PI)) ? \
T(0) :
GCLIM<T>::min() > abs(x + T(GCEM_HALF_PI)) ? \
T(0) :
GCLIM<T>::min() > abs(x - T(GCEM_PI)) ? \
- T(1) :
GCLIM<T>::min() > abs(x + T(GCEM_PI)) ? \
- T(1) :
// else
cos_compute( tan(x/T(2)) ) );
cos_compute_chebyshev(x - T(GCEM_2PI) * floor(x / T(GCEM_2PI))));
}

}
Expand Down
4 changes: 4 additions & 0 deletions include/gcem_incl/gcem_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ namespace gcem
#define GCEM_PI 3.1415926535897932384626433832795028841972L
#endif

#ifndef GCEM_2PI
#define GCEM_2PI 6.2831853071795864769252867665590057683943L
#endif

#ifndef GCEM_LOG_PI
#define GCEM_LOG_PI 1.1447298858494001741434273513530587116473L
#endif
Expand Down
34 changes: 20 additions & 14 deletions include/gcem_incl/sin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
################################################################################*/

/*
* compile-time sine function using tan(x/2)
*
* see eq. 5.4.8 in Numerical Recipes
* compile-time sine function using 29th degree Chebyshev polynomial approximation
*/

#ifndef _gcem_sin_HPP
Expand All @@ -39,6 +37,24 @@ noexcept
return T(2)*x/(T(1) + x*x);
}

template<typename T>
constexpr
T
sin_compute_chebyshev_inner(const T x1, const T x2)
noexcept {
// Chebyshev approximation of sin(x) with degree 29
// weights are from: https://publik-void.github.io/sin-cos-approximations/#_sin_rel_error_minimized_degree_29
return -x1*(1. + x2*(-0.166666666666666666666666666666666629 + x2*(0.00833333333333333333333333333333222318 + x2*(-0.000198412698412698412698412698399723487 + x2*(2.75573192239858906525573184281026561e-6 + x2*(-2.50521083854417187750518138734779333e-8 + x2*(1.60590438368216145993211483359730933e-10 + x2*(-7.64716373181981646407639862566185217e-13 + x2*(2.81145725434551937513944561966331531e-15 + x2*(-8.22063524662315930664562316486611394e-18 + x2*(1.9572941062679701610307767721364446e-20 + x2*(-3.8681701397118386284141935821892017e-23 + x2*(6.44694091890808795934274904084719604e-26 + x2*(-9.18181103282828321298337267365314117e-29 + 1.10854165613066936680763885008011392e-31*x2))))))))))))));
}

template<typename T>
constexpr
T
sin_compute_chebyshev(const T x1)
noexcept {
return sin_compute_chebyshev_inner(x1, x1 * x1);
}

template<typename T>
constexpr
T
Expand All @@ -51,17 +67,7 @@ noexcept
// indistinguishable from zero
GCLIM<T>::min() > abs(x) ? \
T(0) :
// special cases: pi/2 and pi
GCLIM<T>::min() > abs(x - T(GCEM_HALF_PI)) ? \
T(1) :
GCLIM<T>::min() > abs(x + T(GCEM_HALF_PI)) ? \
- T(1) :
GCLIM<T>::min() > abs(x - T(GCEM_PI)) ? \
T(0) :
GCLIM<T>::min() > abs(x + T(GCEM_PI)) ? \
- T(0) :
// else
sin_compute( tan(x/T(2)) ) );
sin_compute_chebyshev(x - T(GCEM_2PI) * floor(x / T(GCEM_2PI)) - T(GCEM_PI)));
}

}
Expand Down
28 changes: 27 additions & 1 deletion include/gcem_incl/sqrt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,31 @@ noexcept
m_val * sqrt_recur(x, x / T(2), 0) );
}

// template<typename T>
// T
// sqrt_babylonian(const T x)
// noexcept
// {
// // see here for more details: https://blogs.sas.com/content/iml/2016/05/16/babylonian-square-roots.html
// T xn = x;
// T xn1 = (xn + x/xn) / T(2);
// while(abs(xn1 - xn) > GCLIM<T>::epsilon()) {
// xn = xn1;
// xn1 = (xn + x/xn) / T(2);
// }
// return xn1;
// }

template<typename T>
T
sqrt_babylonian(const T x, const T xn, const T xn1)
noexcept
{
return ( abs(xn1 - xn) < GCLIM<T>::epsilon() ? \
xn1 :
sqrt_babylonian(x, xn1, (xn1 + x/xn1) / T(2)));
}

template<typename T>
constexpr
T
Expand All @@ -84,7 +109,8 @@ noexcept
GCLIM<T>::min() > abs(T(1) - x) ? \
x :
// else
sqrt_simplify(x, T(1)) );
// sqrt_simplify(x, T(1)) );
sqrt_babylonian(x, x, (x + T(1)) / T(2)) );
}

}
Expand Down
33 changes: 30 additions & 3 deletions include/gcem_incl/tan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,32 @@ noexcept
tan_cf_main(x) );
}

template<typename T>
constexpr
T
tan_from_sin_cos_inner(const T x1)
noexcept
{
// There should be faster ways to calculate tan(x) directly
// At least as fast as calculating sin(x) and cos(x) using Chebyshev polynomials
return (
x1 < T(GCEM_HALF_PI) ? sin(x1) / cos(x1) :
x1 > T(GCEM_HALF_PI) ? - sin(T(GCEM_PI) - x1) / cos(T(GCEM_PI) - x1) :
GCLIM<T>::quiet_NaN() // x1 == 1/2 pi
);
}


template<typename T>
constexpr
T
tan_from_sin_cos(const T x)
noexcept
{
// limit x to range(0, pi)
return tan_from_sin_cos_inner(x - T(GCEM_PI) * floor(x / T(GCEM_PI)));
}

template<typename T>
constexpr
T
Expand All @@ -110,9 +136,10 @@ noexcept
GCLIM<T>::min() > abs(x) ? \
T(0) :
// else
x < T(0) ? \
- tan_begin(-x) :
tan_begin( x) );
tan_from_sin_cos(x));
// x < T(0) ? \
// - tan_begin(-x) :
// tan_begin( x) );
}

}
Expand Down
5 changes: 4 additions & 1 deletion tests/cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ int main()
GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,11.1L);
GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,50.0L);
GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,150.0L);

GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,GCEM_PI);
GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,GCEM_HALF_PI);
GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,-GCEM_PI);
GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,-GCEM_HALF_PI);
GCEM_TEST_COMPARE_VALS(gcem::cos,std::cos,TEST_NAN);

//
Expand Down
5 changes: 4 additions & 1 deletion tests/sin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ int main()
GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,11.1L);
GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,50.0L);
GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,150.0L);

GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,GCEM_PI);
GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,GCEM_HALF_PI);
GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,-GCEM_PI);
GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,-GCEM_HALF_PI);
GCEM_TEST_COMPARE_VALS(gcem::sin,std::sin,TEST_NAN);

//
Expand Down
6 changes: 5 additions & 1 deletion tests/tan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ int main()

// static constexpr long double lrgval = std::numeric_limits<int>::max()*100.0L;

std::cout << "tan(pi/2) = " << std::tan(GCEM_HALF_PI) << std::endl;
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, 0.0L);
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, 0.001L);
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, 1.001L);
Expand All @@ -39,7 +40,10 @@ int main()
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, 50.0L);
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, -1.5L);
// GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, lrgval);

GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, GCEM_HALF_PI - 1e-6);
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, GCEM_HALF_PI + 1e-6);
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, -GCEM_HALF_PI - 1e-6);
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, -GCEM_HALF_PI + 1e-6);
GCEM_TEST_COMPARE_VALS(gcem::tan,std::tan, TEST_NAN);

//
Expand Down