Skip to content

Commit

Permalink
update Eigen (to version 3.4.0)
Browse files Browse the repository at this point in the history
  • Loading branch information
mlivesu committed Apr 18, 2024
1 parent c6f5746 commit 015aeaf
Show file tree
Hide file tree
Showing 96 changed files with 981 additions and 4,268 deletions.
3 changes: 3 additions & 0 deletions external/eigen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ else()
ei_add_cxx_compiler_flag("-std=c++03")
endif()

# Determine if we should build shared libraries on this platform.
get_cmake_property(EIGEN_BUILD_SHARED_LIBS TARGET_SUPPORTS_SHARED_LIBS)

#############################################################################
# find how to link to the standard libraries #
#############################################################################
Expand Down
2 changes: 1 addition & 1 deletion external/eigen/Eigen/src/Core/Dot.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const

//---------- implementation of L2 norm and related functions ----------

/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm.
* In both cases, it consists in the sum of the square of all the matrix entries.
* For vectors, this is also equals to the dot product of \c *this with itself.
*
Expand Down
220 changes: 174 additions & 46 deletions external/eigen/Eigen/src/Core/GenericPacketMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,22 @@ template<typename T> struct packet_traits : default_packet_traits

template<typename T> struct packet_traits<const T> : packet_traits<T> { };

template<typename T> struct unpacket_traits
{
typedef T type;
typedef T half;
enum
{
size = 1,
alignment = 1,
vectorizable = false,
masked_load_available=false,
masked_store_available=false
};
};

template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };

template <typename Src, typename Tgt> struct type_casting_traits {
enum {
VectorizedCast = 0,
Expand All @@ -154,6 +170,18 @@ struct eigen_packet_wrapper
T m_val;
};


/** \internal A convenience utility for determining if the type is a scalar.
* This is used to enable some generic packet implementations.
*/
template<typename Packet>
struct is_scalar {
typedef typename unpacket_traits<Packet>::type Scalar;
enum {
value = internal::is_same<Packet, Scalar>::value
};
};

/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
template <typename SrcPacket, typename TgtPacket>
EIGEN_DEVICE_FUNC inline TgtPacket
Expand Down Expand Up @@ -215,13 +243,59 @@ pmul(const bool& a, const bool& b) { return a && b; }
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pdiv(const Packet& a, const Packet& b) { return a/b; }

/** \internal \returns one bits */
// In the generic case, memset to all one bits.
template<typename Packet, typename EnableIf = void>
struct ptrue_impl {
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
Packet b;
memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
return b;
}
};

// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
// Although this is technically not a valid bitmask, the scalar path for pselect
// uses a comparison to zero, so this should still work in most cases. We don't
// have another option, since the scalar type requires initialization.
template<typename T>
struct ptrue_impl<T,
typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
return T(1);
}
};

/** \internal \returns one bits. */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;}
ptrue(const Packet& a) {
return ptrue_impl<Packet>::run(a);
}

/** \internal \returns zero bits */
// In the general case, memset to zero.
template<typename Packet, typename EnableIf = void>
struct pzero_impl {
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
Packet b;
memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
return b;
}
};

// For scalars, explicitly set to Scalar(0), since the underlying representation
// for zero may not consist of all-zero bits.
template<typename T>
struct pzero_impl<T,
typename internal::enable_if<is_scalar<T>::value>::type> {
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
return T(0);
}
};

/** \internal \returns packet of zeros */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;}
pzero(const Packet& a) {
return pzero_impl<Packet>::run(a);
}

/** \internal \returns a <= b as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
Expand All @@ -238,33 +312,6 @@ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
EIGEN_UNUSED_VARIABLE(a)
return 0.f;
}

template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
EIGEN_UNUSED_VARIABLE(a)
return 0.;
}

template <typename RealScalar>
EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) {
RealScalar b = ptrue(RealScalar(0));
return std::complex<RealScalar>(b, b);
}

template <typename Packet, typename Op>
EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) {
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
Packet c;
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
for (size_t i = 0; i < sizeof(Packet); ++i) {
*c_ptr++ = op(*a_ptr++, *b_ptr++);
}
return c;
}

template<typename T>
struct bit_and {
Expand All @@ -287,42 +334,123 @@ struct bit_xor {
}
};

template<typename T>
struct bit_not {
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
return ~a;
}
};

// Use operators &, |, ^, ~.
template<typename T>
struct operator_bitwise_helper {
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
};

// Apply binary operations byte-by-byte
template<typename T>
struct bytewise_bitwise_helper {
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
return binary(a, b, bit_and<unsigned char>());
}
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
return binary(a, b, bit_or<unsigned char>());
}
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
return binary(a, b, bit_xor<unsigned char>());
}
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
return unary(a,bit_not<unsigned char>());
}

private:
template<typename Op>
EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
T c;
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
for (size_t i = 0; i < sizeof(T); ++i) {
*c_ptr++ = op(*a_ptr++);
}
return c;
}

template<typename Op>
EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
T c;
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
for (size_t i = 0; i < sizeof(T); ++i) {
*c_ptr++ = op(*a_ptr++, *b_ptr++);
}
return c;
}
};

// In the general case, use byte-by-byte manipulation.
template<typename T, typename EnableIf = void>
struct bitwise_helper : public bytewise_bitwise_helper<T> {};

// For integers or non-trivial scalars, use binary operators.
template<typename T>
struct bitwise_helper<T,
typename internal::enable_if<
is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
> : public operator_bitwise_helper<T> {};

/** \internal \returns the bitwise and of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pand(const Packet& a, const Packet& b) {
return bitwise_helper(a, b, bit_and<unsigned char>());
return bitwise_helper<Packet>::bitwise_and(a, b);
}

/** \internal \returns the bitwise or of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
por(const Packet& a, const Packet& b) {
return bitwise_helper(a ,b, bit_or<unsigned char>());
return bitwise_helper<Packet>::bitwise_or(a, b);
}

/** \internal \returns the bitwise xor of \a a and \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pxor(const Packet& a, const Packet& b) {
return bitwise_helper(a ,b, bit_xor<unsigned char>());
return bitwise_helper<Packet>::bitwise_xor(a, b);
}

/** \internal \returns the bitwise not of \a a */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pnot(const Packet& a) {
return bitwise_helper<Packet>::bitwise_not(a);
}

/** \internal \returns the bitwise and of \a a and not \a b */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); }
pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }

// In the general case, use bitwise select.
template<typename Packet, typename EnableIf = void>
struct pselect_impl {
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
return por(pand(a,mask),pandnot(b,mask));
}
};

// For scalars, use ternary select.
template<typename Packet>
struct pselect_impl<Packet,
typename internal::enable_if<is_scalar<Packet>::value>::type > {
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
return numext::equal_strict(mask, Packet(0)) ? b : a;
}
};

/** \internal \returns \a or \b for each field in packet according to \mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pselect(const Packet& mask, const Packet& a, const Packet& b) {
return por(pand(a,mask),pandnot(b,mask));
}

template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
const float& cond, const float& a, const float&b) {
return numext::equal_strict(cond,0.f) ? b : a;
}

template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
const double& cond, const double& a, const double& b) {
return numext::equal_strict(cond,0.) ? b : a;
return pselect_impl<Packet>::run(mask, a, b);
}

template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
Expand Down
2 changes: 1 addition & 1 deletion external/eigen/Eigen/src/Core/Map.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
* \brief A matrix or vector expression mapping an existing array of data.
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
* of an ordinary, contiguous array. This can be overridden by specifying strides.
Expand Down
4 changes: 3 additions & 1 deletion external/eigen/Eigen/src/Core/MathFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,9 @@ struct rint_retval
* Implementation of arg *
****************************************************************************/

#if EIGEN_HAS_CXX11_MATH
// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
// This seems to be fixed in VS 2019.
#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
// std::arg is only defined for types of std::complex, or integer types or float/double/long double
template<typename Scalar,
bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value
Expand Down
2 changes: 1 addition & 1 deletion external/eigen/Eigen/src/Core/Solve.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<R

protected:
const Decomposition &m_dec;
const typename internal::ref_selector<RhsType>::type m_rhs;
const RhsType &m_rhs;
};


Expand Down
12 changes: 9 additions & 3 deletions external/eigen/Eigen/src/Core/arch/AVX/Complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,15 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const P
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
}


EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)

template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{
return pdiv_complex(a, b);
Packet4cf num = pmul(a, pconj(b));
__m256 tmp = _mm256_mul_ps(b.v, b.v);
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
__m256 denom = _mm256_add_ps(tmp, tmp2);
return Packet4cf(_mm256_div_ps(num.v, denom));
}

template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
Expand Down Expand Up @@ -318,7 +321,10 @@ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)

template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{
return pdiv_complex(a, b);
Packet2cd num = pmul(a, pconj(b));
__m256d tmp = _mm256_mul_pd(b.v, b.v);
__m256d denom = _mm256_hadd_pd(tmp, tmp);
return Packet2cd(_mm256_div_pd(num.v, denom));
}

template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
Expand Down
11 changes: 3 additions & 8 deletions external/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -1274,12 +1274,7 @@ EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf& a) {
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(const Packet8f& a) {
Packet8bf r;

// Flush input denormals value to zero with hardware capability.
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
__m256 flush = _mm256_and_ps(a, a);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF);

__m256i input = _mm256_castps_si256(flush);
__m256i input = _mm256_castps_si256(a);

#ifdef EIGEN_VECTORIZE_AVX2
// uint32_t lsb = (input >> 16);
Expand All @@ -1293,7 +1288,7 @@ EIGEN_STRONG_INLINE Packet8bf F32ToBf16(const Packet8f& a) {
// input = input >> 16;
t = _mm256_srli_epi32(t, 16);
// Check NaN before converting back to bf16
__m256 mask = _mm256_cmp_ps(flush, flush, _CMP_ORD_Q);
__m256 mask = _mm256_cmp_ps(a, a, _CMP_ORD_Q);
__m256i nan = _mm256_set1_epi32(0x7fc0);
t = _mm256_blendv_epi8(nan, t, _mm256_castps_si256(mask));
// output = numext::bit_cast<uint16_t>(input);
Expand All @@ -1316,7 +1311,7 @@ EIGEN_STRONG_INLINE Packet8bf F32ToBf16(const Packet8f& a) {
lo = _mm_srli_epi32(lo, 16);
hi = _mm_srli_epi32(hi, 16);
// Check NaN before converting back to bf16
__m256 mask = _mm256_cmp_ps(flush, flush, _CMP_ORD_Q);
__m256 mask = _mm256_cmp_ps(a, a, _CMP_ORD_Q);
__m128i nan = _mm_set1_epi32(0x7fc0);
lo = _mm_blendv_epi8(nan, lo, _mm_castps_si128(_mm256_castps256_ps128(mask)));
hi = _mm_blendv_epi8(nan, hi, _mm_castps_si128(_mm256_extractf128_ps(mask, 1)));
Expand Down
Loading

0 comments on commit 015aeaf

Please sign in to comment.