Skip to content

Commit

Permalink
Fix fma4 support
Browse files Browse the repository at this point in the history
Fix #1071
  • Loading branch information
serge-sans-paille committed Dec 14, 2024
1 parent 0252ae7 commit e92be56
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
16 changes: 8 additions & 8 deletions include/xsimd/arch/xsimd_fma4.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,52 +23,52 @@ namespace xsimd

// fnma
template <class A>
XSIMD_INLINE batch<float, A> fnma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<float, A> fnma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_nmacc_ps(x, y, z);
}

template <class A>
XSIMD_INLINE batch<double, A> fnma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<double, A> fnma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_nmacc_pd(x, y, z);
}

// fnms
template <class A>
XSIMD_INLINE batch<float, A> fnms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<float, A> fnms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_nmsub_ps(x, y, z);
}

template <class A>
XSIMD_INLINE batch<double, A> fnms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<double, A> fnms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_nmsub_pd(x, y, z);
}

// fma
template <class A>
XSIMD_INLINE batch<float, A> fma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_macc_ps(x, y, z);
}

template <class A>
XSIMD_INLINE batch<double, A> fma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<double, A> fma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_macc_pd(x, y, z);
}

// fms
template <class A>
XSIMD_INLINE batch<float, A> fms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_msub_ps(x, y, z);
}

template <class A>
XSIMD_INLINE batch<double, A> fms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
XSIMD_INLINE batch<double, A> fms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
{
return _mm_msub_pd(x, y, z);
}
Expand Down
4 changes: 4 additions & 0 deletions include/xsimd/types/xsimd_fma4_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

#include "./xsimd_sse4_2_register.hpp"

#if XSIMD_WITH_FMA4
#include <x86intrin.h>
#endif

namespace xsimd
{
/**
Expand Down
3 changes: 1 addition & 2 deletions test/architectures/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
set(INTEL_PROCESSORS
knl knm skylake-avx512 cannonlake icelake-client
bdver1 knl knm skylake-avx512 cannonlake icelake-client
icelake-server cascadelake cooperlake tigerlake sapphirerapids alderlake
rocketlake graniterapids graniterapids-d znver4)
set(CMAKE_CXX_FLAGS "-Werror=unused-command-line-argument")

foreach(INTEL_PROCESSOR ${INTEL_PROCESSORS})
# Adding the werror here to choke if the -march is incompatible with the
Expand Down

0 comments on commit e92be56

Please sign in to comment.