From e92be561f1c3fc5219f144605e446966f1e49eb1 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Sat, 14 Dec 2024 22:30:57 +0100 Subject: [PATCH] Fix fma4 support Fix #1071 --- include/xsimd/arch/xsimd_fma4.hpp | 16 ++++++++-------- include/xsimd/types/xsimd_fma4_register.hpp | 4 ++++ test/architectures/CMakeLists.txt | 3 +-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/xsimd/arch/xsimd_fma4.hpp b/include/xsimd/arch/xsimd_fma4.hpp index e51c7c52a..423331230 100644 --- a/include/xsimd/arch/xsimd_fma4.hpp +++ b/include/xsimd/arch/xsimd_fma4.hpp @@ -23,52 +23,52 @@ namespace xsimd // fnma template - XSIMD_INLINE batch fnma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_nmacc_ps(x, y, z); } template - XSIMD_INLINE batch fnma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_nmacc_pd(x, y, z); } // fnms template - XSIMD_INLINE batch fnms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_nmsub_ps(x, y, z); } template - XSIMD_INLINE batch fnms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_nmsub_pd(x, y, z); } // fma template - XSIMD_INLINE batch fma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_macc_ps(x, y, z); } template - XSIMD_INLINE batch fma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_macc_pd(x, y, z); } // fms template - XSIMD_INLINE batch fms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_msub_ps(x, y, z); } template - XSIMD_INLINE batch fms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm_msub_pd(x, y, z); } diff --git a/include/xsimd/types/xsimd_fma4_register.hpp b/include/xsimd/types/xsimd_fma4_register.hpp index 1a066cd20..70f889abb 100644 --- a/include/xsimd/types/xsimd_fma4_register.hpp +++ b/include/xsimd/types/xsimd_fma4_register.hpp @@ -14,6 +14,10 @@ #include "./xsimd_sse4_2_register.hpp" +#if XSIMD_WITH_FMA4 +#include +#endif + namespace xsimd { /** diff --git a/test/architectures/CMakeLists.txt b/test/architectures/CMakeLists.txt index 491f95629..597803904 100644 --- a/test/architectures/CMakeLists.txt +++ b/test/architectures/CMakeLists.txt @@ -1,8 +1,7 @@ set(INTEL_PROCESSORS - knl knm skylake-avx512 cannonlake icelake-client + bdver1 knl knm skylake-avx512 cannonlake icelake-client icelake-server cascadelake cooperlake tigerlake sapphirerapids alderlake rocketlake graniterapids graniterapids-d znver4) -set(CMAKE_CXX_FLAGS "-Werror=unused-command-line-argument") foreach(INTEL_PROCESSOR ${INTEL_PROCESSORS}) # Adding the werror here to choke if the -march is incompatible with the