Skip to content

Commit

Permalink
Replaced FMA feature flag for relaxed which also ties in the other pl…
Browse files Browse the repository at this point in the history
…atform precision variation functions like invsqrt and reciprical. Fixed WASM arch detect
  • Loading branch information
Auburn committed Mar 26, 2024
1 parent 9fa0b71 commit 185d7ef
Show file tree
Hide file tree
Showing 20 changed files with 202 additions and 137 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ else()
target_compile_definitions(FastSIMD PUBLIC FASTSIMD_STATIC_LIB)
endif()

if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
target_compile_options(FastSIMD PUBLIC "-msimd128")
endif()

target_include_directories(FastSIMD PUBLIC
$<BUILD_INTERFACE:${FastSIMD_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
Expand Down
56 changes: 36 additions & 20 deletions dispatch/cmake/ClassSIMD.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

function(fastsimd_add_feature_set_source simd_inl feature_set)
function(fastsimd_add_feature_set_source simd_inl feature_set is_relaxed)
set(feature_set_source "${simd_library_source_dir}/${simd_library_name}_${feature_set}.cpp")
set(simd_inl_full "${CMAKE_CURRENT_LIST_DIR}/${simd_inl}")

Expand All @@ -9,52 +9,64 @@ function(fastsimd_add_feature_set_source simd_inl feature_set)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
# MSVC 32bit needs SSE2 flag for all SSE levels
if(${feature_set} MATCHES "SSE[^(0-9)]" AND CMAKE_SIZEOF_VOID_P EQUAL 4)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS /arch:SSE2)

elseif(${feature_set} MATCHES "AVX[^(0-9)]")
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "/arch:AVX")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS /arch:AVX)

elseif(${feature_set} MATCHES AVX2)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "/arch:AVX2")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS /arch:AVX2)

elseif(${feature_set} MATCHES AVX512)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "/arch:AVX512")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS /arch:AVX512)
endif()
else()
if(${feature_set} MATCHES SSE2 AND CMAKE_SIZEOF_VOID_P EQUAL 4)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-msse2")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -msse2)

elseif(${feature_set} MATCHES SSE3)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-msse3")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -msse3)

elseif(${feature_set} MATCHES SSSE3)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-mssse3")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mssse3)

elseif(${feature_set} MATCHES SSE41)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-msse4.1")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -msse4.1)

elseif(${feature_set} MATCHES SSE42)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-msse4.2")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -msse4.2)

elseif(${feature_set} MATCHES "AVX[^(0-9)]")
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-mavx")
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mavx)

elseif(${feature_set} MATCHES AVX2)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
if(is_relaxed)
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mfma)
else()
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mno-fma)
endif()
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mavx2)

elseif(${feature_set} MATCHES AVX512)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma")
if(is_relaxed)
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mfma)
else()
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mno-fma)
endif()
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mavx512f -mavx512dq -mavx512vl -mavx512bw)

elseif(${feature_set} MATCHES WASM)
set_source_files_properties(${feature_set_source} PROPERTIES COMPILE_FLAGS "-msimd128 -mrelaxed-simd")
if(is_relaxed)
set_property(SOURCE ${feature_set_source} APPEND PROPERTY COMPILE_OPTIONS -mrelaxed-simd)
endif()
endif()
endif()

endfunction()

function(fastsimd_create_dispatch_library simd_library_name)

cmake_parse_arguments(PARSE_ARGV 0 fastsimd_create_dispatch_library "" "" "SOURCES;FEATURE_SETS")
cmake_parse_arguments(PARSE_ARGV 0 fastsimd_create_dispatch_library "RELAXED" "" "SOURCES;FEATURE_SETS")

list(LENGTH fastsimd_create_dispatch_library_FEATURE_SETS FEATURE_SET_COUNT)
list(LENGTH fastsimd_create_dispatch_library_SOURCES SOURCES_COUNT)
Expand All @@ -68,10 +80,10 @@ function(fastsimd_create_dispatch_library simd_library_name)
set(fastsimd_create_dispatch_library_FEATURE_SETS
SSE2
SSE41
AVX2_FMA
AVX512_FMA
NEON_FMA
AARCH64_FMA
AVX2
AVX512
NEON
AARCH64
WASM)
endif()

Expand All @@ -93,6 +105,10 @@ function(fastsimd_create_dispatch_library simd_library_name)
if(CMAKE_COMPILER_IS_GNUCC)
set_target_properties(${simd_library_name} PROPERTIES COMPILE_FLAGS "-Wno-ignored-attributes")
endif()

if(fastsimd_create_dispatch_library_RELAXED)
target_compile_definitions(${simd_library_name} PUBLIC FASTSIMD_IS_RELAXED=1)
endif()

set(feature_set_list "")
set(feature_set_list_debug "")
Expand All @@ -112,7 +128,7 @@ function(fastsimd_create_dispatch_library simd_library_name)
if ("${COMPILE_OUTPUT}" MATCHES "TEST_FEATURE_SET_ACTIVE_SUCCESS")
list(APPEND feature_set_list "FastSIMD::FeatureSet::${feature_set}")
list(APPEND feature_set_list_debug "${feature_set}")
fastsimd_add_feature_set_source(${simd_inl} ${feature_set})
fastsimd_add_feature_set_source(${simd_inl} ${feature_set} ${fastsimd_create_dispatch_library_RELAXED})
endif()
endforeach()
endforeach()
Expand Down
8 changes: 4 additions & 4 deletions dispatch/impl/DispatchClassImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ namespace FastSIMD
};


template<typename T, FastSIMD::FeatureSet SIMD>
FS_FORCEINLINE static T* DispatchClassFactoryIterator( FastSIMD::FeatureSet maxFeatureSet, MemoryAllocator allocator )
template<typename T, FeatureSet SIMD>
FS_FORCEINLINE static T* DispatchClassFactoryIterator( FeatureSet maxFeatureSet, MemoryAllocator allocator )
{
if( maxFeatureSet < SIMD )
{
Expand All @@ -68,7 +68,7 @@ namespace FastSIMD

constexpr auto NextCompiled = FastSIMD::FASTSIMD_LIBRARY_NAME::CompiledFeatureSets::NextAfter<SIMD>;

if constexpr( NextCompiled != FastSIMD::FeatureSet::Max )
if constexpr( NextCompiled != FeatureSet::Max )
{
if( maxFeatureSet >= NextCompiled )
{
Expand All @@ -80,7 +80,7 @@ namespace FastSIMD
}

template<typename T>
FASTSIMD_API T* NewDispatchClass( FastSIMD::FeatureSet maxFeatureSet, MemoryAllocator allocator )
FASTSIMD_API T* NewDispatchClass( FeatureSet maxFeatureSet, MemoryAllocator allocator )
{
if( maxFeatureSet == FeatureSet::Max )
{
Expand Down
2 changes: 1 addition & 1 deletion examples/header_only/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ int main()

auto out = FS::Masked( invSqrt != FS::f32<4>( INFINITY ), dist );

return 0;
return FS::Extract0( FS::Convert<int>( out ) );
}
6 changes: 6 additions & 0 deletions include/FastSIMD/ToolSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ namespace FastSIMD
{
return SIMD;
}

template<auto = 0, bool RELAXED = FASTSIMD_IS_RELAXED>
static constexpr bool IsRelaxed()
{
return RELAXED;
}
} // namespace FastSIMD

#include "ToolSet/Generic/Functions.h"
Expand Down
18 changes: 16 additions & 2 deletions include/FastSIMD/ToolSet/Generic/Functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,22 @@ namespace FS
template<typename T, std::size_t N, FastSIMD::FeatureSet SIMD>
FS_FORCEINLINE Register<T, N, SIMD> InvSqrt( const Register<T, N, SIMD>& a )
{
static_assert( !IsNativeV<Register<T, N, SIMD>>, "FastSIMD: FS::InvSqrt not supported with provided types" );
return Register<T, N, SIMD>{ InvSqrt( a.v0 ), InvSqrt( a.v1 ) };
if constexpr( IsNativeV<Register<T, N, SIMD>> )
{
return Register<T, N, SIMD>( 1 ) / Sqrt( a );
}
else
{
return Register<T, N, SIMD>{ InvSqrt( a.v0 ), InvSqrt( a.v1 ) };
}
}

// Sqrt
template<typename T, std::size_t N, FastSIMD::FeatureSet SIMD>
FS_FORCEINLINE Register<T, N, SIMD> Sqrt( const Register<T, N, SIMD>& a )
{
static_assert( !IsNativeV<Register<T, N, SIMD>>, "FastSIMD: FS::Sqrt not supported with provided types" );
return Register<T, N, SIMD>{ Sqrt( a.v0 ), Sqrt( a.v1 ) };
}

template<std::size_t N, FastSIMD::FeatureSet SIMD>
Expand Down
3 changes: 3 additions & 0 deletions include/FastSIMD/ToolSet/Generic/Register.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ namespace FS
template<typename T>
constexpr bool IsNativeV = IsNative<T>::value;

template<auto T = 0>
using EnableIfRelaxed = std::enable_if_t<FastSIMD::IsRelaxed<T>()>;


template<std::size_t N, FastSIMD::FeatureSet SIMD = FastSIMD::FeatureSetDefault()>
using i32 = Register<std::int32_t, N, SIMD>;
Expand Down
4 changes: 2 additions & 2 deletions include/FastSIMD/ToolSet/Generic/Scalar/f32x1.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ namespace FS


template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<1, SIMD>>>
FS_FORCEINLINE f32<1, SIMD> InvSqrt( const f32<1, SIMD>& a )
FS_FORCEINLINE f32<1, SIMD> Sqrt( const f32<1, SIMD>& a )
{
return 1 / std::sqrt( a.native.f );
return std::sqrt( a.native.f );
}
}
47 changes: 22 additions & 25 deletions include/FastSIMD/ToolSet/WASM/128/f32x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,27 @@ namespace FS
template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
FS_FORCEINLINE f32<4, SIMD> Min( const f32<4, SIMD>& a, const f32<4, SIMD>& b )
{
return wasm_f32x4_pmin( a.native, b.native );
if constexpr( FastSIMD::IsRelaxed() )
{
return wasm_f32x4_relaxed_min( a.native, b.native );
}
else
{
return wasm_f32x4_min( a.native, b.native );
}
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
FS_FORCEINLINE f32<4, SIMD> Max( const f32<4, SIMD>& a, const f32<4, SIMD>& b )
{
return wasm_f32x4_pmax( a.native, b.native );
if constexpr( FastSIMD::IsRelaxed() )
{
return wasm_f32x4_relaxed_max( a.native, b.native );
}
else
{
return wasm_f32x4_max( a.native, b.native );
}
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
Expand All @@ -182,50 +196,33 @@ namespace FS
FS_FORCEINLINE f32<4, SIMD> MaskedIncrement( const typename f32<4, SIMD>::MaskTypeArg& mask, const f32<4, SIMD>& a )
{
return wasm_f32x4_sub( a.native,
wasm_f32x4_convert_i32x4(static_cast<v128_t>(mask.native)) );
wasm_f32x4_convert_i32x4( static_cast<v128_t>( mask.native ) ) );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
FS_FORCEINLINE f32<4, SIMD> MaskedDecrement( const typename f32<4, SIMD>::MaskTypeArg& mask, const f32<4, SIMD>& a )
{
return wasm_f32x4_add( a.native,
wasm_f32x4_convert_i32x4(static_cast<v128_t>(mask.native)) );
wasm_f32x4_convert_i32x4( static_cast<v128_t>( mask.native ) ) );
}


template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
FS_FORCEINLINE f32<4, SIMD> Reciprocal( const f32<4, SIMD>& a )
FS_FORCEINLINE f32<4, SIMD> Sqrt( const f32<4, SIMD>& a )
{
return wasm_f32x4_div( f32<4, SIMD>{1.0f}.native, a.native );
return wasm_f32x4_sqrt( a.native );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
FS_FORCEINLINE f32<4, SIMD> InvSqrt( const f32<4, SIMD>& a )
{
return wasm_f32x4_div( f32<4, SIMD>{1.0f}.native, wasm_f32x4_sqrt( a.native ) );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = std::enable_if_t<SIMD & FastSIMD::FeatureFlag::FMA>>
template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = EnableIfRelaxed<SIMD>()>
FS_FORCEINLINE f32<4, SIMD> FMulAdd( const f32<4, SIMD>& a, const f32<4, SIMD>& b, const f32<4, SIMD>& c )
{
return wasm_f32x4_relaxed_madd( c.native, a.native, b.native );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = std::enable_if_t<SIMD & FastSIMD::FeatureFlag::FMA>>
FS_FORCEINLINE f32<4, SIMD> FMulSub( const f32<4, SIMD>& a, const f32<4, SIMD>& b, const f32<4, SIMD>& c )
{
return wasm_f32x4_relaxed_msub( c.native, a.native, b.native );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = std::enable_if_t<SIMD & FastSIMD::FeatureFlag::FMA>>
template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = EnableIfRelaxed<SIMD>()>
FS_FORCEINLINE f32<4, SIMD> FNMulAdd( const f32<4, SIMD>& a, const f32<4, SIMD>& b, const f32<4, SIMD>& c )
{
return wasm_f32x4_relaxed_nmadd( c.native, a.native, b.native );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = std::enable_if_t<SIMD & FastSIMD::FeatureFlag::FMA>>
FS_FORCEINLINE f32<4, SIMD> FNMulSub( const f32<4, SIMD>& a, const f32<4, SIMD>& b, const f32<4, SIMD>& c )
{
return wasm_f32x4_relaxed_nmsub( c.native, a.native, b.native );
}
}
9 changes: 8 additions & 1 deletion include/FastSIMD/ToolSet/WASM/WASM.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ namespace FS
template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
FS_FORCEINLINE i32<4, SIMD> Convert( const f32<4, SIMD>& a, TypeDummy<int32_t> )
{
return wasm_i32x4_trunc_sat_f32x4( Round( a ).native );
if constexpr( FastSIMD::IsRelaxed() )
{
return wasm_i32x4_relaxed_trunc_f32x4( Round( a ).native );
}
else
{
return wasm_i32x4_trunc_sat_f32x4( Round( a ).native );
}
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<i32<4, SIMD>>>
Expand Down
10 changes: 8 additions & 2 deletions include/FastSIMD/ToolSet/x86/128/f32x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,21 @@ namespace FS
}


template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = EnableIfRelaxed<SIMD>()>
FS_FORCEINLINE f32<4, SIMD> Reciprocal( const f32<4, SIMD>& a )
{
return _mm_rcp_ps( a.native );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>, typename = EnableIfRelaxed<SIMD>()>
FS_FORCEINLINE f32<4, SIMD> InvSqrt( const f32<4, SIMD>& a )
{
return _mm_rsqrt_ps( a.native );
}

template<FastSIMD::FeatureSet SIMD, typename = EnableIfNative<f32<4, SIMD>>>
FS_FORCEINLINE f32<4, SIMD> Sqrt( const f32<4, SIMD>& a )
{
return _mm_sqrt_ps( a.native );
}
}
Loading

0 comments on commit 185d7ef

Please sign in to comment.