diff --git a/include/FastSIMD/ToolSet/ARM/128/i32x4.h b/include/FastSIMD/ToolSet/ARM/128/i32x4.h index 27b3be6..de9c8d8 100644 --- a/include/FastSIMD/ToolSet/ARM/128/i32x4.h +++ b/include/FastSIMD/ToolSet/ARM/128/i32x4.h @@ -12,7 +12,7 @@ namespace FS using NativeType = int32x4_t; using ElementType = std::int32_t; - using MaskType = m32; + using MaskType = m32; using MaskTypeArg = m32; FS_FORCEINLINE Register() = default; diff --git a/include/FastSIMD/ToolSet/ARM/128/m32x4.h b/include/FastSIMD/ToolSet/ARM/128/m32x4.h index 239a618..2074f14 100644 --- a/include/FastSIMD/ToolSet/ARM/128/m32x4.h +++ b/include/FastSIMD/ToolSet/ARM/128/m32x4.h @@ -4,54 +4,56 @@ namespace FS { + namespace impl + { + struct ArmMaskBase32x4 + { + uint32x4_t native; + }; + } + template - struct Register, 4, SIMD, std::enable_if_t> + struct Register, 4, SIMD, std::enable_if_t> + : std::conditional_t, 4, SIMD>> { static constexpr size_t ElementCount = 4; static constexpr auto FeatureFlags = SIMD; - using NativeType = uint32x4_t; + using NativeType = decltype(ArmMaskBase32x4::native); using ElementType = Mask<32, OPTIMISE_FLOAT>; using MaskType = Register; using MaskTypeArg = Register; FS_FORCEINLINE Register() = default; - - template - FS_FORCEINLINE Register( std::enable_if_t v ) : native( v ) { } - - template, 4, SIMD>> - FS_FORCEINLINE Register( const std::enable_if_t& v ) : native( v.native ) { } + FS_FORCEINLINE Register( NativeType v ) { this->native = v; } FS_FORCEINLINE NativeType GetNative() const { - return native; + return this->native; } FS_FORCEINLINE Register& operator &=( const Register& rhs ) { - native = vandq_u32( native, rhs.native ); + this->native = vandq_u32( this->native, rhs.native ); return *this; } FS_FORCEINLINE Register& operator |=( const Register& rhs ) { - native = vorrq_u32( native, rhs.native ); + this->native = vorrq_u32( this->native, rhs.native ); return *this; } FS_FORCEINLINE Register& operator ^=( const Register& rhs ) { - native = veorq_u32( native, rhs.native ); + this->native = veorq_u32( this->native, rhs.native ); return *this; } FS_FORCEINLINE Register operator ~() const { - return vmvnq_u32( native ); + return vmvnq_u32( this->native ); } - - NativeType native; }; template, 4, SIMD>>> diff --git a/include/FastSIMD/ToolSet/Generic/Scalar/mNx1.h b/include/FastSIMD/ToolSet/Generic/Scalar/mNx1.h index e6355cc..afe3a3b 100644 --- a/include/FastSIMD/ToolSet/Generic/Scalar/mNx1.h +++ b/include/FastSIMD/ToolSet/Generic/Scalar/mNx1.h @@ -4,49 +4,56 @@ namespace FS { - template - struct Register, 1, SIMD> + namespace impl + { + struct GenericMaskBase + { + bool native; + }; + } + + template + struct Register, 1, SIMD> + : std::conditional_t, 1, SIMD>> { static constexpr size_t ElementCount = 1; static constexpr auto FeatureFlags = SIMD; using NativeType = bool; - using ElementType = Mask; + using ElementType = Mask; using MaskType = Register; using MaskTypeArg = Register; FS_FORCEINLINE Register() = default; - FS_FORCEINLINE Register( NativeType v ) : native( v ) { } + FS_FORCEINLINE Register( NativeType v ) { this->native = v; } FS_FORCEINLINE NativeType GetNative() const { - return native; + return this->native; } FS_FORCEINLINE Register& operator &=( const Register& rhs ) { - native = native && rhs.native; + this->native = this->native && rhs.native; return *this; } FS_FORCEINLINE Register& operator |=( const Register& rhs ) { - native = native || rhs.native; + this->native = this->native || rhs.native; return *this; } FS_FORCEINLINE Register& operator ^=( const Register& rhs ) { - native = native ^ rhs.native; + this->native = this->native ^ rhs.native; return *this; } FS_FORCEINLINE Register operator ~() const { - return !native; + return !this->native; } - - NativeType native; }; template, 1, SIMD>>> @@ -66,24 +73,4 @@ namespace FS { return static_cast>( a.native ); } - - template - struct Register, 1, SIMD> : Register, 1, SIMD> - { - static constexpr size_t ElementCount = 1; - static constexpr auto FeatureFlags = SIMD; - - using NativeType = bool; - using ElementType = Mask<32, false>; - using MaskType = Register; - using MaskTypeArg = Register, 1, SIMD>; - - FS_FORCEINLINE Register() = default; - FS_FORCEINLINE Register( NativeType v ) : Register, 1, SIMD>( v ) { } - - FS_FORCEINLINE Register operator ~() const - { - return !this->native; - } - }; } diff --git a/include/FastSIMD/ToolSet/WASM/128/i32x4.h b/include/FastSIMD/ToolSet/WASM/128/i32x4.h index 6df5d00..f879354 100644 --- a/include/FastSIMD/ToolSet/WASM/128/i32x4.h +++ b/include/FastSIMD/ToolSet/WASM/128/i32x4.h @@ -12,7 +12,7 @@ namespace FS using NativeType = v128_t; using ElementType = std::int32_t; - using MaskType = m32; + using MaskType = m32; using MaskTypeArg = m32; FS_FORCEINLINE Register() = default; diff --git a/include/FastSIMD/ToolSet/WASM/128/m32x4.h b/include/FastSIMD/ToolSet/WASM/128/m32x4.h index 886760c..846bab6 100644 --- a/include/FastSIMD/ToolSet/WASM/128/m32x4.h +++ b/include/FastSIMD/ToolSet/WASM/128/m32x4.h @@ -4,54 +4,56 @@ namespace FS { + namespace impl + { + struct WasmMaskBase32x4 + { + v128_t native; + }; + } + template struct Register, 4, SIMD, std::enable_if_t> + : std::conditional_t, 4, SIMD>> { static constexpr size_t ElementCount = 4; static constexpr auto FeatureFlags = SIMD; - using NativeType = v128_t; + using NativeType = decltype(WasmMaskBase32x4::native); using ElementType = Mask<32, OPTIMISE_FLOAT>; using MaskType = Register; using MaskTypeArg = Register; FS_FORCEINLINE Register() = default; - - template - FS_FORCEINLINE Register( std::enable_if_t v ) : native( v ) { } - - template, 4, SIMD>> - FS_FORCEINLINE Register( const std::enable_if_t& v ) : native( v.native ) { } + FS_FORCEINLINE Register( NativeType v ) { this->native = v; } FS_FORCEINLINE NativeType GetNative() const { - return native; + return this->native; } FS_FORCEINLINE Register& operator &=( const Register& rhs ) { - native = wasm_v128_and( native, rhs.native ); + this->native = wasm_v128_and( this->native, rhs.native ); return *this; } FS_FORCEINLINE Register& operator |=( const Register& rhs ) { - native = wasm_v128_or( native, rhs.native ); + this->native = wasm_v128_or( this->native, rhs.native ); return *this; } FS_FORCEINLINE Register& operator ^=( const Register& rhs ) { - native = wasm_v128_xor( native, rhs.native ); + this->native = wasm_v128_xor( this->native, rhs.native ); return *this; } FS_FORCEINLINE Register operator ~() const { - return wasm_v128_not( native ); + return wasm_v128_not( this->native ); } - - NativeType native; }; template, 4, SIMD>>> diff --git a/include/FastSIMD/ToolSet/x86/512/i32x16.h b/include/FastSIMD/ToolSet/x86/512/i32x16.h index 2c8de8f..00c07d3 100644 --- a/include/FastSIMD/ToolSet/x86/512/i32x16.h +++ b/include/FastSIMD/ToolSet/x86/512/i32x16.h @@ -12,7 +12,7 @@ namespace FS using NativeType = __m512i; using ElementType = std::int32_t; - using MaskType = m32; + using MaskType = m32; using MaskTypeArg = m32; FS_FORCEINLINE Register() = default; diff --git a/include/FastSIMD/ToolSet/x86/512/mNx16.h b/include/FastSIMD/ToolSet/x86/512/mNx16.h index 9b36edb..70b2af9 100644 --- a/include/FastSIMD/ToolSet/x86/512/mNx16.h +++ b/include/FastSIMD/ToolSet/x86/512/mNx16.h @@ -4,55 +4,56 @@ namespace FS { + namespace impl + { + struct AVX512MaskBase + { + __mmask16 native; + }; + } + template struct Register, 16, SIMD, std::enable_if_t> + : std::conditional_t, 16, SIMD>> { static constexpr size_t ElementCount = 16; static constexpr auto FeatureFlags = SIMD; - using NativeType = __mmask16; - using ElementType = Mask<32, OPTIMISE_FLOAT>; + using NativeType = decltype(AVX512MaskBase::native); + using ElementType = Mask; using MaskType = Register; using MaskTypeArg = Register; FS_FORCEINLINE Register() = default; - - template - FS_FORCEINLINE Register( std::enable_if_t v ) : native( v ) { } - - template, 16, SIMD>> - FS_FORCEINLINE Register( const std::enable_if_t& v ) : native( v.native ) { } - + FS_FORCEINLINE Register( NativeType v ) { this->native = v; } FS_FORCEINLINE NativeType GetNative() const { - return native; + return this->native; } FS_FORCEINLINE Register& operator &=( const Register& rhs ) { - native = ( native & rhs.native ); + this->native = ( this->native & rhs.native ); return *this; } FS_FORCEINLINE Register& operator |=( const Register& rhs ) { - native = ( native | rhs.native ); + this->native = ( this->native | rhs.native ); return *this; } FS_FORCEINLINE Register& operator ^=( const Register& rhs ) { - native = ( native ^ rhs.native ); + this->native = ( this->native ^ rhs.native ); return *this; } FS_FORCEINLINE Register operator ~() const { - return ~native; + return ~this->native; } - - NativeType native; }; template, 16, SIMD>>> diff --git a/tests/test.inl b/tests/test.inl index fac0ffc..a4da5b8 100644 --- a/tests/test.inl +++ b/tests/test.inl @@ -201,7 +201,7 @@ class FastSIMD::DispatchClass, SIMD> : publ struct GenArg, N, S>> { template - static FS::Register, N, S> Load( size_t inIdx, size_t argIdx, float* rnd, ARGs... ) + static FS::Register, N, S> Load( size_t inIdx, size_t argIdx, std::conditional_t* rnd, ARGs... ) { return FS::Load( rnd + inIdx + argIdx * N ) > FS::Load( rnd + inIdx + argIdx * N + 1 ); } @@ -279,6 +279,7 @@ class FastSIMD::DispatchClass, SIMD> : publ using TestRegi32 = TestReg; using TestRegf32 = TestReg; using TestRegm32 = typename TestRegf32::MaskType; + using TestRegm32i = typename TestRegi32::MaskType; RegisterTest( tests, "m32 bit mask", []( TestRegm32 a ) { return a; } ); @@ -290,6 +291,15 @@ class FastSIMD::DispatchClass, SIMD> : publ RegisterTest( tests, "m32 bit not operator", std::bit_not() ); RegisterTest( tests, "m32 bit and not", []( TestRegm32 a, TestRegm32 b ) { return FS::BitwiseAndNot( a, b ); } ); + RegisterTest( tests, "m32i bit mask", []( TestRegm32 a ) { return a; } ); + RegisterTest( tests, "m32i any mask", []( TestRegm32 a ) { return FS::AnyMask( a ); } ); + + RegisterTest( tests, "m32i bit and operator", std::bit_and() ); + RegisterTest( tests, "m32i bit or operator", std::bit_or() ); + RegisterTest( tests, "m32i bit xor operator", std::bit_xor() ); + RegisterTest( tests, "m32i bit not operator", std::bit_not() ); + RegisterTest( tests, "m32i bit and not", []( TestRegm32i a, TestRegm32i b ) { return FS::BitwiseAndNot( a, b ); } ); + RegisterTest( tests, "i32 load store", []( TestRegi32 a ) { return a; } ); RegisterTest( tests, "i32 load scalar", []( int32_t a ) { return TestRegi32( a ); } ); RegisterTest( tests, "i32 splat", []( int32_t a ) { return FS::Splat( a ); } ); @@ -402,17 +412,27 @@ class FastSIMD::DispatchClass, SIMD> : publ return FS::Reciprocal( FS::Select( a > TestRegf32( 0 ), clamped, -clamped ) ); } ).relaxedAccuracy = 8192; - RegisterTest( tests, "f32 cos", []( TestRegf32 a ) { return FS::Cos( a ); } ).relaxedAccuracy = 8192; - RegisterTest( tests, "f32 sin", []( TestRegf32 a ) { return FS::Sin( a ); } ).relaxedAccuracy = 8192; - RegisterTest( tests, "f32 exp", []( TestRegf32 a ) { return FS::Exp( a ); } ).relaxedAccuracy = 8192; - RegisterTest( tests, "f32 log", []( TestRegf32 a ) { return FS::Log( a ); } ).relaxedAccuracy = 8192; + RegisterTest( tests, "f32 cos", []( TestRegf32 a ) { return FS::Cos( FS::Min( FS::Max( a, TestRegf32( -1.e+16f ) ), TestRegf32( 1.e+16f ) ) ); } ).relaxedAccuracy = 8192; + RegisterTest( tests, "f32 sin", []( TestRegf32 a ) { return FS::Sin( FS::Min( FS::Max( a, TestRegf32( -1.e+16f ) ), TestRegf32( 1.e+16f ) ) ); } ).relaxedAccuracy = 8192; + RegisterTest( tests, "f32 exp", []( TestRegf32 a ) { return FS::Exp( FS::Min( FS::Max( a, TestRegf32( -1.e+16f ) ), TestRegf32( 1.e+16f ) ) ); } ).relaxedAccuracy = 8192; + RegisterTest( tests, "f32 log", []( TestRegf32 a ) { return FS::Log( FS::Min( FS::Max( a, TestRegf32( -1.e+16f ) ), TestRegf32( 1.e+16f ) ) ); } ).relaxedAccuracy = 8192; RegisterTest( tests, "f32 pow", []( TestRegf32 a, TestRegf32 b ) { return FS::Pow( a, b ); } ).relaxedAccuracy = 8192; RegisterTest( tests, "i32 convert to f32", []( TestRegi32 a ) { return FS::Convert( a ); } ); - RegisterTest( tests, "i32 cast to f32", []( TestRegi32 a ) { return FS::Cast( a ); } ); RegisterTest( tests, "f32 convert to i32", []( TestRegf32 a ) { return FS::Convert( FS::Min( FS::Max( a, TestRegf32( 2147483648 ) ), TestRegf32( 2147483520 ) ) ); } ); + RegisterTest( tests, "f32 cast to i32", []( TestRegf32 a ) { return FS::Cast( a ); } ); + RegisterTest( tests, "i32 cast to f32", []( TestRegi32 a ) { return FS::Cast( a ); } ); + if constexpr( !( SIMD & FeatureFlag::AVX512_F ) ) + { + RegisterTest( tests, "f32 cast to m32", []( TestRegf32 a ) { return FS_BIND_INTRINSIC( FS::Cast> )( a ); } ); + RegisterTest( tests, "i32 cast to m32", []( TestRegi32 a ) { return FS_BIND_INTRINSIC( FS::Cast> )( a ); } ); + RegisterTest( tests, "m32 cast to i32", []( TestRegm32 a ) { return FS_BIND_INTRINSIC( FS::Cast )( a ) ; } ); + RegisterTest( tests, "m32 cast to f32", []( TestRegm32 a ) { return FS_BIND_INTRINSIC( FS::Cast )( a ); } ); + RegisterTest( tests, "m32i cast to i32", []( TestRegm32i a ) { return FS_BIND_INTRINSIC( FS::Cast )( a ); } ); + RegisterTest( tests, "m32i cast to f32", []( TestRegm32i a ) { return FS_BIND_INTRINSIC( FS::Cast )( a ); } ); + } return tests; } };