Skip to content

Commit

Permalink
Better register templates for non-specialised mask types
Browse files Browse the repository at this point in the history
  • Loading branch information
Auburn committed Apr 23, 2024
1 parent 176afe5 commit 397dea9
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 85 deletions.
2 changes: 1 addition & 1 deletion include/FastSIMD/ToolSet/ARM/128/i32x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace FS

using NativeType = int32x4_t;
using ElementType = std::int32_t;
using MaskType = m32<ElementCount, true, SIMD>;
using MaskType = m32<ElementCount, false, SIMD>;
using MaskTypeArg = m32<ElementCount, true, SIMD>;

FS_FORCEINLINE Register() = default;
Expand Down
32 changes: 17 additions & 15 deletions include/FastSIMD/ToolSet/ARM/128/m32x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,56 @@

namespace FS
{
namespace impl
{
struct ArmMaskBase32x4
{
uint32x4_t native;
};
}

template<FastSIMD::FeatureSet SIMD, bool OPTIMISE_FLOAT>
struct Register<Mask<32, OPTIMISE_FLOAT>, 4, SIMD, std::enable_if_t<SIMD & FastSIMD::FeatureFlag::NEON>>
struct Register<Mask<32, OPTIMISE_FLOAT>, 4, SIMD, std::enable_if_t<SIMD & FastSIMD::FeatureFlag::ARM>>
: std::conditional_t<OPTIMISE_FLOAT, impl::ArmMaskBase32x4, Register<Mask<32, true>, 4, SIMD>>
{
static constexpr size_t ElementCount = 4;
static constexpr auto FeatureFlags = SIMD;

using NativeType = uint32x4_t;
using NativeType = decltype(ArmMaskBase32x4::native);
using ElementType = Mask<32, OPTIMISE_FLOAT>;
using MaskType = Register;
using MaskTypeArg = Register;

FS_FORCEINLINE Register() = default;

template<typename T = NativeType>
FS_FORCEINLINE Register( std::enable_if_t<OPTIMISE_FLOAT, T> v ) : native( v ) { }

template<typename T = Register<Mask<32, false>, 4, SIMD>>
FS_FORCEINLINE Register( const std::enable_if_t<OPTIMISE_FLOAT, T>& v ) : native( v.native ) { }
FS_FORCEINLINE Register( NativeType v ) { this->native = v; }

FS_FORCEINLINE NativeType GetNative() const
{
return native;
return this->native;
}

FS_FORCEINLINE Register& operator &=( const Register& rhs )
{
native = vandq_u32( native, rhs.native );
this->native = vandq_u32( this->native, rhs.native );
return *this;
}

FS_FORCEINLINE Register& operator |=( const Register& rhs )
{
native = vorrq_u32( native, rhs.native );
this->native = vorrq_u32( this->native, rhs.native );
return *this;
}

FS_FORCEINLINE Register& operator ^=( const Register& rhs )
{
native = veorq_u32( native, rhs.native );
this->native = veorq_u32( this->native, rhs.native );
return *this;
}

FS_FORCEINLINE Register operator ~() const
{
return vmvnq_u32( native );
return vmvnq_u32( this->native );
}

NativeType native;
};

template<FastSIMD::FeatureSet SIMD, bool B, typename = EnableIfNative<Register<Mask<32, B>, 4, SIMD>>>
Expand Down
49 changes: 18 additions & 31 deletions include/FastSIMD/ToolSet/Generic/Scalar/mNx1.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,56 @@

namespace FS
{
template<FastSIMD::FeatureSet SIMD, size_t N>
struct Register<Mask<N, true>, 1, SIMD>
namespace impl
{
struct GenericMaskBase
{
bool native;
};
}

template<FastSIMD::FeatureSet SIMD, std::size_t N, bool OPTIMISE_FLOAT>
struct Register<Mask<N, OPTIMISE_FLOAT>, 1, SIMD>
: std::conditional_t<OPTIMISE_FLOAT, impl::GenericMaskBase, Register<Mask<N, true>, 1, SIMD>>
{
static constexpr size_t ElementCount = 1;
static constexpr auto FeatureFlags = SIMD;

using NativeType = bool;
using ElementType = Mask<N, true>;
using ElementType = Mask<N, OPTIMISE_FLOAT>;
using MaskType = Register;
using MaskTypeArg = Register;

FS_FORCEINLINE Register() = default;
FS_FORCEINLINE Register( NativeType v ) : native( v ) { }
FS_FORCEINLINE Register( NativeType v ) { this->native = v; }

FS_FORCEINLINE NativeType GetNative() const
{
return native;
return this->native;
}

FS_FORCEINLINE Register& operator &=( const Register& rhs )
{
native = native && rhs.native;
this->native = this->native && rhs.native;
return *this;
}

FS_FORCEINLINE Register& operator |=( const Register& rhs )
{
native = native || rhs.native;
this->native = this->native || rhs.native;
return *this;
}

FS_FORCEINLINE Register& operator ^=( const Register& rhs )
{
native = native ^ rhs.native;
this->native = this->native ^ rhs.native;
return *this;
}

FS_FORCEINLINE Register operator ~() const
{
return !native;
return !this->native;
}

NativeType native;
};

template<FastSIMD::FeatureSet SIMD, size_t N, bool B, typename = EnableIfNative<Register<Mask<N, B>, 1, SIMD>>>
Expand All @@ -66,24 +73,4 @@ namespace FS
{
return static_cast<BitStorage<1>>( a.native );
}

template<FastSIMD::FeatureSet SIMD, size_t N>
struct Register<Mask<N, false>, 1, SIMD> : Register<Mask<N, true>, 1, SIMD>
{
static constexpr size_t ElementCount = 1;
static constexpr auto FeatureFlags = SIMD;

using NativeType = bool;
using ElementType = Mask<32, false>;
using MaskType = Register;
using MaskTypeArg = Register<Mask<N, true>, 1, SIMD>;

FS_FORCEINLINE Register() = default;
FS_FORCEINLINE Register( NativeType v ) : Register<Mask<N, true>, 1, SIMD>( v ) { }

FS_FORCEINLINE Register operator ~() const
{
return !this->native;
}
};
}
2 changes: 1 addition & 1 deletion include/FastSIMD/ToolSet/WASM/128/i32x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace FS

using NativeType = v128_t;
using ElementType = std::int32_t;
using MaskType = m32<ElementCount, true, SIMD>;
using MaskType = m32<ElementCount, false, SIMD>;
using MaskTypeArg = m32<ElementCount, true, SIMD>;

FS_FORCEINLINE Register() = default;
Expand Down
30 changes: 16 additions & 14 deletions include/FastSIMD/ToolSet/WASM/128/m32x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,56 @@

namespace FS
{
namespace impl
{
struct WasmMaskBase32x4
{
v128_t native;
};
}

template<FastSIMD::FeatureSet SIMD, bool OPTIMISE_FLOAT>
struct Register<Mask<32, OPTIMISE_FLOAT>, 4, SIMD, std::enable_if_t<SIMD & FastSIMD::FeatureFlag::WASM>>
: std::conditional_t<OPTIMISE_FLOAT, impl::WasmMaskBase32x4, Register<Mask<32, true>, 4, SIMD>>
{
static constexpr size_t ElementCount = 4;
static constexpr auto FeatureFlags = SIMD;

using NativeType = v128_t;
using NativeType = decltype(WasmMaskBase32x4::native);
using ElementType = Mask<32, OPTIMISE_FLOAT>;
using MaskType = Register;
using MaskTypeArg = Register;

FS_FORCEINLINE Register() = default;

template<typename T = NativeType>
FS_FORCEINLINE Register( std::enable_if_t<OPTIMISE_FLOAT, T> v ) : native( v ) { }

template<typename T = Register<Mask<32, false>, 4, SIMD>>
FS_FORCEINLINE Register( const std::enable_if_t<OPTIMISE_FLOAT, T>& v ) : native( v.native ) { }
FS_FORCEINLINE Register( NativeType v ) { this->native = v; }

FS_FORCEINLINE NativeType GetNative() const
{
return native;
return this->native;
}

FS_FORCEINLINE Register& operator &=( const Register& rhs )
{
native = wasm_v128_and( native, rhs.native );
this->native = wasm_v128_and( this->native, rhs.native );
return *this;
}

FS_FORCEINLINE Register& operator |=( const Register& rhs )
{
native = wasm_v128_or( native, rhs.native );
this->native = wasm_v128_or( this->native, rhs.native );
return *this;
}

FS_FORCEINLINE Register& operator ^=( const Register& rhs )
{
native = wasm_v128_xor( native, rhs.native );
this->native = wasm_v128_xor( this->native, rhs.native );
return *this;
}

FS_FORCEINLINE Register operator ~() const
{
return wasm_v128_not( native );
return wasm_v128_not( this->native );
}

NativeType native;
};

template<FastSIMD::FeatureSet SIMD, bool B, typename = EnableIfNative<Register<Mask<32, B>, 4, SIMD>>>
Expand Down
2 changes: 1 addition & 1 deletion include/FastSIMD/ToolSet/x86/512/i32x16.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace FS

using NativeType = __m512i;
using ElementType = std::int32_t;
using MaskType = m32<ElementCount, true, SIMD>;
using MaskType = m32<ElementCount, false, SIMD>;
using MaskTypeArg = m32<ElementCount, true, SIMD>;

FS_FORCEINLINE Register() = default;
Expand Down
33 changes: 17 additions & 16 deletions include/FastSIMD/ToolSet/x86/512/mNx16.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,55 +4,56 @@

namespace FS
{
namespace impl
{
struct AVX512MaskBase
{
__mmask16 native;
};
}

template<FastSIMD::FeatureSet SIMD, std::size_t N, bool OPTIMISE_FLOAT>
struct Register<Mask<N, OPTIMISE_FLOAT>, 16, SIMD, std::enable_if_t<SIMD & FastSIMD::FeatureFlag::AVX512_F>>
: std::conditional_t<OPTIMISE_FLOAT, impl::AVX512MaskBase, Register<Mask<N, true>, 16, SIMD>>
{
static constexpr size_t ElementCount = 16;
static constexpr auto FeatureFlags = SIMD;

using NativeType = __mmask16;
using ElementType = Mask<32, OPTIMISE_FLOAT>;
using NativeType = decltype(AVX512MaskBase::native);
using ElementType = Mask<N, OPTIMISE_FLOAT>;
using MaskType = Register;
using MaskTypeArg = Register;

FS_FORCEINLINE Register() = default;

template<typename T = NativeType>
FS_FORCEINLINE Register( std::enable_if_t<OPTIMISE_FLOAT, T> v ) : native( v ) { }

template<typename T = Register<Mask<N, false>, 16, SIMD>>
FS_FORCEINLINE Register( const std::enable_if_t<OPTIMISE_FLOAT, T>& v ) : native( v.native ) { }

FS_FORCEINLINE Register( NativeType v ) { this->native = v; }

FS_FORCEINLINE NativeType GetNative() const
{
return native;
return this->native;
}

FS_FORCEINLINE Register& operator &=( const Register& rhs )
{
native = ( native & rhs.native );
this->native = ( this->native & rhs.native );
return *this;
}

FS_FORCEINLINE Register& operator |=( const Register& rhs )
{
native = ( native | rhs.native );
this->native = ( this->native | rhs.native );
return *this;
}

FS_FORCEINLINE Register& operator ^=( const Register& rhs )
{
native = ( native ^ rhs.native );
this->native = ( this->native ^ rhs.native );
return *this;
}

FS_FORCEINLINE Register operator ~() const
{
return ~native;
return ~this->native;
}

NativeType native;
};

template<FastSIMD::FeatureSet SIMD, std::size_t N, bool B, typename = EnableIfNative<Register<Mask<N, B>, 16, SIMD>>>
Expand Down
Loading

0 comments on commit 397dea9

Please sign in to comment.