diff --git a/Runtime/AssemblyInfo.cs b/Runtime/AssemblyInfo.cs
index 1ea114b..0a91020 100644
--- a/Runtime/AssemblyInfo.cs
+++ b/Runtime/AssemblyInfo.cs
@@ -32,9 +32,9 @@
// Build Number
// Revision
//
-[assembly: AssemblyVersion("2.3.0")]
-[assembly: AssemblyFileVersion("2.3.0")]
-[assembly: AssemblyInformationalVersion("2.3 Release")]
+[assembly: AssemblyVersion("2.3.5")]
+[assembly: AssemblyFileVersion("2.3.5")]
+[assembly: AssemblyInformationalVersion("2.3.5 Release")]
[assembly: SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "Unity.Mathematics API consistency")]
[assembly: CompilationRelaxationsAttribute(CompilationRelaxations.NoStringInterning)]
\ No newline at end of file
diff --git a/Runtime/Math Lib/Constants.cs b/Runtime/Math Lib/Constants.cs
index c5843a2..17aa5ee 100644
--- a/Runtime/Math Lib/Constants.cs
+++ b/Runtime/Math Lib/Constants.cs
@@ -11,6 +11,9 @@ unsafe public static partial class maxmath
/// The square root 3. Approximately 1.73. This is a f64/double precision constant.
public const double SQRT3_DBL = 1.73205080756887729352d;
+ /// The square root 5. Approximately 2.23. This is a f64/double precision constant.
+ public const double SQRT5_DBL = 2.23606797749978969640d;
+
/// The cube root of 2. Approximately 1.26. This is a f64/double precision constant.
public const double CBRT2_DBL = 1.25992104989487316476d;
@@ -27,6 +30,9 @@ unsafe public static partial class maxmath
/// The square root of 3. Approximately 1.73.
public const float SQRT3 = 1.73205080f;
+ /// The square root of 5. Approximately 2.23.
+ public const float SQRT5 = 2.23606797f;
+
/// The cube root of 2. Approximately 1.26.
public const float CBRT2 = 1.25992104f;
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Add-Subtract.cs b/Runtime/Math Lib/Functions/Arithmetic/Add-Subtract.cs
index 729b9af..ba51f00 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Add-Subtract.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Add-Subtract.cs
@@ -264,7 +264,7 @@ public static float2 addsub(float2 a, float2 b)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.subadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
+ return RegisterConversion.ToFloat2(Xse.subadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
}
else
{
@@ -278,7 +278,7 @@ public static float3 addsub(float3 a, float3 b)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.subadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
+ return RegisterConversion.ToFloat3(Xse.subadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
}
else
{
@@ -292,7 +292,7 @@ public static float4 addsub(float4 a, float4 b)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.subadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
+ return RegisterConversion.ToFloat4(Xse.subadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
}
else
{
@@ -321,7 +321,7 @@ public static double2 addsub(double2 a, double2 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subadd_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
+ return RegisterConversion.ToDouble2(Xse.subadd_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
}
else
{
@@ -335,7 +335,7 @@ public static double3 addsub(double3 a, double3 b)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_subadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
+ return RegisterConversion.ToDouble3(Xse.mm256_subadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
}
else
{
@@ -349,7 +349,7 @@ public static double4 addsub(double4 a, double4 b)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_subadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
+ return RegisterConversion.ToDouble4(Xse.mm256_subadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
}
else
{
@@ -599,7 +599,7 @@ public static uint2 addsub(uint2 a, uint2 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 2));
+ return RegisterConversion.ToUInt2(Xse.subadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 2));
}
else
{
@@ -613,7 +613,7 @@ public static uint3 addsub(uint3 a, uint3 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 3));
+ return RegisterConversion.ToUInt3(Xse.subadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 3));
}
else
{
@@ -627,7 +627,7 @@ public static uint4 addsub(uint4 a, uint4 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 4));
+ return RegisterConversion.ToUInt4(Xse.subadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Average.cs b/Runtime/Math Lib/Functions/Arithmetic/Average.cs
index f73b935..1d5ae6d 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Average.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Average.cs
@@ -811,7 +811,7 @@ public static uint2 avg(uint2 x, uint2 y, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.avg_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow)));
+ return RegisterConversion.ToUInt2(Xse.avg_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow)));
}
else
{
@@ -833,7 +833,7 @@ public static uint3 avg(uint3 x, uint3 y, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.avg_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow)));
+ return RegisterConversion.ToUInt3(Xse.avg_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow)));
}
else
{
@@ -855,7 +855,7 @@ public static uint4 avg(uint4 x, uint4 y, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.avg_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow)));
+ return RegisterConversion.ToUInt4(Xse.avg_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow)));
}
else
{
@@ -902,7 +902,7 @@ public static int2 avg(int2 x, int2 y, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.avg_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow), 2));
+ return RegisterConversion.ToInt2(Xse.avg_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow), 2));
}
else
{
@@ -917,7 +917,7 @@ public static int3 avg(int3 x, int3 y, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.avg_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow), 3));
+ return RegisterConversion.ToInt3(Xse.avg_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow), 3));
}
else
{
@@ -932,7 +932,7 @@ public static int4 avg(int4 x, int4 y, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.avg_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow), 4));
+ return RegisterConversion.ToInt4(Xse.avg_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), noOverflow.Promises(Promise.NoOverflow), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Divide With Remainder.cs b/Runtime/Math Lib/Functions/Arithmetic/Divide With Remainder.cs
index 6c740c7..a593908 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Divide With Remainder.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Divide With Remainder.cs
@@ -1192,8 +1192,8 @@ public static int2 divrem(int2 dividend, int2 divisor, out int2 remainder)
{
if (Sse2.IsSse2Supported)
{
- int2 ret = RegisterConversion.ToType(Xse.divrem_epi32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 2));
- remainder = RegisterConversion.ToType(rem);
+ int2 ret = RegisterConversion.ToInt2(Xse.divrem_epi32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 2));
+ remainder = RegisterConversion.ToInt2(rem);
return ret;
}
@@ -1211,8 +1211,8 @@ public static int3 divrem(int3 dividend, int3 divisor, out int3 remainder)
{
if (Sse2.IsSse2Supported)
{
- int3 ret = RegisterConversion.ToType(Xse.divrem_epi32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 3));
- remainder = RegisterConversion.ToType(rem);
+ int3 ret = RegisterConversion.ToInt3(Xse.divrem_epi32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 3));
+ remainder = RegisterConversion.ToInt3(rem);
return ret;
}
@@ -1230,8 +1230,8 @@ public static int4 divrem(int4 dividend, int4 divisor, out int4 remainder)
{
if (Sse2.IsSse2Supported)
{
- int4 ret = RegisterConversion.ToType(Xse.divrem_epi32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 4));
- remainder = RegisterConversion.ToType(rem);
+ int4 ret = RegisterConversion.ToInt4(Xse.divrem_epi32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 4));
+ remainder = RegisterConversion.ToInt4(rem);
return ret;
}
@@ -1280,8 +1280,8 @@ public static uint2 divrem(uint2 dividend, uint2 divisor, out uint2 remainder)
{
if (Sse2.IsSse2Supported)
{
- uint2 ret = RegisterConversion.ToType(Xse.divrem_epu32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 2));
- remainder = RegisterConversion.ToType(rem);
+ uint2 ret = RegisterConversion.ToUInt2(Xse.divrem_epu32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 2));
+ remainder = RegisterConversion.ToUInt2(rem);
return ret;
}
@@ -1299,8 +1299,8 @@ public static uint3 divrem(uint3 dividend, uint3 divisor, out uint3 remainder)
{
if (Sse2.IsSse2Supported)
{
- uint3 ret = RegisterConversion.ToType(Xse.divrem_epu32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 3));
- remainder = RegisterConversion.ToType(rem);
+ uint3 ret = RegisterConversion.ToUInt3(Xse.divrem_epu32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 3));
+ remainder = RegisterConversion.ToUInt3(rem);
return ret;
}
@@ -1318,8 +1318,8 @@ public static uint4 divrem(uint4 dividend, uint4 divisor, out uint4 remainder)
{
if (Sse2.IsSse2Supported)
{
- uint4 ret = RegisterConversion.ToType(Xse.divrem_epu32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 4));
- remainder = RegisterConversion.ToType(rem);
+ uint4 ret = RegisterConversion.ToUInt4(Xse.divrem_epu32(RegisterConversion.ToV128(dividend), RegisterConversion.ToV128(divisor), out v128 rem, 4));
+ remainder = RegisterConversion.ToUInt4(rem);
return ret;
}
diff --git a/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Add-Subtract.cs b/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Add-Subtract.cs
index 40668e6..152df69 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Add-Subtract.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Add-Subtract.cs
@@ -15,7 +15,7 @@ public static float2 dadsub(float2 a, float2 b, float2 c, bool fast = false)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_ps(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToFloat2(Xse.fmsubadd_ps(RegisterConversion.ToV128(a),
fast ? Sse.rcp_ps(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -31,7 +31,7 @@ public static float3 dadsub(float3 a, float3 b, float3 c, bool fast = false)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_ps(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToFloat3(Xse.fmsubadd_ps(RegisterConversion.ToV128(a),
fast ? Sse.rcp_ps(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -47,7 +47,7 @@ public static float4 dadsub(float4 a, float4 b, float4 c, bool fast = false)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_ps(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToFloat4(Xse.fmsubadd_ps(RegisterConversion.ToV128(a),
fast ? Sse.rcp_ps(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -78,7 +78,7 @@ public static double2 dadsub(double2 a, double2 b, double2 c, bool fast = false)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_pd(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToDouble2(Xse.fmsubadd_pd(RegisterConversion.ToV128(a),
fast ? Xse.rcp_pd(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -105,7 +105,7 @@ public static double3 dadsub(double3 a, double3 b, double3 c, bool fast = false)
divisor = RegisterConversion.ToV256(math.rcp(b));
}
- return RegisterConversion.ToType(Xse.mm256_fmsubadd_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble3(Xse.mm256_fmsubadd_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
}
else
{
@@ -130,7 +130,7 @@ public static double4 dadsub(double4 a, double4 b, double4 c, bool fast = false)
divisor = RegisterConversion.ToV256(math.rcp(b));
}
- return RegisterConversion.ToType(Xse.mm256_fmsubadd_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble4(Xse.mm256_fmsubadd_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Subtract-Add.cs.cs b/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Subtract-Add.cs.cs
index 3802eed..ca30808 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Subtract-Add.cs.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/FMA/Float Divide-Subtract-Add.cs.cs
@@ -15,7 +15,7 @@ public static float2 dsubadd(float2 a, float2 b, float2 c, bool fast = false)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_ps(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToFloat2(Xse.fmaddsub_ps(RegisterConversion.ToV128(a),
fast ? Sse.rcp_ps(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -31,7 +31,7 @@ public static float3 dsubadd(float3 a, float3 b, float3 c, bool fast = false)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_ps(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToFloat3(Xse.fmaddsub_ps(RegisterConversion.ToV128(a),
fast ? Sse.rcp_ps(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -47,7 +47,7 @@ public static float4 dsubadd(float4 a, float4 b, float4 c, bool fast = false)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_ps(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToFloat4(Xse.fmaddsub_ps(RegisterConversion.ToV128(a),
fast ? Sse.rcp_ps(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -78,7 +78,7 @@ public static double2 dsubadd(double2 a, double2 b, double2 c, bool fast = false
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_pd(RegisterConversion.ToV128(a),
+ return RegisterConversion.ToDouble2(Xse.fmaddsub_pd(RegisterConversion.ToV128(a),
fast ? Xse.rcp_pd(RegisterConversion.ToV128(b)) : RegisterConversion.ToV128(math.rcp(b)),
RegisterConversion.ToV128(c)));
}
@@ -105,7 +105,7 @@ public static double3 dsubadd(double3 a, double3 b, double3 c, bool fast = false
divisor = RegisterConversion.ToV256(math.rcp(b));
}
- return RegisterConversion.ToType(Xse.mm256_fmaddsub_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble3(Xse.mm256_fmaddsub_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
}
else
{
@@ -130,7 +130,7 @@ public static double4 dsubadd(double4 a, double4 b, double4 c, bool fast = false
divisor = RegisterConversion.ToV256(math.rcp(b));
}
- return RegisterConversion.ToType(Xse.mm256_fmaddsub_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble4(Xse.mm256_fmaddsub_ps(RegisterConversion.ToV256(a), divisor, RegisterConversion.ToV256(c)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Add-Subtract.cs b/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Add-Subtract.cs
index 3553674..20e5bf4 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Add-Subtract.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Add-Subtract.cs
@@ -208,7 +208,7 @@ public static float2 madsub(float2 a, float2 b, float2 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToFloat2(Xse.fmsubadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -222,7 +222,7 @@ public static float3 madsub(float3 a, float3 b, float3 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToFloat3(Xse.fmsubadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -236,7 +236,7 @@ public static float4 madsub(float4 a, float4 b, float4 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToFloat4(Xse.fmsubadd_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -265,7 +265,7 @@ public static double2 madsub(double2 a, double2 b, double2 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToDouble2(Xse.fmsubadd_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -279,7 +279,7 @@ public static double3 madsub(double3 a, double3 b, double3 c)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_fmsubadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble3(Xse.mm256_fmsubadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
}
else
{
@@ -293,7 +293,7 @@ public static double4 madsub(double4 a, double4 b, double4 c)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_fmsubadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble4(Xse.mm256_fmsubadd_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
}
else
{
@@ -543,7 +543,7 @@ public static uint2 madsub(uint2 a, uint2 b, uint2 c)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 2));
+ return RegisterConversion.ToUInt2(Xse.fmsubadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 2));
}
else
{
@@ -557,7 +557,7 @@ public static uint3 madsub(uint3 a, uint3 b, uint3 c)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 3));
+ return RegisterConversion.ToUInt3(Xse.fmsubadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 3));
}
else
{
@@ -571,7 +571,7 @@ public static uint4 madsub(uint4 a, uint4 b, uint4 c)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmsubadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 4));
+ return RegisterConversion.ToUInt4(Xse.fmsubadd_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Subtract-Add.cs.cs b/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Subtract-Add.cs.cs
index f78045e..6a05893 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Subtract-Add.cs.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/FMA/Multiply-Subtract-Add.cs.cs
@@ -208,7 +208,7 @@ public static float2 msubadd(float2 a, float2 b, float2 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToFloat2(Xse.fmaddsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -222,7 +222,7 @@ public static float3 msubadd(float3 a, float3 b, float3 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToFloat3(Xse.fmaddsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -236,7 +236,7 @@ public static float4 msubadd(float4 a, float4 b, float4 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToFloat4(Xse.fmaddsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -265,7 +265,7 @@ public static double2 msubadd(double2 a, double2 b, double2 c)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
+ return RegisterConversion.ToDouble2(Xse.fmaddsub_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c)));
}
else
{
@@ -279,7 +279,7 @@ public static double3 msubadd(double3 a, double3 b, double3 c)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_fmaddsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble3(Xse.mm256_fmaddsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
}
else
{
@@ -293,7 +293,7 @@ public static double4 msubadd(double4 a, double4 b, double4 c)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_fmaddsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
+ return RegisterConversion.ToDouble4(Xse.mm256_fmaddsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b), RegisterConversion.ToV256(c)));
}
else
{
@@ -543,7 +543,7 @@ public static uint2 msubadd(uint2 a, uint2 b, uint2 c)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 2));
+ return RegisterConversion.ToUInt2(Xse.fmaddsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 2));
}
else
{
@@ -557,7 +557,7 @@ public static uint3 msubadd(uint3 a, uint3 b, uint3 c)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 3));
+ return RegisterConversion.ToUInt3(Xse.fmaddsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 3));
}
else
{
@@ -571,7 +571,7 @@ public static uint4 msubadd(uint4 a, uint4 b, uint4 c)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.fmaddsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 4));
+ return RegisterConversion.ToUInt4(Xse.fmaddsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), RegisterConversion.ToV128(c), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/(Reciprocal) Cube Root.cs b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/(Reciprocal) Cube Root.cs
index fcc50fb..db97cf9 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/(Reciprocal) Cube Root.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/(Reciprocal) Cube Root.cs
@@ -21,7 +21,10 @@ public static v128 cbrt_ps(v128 a, bool promise = false, byte elements = 4)
{
/*const*/ bool NEED_TO_SAVE_SIGN = !(promise || constexpr.ALL_LT_EPU32(a, 1u << 31, elements));
+ v128 ONE = Sse.set1_ps(1f);
+ v128 ONE_THIRD = Sse.set1_ps(1f / 3f);
v128 TWO_THIRDS = Sse.set1_ps(2f / 3f);
+
v128 absX = a;
if (NEED_TO_SAVE_SIGN)
@@ -35,11 +38,11 @@ public static v128 cbrt_ps(v128 a, bool promise = false, byte elements = 4)
v128 c = Sse.mul_ps(Sse.mul_ps(absX, y), Sse.mul_ps(y, y));
y = Sse.mul_ps(y, fnmadd_ps(fnmadd_ps(Sse.set1_ps(F32_C2), c, Sse.set1_ps(F32_C1)), c, Sse.set1_ps(F32_C0)));
v128 d = Sse.mul_ps(absX, y);
- c = fnmadd_ps(d, Sse.mul_ps(y, y), Sse.set1_ps(1f));
+ c = fnmadd_ps(d, Sse.mul_ps(y, y), ONE);
- y = Sse.mul_ps(Sse.mul_ps(d, y), fmadd_ps(c, Sse.set1_ps(1f / 3f), Sse.set1_ps(1f)));
+ y = Sse.mul_ps(Sse.mul_ps(d, y), fmadd_ps(c, ONE_THIRD, ONE));
// additional NR
- y = fdadd_ps(Sse.mul_ps(Sse.set1_ps(1f / 3f), a), Sse.mul_ps(y, y), Sse.mul_ps(TWO_THIRDS, y));
+ y = fdadd_ps(Sse.mul_ps(ONE_THIRD, a), Sse.mul_ps(y, y), Sse.mul_ps(TWO_THIRDS, y));
// FloatPrecision.Low 2nd to last y (!!!save sign somehow!!!)
// FloatPrecision.Medium+ last y
@@ -56,7 +59,10 @@ public static v256 mm256_cbrt_ps(v256 a, bool promise = false)
{
/*const*/ bool NEED_TO_SAVE_SIGN = !(promise || constexpr.ALL_LT_EPU32(a, 1u << 31));
+ v256 ONE = Avx.mm256_set1_ps(1f);
+ v256 ONE_THIRD = Avx.mm256_set1_ps(1f / 3f);
v256 TWO_THIRDS = Avx.mm256_set1_ps(2f / 3f);
+
v256 absX = a;
if (NEED_TO_SAVE_SIGN)
@@ -82,11 +88,11 @@ public static v256 mm256_cbrt_ps(v256 a, bool promise = false)
v256 c = Avx.mm256_mul_ps(Avx.mm256_mul_ps(absX, y), Avx.mm256_mul_ps(y, y));
y = Avx.mm256_mul_ps(y, mm256_fnmadd_ps(mm256_fnmadd_ps(Avx.mm256_set1_ps(F32_C2), c, Avx.mm256_set1_ps(F32_C1)), c, Avx.mm256_set1_ps(F32_C0)));
v256 d = Avx.mm256_mul_ps(absX, y);
- c = mm256_fnmadd_ps(d, Avx.mm256_mul_ps(y, y), Avx.mm256_set1_ps(1f));
+ c = mm256_fnmadd_ps(d, Avx.mm256_mul_ps(y, y), ONE);
- y = Avx.mm256_mul_ps(Avx.mm256_mul_ps(d, y), mm256_fmadd_ps(c, Avx.mm256_set1_ps(1f / 3f), Avx.mm256_set1_ps(1f)));
+ y = Avx.mm256_mul_ps(Avx.mm256_mul_ps(d, y), mm256_fmadd_ps(c, ONE_THIRD, ONE));
// additional NR
- y = mm256_fdadd_ps(Avx.mm256_mul_ps(Avx.mm256_set1_ps(1f / 3f), a), Avx.mm256_mul_ps(y, y), Avx.mm256_mul_ps(TWO_THIRDS, y));
+ y = mm256_fdadd_ps(Avx.mm256_mul_ps(ONE_THIRD, a), Avx.mm256_mul_ps(y, y), Avx.mm256_mul_ps(TWO_THIRDS, y));
// FloatPrecision.Low 2nd to last y (!!!save sign somehow!!!)
// FloatPrecision.Medium+ last y
@@ -130,7 +136,7 @@ public static v128 cbrt_pd(v128 a)
v128 t = Sse2.and_si128(a, Sse2.set1_epi64x(1L << 63));
t = Sse2.or_si128(t, Sse2.slli_epi64(absHi, 32));
- r = Sse2.mul_pd(Sse2.mul_pd(t, t), Sse2.mul_pd(t, Sse2.div_pd(Sse2.set1_pd(1d), a)));
+ r = Sse2.mul_pd(Sse2.mul_pd(t, Sse2.mul_pd(t, t)), Sse2.div_pd(Sse2.set1_pd(1d), a));
t = Sse2.mul_pd(t, fmadd_pd(Sse2.mul_pd(Sse2.mul_pd(r, r), r), fmadd_pd(Sse2.set1_pd(F64_C4), r, Sse2.set1_pd(F64_C3)), fmadd_pd(fmadd_pd(Sse2.set1_pd(F64_C2), r, Sse2.set1_pd(F64_C1)), r, Sse2.set1_pd(F64_C0))));
t = Sse2.and_si128(Sse2.add_epi64(t, Sse2.set1_epi64x(0x8000_0000)), Sse2.set1_epi64x(unchecked((long)0xFFFF_FFFF_C000_0000ul)));
@@ -166,7 +172,7 @@ public static v256 mm256_cbrt_pd(v256 a)
v256 t = Avx2.mm256_and_si256(a, Avx.mm256_set1_epi64x(1L << 63));
t = Avx2.mm256_or_si256(t, Avx2.mm256_slli_epi64(absHi, 32));
- r = Avx.mm256_mul_pd(Avx.mm256_mul_pd(t, t), Avx.mm256_mul_pd(t, Avx.mm256_div_pd(Avx.mm256_set1_pd(1d), a)));
+ r = Avx.mm256_mul_pd(Avx.mm256_mul_pd(t, Avx.mm256_mul_pd(t, t)), Avx.mm256_div_pd(Avx.mm256_set1_pd(1d), a));
t = Avx.mm256_mul_pd(t, mm256_fmadd_pd(Avx.mm256_mul_pd(Avx.mm256_mul_pd(r, r), r), mm256_fmadd_pd(Avx.mm256_set1_pd(F64_C4), r, Avx.mm256_set1_pd(F64_C3)), mm256_fmadd_pd(mm256_fmadd_pd(Avx.mm256_set1_pd(F64_C2), r, Avx.mm256_set1_pd(F64_C1)), r, Avx.mm256_set1_pd(F64_C0))));
t = Avx2.mm256_and_si256(Avx2.mm256_add_epi64(t, Avx.mm256_set1_epi64x(0x8000_0000)), Avx.mm256_set1_epi64x(unchecked((long)0xFFFF_FFFF_C000_0000ul)));
@@ -292,7 +298,7 @@ public static v128 rcbrt_pd(v128 a)
v128 t = Sse2.and_si128(a, Sse2.set1_epi64x(1L << 63));
t = Sse2.or_si128(t, Sse2.slli_epi64(absHi, 32));
- r = Sse2.mul_pd(Sse2.mul_pd(t, t), Sse2.mul_pd(t, Sse2.div_pd(Sse2.set1_pd(1d), a)));
+ r = Sse2.mul_pd(Sse2.mul_pd(t, Sse2.mul_pd(t, t)), Sse2.div_pd(Sse2.set1_pd(1d), a));
t = Sse2.mul_pd(t, fmadd_pd(Sse2.mul_pd(Sse2.mul_pd(r, r), r), fmadd_pd(Sse2.set1_pd(F64_C4), r, Sse2.set1_pd(F64_C3)), fmadd_pd(fmadd_pd(Sse2.set1_pd(F64_C2), r, Sse2.set1_pd(F64_C1)), r, Sse2.set1_pd(F64_C0))));
t = Sse2.and_si128(Sse2.add_epi64(t, Sse2.set1_epi64x(0x8000_0000)), Sse2.set1_epi64x(unchecked((long)0xFFFF_FFFF_C000_0000ul)));
@@ -330,7 +336,7 @@ public static v256 mm256_rcbrt_pd(v256 a)
v256 t = Avx2.mm256_and_si256(a, Avx.mm256_set1_epi64x(1L << 63));
t = Avx2.mm256_or_si256(t, Avx2.mm256_slli_epi64(absHi, 32));
- r = Avx.mm256_mul_pd(Avx.mm256_mul_pd(t, t), Avx.mm256_mul_pd(t, Avx.mm256_div_pd(Avx.mm256_set1_pd(1d), a)));
+ r = Avx.mm256_mul_pd(Avx.mm256_mul_pd(t, Avx.mm256_mul_pd(t, t)), Avx.mm256_div_pd(Avx.mm256_set1_pd(1d), a));
t = Avx.mm256_mul_pd(t, mm256_fmadd_pd(Avx.mm256_mul_pd(Avx.mm256_mul_pd(r, r), r), mm256_fmadd_pd(Avx.mm256_set1_pd(F64_C4), r, Avx.mm256_set1_pd(F64_C3)), mm256_fmadd_pd(mm256_fmadd_pd(Avx.mm256_set1_pd(F64_C2), r, Avx.mm256_set1_pd(F64_C1)), r, Avx.mm256_set1_pd(F64_C0))));
t = Avx2.mm256_and_si256(Avx2.mm256_add_epi64(t, Avx.mm256_set1_epi64x(0x8000_0000)), Avx.mm256_set1_epi64x(unchecked((long)0xFFFF_FFFF_C000_0000ul)));
@@ -397,7 +403,7 @@ public static float2 cbrt(float2 x, Promise positive = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 2));
+ return RegisterConversion.ToFloat2(Xse.cbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 2));
}
else
{
@@ -412,7 +418,7 @@ public static float3 cbrt(float3 x, Promise positive = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 3));
+ return RegisterConversion.ToFloat3(Xse.cbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 3));
}
else
{
@@ -427,7 +433,7 @@ public static float4 cbrt(float4 x, Promise positive = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 4));
+ return RegisterConversion.ToFloat4(Xse.cbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 4));
}
else
{
@@ -479,7 +485,7 @@ public static double cbrt(double x)
u |= (ulong)hi << 32;
double t = *(double*)&u;
- r = (t * t) * (t * (1d / x));
+ r = (t * t * t) * (1d / x);
t *= math.mad(r * r * r, math.mad(F64_C4, r, F64_C3), math.mad(math.mad(F64_C2, r, F64_C1), r, F64_C0));
u = (*(ulong*)&t + 0x8000_0000) & 0xFFFF_FFFF_C000_0000ul;
t = *(double*)&u;
@@ -497,7 +503,7 @@ public static double2 cbrt(double2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_pd(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToDouble2(Xse.cbrt_pd(RegisterConversion.ToV128(x)));
}
else
{
@@ -511,7 +517,7 @@ public static double3 cbrt(double3 x)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.mm256_cbrt_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble3(Xse.mm256_cbrt_pd(RegisterConversion.ToV256(x)));
}
else
{
@@ -525,7 +531,7 @@ public static double4 cbrt(double4 x)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.mm256_cbrt_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble4(Xse.mm256_cbrt_pd(RegisterConversion.ToV256(x)));
}
else
{
@@ -581,7 +587,7 @@ public static float2 rcbrt(float2 x, Promise positive = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.rcbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 2));
+ return RegisterConversion.ToFloat2(Xse.rcbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 2));
}
else
{
@@ -596,7 +602,7 @@ public static float3 rcbrt(float3 x, Promise positive = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.rcbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 3));
+ return RegisterConversion.ToFloat3(Xse.rcbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 3));
}
else
{
@@ -611,7 +617,7 @@ public static float4 rcbrt(float4 x, Promise positive = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.rcbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 4));
+ return RegisterConversion.ToFloat4(Xse.rcbrt_ps(RegisterConversion.ToV128(x), positive.Promises(Promise.Positive), 4));
}
else
{
@@ -663,7 +669,7 @@ public static double rcbrt(double x)
u |= (ulong)hi << 32;
double t = *(double*)&u;
- r = (t * t) * (t * (1d / x));
+ r = (t * t * t) * (1d / x);
t *= math.mad(r * r * r, math.mad(F64_C4, r, F64_C3), math.mad(math.mad(F64_C2, r, F64_C1), r, F64_C0));
u = (*(ulong*)&t + 0x8000_0000) & 0xFFFF_FFFF_C000_0000ul;
t = *(double*)&u;
@@ -682,7 +688,7 @@ public static double2 rcbrt(double2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.rcbrt_pd(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToDouble2(Xse.rcbrt_pd(RegisterConversion.ToV128(x)));
}
else
{
@@ -696,7 +702,7 @@ public static double3 rcbrt(double3 x)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.mm256_rcbrt_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble3(Xse.mm256_rcbrt_pd(RegisterConversion.ToV256(x)));
}
else
{
@@ -710,7 +716,7 @@ public static double4 rcbrt(double4 x)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.mm256_rcbrt_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble4(Xse.mm256_rcbrt_pd(RegisterConversion.ToV256(x)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Inverse Square Root.cs b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Inverse Square Root.cs
index 53cf02b..02f8913 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Inverse Square Root.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Inverse Square Root.cs
@@ -28,7 +28,7 @@ public static float4 fastrsqrt(float4 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Sse.rsqrt_ps(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToFloat4(Sse.rsqrt_ps(RegisterConversion.ToV128(x)));
}
else
{
@@ -42,7 +42,7 @@ public static float3 fastrsqrt(float3 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Sse.rsqrt_ps(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToFloat3(Sse.rsqrt_ps(RegisterConversion.ToV128(x)));
}
else
{
@@ -56,7 +56,7 @@ public static float2 fastrsqrt(float2 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Sse.rsqrt_ps(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToFloat2(Sse.rsqrt_ps(RegisterConversion.ToV128(x)));
}
else
{
@@ -85,7 +85,7 @@ public static double4 fastrsqrt(double4 x)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_rsqrt_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble4(Xse.mm256_rsqrt_pd(RegisterConversion.ToV256(x)));
}
else
{
@@ -99,7 +99,7 @@ public static double3 fastrsqrt(double3 x)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_rsqrt_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble3(Xse.mm256_rsqrt_pd(RegisterConversion.ToV256(x)));
}
else
{
@@ -113,7 +113,7 @@ public static double2 fastrsqrt(double2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.rsqrt_pd(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToDouble2(Xse.rsqrt_pd(RegisterConversion.ToV128(x)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Reciprocal.cs b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Reciprocal.cs
index 2d988d3..85bed46 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Reciprocal.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Fast Approximate Reciprocal.cs
@@ -28,7 +28,7 @@ public static float4 fastrcp(float4 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Sse.rcp_ps(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToFloat4(Sse.rcp_ps(RegisterConversion.ToV128(x)));
}
else
{
@@ -42,7 +42,7 @@ public static float3 fastrcp(float3 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Sse.rcp_ps(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToFloat3(Sse.rcp_ps(RegisterConversion.ToV128(x)));
}
else
{
@@ -56,7 +56,7 @@ public static float2 fastrcp(float2 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Sse.rcp_ps(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToFloat2(Sse.rcp_ps(RegisterConversion.ToV128(x)));
}
else
{
@@ -85,7 +85,7 @@ public static double4 fastrcp(double4 x)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.mm256_rcp_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble4(Xse.mm256_rcp_pd(RegisterConversion.ToV256(x)));
}
else
{
@@ -99,7 +99,7 @@ public static double3 fastrcp(double3 x)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.mm256_rcp_pd(RegisterConversion.ToV256(x)));
+ return RegisterConversion.ToDouble3(Xse.mm256_rcp_pd(RegisterConversion.ToV256(x)));
}
else
{
@@ -113,7 +113,7 @@ public static double2 fastrcp(double2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.rcp_pd(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToDouble2(Xse.rcp_pd(RegisterConversion.ToV128(x)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Logarithm Any Base.cs b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Logarithm Any Base.cs
index 324de2c..b015233 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Logarithm Any Base.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Floating Point/Logarithm Any Base.cs
@@ -74,7 +74,7 @@ public static float3 log(float3 x, float3 b)
v128 _x = Avx.mm256_castps256_ps128(ln);
v128 _b = Avx.mm256_extractf128_ps(ln, 1);
- return RegisterConversion.ToType(Sse.div_ps(_x, _b));
+ return RegisterConversion.ToFloat3(Sse.div_ps(_x, _b));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Base-10 Logarithm.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Base-10 Logarithm.cs
index 2bf3320..99de3ee 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Integer/Base-10 Logarithm.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Base-10 Logarithm.cs
@@ -124,6 +124,8 @@ public static v128 log10_epu16(v128 a, byte elements = 8)
cmp = negmask_epi16(cmp_negated);
cmp = Sse2.sub_epi16(cmp, Sse2.bsrli_si128(cmp_negated, 4 * sizeof(short)));
cmp = Sse2.add_epi16(cmp, Sse2.bsrli_si128(cmp, 2 * sizeof(short)));
+
+ constexpr.ASSUME_LE_EPU16(cmp, 4);
return cmp;
}
@@ -150,7 +152,7 @@ public static v128 log10_epu16(v128 a, byte elements = 8)
v128 _2 = Sse2.set1_epi16(999);
v128 _3 = Sse2.set1_epi16(9_999);
- result = neg_epi16(cmpgt_epu16(a, _0, elements));
+ result = negmask_epi16(cmpgt_epu16(a, _0, elements));
result = Sse2.sub_epi16(result, cmpgt_epu16(a, _1, elements));
result = Sse2.sub_epi16(result, cmpgt_epu16(a, _2, elements));
result = Sse2.sub_epi16(result, cmpgt_epu16(a, _3, elements));
@@ -231,8 +233,6 @@ public static v128 log10_epi16(v128 a, byte elements = 8)
result = Sse2.sub_epi16(result, Sse2.cmpgt_epi16(a, NINETY_NINE));
result = Sse2.sub_epi16(result, Sse2.cmpgt_epi16(a, _999));
result = Sse2.sub_epi16(result, Sse2.cmpgt_epi16(a, _9_999));
-
- return result;
}
constexpr.ASSUME_LE_EPU16(result, 4);
@@ -303,15 +303,15 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v128 MASK_100000000 = Sse2.set1_epi32(100_000_000);
v128 MASK_1000000000 = Sse2.set1_epi32(1_000_000_000);
- v128 result_10 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_10));
- v128 result_100 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_100));
- v128 result_1000 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_1000));
- v128 result_10000 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_10000));
- v128 result_100000 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_100000));
- v128 result_1000000 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_1000000));
- v128 result_10000000 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_10000000));
- v128 result_100000000 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_100000000));
- v128 result_1000000000 = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_1000000000));
+ v128 result_10 = cmpge_epu32(a, MASK_10);
+ v128 result_100 = cmpge_epu32(a, MASK_100);
+ v128 result_1000 = cmpge_epu32(a, MASK_1000);
+ v128 result_10000 = cmpge_epu32(a, MASK_10000);
+ v128 result_100000 = cmpge_epu32(a, MASK_100000);
+ v128 result_1000000 = cmpge_epu32(a, MASK_1000000);
+ v128 result_10000000 = cmpge_epu32(a, MASK_10000000);
+ v128 result_100000000 = cmpge_epu32(a, MASK_100000000);
+ v128 result_1000000000 = cmpge_epu32(a, MASK_1000000000);
result_10 = Ssse3.abs_epi32(result_10);
result_100 = Ssse3.abs_epi32(result_100);
@@ -341,18 +341,18 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v128 zzzz = Sse2.shuffle_epi32(a, Sse.SHUFFLE(2, 2, 2, 2));
v128 wwww = Sse2.shuffle_epi32(a, Sse.SHUFFLE(3, 3, 3, 3));
- v128 xResult = Xse.cmpgt_epu32(xxxx, MASK_SMALL);
- v128 yResult = Xse.cmpgt_epu32(yyyy, MASK_SMALL);
- v128 zResult = Xse.cmpgt_epu32(zzzz, MASK_SMALL);
- v128 wResult = Xse.cmpgt_epu32(wwww, MASK_SMALL);
- xResult = Xse.neg_epi32(xResult);
- yResult = Xse.neg_epi32(yResult);
- zResult = Xse.neg_epi32(zResult);
- wResult = Xse.neg_epi32(wResult);
- xResult = Sse2.sub_epi32(xResult, Xse.cmpgt_epu32(xxxx, MASK_LARGE));
- yResult = Sse2.sub_epi32(yResult, Xse.cmpgt_epu32(yyyy, MASK_LARGE));
- zResult = Sse2.sub_epi32(zResult, Xse.cmpgt_epu32(zzzz, MASK_LARGE));
- wResult = Sse2.sub_epi32(wResult, Xse.cmpgt_epu32(wwww, MASK_LARGE));
+ v128 xResult = cmpgt_epu32(xxxx, MASK_SMALL);
+ v128 yResult = cmpgt_epu32(yyyy, MASK_SMALL);
+ v128 zResult = cmpgt_epu32(zzzz, MASK_SMALL);
+ v128 wResult = cmpgt_epu32(wwww, MASK_SMALL);
+ xResult = negmask_epi32(xResult);
+ yResult = negmask_epi32(yResult);
+ zResult = negmask_epi32(zResult);
+ wResult = negmask_epi32(wResult);
+ xResult = Sse2.sub_epi32(xResult, cmpgt_epu32(xxxx, MASK_LARGE));
+ yResult = Sse2.sub_epi32(yResult, cmpgt_epu32(yyyy, MASK_LARGE));
+ zResult = Sse2.sub_epi32(zResult, cmpgt_epu32(zzzz, MASK_LARGE));
+ wResult = Sse2.sub_epi32(wResult, cmpgt_epu32(wwww, MASK_LARGE));
xResult = Sse2.add_epi32(xResult, Sse2.bsrli_si128(xResult, 2 * sizeof(uint)));
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 2 * sizeof(uint)));
zResult = Sse2.add_epi32(zResult, Sse2.bsrli_si128(zResult, 2 * sizeof(uint)));
@@ -362,7 +362,7 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
zResult = Sse2.add_epi32(zResult, Sse2.bsrli_si128(zResult, 1 * sizeof(uint)));
wResult = Sse2.add_epi32(wResult, Sse2.bsrli_si128(wResult, 1 * sizeof(uint)));
- v128 lastCMP = Sse2.cmpeq_epi32(Xse.max_epu32(a, LAST), a);
+ v128 lastCMP = cmpgt_epu32(a, LAST);
v128 xy = Sse2.unpacklo_epi32(xResult, yResult);
v128 zw = Sse2.unpacklo_epi32(zResult, wResult);
@@ -383,18 +383,18 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v256 _y = Avx2.mm256_broadcastd_epi32(Sse2.bsrli_si128(a, 1 * sizeof(uint)));
v256 _z = Avx2.mm256_broadcastd_epi32(Sse2.bsrli_si128(a, 2 * sizeof(uint)));
- v256 resultX = Xse.mm256_cmpge_epu32(_x, MASK);
+ v256 resultX = mm256_cmpge_epu32(_x, MASK);
v128 hiX = Avx2.mm256_extracti128_si256(resultX, 1);
- v256 resultY = Xse.mm256_cmpge_epu32(_y, MASK);
+ v256 resultY = mm256_cmpge_epu32(_y, MASK);
v128 hiY = Avx2.mm256_extracti128_si256(resultY, 1);
- v256 resultZ = Xse.mm256_cmpge_epu32(_z, MASK);
+ v256 resultZ = mm256_cmpge_epu32(_z, MASK);
v128 hiZ = Avx2.mm256_extracti128_si256(resultZ, 1);
v128 resultX128 = Ssse3.abs_epi32(Avx.mm256_castsi256_si128(resultX));
v128 resultY128 = Ssse3.abs_epi32(Avx.mm256_castsi256_si128(resultY));
v128 resultZ128 = Ssse3.abs_epi32(Avx.mm256_castsi256_si128(resultZ));
- v128 lastCMP = Sse2.cmpeq_epi32(Sse4_1.max_epu32(a, LAST), a);
+ v128 lastCMP = cmpge_epu32(a, LAST);
resultX128 = Sse2.sub_epi32(resultX128, hiX);
resultY128 = Sse2.sub_epi32(resultY128, hiY);
@@ -423,15 +423,15 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v128 MASK_LARGE = new v128(100_000, 1_000_000, 10_000_000, 100_000_000);
v128 MASK_LAST = new v128(1_000_000_000);
- v128 xResult = Sse2.cmpeq_epi32(xxxx, Sse4_1.max_epu32(xxxx, MASK_SMALL));
- v128 yResult = Sse2.cmpeq_epi32(yyyy, Sse4_1.max_epu32(yyyy, MASK_SMALL));
- v128 zResult = Sse2.cmpeq_epi32(zzzz, Sse4_1.max_epu32(zzzz, MASK_SMALL));
+ v128 xResult = cmpge_epu32(xxxx, MASK_SMALL);
+ v128 yResult = cmpge_epu32(yyyy, MASK_SMALL);
+ v128 zResult = cmpge_epu32(zzzz, MASK_SMALL);
xResult = Ssse3.abs_epi32(xResult);
yResult = Ssse3.abs_epi32(yResult);
zResult = Ssse3.abs_epi32(zResult);
- xResult = Sse2.sub_epi32(xResult, Sse2.cmpeq_epi32(xxxx, Sse4_1.max_epu32(xxxx, MASK_LARGE)));
- yResult = Sse2.sub_epi32(yResult, Sse2.cmpeq_epi32(yyyy, Sse4_1.max_epu32(yyyy, MASK_LARGE)));
- zResult = Sse2.sub_epi32(zResult, Sse2.cmpeq_epi32(zzzz, Sse4_1.max_epu32(zzzz, MASK_LARGE)));
+ xResult = Sse2.sub_epi32(xResult, cmpge_epu32(xxxx, MASK_LARGE));
+ yResult = Sse2.sub_epi32(yResult, cmpge_epu32(yyyy, MASK_LARGE));
+ zResult = Sse2.sub_epi32(zResult, cmpge_epu32(zzzz, MASK_LARGE));
xResult = Sse2.add_epi32(xResult, Sse2.bsrli_si128(xResult, 2 * sizeof(uint)));
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 2 * sizeof(uint)));
zResult = Sse2.add_epi32(zResult, Sse2.bsrli_si128(zResult, 2 * sizeof(uint)));
@@ -439,7 +439,7 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 1 * sizeof(uint)));
zResult = Sse2.add_epi32(zResult, Sse2.bsrli_si128(zResult, 1 * sizeof(uint)));
- v128 lastCMP = Xse.cmpgt_epu32(a, MASK_LAST);
+ v128 lastCMP = cmpgt_epu32(a, MASK_LAST);
v128 xy = Sse2.unpacklo_epi32(xResult, yResult);
result = Sse2.unpacklo_epi64(xy, zResult);
@@ -451,16 +451,16 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v128 MASK_LARGE = new v128(99_999, 999_999, 9_999_999, 99_999_999);
v128 MASK_LAST = new v128(999_999_999);
- v128 xResult = Xse.cmpgt_epu32(xxxx, MASK_SMALL);
- v128 yResult = Xse.cmpgt_epu32(yyyy, MASK_SMALL);
- v128 zResult = Xse.cmpgt_epu32(zzzz, MASK_SMALL);
+ v128 xResult = cmpgt_epu32(xxxx, MASK_SMALL);
+ v128 yResult = cmpgt_epu32(yyyy, MASK_SMALL);
+ v128 zResult = cmpgt_epu32(zzzz, MASK_SMALL);
- xResult = Xse.neg_epi32(xResult);
- yResult = Xse.neg_epi32(yResult);
- zResult = Xse.neg_epi32(zResult);
- xResult = Sse2.sub_epi32(xResult, Xse.cmpgt_epu32(xxxx, MASK_LARGE));
- yResult = Sse2.sub_epi32(yResult, Xse.cmpgt_epu32(yyyy, MASK_LARGE));
- zResult = Sse2.sub_epi32(zResult, Xse.cmpgt_epu32(zzzz, MASK_LARGE));
+ xResult = negmask_epi32(xResult);
+ yResult = negmask_epi32(yResult);
+ zResult = negmask_epi32(zResult);
+ xResult = Sse2.sub_epi32(xResult, cmpgt_epu32(xxxx, MASK_LARGE));
+ yResult = Sse2.sub_epi32(yResult, cmpgt_epu32(yyyy, MASK_LARGE));
+ zResult = Sse2.sub_epi32(zResult, cmpgt_epu32(zzzz, MASK_LARGE));
xResult = Sse2.add_epi32(xResult, Sse2.bsrli_si128(xResult, 2 * sizeof(uint)));
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 2 * sizeof(uint)));
zResult = Sse2.add_epi32(zResult, Sse2.bsrli_si128(zResult, 2 * sizeof(uint)));
@@ -468,7 +468,7 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 1 * sizeof(uint)));
zResult = Sse2.add_epi32(zResult, Sse2.bsrli_si128(zResult, 1 * sizeof(uint)));
- v128 lastCMP = Xse.cmpgt_epu32(a, MASK_LAST);
+ v128 lastCMP = cmpgt_epu32(a, MASK_LAST);
v128 xy = Sse2.unpacklo_epi32(xResult, yResult);
result = Sse2.unpacklo_epi64(xy, zResult);
@@ -488,15 +488,15 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v256 _x = Avx2.mm256_broadcastd_epi32(a);
v256 _y = Avx2.mm256_broadcastd_epi32(Sse2.bsrli_si128(a, 1 * sizeof(uint)));
- v256 resultX = Xse.mm256_cmpge_epu32(_x, MASK);
+ v256 resultX = mm256_cmpge_epu32(_x, MASK);
v128 hiX = Avx2.mm256_extracti128_si256(resultX, 1);
- v256 resultY = Xse.mm256_cmpge_epu32(_y, MASK);
+ v256 resultY = mm256_cmpge_epu32(_y, MASK);
v128 hiY = Avx2.mm256_extracti128_si256(resultY, 1);
v128 resultX128 = Ssse3.abs_epi32(Avx.mm256_castsi256_si128(resultX));
v128 resultY128 = Ssse3.abs_epi32(Avx.mm256_castsi256_si128(resultY));
- v128 lastCMP = Sse2.cmpeq_epi32(Sse4_1.max_epu32(a, LAST), a);
+ v128 lastCMP = cmpge_epu32(a, LAST);
resultX128 = Sse2.sub_epi32(resultX128, hiX);
resultY128 = Sse2.sub_epi32(resultY128, hiY);
@@ -520,18 +520,18 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v128 MASK_LARGE = new v128(100_000, 1_000_000, 10_000_000, 100_000_000);
v128 MASK_LAST = Sse2.cvtsi64x_si128((1_000_000_000L << 32) | 1_000_000_000L);
- v128 xResult = Sse2.cmpeq_epi32(xxxx, Sse4_1.max_epu32(xxxx, MASK_SMALL));
- v128 yResult = Sse2.cmpeq_epi32(yyyy, Sse4_1.max_epu32(yyyy, MASK_SMALL));
+ v128 xResult = cmpge_epu32(xxxx, MASK_SMALL);
+ v128 yResult = cmpge_epu32(yyyy, MASK_SMALL);
xResult = Ssse3.abs_epi32(xResult);
yResult = Ssse3.abs_epi32(yResult);
- xResult = Sse2.sub_epi32(xResult, Sse2.cmpeq_epi32(xxxx, Sse4_1.max_epu32(xxxx, MASK_LARGE)));
- yResult = Sse2.sub_epi32(yResult, Sse2.cmpeq_epi32(yyyy, Sse4_1.max_epu32(yyyy, MASK_LARGE)));
+ xResult = Sse2.sub_epi32(xResult, cmpge_epu32(xxxx, MASK_LARGE));
+ yResult = Sse2.sub_epi32(yResult, cmpge_epu32(yyyy, MASK_LARGE));
xResult = Sse2.add_epi32(xResult, Sse2.bsrli_si128(xResult, 2 * sizeof(uint)));
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 2 * sizeof(uint)));
xResult = Sse2.add_epi32(xResult, Sse2.bsrli_si128(xResult, 1 * sizeof(uint)));
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 1 * sizeof(uint)));
- v128 lastCMP = Sse2.cmpeq_epi32(a, Sse4_1.max_epu32(a, MASK_LAST));
+ v128 lastCMP = cmpge_epu32(a, MASK_LAST);
v128 xy = Sse2.unpacklo_epi32(xResult, yResult);
result = Sse2.sub_epi32(xy, lastCMP);
@@ -542,18 +542,18 @@ public static v128 log10_epu32(v128 a, byte elements = 4)
v128 MASK_LARGE = new v128(99_999, 999_999, 9_999_999, 99_999_999);
v128 MASK_LAST = Sse2.cvtsi64x_si128((999_999_999L << 32) | 999_999_999L);
- v128 xResult = Xse.cmpgt_epu32(xxxx, MASK_SMALL);
- v128 yResult = Xse.cmpgt_epu32(yyyy, MASK_SMALL);
- xResult = Xse.neg_epi32(xResult);
- yResult = Xse.neg_epi32(yResult);
- xResult = Sse2.sub_epi32(xResult, Xse.cmpgt_epu32(xxxx, MASK_LARGE));
- yResult = Sse2.sub_epi32(yResult, Xse.cmpgt_epu32(yyyy, MASK_LARGE));
+ v128 xResult = cmpgt_epu32(xxxx, MASK_SMALL);
+ v128 yResult = cmpgt_epu32(yyyy, MASK_SMALL);
+ xResult = negmask_epi32(xResult);
+ yResult = negmask_epi32(yResult);
+ xResult = Sse2.sub_epi32(xResult, cmpgt_epu32(xxxx, MASK_LARGE));
+ yResult = Sse2.sub_epi32(yResult, cmpgt_epu32(yyyy, MASK_LARGE));
xResult = Sse2.add_epi32(xResult, Sse2.bsrli_si128(xResult, 2 * sizeof(uint)));
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 2 * sizeof(uint)));
xResult = Sse2.add_epi32(xResult, Sse2.bsrli_si128(xResult, 1 * sizeof(uint)));
yResult = Sse2.add_epi32(yResult, Sse2.bsrli_si128(yResult, 1 * sizeof(uint)));
- v128 lastCMP = Xse.cmpgt_epu32(a, MASK_LAST);
+ v128 lastCMP = cmpgt_epu32(a, MASK_LAST);
v128 xy = Sse2.unpacklo_epi32(xResult, yResult);
result = Sse2.sub_epi32(xy, lastCMP);
@@ -594,15 +594,15 @@ public static v256 mm256_log10_epu32(v256 a)
v256 MASK_100000000 = Avx.mm256_set1_epi32(100_000_000);
v256 MASK_1000000000 = Avx.mm256_set1_epi32(1_000_000_000);
- v256 result_10 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_10));
- v256 result_100 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_100));
- v256 result_1000 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_1000));
- v256 result_10000 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_10000));
- v256 result_100000 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_100000));
- v256 result_1000000 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_1000000));
- v256 result_10000000 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_10000000));
- v256 result_100000000 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_100000000));
- v256 result_1000000000 = Avx2.mm256_cmpeq_epi32(a, Avx2.mm256_max_epu32(a, MASK_1000000000));
+ v256 result_10 = mm256_cmpge_epu32(a, MASK_10);
+ v256 result_100 = mm256_cmpge_epu32(a, MASK_100);
+ v256 result_1000 = mm256_cmpge_epu32(a, MASK_1000);
+ v256 result_10000 = mm256_cmpge_epu32(a, MASK_10000);
+ v256 result_100000 = mm256_cmpge_epu32(a, MASK_100000);
+ v256 result_1000000 = mm256_cmpge_epu32(a, MASK_1000000);
+ v256 result_10000000 = mm256_cmpge_epu32(a, MASK_10000000);
+ v256 result_100000000 = mm256_cmpge_epu32(a, MASK_100000000);
+ v256 result_1000000000 = mm256_cmpge_epu32(a, MASK_1000000000);
result_10 = Avx2.mm256_abs_epi32(result_10);
result_100 = Avx2.mm256_abs_epi32(result_100);
@@ -619,7 +619,11 @@ public static v256 mm256_log10_epu32(v256 a)
result_100 = Avx2.mm256_add_epi32(result_100, result_1000);
result_1000 = Avx2.mm256_add_epi32(result_10000, result_10);
- return Avx2.mm256_add_epi32(result_100, result_1000);
+ v256 result = Avx2.mm256_add_epi32(result_100, result_1000);
+
+ constexpr.ASSUME_LE_EPU32(result, 9);
+
+ return result;
}
}
else throw new IllegalInstructionException();
@@ -873,7 +877,11 @@ public static v256 mm256_log10_epi32(v256 a)
result_99 = Avx2.mm256_add_epi32(result_99, result_999);
result_999 = Avx2.mm256_add_epi32(result_9999, result_9);
- return Avx2.mm256_add_epi32(result_99, result_999);
+ v256 result = Avx2.mm256_add_epi32(result_99, result_999);
+
+ constexpr.ASSUME_LE_EPU32(result, 9);
+
+ return result;
}
}
else throw new IllegalInstructionException();
@@ -965,7 +973,11 @@ public static v128 log10_epu64(v128 a)
UInt128 adjust0 = guess[math.lzcnt(a.ULong0)];
UInt128 adjust1 = guess[math.lzcnt(a.ULong1)];
- return new v128((adjust0 + a.ULong0).hi64, (adjust1 + a.ULong1).hi64);
+ v128 result = new v128((adjust0 + a.ULong0).hi64, (adjust1 + a.ULong1).hi64);
+
+ constexpr.ASSUME_LE_EPU64(result, 19);
+
+ return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -1062,7 +1074,11 @@ public static v256 mm256_log10_epu64(v256 a, byte elements = 4)
hi = Sse2.unpacklo_epi64(hi, Sse2.cvtsi64x_si128((long)((adjust3 + a.ULong3).hi64)));
}
- return new v256(lo, hi);
+ v256 result = new v256(lo, hi);
+
+ constexpr.ASSUME_LE_EPU64(result, 19);
+
+ return result;
}
}
else throw new IllegalInstructionException();
@@ -1638,7 +1654,7 @@ public static uint2 intlog10(uint2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.log10_epu32(RegisterConversion.ToV128(x), 2));
+ return RegisterConversion.ToUInt2(Xse.log10_epu32(RegisterConversion.ToV128(x), 2));
}
else
{
@@ -1671,7 +1687,7 @@ public static uint3 intlog10(uint3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.log10_epu32(RegisterConversion.ToV128(x), 3));
+ return RegisterConversion.ToUInt3(Xse.log10_epu32(RegisterConversion.ToV128(x), 3));
}
else
{
@@ -1706,7 +1722,7 @@ public static uint4 intlog10(uint4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.log10_epu32(RegisterConversion.ToV128(x), 4));
+ return RegisterConversion.ToUInt4(Xse.log10_epu32(RegisterConversion.ToV128(x), 4));
}
else
{
@@ -1824,7 +1840,7 @@ public static int2 intlog10(int2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.log10_epi32(RegisterConversion.ToV128(x), 2));
+ return RegisterConversion.ToInt2(Xse.log10_epi32(RegisterConversion.ToV128(x), 2));
}
else
{
@@ -1839,7 +1855,7 @@ public static int3 intlog10(int3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.log10_epi32(RegisterConversion.ToV128(x), 3));
+ return RegisterConversion.ToInt3(Xse.log10_epi32(RegisterConversion.ToV128(x), 3));
}
else
{
@@ -1854,7 +1870,7 @@ public static int4 intlog10(int4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.log10_epi32(RegisterConversion.ToV128(x), 4));
+ return RegisterConversion.ToInt4(Xse.log10_epi32(RegisterConversion.ToV128(x), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Binomial Coefficient.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Binomial Coefficient.cs
new file mode 100644
index 0000000..52a9484
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Binomial Coefficient.cs
@@ -0,0 +1,2878 @@
+using System.Runtime.CompilerServices;
+using Unity.Burst.Intrinsics;
+using Unity.Burst.CompilerServices;
+using Unity.Mathematics;
+using MaxMath.Intrinsics;
+using DevTools;
+
+using static Unity.Burst.Intrinsics.X86;
+using static MaxMath.LUT.FACTORIAL;
+
+namespace MaxMath
+{
+ namespace Intrinsics
+ {
+ unsafe public static partial class Xse
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naivecomb_epu8(v128 n, v128 k, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ v128 nom = gamma_epu8(n, true, elements);
+ v128 denom = mullo_epi8(gamma_epu8(k, true, elements), gamma_epu8(Sse2.sub_epi8(n, k), true, elements), elements);
+
+ return div_epu8(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naivecomb_epu8(v256 n, v256 k)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 nom = mm256_gamma_epu8(n, true);
+ v256 denom = mm256_mullo_epi8(mm256_gamma_epu8(k, true), mm256_gamma_epu8(Avx2.mm256_sub_epi8(n, k), true));
+
+ return mm256_div_epu8(nom, denom);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naivecomb_epu16(v128 n, v128 k, bool epu8range = false, byte elements = 8)
+ {
+ v128 nom;
+ v128 denom;
+ if (Sse2.IsSse2Supported)
+ {
+ if (epu8range || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8, elements))
+ {
+ if (Ssse3.IsSsse3Supported)
+ {
+ nom = gamma_epu16_epu8range(n);
+ denom = Sse2.mullo_epi16(gamma_epu16_epu8range(k), gamma_epu16_epu8range(Sse2.sub_epi16(n, k)));
+
+ return div_epu16(nom, denom, elements);
+ }
+ }
+
+ nom = gamma_epu16(n, true, elements);
+ denom = Sse2.mullo_epi16(gamma_epu16(k, true, elements), gamma_epu16(Sse2.sub_epi16(n, k), true, elements));
+
+ return div_epu16(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naivecomb_epu16(v256 n, v256 k, bool epu8range = false)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (epu8range || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8))
+ {
+ v256 nom = mm256_gamma_epu16_epu8range(n);
+ v256 denom = Avx2.mm256_mullo_epi16(mm256_gamma_epu16_epu8range(k), mm256_gamma_epu16_epu8range(Avx2.mm256_sub_epi16(n, k)));
+
+ return mm256_div_epu16(nom, denom);
+ }
+ else
+ {
+ v256 nom = mm256_gamma_epu16(n, true);
+ v256 denom = Avx2.mm256_mullo_epi16(mm256_gamma_epu16(k, true), mm256_gamma_epu16(Avx2.mm256_sub_epi16(n, k), true));
+
+ return mm256_div_epu16(nom, denom);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naivecomb_epu32(v128 n, v128 k, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ v128 nom = gamma_epu32(n, true, elements);
+ v128 denom = mullo_epi32(gamma_epu32(k, true, elements), gamma_epu32(Sse2.sub_epi32(n, k), true, elements));
+
+ return div_epu32(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naivecomb_epu32(v256 n, v256 k)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 nom = mm256_gamma_epu32(n, true);
+ v256 denom = Avx2.mm256_mullo_epi32(mm256_gamma_epu32(k, true), mm256_gamma_epu32(Avx2.mm256_sub_epi32(n, k), true));
+
+ return mm256_div_epu32(nom, denom);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naivecomb_epu64(v128 n, v128 k)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ v128 nom = gamma_epu64(n, true);
+ v128 denom = mullo_epi64(gamma_epu64(k, true), gamma_epu64(Sse2.sub_epi64(n, k), true));
+
+ return div_epu64(nom, denom);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naivecomb_epu64(v256 n, v256 k, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 nom = mm256_gamma_epu64(n, true, elements);
+ v256 denom = mm256_mullo_epi64(mm256_gamma_epu64(k, true), mm256_gamma_epu64(Avx2.mm256_sub_epi64(n, k), true, elements));
+
+ return mm256_div_epu64(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epu8(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.Byte0, n.Byte0);
+Assert.IsNotGreater(k.Byte1, n.Byte1);
+if (elements > 2)
+{
+Assert.IsNotGreater(k.Byte2, n.Byte2);
+}
+if (elements > 3)
+{
+Assert.IsNotGreater(k.Byte3, n.Byte3);
+}
+if (elements > 4)
+{
+Assert.IsNotGreater(k.Byte4, n.Byte4);
+Assert.IsNotGreater(k.Byte5, n.Byte5);
+Assert.IsNotGreater(k.Byte6, n.Byte6);
+Assert.IsNotGreater(k.Byte7, n.Byte7);
+}
+if (elements > 8)
+{
+Assert.IsNotGreater(k.Byte8, n.Byte8);
+Assert.IsNotGreater(k.Byte9, n.Byte9);
+Assert.IsNotGreater(k.Byte10, n.Byte10);
+Assert.IsNotGreater(k.Byte11, n.Byte11);
+Assert.IsNotGreater(k.Byte12, n.Byte12);
+Assert.IsNotGreater(k.Byte13, n.Byte13);
+Assert.IsNotGreater(k.Byte14, n.Byte14);
+Assert.IsNotGreater(k.Byte15, n.Byte15);
+}
+
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU8(n, (byte)sbyte.MaxValue, elements))
+ {
+ return comb_epi8(n, k, unsafeLevels, elements);
+ }
+ else
+ {
+ if (elements <= 8)
+ {
+ return cvtepi16_epi8(comb_epi16(cvtepu8_epi16(n), cvtepu8_epi16(k), elements: elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi16_epi8(mm256_comb_epi16(Avx2.mm256_cvtepu8_epi16(n), Avx2.mm256_cvtepu8_epi16(k)));
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi8(1);
+
+ k = Sse2.min_epu8(k, Sse2.sub_epi8(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi8(n, ONE);
+ v128 c = Sse2.add_epi8(mullo_epi8(srli_epi8(n2, 1), n, 16), Sse2.and_si128(neg_epi8(Sse2.and_si128(n2, ONE)), srli_epi8(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, Sse2.cmpeq_epi8(k, ONE)), ONE, Sse2.cmpeq_epi8(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi8(ONE, ONE);
+ v128 cmp = cmple_epu8(k, i);
+
+ while (Hint.Likely(notalltrue_epi128(cmp, 16)))
+ {
+ i = Sse2.add_epi8(i, ONE);
+ v128 q = divrem_epu8(c, i, out v128 r);
+ n = Sse2.sub_epi8(n, ONE);
+ c = Sse2.add_epi8(mullo_epi8(q, n, 16), div_epu8(mullo_epi8(r, n, 16), i));
+
+ results = blendv_si128(c, results, cmp);
+ cmp = cmple_epu8(k, i);
+ }
+
+ return results;
+ }
+ }
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epu8(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.Byte0, n.Byte0);
+Assert.IsNotGreater(k.Byte1, n.Byte1);
+Assert.IsNotGreater(k.Byte2, n.Byte2);
+Assert.IsNotGreater(k.Byte3, n.Byte3);
+Assert.IsNotGreater(k.Byte4, n.Byte4);
+Assert.IsNotGreater(k.Byte5, n.Byte5);
+Assert.IsNotGreater(k.Byte6, n.Byte6);
+Assert.IsNotGreater(k.Byte7, n.Byte7);
+Assert.IsNotGreater(k.Byte8, n.Byte8);
+Assert.IsNotGreater(k.Byte9, n.Byte9);
+Assert.IsNotGreater(k.Byte10, n.Byte10);
+Assert.IsNotGreater(k.Byte11, n.Byte11);
+Assert.IsNotGreater(k.Byte12, n.Byte12);
+Assert.IsNotGreater(k.Byte13, n.Byte13);
+Assert.IsNotGreater(k.Byte14, n.Byte14);
+Assert.IsNotGreater(k.Byte15, n.Byte15);
+Assert.IsNotGreater(k.Byte16, n.Byte16);
+Assert.IsNotGreater(k.Byte17, n.Byte17);
+Assert.IsNotGreater(k.Byte18, n.Byte18);
+Assert.IsNotGreater(k.Byte19, n.Byte19);
+Assert.IsNotGreater(k.Byte20, n.Byte20);
+Assert.IsNotGreater(k.Byte21, n.Byte21);
+Assert.IsNotGreater(k.Byte22, n.Byte22);
+Assert.IsNotGreater(k.Byte23, n.Byte23);
+Assert.IsNotGreater(k.Byte24, n.Byte24);
+Assert.IsNotGreater(k.Byte25, n.Byte25);
+Assert.IsNotGreater(k.Byte26, n.Byte26);
+Assert.IsNotGreater(k.Byte27, n.Byte27);
+Assert.IsNotGreater(k.Byte28, n.Byte28);
+Assert.IsNotGreater(k.Byte29, n.Byte29);
+Assert.IsNotGreater(k.Byte30, n.Byte30);
+Assert.IsNotGreater(k.Byte31, n.Byte31);
+
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU8(n, (byte)sbyte.MaxValue))
+ {
+ return mm256_comb_epi8(n, k, unsafeLevels);
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi8(1);
+
+ k = Avx2.mm256_min_epu8(k, Avx2.mm256_sub_epi8(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi8(n, ONE);
+ v256 c = Avx2.mm256_add_epi8(mm256_mullo_epi8(mm256_srli_epi8(n2, 1), n), Avx2.mm256_and_si256(mm256_neg_epi8(Avx2.mm256_and_si256(n2, ONE)), mm256_srli_epi8(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi8(k, ONE)), ONE, Avx2.mm256_cmpeq_epi8(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi8(ONE, ONE);
+ v256 cmp = mm256_cmple_epu8(k, i);
+
+ while (Hint.Likely(mm256_notalltrue_epi256(cmp, 32)))
+ {
+ i = Avx2.mm256_add_epi8(i, ONE);
+ v256 q = mm256_divrem_epu8(c, i, out v256 r);
+ n = Avx2.mm256_sub_epi8(n, ONE);
+ c = Avx2.mm256_add_epi8(mm256_mullo_epi8(q, n), mm256_div_epu8(mm256_mullo_epi8(r, n), i));
+
+ results = mm256_blendv_si256(c, results, cmp);
+ cmp = mm256_cmple_epu8(k, i);
+ }
+
+ return results;
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epi8(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.SByte0, n.SByte0);
+Assert.IsNotGreater(k.SByte1, n.SByte1);
+Assert.IsNonNegative(k.SByte0);
+Assert.IsNonNegative(n.SByte0);
+Assert.IsNonNegative(k.SByte1);
+Assert.IsNonNegative(n.SByte1);
+if (elements > 2)
+{
+Assert.IsNotGreater(k.SByte2, n.SByte2);
+Assert.IsNonNegative(k.SByte2);
+Assert.IsNonNegative(n.SByte2);
+}
+if (elements > 3)
+{
+Assert.IsNotGreater(k.SByte3, n.SByte3);
+Assert.IsNonNegative(k.SByte3);
+Assert.IsNonNegative(n.SByte3);
+}
+if (elements > 4)
+{
+Assert.IsNotGreater(k.SByte4, n.SByte4);
+Assert.IsNotGreater(k.SByte5, n.SByte5);
+Assert.IsNotGreater(k.SByte6, n.SByte6);
+Assert.IsNotGreater(k.SByte7, n.SByte7);
+Assert.IsNonNegative(k.SByte4);
+Assert.IsNonNegative(k.SByte5);
+Assert.IsNonNegative(k.SByte6);
+Assert.IsNonNegative(k.SByte7);
+Assert.IsNonNegative(n.SByte4);
+Assert.IsNonNegative(n.SByte5);
+Assert.IsNonNegative(n.SByte6);
+Assert.IsNonNegative(n.SByte7);
+}
+if (elements > 8)
+{
+Assert.IsNotGreater(k.SByte8, n.SByte8);
+Assert.IsNotGreater(k.SByte9, n.SByte9);
+Assert.IsNotGreater(k.SByte10, n.SByte10);
+Assert.IsNotGreater(k.SByte11, n.SByte11);
+Assert.IsNotGreater(k.SByte12, n.SByte12);
+Assert.IsNotGreater(k.SByte13, n.SByte13);
+Assert.IsNotGreater(k.SByte14, n.SByte14);
+Assert.IsNotGreater(k.SByte15, n.SByte15);
+Assert.IsNonNegative(k.SByte8);
+Assert.IsNonNegative(k.SByte9);
+Assert.IsNonNegative(k.SByte10);
+Assert.IsNonNegative(k.SByte11);
+Assert.IsNonNegative(k.SByte12);
+Assert.IsNonNegative(k.SByte13);
+Assert.IsNonNegative(k.SByte14);
+Assert.IsNonNegative(k.SByte15);
+Assert.IsNonNegative(n.SByte8);
+Assert.IsNonNegative(n.SByte9);
+Assert.IsNonNegative(n.SByte10);
+Assert.IsNonNegative(n.SByte11);
+Assert.IsNonNegative(n.SByte12);
+Assert.IsNonNegative(n.SByte13);
+Assert.IsNonNegative(n.SByte14);
+Assert.IsNonNegative(n.SByte15);
+}
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U64, elements))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U32, elements))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U16, elements))
+ {
+ if (unsafeLevels > 3 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U8, elements))
+ {
+ return naivecomb_epu8(n, k, elements);
+ }
+ else
+ {
+ if (elements <= 8)
+ {
+ return cvtepi16_epi8(naivecomb_epu16(cvtepu8_epi16(n), cvtepu8_epi16(k), false, elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi16_epi8(mm256_naivecomb_epu16(Avx2.mm256_cvtepu8_epi16(n), Avx2.mm256_cvtepu8_epi16(k), false));
+ }
+ else
+ {
+ v128 nLo16 = cvt2x2epu8_epi16(n, out v128 nHi16);
+ v128 kLo16 = cvt2x2epu8_epi16(k, out v128 kHi16);
+
+ v128 resultLo = naivecomb_epu16(nLo16, kLo16, false, elements);
+ v128 resultHi = naivecomb_epu16(nHi16, kHi16, false, elements);
+
+ return cvt2x2epi16_epi8(resultLo, resultHi);
+ }
+ }
+ }
+ }
+ else
+ {
+ if (elements <= 4)
+ {
+ return cvtepi32_epi8(naivecomb_epu32(cvtepu8_epi32(n), cvtepu8_epi32(k), elements));
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (elements == 8)
+ {
+ return mm256_cvtepi32_epi8(mm256_naivecomb_epu32(Avx2.mm256_cvtepu8_epi32(n), Avx2.mm256_cvtepu8_epi32(k)));
+ }
+ else
+ {
+ v256 loN32 = Avx2.mm256_cvtepu8_epi32(n);
+ v256 hiN32 = Avx2.mm256_cvtepu8_epi32(Sse2.bsrli_si128(n, 8 * sizeof(byte)));
+ v256 loK32 = Avx2.mm256_cvtepu8_epi32(k);
+ v256 hiK32 = Avx2.mm256_cvtepu8_epi32(Sse2.bsrli_si128(k, 8 * sizeof(byte)));
+
+ v128 resultLo = mm256_cvtepi32_epi8(mm256_naivecomb_epu32(loN32, loK32));
+ v128 resultHi = mm256_cvtepi32_epi8(mm256_naivecomb_epu32(hiN32, hiK32));
+
+ return Sse2.unpacklo_epi64(resultLo, resultHi);
+ }
+ }
+ else
+ {
+ if (elements == 8)
+ {
+ v128 loN32 = cvtepu8_epi32(n);
+ v128 hiN32 = cvtepu8_epi32(Sse2.bsrli_si128(n, 4 * sizeof(byte)));
+ v128 loK32 = cvtepu8_epi32(k);
+ v128 hiK32 = cvtepu8_epi32(Sse2.bsrli_si128(k, 4 * sizeof(byte)));
+
+ v128 resultLo = naivecomb_epu32(loN32, loK32);
+ v128 resultHi = naivecomb_epu32(hiN32, hiK32);
+
+ v128 result = cvt2x2epi32_epi16(resultLo, resultHi);
+
+ return cvt2x2epi16_epi8(result, result);
+ }
+ else
+ {
+ v128 loN16 = cvt2x2epu8_epi16(n, out v128 hiN16);
+ v128 loK16 = cvt2x2epu8_epi16(k, out v128 hiK16);
+
+ v128 n32_0 = cvt2x2epu16_epi32(loN16, out v128 n32_1);
+ v128 n32_2 = cvt2x2epu16_epi32(hiN16, out v128 n32_3);
+ v128 k32_0 = cvt2x2epu16_epi32(loK16, out v128 k32_1);
+ v128 k32_2 = cvt2x2epu16_epi32(hiK16, out v128 k32_3);
+
+ v128 result32_0 = naivecomb_epu32(n32_0, k32_0);
+ v128 result32_1 = naivecomb_epu32(n32_1, k32_1);
+ v128 result32_2 = naivecomb_epu32(n32_2, k32_2);
+ v128 result32_3 = naivecomb_epu32(n32_3, k32_3);
+
+ v128 result16_0 = cvt2x2epi32_epi16(result32_0, result32_1);
+ v128 result16_1 = cvt2x2epi32_epi16(result32_2, result32_3);
+
+ return cvt2x2epi16_epi8(result16_0, result16_1);
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ switch (elements)
+ {
+ case 2:
+ {
+ return Sse2.unpacklo_epi8(Sse2.cvtsi64x_si128((long)maxmath.comb((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe0)),
+ Sse2.cvtsi64x_si128((long)maxmath.comb((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe0)));
+ }
+
+ case 3:
+ case 4:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi64_epi8(mm256_naivecomb_epu64(Avx2.mm256_cvtepu8_epi64(n), Avx2.mm256_cvtepu8_epi64(k), elements));
+ }
+ else
+ {
+ return new v128((byte)maxmath.comb((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 2), (ulong)extract_epi8(k, 2), maxmath.Promise.Unsafe0),
+ (byte)(elements == 4 ? maxmath.comb((ulong)extract_epi8(n, 3), (ulong)extract_epi8(k, 3), maxmath.Promise.Unsafe0) : 0),
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0);
+ }
+ }
+
+ case 8:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 n64Lo = Avx2.mm256_cvtepu8_epi64(n);
+ v256 k64Lo = Avx2.mm256_cvtepu8_epi64(k);
+ v256 n64Hi = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 4 * sizeof(byte)));
+ v256 k64Hi = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 4 * sizeof(byte)));
+
+ v128 lo = mm256_cvtepi64_epi8(mm256_naivecomb_epu64(n64Lo, k64Lo));
+ v128 hi = mm256_cvtepi64_epi8(mm256_naivecomb_epu64(n64Hi, k64Hi));
+
+ return Sse2.unpacklo_epi32(lo, hi);
+ }
+ else
+ {
+ return new v128((byte)maxmath.comb((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 2), (ulong)extract_epi8(k, 2), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 3), (ulong)extract_epi8(k, 3), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 4), (ulong)extract_epi8(k, 4), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 5), (ulong)extract_epi8(k, 5), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 6), (ulong)extract_epi8(k, 6), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 7), (ulong)extract_epi8(k, 7), maxmath.Promise.Unsafe0),
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0);
+ }
+ }
+
+ default:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 n0 = Avx2.mm256_cvtepu8_epi64(n);
+ v256 k0 = Avx2.mm256_cvtepu8_epi64(k);
+ v256 n1 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 4 * sizeof(byte)));
+ v256 k1 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 4 * sizeof(byte)));
+ v256 n2 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 8 * sizeof(byte)));
+ v256 k2 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 8 * sizeof(byte)));
+ v256 n3 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 12 * sizeof(byte)));
+ v256 k3 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 12 * sizeof(byte)));
+
+ v128 result0 = mm256_cvtepi64_epi8(mm256_naivecomb_epu64(n0, k0));
+ v128 result1 = mm256_cvtepi64_epi8(mm256_naivecomb_epu64(n1, k1));
+ v128 result2 = mm256_cvtepi64_epi8(mm256_naivecomb_epu64(n2, k2));
+ v128 result3 = mm256_cvtepi64_epi8(mm256_naivecomb_epu64(n3, k3));
+
+ return Sse2.unpacklo_epi64(Sse2.unpacklo_epi32(result0, result1), Sse2.unpacklo_epi32(result2, result3));
+ }
+ else
+ {
+ return new v128((byte)maxmath.comb((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 2), (ulong)extract_epi8(k, 2), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 3), (ulong)extract_epi8(k, 3), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 4), (ulong)extract_epi8(k, 4), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 5), (ulong)extract_epi8(k, 5), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 6), (ulong)extract_epi8(k, 6), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 7), (ulong)extract_epi8(k, 7), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 8), (ulong)extract_epi8(k, 8), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 9), (ulong)extract_epi8(k, 9), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 10), (ulong)extract_epi8(k, 10), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 11), (ulong)extract_epi8(k, 11), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 12), (ulong)extract_epi8(k, 12), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 13), (ulong)extract_epi8(k, 13), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 14), (ulong)extract_epi8(k, 14), maxmath.Promise.Unsafe0),
+ (byte)maxmath.comb((ulong)extract_epi8(n, 15), (ulong)extract_epi8(k, 15), maxmath.Promise.Unsafe0));
+ }
+ }
+ }
+ }
+ }
+
+
+ if (elements <= 8)
+ {
+ return cvtepi16_epi8(comb_epi16(cvtepu8_epi16(n), cvtepu8_epi16(k), elements: elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi16_epi8(mm256_comb_epi16(Avx2.mm256_cvtepu8_epi16(n), Avx2.mm256_cvtepu8_epi16(k)));
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi8(1);
+
+ k = Sse2.min_epu8(k, Sse2.sub_epi8(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi8(n, ONE);
+ v128 c = Sse2.add_epi8(mullo_epi8(srli_epi8(n2, 1), n, 16), Sse2.and_si128(neg_epi8(Sse2.and_si128(n2, ONE)), srli_epi8(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, Sse2.cmpeq_epi8(k, ONE)), ONE, Sse2.cmpeq_epi8(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi8(ONE, ONE);
+ v128 cmp = Sse2.cmpgt_epi8(k, i);
+
+ while (Hint.Likely(notallfalse_epi128(cmp, 16)))
+ {
+ i = Sse2.add_epi8(i, ONE);
+ v128 q = divrem_epu8(c, i, out v128 r);
+ n = Sse2.sub_epi8(n, ONE);
+ c = Sse2.add_epi8(mullo_epi8(q, n, 16), div_epu8(mullo_epi8(r, n, 16), i));
+
+ results = blendv_si128(results, c, cmp);
+ cmp = Sse2.cmpgt_epi8(k, i);
+ }
+
+ return results;
+ }
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epi8(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.SByte0, n.SByte0);
+Assert.IsNotGreater(k.SByte1, n.SByte1);
+Assert.IsNotGreater(k.SByte2, n.SByte2);
+Assert.IsNotGreater(k.SByte3, n.SByte3);
+Assert.IsNotGreater(k.SByte4, n.SByte4);
+Assert.IsNotGreater(k.SByte5, n.SByte5);
+Assert.IsNotGreater(k.SByte6, n.SByte6);
+Assert.IsNotGreater(k.SByte7, n.SByte7);
+Assert.IsNotGreater(k.SByte8, n.SByte8);
+Assert.IsNotGreater(k.SByte9, n.SByte9);
+Assert.IsNotGreater(k.SByte10, n.SByte10);
+Assert.IsNotGreater(k.SByte11, n.SByte11);
+Assert.IsNotGreater(k.SByte12, n.SByte12);
+Assert.IsNotGreater(k.SByte13, n.SByte13);
+Assert.IsNotGreater(k.SByte14, n.SByte14);
+Assert.IsNotGreater(k.SByte15, n.SByte15);
+Assert.IsNotGreater(k.SByte16, n.SByte16);
+Assert.IsNotGreater(k.SByte17, n.SByte17);
+Assert.IsNotGreater(k.SByte18, n.SByte18);
+Assert.IsNotGreater(k.SByte19, n.SByte19);
+Assert.IsNotGreater(k.SByte20, n.SByte20);
+Assert.IsNotGreater(k.SByte21, n.SByte21);
+Assert.IsNotGreater(k.SByte22, n.SByte22);
+Assert.IsNotGreater(k.SByte23, n.SByte23);
+Assert.IsNotGreater(k.SByte24, n.SByte24);
+Assert.IsNotGreater(k.SByte25, n.SByte25);
+Assert.IsNotGreater(k.SByte26, n.SByte26);
+Assert.IsNotGreater(k.SByte27, n.SByte27);
+Assert.IsNotGreater(k.SByte28, n.SByte28);
+Assert.IsNotGreater(k.SByte29, n.SByte29);
+Assert.IsNotGreater(k.SByte30, n.SByte30);
+Assert.IsNotGreater(k.SByte31, n.SByte31);
+Assert.IsNonNegative(k.SByte0);
+Assert.IsNonNegative(k.SByte1);
+Assert.IsNonNegative(k.SByte2);
+Assert.IsNonNegative(k.SByte3);
+Assert.IsNonNegative(k.SByte4);
+Assert.IsNonNegative(k.SByte5);
+Assert.IsNonNegative(k.SByte6);
+Assert.IsNonNegative(k.SByte7);
+Assert.IsNonNegative(k.SByte8);
+Assert.IsNonNegative(k.SByte9);
+Assert.IsNonNegative(k.SByte10);
+Assert.IsNonNegative(k.SByte11);
+Assert.IsNonNegative(k.SByte12);
+Assert.IsNonNegative(k.SByte13);
+Assert.IsNonNegative(k.SByte14);
+Assert.IsNonNegative(k.SByte15);
+Assert.IsNonNegative(k.SByte16);
+Assert.IsNonNegative(k.SByte17);
+Assert.IsNonNegative(k.SByte18);
+Assert.IsNonNegative(k.SByte19);
+Assert.IsNonNegative(k.SByte20);
+Assert.IsNonNegative(k.SByte21);
+Assert.IsNonNegative(k.SByte22);
+Assert.IsNonNegative(k.SByte23);
+Assert.IsNonNegative(k.SByte24);
+Assert.IsNonNegative(k.SByte25);
+Assert.IsNonNegative(k.SByte26);
+Assert.IsNonNegative(k.SByte27);
+Assert.IsNonNegative(k.SByte28);
+Assert.IsNonNegative(k.SByte29);
+Assert.IsNonNegative(k.SByte30);
+Assert.IsNonNegative(k.SByte31);
+Assert.IsNonNegative(n.SByte0);
+Assert.IsNonNegative(n.SByte1);
+Assert.IsNonNegative(n.SByte2);
+Assert.IsNonNegative(n.SByte3);
+Assert.IsNonNegative(n.SByte4);
+Assert.IsNonNegative(n.SByte5);
+Assert.IsNonNegative(n.SByte6);
+Assert.IsNonNegative(n.SByte7);
+Assert.IsNonNegative(n.SByte8);
+Assert.IsNonNegative(n.SByte9);
+Assert.IsNonNegative(n.SByte10);
+Assert.IsNonNegative(n.SByte11);
+Assert.IsNonNegative(n.SByte12);
+Assert.IsNonNegative(n.SByte13);
+Assert.IsNonNegative(n.SByte14);
+Assert.IsNonNegative(n.SByte15);
+Assert.IsNonNegative(n.SByte16);
+Assert.IsNonNegative(n.SByte17);
+Assert.IsNonNegative(n.SByte18);
+Assert.IsNonNegative(n.SByte19);
+Assert.IsNonNegative(n.SByte20);
+Assert.IsNonNegative(n.SByte21);
+Assert.IsNonNegative(n.SByte22);
+Assert.IsNonNegative(n.SByte23);
+Assert.IsNonNegative(n.SByte24);
+Assert.IsNonNegative(n.SByte25);
+Assert.IsNonNegative(n.SByte26);
+Assert.IsNonNegative(n.SByte27);
+Assert.IsNonNegative(n.SByte28);
+Assert.IsNonNegative(n.SByte29);
+Assert.IsNonNegative(n.SByte30);
+Assert.IsNonNegative(n.SByte31);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U16))
+ {
+ if (unsafeLevels > 3 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U8))
+ {
+ return mm256_naivecomb_epu8(n, k);
+ }
+ else
+ {
+ v256 nLo16 = mm256_cvt2x2epu8_epi16(n, out v256 nHi16);
+ v256 kLo16 = mm256_cvt2x2epu8_epi16(k, out v256 kHi16);
+
+ return mm256_cvt2x2epi16_epi8(mm256_naivecomb_epu16(nLo16, kLo16), mm256_naivecomb_epu16(nHi16, kHi16));
+ }
+ }
+ else
+ {
+ v256 loN16 = mm256_cvt2x2epu8_epi16(n, out v256 hiN16);
+ v256 loK16 = mm256_cvt2x2epu8_epi16(k, out v256 hiK16);
+
+ v256 n32_0 = mm256_cvt2x2epu16_epi32(loN16, out v256 n32_1);
+ v256 n32_2 = mm256_cvt2x2epu16_epi32(hiN16, out v256 n32_3);
+ v256 k32_0 = mm256_cvt2x2epu16_epi32(loK16, out v256 k32_1);
+ v256 k32_2 = mm256_cvt2x2epu16_epi32(hiK16, out v256 k32_3);
+
+ v256 result32_0 = mm256_naivecomb_epu32(n32_0, k32_0);
+ v256 result32_1 = mm256_naivecomb_epu32(n32_1, k32_1);
+ v256 result32_2 = mm256_naivecomb_epu32(n32_2, k32_2);
+ v256 result32_3 = mm256_naivecomb_epu32(n32_3, k32_3);
+
+ v256 result16_0 = mm256_cvt2x2epi32_epi16(result32_0, result32_1);
+ v256 result16_1 = mm256_cvt2x2epi32_epi16(result32_2, result32_3);
+
+ return mm256_cvt2x2epi16_epi8(result16_0, result16_1);
+ }
+ }
+ else
+ {
+ v256 loN16 = mm256_cvt2x2epu8_epi16(n, out v256 hiN16);
+ v256 loK16 = mm256_cvt2x2epu8_epi16(k, out v256 hiK16);
+
+ v256 n32_0 = mm256_cvt2x2epu16_epi32(loN16, out v256 n32_1);
+ v256 n32_2 = mm256_cvt2x2epu16_epi32(hiN16, out v256 n32_3);
+ v256 k32_0 = mm256_cvt2x2epu16_epi32(loK16, out v256 k32_1);
+ v256 k32_2 = mm256_cvt2x2epu16_epi32(hiK16, out v256 k32_3);
+
+ v256 n64_0 = mm256_cvt2x2epu32_epi64(n32_0, out v256 n64_1);
+ v256 n64_2 = mm256_cvt2x2epu32_epi64(n32_1, out v256 n64_3);
+ v256 n64_4 = mm256_cvt2x2epu32_epi64(n32_2, out v256 n64_5);
+ v256 n64_6 = mm256_cvt2x2epu32_epi64(n32_3, out v256 n64_7);
+ v256 k64_0 = mm256_cvt2x2epu32_epi64(k32_0, out v256 k64_1);
+ v256 k64_2 = mm256_cvt2x2epu32_epi64(k32_1, out v256 k64_3);
+ v256 k64_4 = mm256_cvt2x2epu32_epi64(k32_2, out v256 k64_5);
+ v256 k64_6 = mm256_cvt2x2epu32_epi64(k32_3, out v256 k64_7);
+
+ v256 result64_0 = mm256_naivecomb_epu64(n64_0, k64_0);
+ v256 result64_1 = mm256_naivecomb_epu64(n64_1, k64_1);
+ v256 result64_2 = mm256_naivecomb_epu64(n64_2, k64_2);
+ v256 result64_3 = mm256_naivecomb_epu64(n64_3, k64_3);
+ v256 result64_4 = mm256_naivecomb_epu64(n64_4, k64_4);
+ v256 result64_5 = mm256_naivecomb_epu64(n64_5, k64_5);
+ v256 result64_6 = mm256_naivecomb_epu64(n64_6, k64_6);
+ v256 result64_7 = mm256_naivecomb_epu64(n64_7, k64_7);
+
+ v256 result32_0 = mm256_cvt2x2epi64_epi32(result64_0, result64_1);
+ v256 result32_1 = mm256_cvt2x2epi64_epi32(result64_2, result64_3);
+ v256 result32_2 = mm256_cvt2x2epi64_epi32(result64_4, result64_5);
+ v256 result32_3 = mm256_cvt2x2epi64_epi32(result64_6, result64_7);
+
+ v256 result16_0 = mm256_cvt2x2epi32_epi16(result32_0, result32_1);
+ v256 result16_1 = mm256_cvt2x2epi32_epi16(result32_2, result32_3);
+
+ return mm256_cvt2x2epi16_epi8(result16_0, result16_1);
+ }
+ }
+
+
+ v256 ONE = Avx.mm256_set1_epi8(1);
+
+ k = Avx2.mm256_min_epu8(k, Avx2.mm256_sub_epi8(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi8(n, ONE);
+ v256 c = Avx2.mm256_add_epi8(mm256_mullo_epi8(mm256_srli_epi8(n2, 1), n), Avx2.mm256_and_si256(mm256_neg_epi8(Avx2.mm256_and_si256(n2, ONE)), mm256_srli_epi8(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi8(k, ONE)), ONE, Avx2.mm256_cmpeq_epi8(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi8(ONE, ONE);
+ v256 cmp = Avx2.mm256_cmpgt_epi8(k, i);
+
+ while (Hint.Likely(mm256_notallfalse_epi256(cmp, 32)))
+ {
+ i = Avx2.mm256_add_epi8(i, ONE);
+ v256 q = mm256_divrem_epu8(c, i, out v256 r);
+ n = Avx2.mm256_sub_epi8(n, ONE);
+ c = Avx2.mm256_add_epi8(mm256_mullo_epi8(q, n), mm256_div_epu8(mm256_mullo_epi8(r, n), i));
+
+ results = mm256_blendv_si256(results, c, cmp);
+ cmp = Avx2.mm256_cmpgt_epi8(k, i);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epu16(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.UShort0, n.UShort0);
+Assert.IsNotGreater(k.UShort1, n.UShort1);
+if (elements > 2)
+{
+Assert.IsNotGreater(k.UShort2, n.UShort2);
+}
+if (elements > 3)
+{
+Assert.IsNotGreater(k.UShort3, n.UShort3);
+}
+if (elements > 4)
+{
+Assert.IsNotGreater(k.UShort4, n.UShort4);
+Assert.IsNotGreater(k.UShort5, n.UShort5);
+Assert.IsNotGreater(k.UShort6, n.UShort6);
+Assert.IsNotGreater(k.UShort7, n.UShort7);
+}
+
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU16(n, (ushort)short.MaxValue, elements))
+ {
+ return comb_epi16(n, k, unsafeLevels, elements);
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi16(1);
+
+ k = min_epu16(k, Sse2.sub_epi16(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi16(n, ONE);
+ v128 c = Sse2.add_epi16(Sse2.mullo_epi16(Sse2.srli_epi16(n2, 1), n), Sse2.and_si128(neg_epi16(Sse2.and_si128(n2, ONE)), Sse2.srli_epi16(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, Sse2.cmpeq_epi16(k, ONE)), ONE, Sse2.cmpeq_epi16(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi16(ONE, ONE);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ v128 cmp = cmple_epu16(k, i);
+ while (Hint.Likely(notalltrue_epi128(cmp, 8)))
+ {
+ i = Sse2.add_epi16(i, ONE);
+ v128 q = divrem_epu16(c, i, out v128 r);
+ n = Sse2.sub_epi16(n, ONE);
+ c = Sse2.add_epi16(Sse2.mullo_epi16(q, n), div_epu16(Sse2.mullo_epi16(r, n), i));
+
+ results = blendv_si128(c, results, cmp);
+ cmp = cmple_epu16(k, i);
+ }
+ }
+ else
+ {
+ v128 cmp = cmpgt_epu16(k, i);
+ while (Hint.Likely(notallfalse_epi128(cmp, 8)))
+ {
+ i = Sse2.add_epi16(i, ONE);
+ v128 q = divrem_epu16(c, i, out v128 r);
+ n = Sse2.sub_epi16(n, ONE);
+ c = Sse2.add_epi16(Sse2.mullo_epi16(q, n), div_epu16(Sse2.mullo_epi16(r, n), i));
+
+ results = blendv_si128(results, c, cmp);
+ cmp = cmpgt_epu16(k, i);
+ }
+ }
+
+
+ return results;
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epu16(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.UShort0, n.UShort0);
+Assert.IsNotGreater(k.UShort1, n.UShort1);
+Assert.IsNotGreater(k.UShort2, n.UShort2);
+Assert.IsNotGreater(k.UShort3, n.UShort3);
+Assert.IsNotGreater(k.UShort4, n.UShort4);
+Assert.IsNotGreater(k.UShort5, n.UShort5);
+Assert.IsNotGreater(k.UShort6, n.UShort6);
+Assert.IsNotGreater(k.UShort7, n.UShort7);
+Assert.IsNotGreater(k.UShort8, n.UShort8);
+Assert.IsNotGreater(k.UShort9, n.UShort9);
+Assert.IsNotGreater(k.UShort10, n.UShort10);
+Assert.IsNotGreater(k.UShort11, n.UShort11);
+Assert.IsNotGreater(k.UShort12, n.UShort12);
+Assert.IsNotGreater(k.UShort13, n.UShort13);
+Assert.IsNotGreater(k.UShort14, n.UShort14);
+Assert.IsNotGreater(k.UShort15, n.UShort15);
+
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU16(n, (ushort)short.MaxValue))
+ {
+ return mm256_comb_epi16(n, k, unsafeLevels);
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi16(1);
+
+ k = Avx2.mm256_min_epu16(k, Avx2.mm256_sub_epi16(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi16(n, ONE);
+ v256 c = Avx2.mm256_add_epi16(Avx2.mm256_mullo_epi16(Avx2.mm256_srli_epi16(n2, 1), n), Avx2.mm256_and_si256(mm256_neg_epi16(Avx2.mm256_and_si256(n2, ONE)), Avx2.mm256_srli_epi16(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi16(k, ONE)), ONE, Avx2.mm256_cmpeq_epi16(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi16(ONE, ONE);
+ v256 cmp = mm256_cmple_epu16(k, i);
+
+ while (Hint.Likely(mm256_notalltrue_epi256(cmp, 16)))
+ {
+ i = Avx2.mm256_add_epi16(i, ONE);
+ v256 q = mm256_divrem_epu16(c, i, out v256 r);
+ n = Avx2.mm256_sub_epi16(n, ONE);
+ c = Avx2.mm256_add_epi16(Avx2.mm256_mullo_epi16(q, n), mm256_div_epu16(Avx2.mm256_mullo_epi16(r, n), i));
+
+ results = mm256_blendv_si256(c, results, cmp);
+ cmp = mm256_cmple_epu16(k, i);
+ }
+
+ return results;
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epi16(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.SShort0, n.SShort0);
+Assert.IsNotGreater(k.SShort1, n.SShort1);
+Assert.IsNonNegative(k.SShort0);
+Assert.IsNonNegative(n.SShort0);
+Assert.IsNonNegative(k.SShort1);
+Assert.IsNonNegative(n.SShort1);
+if (elements > 2)
+{
+Assert.IsNotGreater(k.SShort2, n.SShort2);
+Assert.IsNonNegative(k.SShort2);
+Assert.IsNonNegative(n.SShort2);
+}
+if (elements > 3)
+{
+Assert.IsNotGreater(k.SShort3, n.SShort3);
+Assert.IsNonNegative(k.SShort3);
+Assert.IsNonNegative(n.SShort3);
+}
+if (elements > 4)
+{
+Assert.IsNotGreater(k.SShort4, n.SShort4);
+Assert.IsNotGreater(k.SShort5, n.SShort5);
+Assert.IsNotGreater(k.SShort6, n.SShort6);
+Assert.IsNotGreater(k.SShort7, n.SShort7);
+Assert.IsNonNegative(k.SShort4);
+Assert.IsNonNegative(k.SShort5);
+Assert.IsNonNegative(k.SShort6);
+Assert.IsNonNegative(k.SShort7);
+Assert.IsNonNegative(n.SShort4);
+Assert.IsNonNegative(n.SShort5);
+Assert.IsNonNegative(n.SShort6);
+Assert.IsNonNegative(n.SShort7);
+}
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U64, elements))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U32, elements))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U16, elements))
+ {
+ return naivecomb_epu16(n, k, unsafeLevels > 3 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8, elements), elements);
+ }
+ else
+ {
+ if (elements <= 4)
+ {
+ return cvtepi32_epi16(naivecomb_epu32(cvtepu16_epi32(n), cvtepu16_epi32(k), elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi32_epi16(mm256_naivecomb_epu32(Avx2.mm256_cvtepu16_epi32(n), Avx2.mm256_cvtepu16_epi32(k)));
+ }
+ else
+ {
+ v128 nLo32 = cvt2x2epu16_epi32(n, out v128 nHi32);
+ v128 kLo32 = cvt2x2epu16_epi32(k, out v128 kHi32);
+
+ return cvt2x2epi32_epi16(naivecomb_epu32(nLo32, kLo32), naivecomb_epu32(nHi32, kHi32));
+ }
+ }
+ }
+ }
+ else
+ {
+ switch (elements)
+ {
+ case 2:
+ {
+ return Sse2.unpacklo_epi16(Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi16(n, 0), (ulong)extract_epi16(k, 0), maxmath.Promise.Unsafe0)),
+ Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi16(n, 1), (ulong)extract_epi16(k, 1), maxmath.Promise.Unsafe0)));
+ }
+
+ case 3:
+ case 4:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi64_epi16(mm256_naivecomb_epu64(Avx2.mm256_cvtepu16_epi64(n), Avx2.mm256_cvtepu16_epi64(k), elements));
+ }
+ else
+ {
+ return new v128((ushort)maxmath.comb((ulong)extract_epi16(n, 0), (ulong)extract_epi16(k, 0), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 1), (ulong)extract_epi16(k, 1), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 2), (ulong)extract_epi16(k, 2), maxmath.Promise.Unsafe0),
+ (ushort)(elements == 4 ? maxmath.comb((ulong)extract_epi16(n, 3), (ulong)extract_epi16(k, 3), maxmath.Promise.Unsafe0) : 0),
+ 0,
+ 0,
+ 0,
+ 0);
+ }
+ }
+
+ default:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 n64Lo = Avx2.mm256_cvtepu16_epi64(n);
+ v256 k64Lo = Avx2.mm256_cvtepu16_epi64(k);
+ v256 n64Hi = Avx2.mm256_cvtepu16_epi64(Sse2.bsrli_si128(n, 4 * sizeof(ushort)));
+ v256 k64Hi = Avx2.mm256_cvtepu16_epi64(Sse2.bsrli_si128(k, 4 * sizeof(ushort)));
+
+ v128 result64Lo = mm256_cvtepi64_epi16(mm256_naivecomb_epu64(n64Lo, k64Lo));
+ v128 result64Hi = mm256_cvtepi64_epi16(mm256_naivecomb_epu64(n64Hi, k64Hi));
+
+ return Sse2.unpacklo_epi64(result64Lo, result64Hi);
+ }
+ else
+ {
+ return new v128((ushort)maxmath.comb((ulong)extract_epi16(n, 0), (ulong)extract_epi16(k, 0), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 1), (ulong)extract_epi16(k, 1), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 2), (ulong)extract_epi16(k, 2), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 3), (ulong)extract_epi16(k, 3), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 4), (ulong)extract_epi16(k, 4), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 5), (ulong)extract_epi16(k, 5), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 6), (ulong)extract_epi16(k, 6), maxmath.Promise.Unsafe0),
+ (ushort)maxmath.comb((ulong)extract_epi16(n, 7), (ulong)extract_epi16(k, 7), maxmath.Promise.Unsafe0));
+ }
+ }
+ }
+ }
+ }
+
+
+ v128 ONE = Sse2.set1_epi16(1);
+
+ k = Sse2.min_epi16(k, Sse2.sub_epi16(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi16(n, ONE);
+ v128 c = Sse2.add_epi16(Sse2.mullo_epi16(Sse2.srli_epi16(n2, 1), n), Sse2.and_si128(neg_epi16(Sse2.and_si128(n2, ONE)), Sse2.srli_epi16(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, Sse2.cmpeq_epi16(k, ONE)), ONE, Sse2.cmpeq_epi16(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi16(ONE, ONE);
+ v128 cmp = Sse2.cmpgt_epi16(k, i);
+
+ while (Hint.Likely(notallfalse_epi128(cmp, elements)))
+ {
+ i = Sse2.add_epi16(i, ONE);
+ v128 q = divrem_epu16(c, i, out v128 r, elements);
+ n = Sse2.sub_epi16(n, ONE);
+ c = Sse2.add_epi16(Sse2.mullo_epi16(q, n), div_epu16(Sse2.mullo_epi16(r, n), i, elements));
+
+ results = blendv_si128(results, c, cmp);
+ cmp = Sse2.cmpgt_epi16(k, i);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epi16(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.SShort0, n.SShort0);
+Assert.IsNotGreater(k.SShort1, n.SShort1);
+Assert.IsNotGreater(k.SShort2, n.SShort2);
+Assert.IsNotGreater(k.SShort3, n.SShort3);
+Assert.IsNotGreater(k.SShort4, n.SShort4);
+Assert.IsNotGreater(k.SShort5, n.SShort5);
+Assert.IsNotGreater(k.SShort6, n.SShort6);
+Assert.IsNotGreater(k.SShort7, n.SShort7);
+Assert.IsNotGreater(k.SShort8, n.SShort8);
+Assert.IsNotGreater(k.SShort9, n.SShort9);
+Assert.IsNotGreater(k.SShort10, n.SShort10);
+Assert.IsNotGreater(k.SShort11, n.SShort11);
+Assert.IsNotGreater(k.SShort12, n.SShort12);
+Assert.IsNotGreater(k.SShort13, n.SShort13);
+Assert.IsNotGreater(k.SShort14, n.SShort14);
+Assert.IsNotGreater(k.SShort15, n.SShort15);
+Assert.IsNonNegative(k.SShort0);
+Assert.IsNonNegative(k.SShort1);
+Assert.IsNonNegative(k.SShort2);
+Assert.IsNonNegative(k.SShort3);
+Assert.IsNonNegative(k.SShort4);
+Assert.IsNonNegative(k.SShort5);
+Assert.IsNonNegative(k.SShort6);
+Assert.IsNonNegative(k.SShort7);
+Assert.IsNonNegative(k.SShort8);
+Assert.IsNonNegative(k.SShort9);
+Assert.IsNonNegative(k.SShort10);
+Assert.IsNonNegative(k.SShort11);
+Assert.IsNonNegative(k.SShort12);
+Assert.IsNonNegative(k.SShort13);
+Assert.IsNonNegative(k.SShort14);
+Assert.IsNonNegative(k.SShort15);
+Assert.IsNonNegative(n.SShort0);
+Assert.IsNonNegative(n.SShort1);
+Assert.IsNonNegative(n.SShort2);
+Assert.IsNonNegative(n.SShort3);
+Assert.IsNonNegative(n.SShort4);
+Assert.IsNonNegative(n.SShort5);
+Assert.IsNonNegative(n.SShort6);
+Assert.IsNonNegative(n.SShort7);
+Assert.IsNonNegative(n.SShort8);
+Assert.IsNonNegative(n.SShort9);
+Assert.IsNonNegative(n.SShort10);
+Assert.IsNonNegative(n.SShort11);
+Assert.IsNonNegative(n.SShort12);
+Assert.IsNonNegative(n.SShort13);
+Assert.IsNonNegative(n.SShort14);
+Assert.IsNonNegative(n.SShort15);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U16))
+ {
+ return mm256_naivecomb_epu16(n, k, unsafeLevels > 3 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8));
+ }
+ else
+ {
+ v256 nLo32 = mm256_cvt2x2epu16_epi32(n, out v256 nHi32);
+ v256 kLo32 = mm256_cvt2x2epu16_epi32(k, out v256 kHi32);
+
+ v256 resultLo = mm256_naivecomb_epu32(nLo32, kLo32);
+ v256 resultHi = mm256_naivecomb_epu32(nHi32, kHi32);
+
+ return mm256_cvt2x2epi32_epi16(resultLo, resultHi);
+ }
+ }
+ else
+ {
+ v256 nLo32 = mm256_cvt2x2epu16_epi32(n, out v256 nHi32);
+ v256 kLo32 = mm256_cvt2x2epu16_epi32(k, out v256 kHi32);
+ v256 n64LoLo = mm256_cvt2x2epu32_epi64(nLo32, out v256 n64LoHi);
+ v256 n64HiLo = mm256_cvt2x2epu32_epi64(nHi32, out v256 n64HiHi);
+ v256 k64LoLo = mm256_cvt2x2epu32_epi64(kLo32, out v256 k64LoHi);
+ v256 k64HiLo = mm256_cvt2x2epu32_epi64(kHi32, out v256 k64HiHi);
+
+ v256 resultLoLo = mm256_naivecomb_epu64(n64LoLo, k64LoLo);
+ v256 resultLoHi = mm256_naivecomb_epu64(n64LoHi, k64LoHi);
+ v256 resultHiLo = mm256_naivecomb_epu64(n64HiLo, k64HiLo);
+ v256 resultHiHi = mm256_naivecomb_epu64(n64HiHi, k64HiHi);
+
+ v256 result32Lo = mm256_cvt2x2epi64_epi32(resultLoLo, resultLoHi);
+ v256 result32Hi = mm256_cvt2x2epi64_epi32(resultHiLo, resultHiHi);
+
+ return mm256_cvt2x2epi32_epi16(result32Lo, result32Hi);
+ }
+ }
+
+
+ v256 ONE = Avx.mm256_set1_epi16(1);
+
+ k = Avx2.mm256_min_epu16(k, Avx2.mm256_sub_epi16(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi16(n, ONE);
+ v256 c = Avx2.mm256_add_epi16(Avx2.mm256_mullo_epi16(Avx2.mm256_srli_epi16(n2, 1), n), Avx2.mm256_and_si256(mm256_neg_epi16(Avx2.mm256_and_si256(n2, ONE)), Avx2.mm256_srli_epi16(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi16(k, ONE)), ONE, Avx2.mm256_cmpeq_epi16(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi16(ONE, ONE);
+ v256 cmp = Avx2.mm256_cmpgt_epi16(k, i);
+
+ while (Hint.Likely(mm256_notallfalse_epi256(cmp, 16)))
+ {
+ i = Avx2.mm256_add_epi16(i, ONE);
+ v256 q = mm256_divrem_epu16(c, i, out v256 r);
+ n = Avx2.mm256_sub_epi16(n, ONE);
+ c = Avx2.mm256_add_epi16(Avx2.mm256_mullo_epi16(q, n), mm256_div_epu16(Avx2.mm256_mullo_epi16(r, n), i));
+
+ results = mm256_blendv_si256(results, c, cmp);
+ cmp = Avx2.mm256_cmpgt_epi16(k, i);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epi32(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.SInt0, n.SInt0);
+Assert.IsNotGreater(k.SInt1, n.SInt1);
+Assert.IsNonNegative(k.SInt0);
+Assert.IsNonNegative(k.SInt1);
+Assert.IsNonNegative(n.SInt0);
+Assert.IsNonNegative(n.SInt1);
+if (elements > 2)
+{
+Assert.IsNotGreater(k.SInt2, n.SInt2);
+Assert.IsNonNegative(k.SInt2);
+Assert.IsNonNegative(n.SInt2);
+}
+if (elements > 3)
+{
+Assert.IsNotGreater(k.SInt3, n.SInt3);
+Assert.IsNonNegative(k.SInt3);
+Assert.IsNonNegative(n.SInt3);
+}
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U64, elements))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U32, elements))
+ {
+ return naivecomb_epu32(n, k, elements);
+ }
+ else
+ {
+ if (elements > 2)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi64_epi32(mm256_naivecomb_epu64(Avx2.mm256_cvtepu32_epi64(n), Avx2.mm256_cvtepu32_epi64(k), elements));
+ }
+ else
+ {
+ v128 lo = Sse2.unpacklo_epi32(Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi32(n, 0), (ulong)extract_epi32(k, 0), maxmath.Promise.Unsafe0)),
+ Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi32(n, 1), (ulong)extract_epi32(k, 1), maxmath.Promise.Unsafe0)));
+ v128 hi = Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi32(n, 2), (ulong)extract_epi32(k, 2), maxmath.Promise.Unsafe0));
+
+ if (elements == 4)
+ {
+ hi = Sse2.unpacklo_epi32(hi, Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi32(n, 3), (ulong)extract_epi32(k, 3), maxmath.Promise.Unsafe0)));
+ }
+
+ return Sse2.unpacklo_epi64(lo, hi);
+ }
+ }
+ else
+ {
+ return Sse2.unpacklo_epi32(Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi32(n, 0), (ulong)extract_epi32(k, 0), maxmath.Promise.Unsafe0)),
+ Sse2.cvtsi32_si128((int)maxmath.comb((ulong)extract_epi32(n, 1), (ulong)extract_epi32(k, 1), maxmath.Promise.Unsafe0)));
+ }
+ }
+ }
+
+
+ v128 ONE = Sse2.set1_epi32(1);
+
+ k = min_epi32(k, Sse2.sub_epi32(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi32(n, ONE);
+ v128 c = Sse2.add_epi32(mullo_epi32(Sse2.srli_epi32(n2, 1), n, elements), Sse2.and_si128(neg_epi32(Sse2.and_si128(n2, ONE)), Sse2.srli_epi32(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, Sse2.cmpeq_epi32(k, ONE)), ONE, Sse2.cmpeq_epi32(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi32(ONE, ONE);
+ v128 cmp = Sse2.cmpgt_epi32(k, i);
+
+ while (Hint.Likely(notallfalse_epi128(cmp, elements)))
+ {
+ i = Sse2.add_epi32(i, ONE);
+ v128 q = divrem_epu32(c, i, out v128 r, elements);
+ n = Sse2.sub_epi32(n, ONE);
+ c = Sse2.add_epi32(mullo_epi32(q, n, elements), div_epu32(mullo_epi32(r, n, elements), i, elements));
+
+ results = blendv_si128(results, c, cmp);
+ cmp = Sse2.cmpgt_epi32(k, i);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epi32(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.SInt0, n.SInt0);
+Assert.IsNotGreater(k.SInt1, n.SInt1);
+Assert.IsNotGreater(k.SInt2, n.SInt2);
+Assert.IsNotGreater(k.SInt3, n.SInt3);
+Assert.IsNotGreater(k.SInt4, n.SInt4);
+Assert.IsNotGreater(k.SInt5, n.SInt5);
+Assert.IsNotGreater(k.SInt6, n.SInt6);
+Assert.IsNotGreater(k.SInt7, n.SInt7);
+Assert.IsNonNegative(k.SInt0);
+Assert.IsNonNegative(k.SInt1);
+Assert.IsNonNegative(k.SInt2);
+Assert.IsNonNegative(k.SInt3);
+Assert.IsNonNegative(k.SInt4);
+Assert.IsNonNegative(k.SInt5);
+Assert.IsNonNegative(k.SInt6);
+Assert.IsNonNegative(k.SInt7);
+Assert.IsNonNegative(n.SInt0);
+Assert.IsNonNegative(n.SInt1);
+Assert.IsNonNegative(n.SInt2);
+Assert.IsNonNegative(n.SInt3);
+Assert.IsNonNegative(n.SInt4);
+Assert.IsNonNegative(n.SInt5);
+Assert.IsNonNegative(n.SInt6);
+Assert.IsNonNegative(n.SInt7);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ return mm256_naivecomb_epu32(n, k);
+ }
+ else
+ {
+ v256 n64Lo = mm256_cvt2x2epu32_epi64(n, out v256 n64Hi);
+ v256 k64Lo = mm256_cvt2x2epu32_epi64(k, out v256 k64Hi);
+
+ v256 resultLo = mm256_naivecomb_epu64(n64Lo, k64Lo);
+ v256 resultHi = mm256_naivecomb_epu64(n64Hi, k64Hi);
+
+ return mm256_cvt2x2epi64_epi32(resultLo, resultHi);
+ }
+ }
+
+
+ v256 ONE = Avx.mm256_set1_epi32(1);
+
+ k = Avx2.mm256_min_epi32(k, Avx2.mm256_sub_epi32(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi32(n, ONE);
+ v256 c = Avx2.mm256_add_epi32(Avx2.mm256_mullo_epi32(Avx2.mm256_srli_epi32(n2, 1), n), Avx2.mm256_and_si256(mm256_neg_epi32(Avx2.mm256_and_si256(n2, ONE)), Avx2.mm256_srli_epi32(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi32(k, ONE)), ONE, Avx2.mm256_cmpeq_epi32(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi32(ONE, ONE);
+ v256 cmp = Avx2.mm256_cmpgt_epi32(k, i);
+
+ while (Hint.Likely(mm256_notallfalse_epi256(cmp, 8)))
+ {
+ i = Avx2.mm256_add_epi32(i, ONE);
+ v256 q = mm256_divrem_epu32(c, i, out v256 r);
+ n = Avx2.mm256_sub_epi32(n, ONE);
+ c = Avx2.mm256_add_epi32(Avx2.mm256_mullo_epi32(q, n), mm256_div_epu32(Avx2.mm256_mullo_epi32(r, n), i));
+
+ results = mm256_blendv_si256(results, c, cmp);
+ cmp = Avx2.mm256_cmpgt_epi32(k, i);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epu32(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.UInt0, n.UInt0);
+Assert.IsNotGreater(k.UInt1, n.UInt1);
+if (elements > 2)
+{
+Assert.IsNotGreater(k.UInt2, n.UInt2);
+}
+if (elements > 3)
+{
+Assert.IsNotGreater(k.UInt3, n.UInt3);
+}
+
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU32(n, int.MaxValue))
+ {
+ return comb_epi32(n, k, unsafeLevels, elements);
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi32(1);
+
+ k = min_epu32(k, Sse2.sub_epi32(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi32(n, ONE);
+ v128 c = Sse2.add_epi32(mullo_epi32(Sse2.srli_epi32(n2, 1), n, elements), Sse2.and_si128(neg_epi32(Sse2.and_si128(n2, ONE)), Sse2.srli_epi32(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, Sse2.cmpeq_epi32(k, ONE)), ONE, Sse2.cmpeq_epi32(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi32(ONE, ONE);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ v128 cmp = cmple_epu32(k, i, elements);
+ while (Hint.Likely(notalltrue_epi128(cmp, elements)))
+ {
+ i = Sse2.add_epi32(i, ONE);
+ v128 q = divrem_epu32(c, i, out v128 r);
+ n = Sse2.sub_epi32(n, ONE);
+ c = Sse2.add_epi32(mullo_epi32(q, n, elements), div_epu32(mullo_epi32(r, n, elements), i, elements));
+
+ results = blendv_si128(c, results, cmp);
+ cmp = cmple_epu32(k, i, elements);
+ }
+ }
+ else
+ {
+ v128 cmp = cmpgt_epu32(k, i, elements);
+ while (Hint.Likely(notallfalse_epi128(cmp, elements)))
+ {
+ i = Sse2.add_epi32(i, ONE);
+ v128 q = divrem_epu32(c, i, out v128 r);
+ n = Sse2.sub_epi32(n, ONE);
+ c = Sse2.add_epi32(mullo_epi32(q, n, elements), div_epu32(mullo_epi32(r, n, elements), i, elements));
+
+ results = blendv_si128(results, c, cmp);
+ cmp = cmpgt_epu32(k, i, elements);
+ }
+ }
+
+ return results;
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epu32(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.UInt0, n.UInt0);
+Assert.IsNotGreater(k.UInt1, n.UInt1);
+Assert.IsNotGreater(k.UInt2, n.UInt2);
+Assert.IsNotGreater(k.UInt3, n.UInt3);
+Assert.IsNotGreater(k.UInt4, n.UInt4);
+Assert.IsNotGreater(k.UInt5, n.UInt5);
+Assert.IsNotGreater(k.UInt6, n.UInt6);
+Assert.IsNotGreater(k.UInt7, n.UInt7);
+
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU32(n, int.MaxValue))
+ {
+ return mm256_comb_epi32(n, k, unsafeLevels);
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi32(1);
+
+ k = Avx2.mm256_min_epu32(k, Avx2.mm256_sub_epi32(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi32(n, ONE);
+ v256 c = Avx2.mm256_add_epi32(Avx2.mm256_mullo_epi32(Avx2.mm256_srli_epi32(n2, 1), n), Avx2.mm256_and_si256(mm256_neg_epi32(Avx2.mm256_and_si256(n2, ONE)), Avx2.mm256_srli_epi32(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi32(k, ONE)), ONE, Avx2.mm256_cmpeq_epi32(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi32(ONE, ONE);
+ v256 cmp = mm256_cmple_epu32(k, i);
+
+ while (Hint.Likely(mm256_notalltrue_epi256(cmp, 8)))
+ {
+ i = Avx2.mm256_add_epi32(i, ONE);
+ v256 q = mm256_divrem_epu32(c, i, out v256 r);
+ n = Avx2.mm256_sub_epi32(n, ONE);
+ c = Avx2.mm256_add_epi32(Avx2.mm256_mullo_epi32(q, n), mm256_div_epu32(Avx2.mm256_mullo_epi32(r, n), i));
+
+ results = mm256_blendv_si256(c, results, cmp);
+ cmp = mm256_cmple_epu32(k, i);
+ }
+
+ return results;
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epi64(v128 n, v128 k, byte unsafeLevels = 0)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.SLong0, n.SLong0);
+Assert.IsNotGreater(k.SLong1, n.SLong1);
+Assert.IsNonNegative(k.SLong0);
+Assert.IsNonNegative(k.SLong1);
+Assert.IsNonNegative(n.SLong0);
+Assert.IsNonNegative(n.SLong1);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ v128 nFactorial = gamma_epu64(n, true);
+ v128 kFactorial = gamma_epu64(k, true);
+ v128 nkFactorial = gamma_epu64(Sse2.sub_epi64(n, k), true);
+
+ return usfcvttpd_epu64(Sse2.div_pd(usfcvtepu64_pd(nFactorial), usfcvtepu64_pd(mullo_epi64(kFactorial, nkFactorial))));
+ }
+ else
+ {
+ return Sse2.unpacklo_epi64(Sse2.cvtsi64x_si128((long)maxmath.comb(extract_epi64(n, 0), extract_epi64(k, 0), maxmath.Promise.Unsafe0)),
+ Sse2.cvtsi64x_si128((long)maxmath.comb(extract_epi64(n, 1), extract_epi64(k, 1), maxmath.Promise.Unsafe0)));
+ }
+ }
+
+
+ v128 ONE = Sse2.set1_epi64x(1);
+
+ k = min_epi64(k, Sse2.sub_epi64(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi64(n, ONE);
+ v128 c = Sse2.add_epi64(mullo_epi64(Sse2.srli_epi64(n2, 1), n), Sse2.and_si128(neg_epi64(Sse2.and_si128(n2, ONE)), Sse2.srli_epi64(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, cmpeq_epi64(k, ONE)), ONE, cmpeq_epi64(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi64(ONE, ONE);
+ v128 cmp = cmpgt_epi64(k, i);
+
+ while (Hint.Likely(notallfalse_epi128(cmp, 2)))
+ {
+ i = Sse2.add_epi64(i, ONE);
+ v128 q = divrem_epu64(c, i, out v128 r);
+ n = Sse2.sub_epi64(n, ONE);
+ c = Sse2.add_epi64(mullo_epi64(q, n), div_epu64(mullo_epi64(r, n), i));
+
+ results = blendv_si128(results, c, cmp);
+ cmp = cmpgt_epi64(k, i);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epi64(v256 n, v256 k, byte unsafeLevels = 0, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.SLong0, n.SLong0);
+Assert.IsNotGreater(k.SLong1, n.SLong1);
+Assert.IsNotGreater(k.SLong2, n.SLong2);
+Assert.IsNonNegative(k.SLong0);
+Assert.IsNonNegative(k.SLong1);
+Assert.IsNonNegative(k.SLong2);
+Assert.IsNonNegative(n.SLong0);
+Assert.IsNonNegative(n.SLong1);
+Assert.IsNonNegative(n.SLong2);
+if (elements > 3)
+{
+ Assert.IsNotGreater(k.SLong3, n.SLong3);
+ Assert.IsNonNegative(k.SLong3);
+ Assert.IsNonNegative(n.SLong3);
+}
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ v256 nFactorial = mm256_gamma_epu64(n, true);
+ v256 kFactorial = mm256_gamma_epu64(k, true);
+ v256 nkFactorial = mm256_gamma_epu64(Avx2.mm256_sub_epi64(n, k), true);
+
+ return mm256_usfcvttpd_epu64(Avx.mm256_div_pd(mm256_usfcvtepu64_pd(nFactorial), mm256_usfcvtepu64_pd(mm256_mullo_epi64(kFactorial, nkFactorial, elements))));
+ }
+ else
+ {
+ return mm256_naivecomb_epu64(n, k, elements);
+ }
+ }
+
+
+ v256 ONE = Avx.mm256_set1_epi64x(1);
+
+ k = mm256_min_epi64(k, Avx2.mm256_sub_epi64(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi64(n, ONE);
+ v256 c = Avx2.mm256_add_epi64(mm256_mullo_epi64(Avx2.mm256_srli_epi64(n2, 1), n, elements), Avx2.mm256_and_si256(mm256_neg_epi64(Avx2.mm256_and_si256(n2, ONE)), Avx2.mm256_srli_epi64(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi64(k, ONE)), ONE, Avx2.mm256_cmpeq_epi64(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi64(ONE, ONE);
+ v256 cmp = mm256_cmpgt_epi64(k, i, elements);
+
+ while (Hint.Likely(mm256_notallfalse_epi256(cmp, elements)))
+ {
+ i = Avx2.mm256_add_epi64(i, ONE);
+ v256 q = mm256_divrem_epu64(c, i, out v256 r, elements);
+ n = Avx2.mm256_sub_epi64(n, ONE);
+ c = Avx2.mm256_add_epi64(mm256_mullo_epi64(q, n, elements), mm256_div_epu64(mm256_mullo_epi64(r, n, elements), i, elements));
+
+ results = mm256_blendv_si256(results, c, cmp);
+ cmp = mm256_cmpgt_epi64(k, i, elements);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 comb_epu64(v128 n, v128 k, byte unsafeLevels = 0)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.ULong0, n.ULong0);
+Assert.IsNotGreater(k.ULong1, n.ULong1);
+
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU64(n, long.MaxValue))
+ {
+ return comb_epi64(n, k, unsafeLevels);
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi64x(1);
+
+ k = min_epu64(k, Sse2.sub_epi64(n, k));
+
+ v128 n2 = n;
+ n = Sse2.sub_epi64(n, ONE);
+ v128 c = Sse2.add_epi64(mullo_epi64(Sse2.srli_epi64(n2, 1), n), Sse2.and_si128(neg_epi64(Sse2.and_si128(n2, ONE)), Sse2.srli_epi64(n, 1)));
+ v128 results = blendv_si128(blendv_si128(c, n2, cmpeq_epi64(k, ONE)), ONE, cmpeq_epi64(k, Sse2.setzero_si128()));
+ v128 i = Sse2.add_epi64(ONE, ONE);
+ v128 cmp = cmpgt_epu64(k, i);
+
+ while (Hint.Likely(notallfalse_epi128(cmp, 2)))
+ {
+ i = Sse2.add_epi64(i, ONE);
+ v128 q = divrem_epu64(c, i, out v128 r);
+ n = Sse2.sub_epi64(n, ONE);
+ c = Sse2.add_epi64(mullo_epi64(q, n), div_epu64(mullo_epi64(r, n), i));
+
+ results = blendv_si128(results, c, cmp);
+ cmp = cmpgt_epu64(k, i);
+ }
+
+ return results;
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_comb_epu64(v256 n, v256 k, byte unsafeLevels = 0, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.ULong0, n.ULong0);
+Assert.IsNotGreater(k.ULong1, n.ULong1);
+Assert.IsNotGreater(k.ULong2, n.ULong2);
+if (elements > 3)
+{
+ Assert.IsNotGreater(k.ULong3, n.ULong3);
+}
+ if (unsafeLevels != 0 || constexpr.ALL_LE_EPU64(n, long.MaxValue, elements))
+ {
+ return mm256_comb_epi64(n, k, unsafeLevels, elements);
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi64x(1);
+
+ k = mm256_min_epu64(k, Avx2.mm256_sub_epi64(n, k));
+
+ v256 n2 = n;
+ n = Avx2.mm256_sub_epi64(n, ONE);
+ v256 c = Avx2.mm256_add_epi64(mm256_mullo_epi64(Avx2.mm256_srli_epi64(n2, 1), n, elements), Avx2.mm256_and_si256(mm256_neg_epi64(Avx2.mm256_and_si256(n2, ONE)), Avx2.mm256_srli_epi64(n, 1)));
+ v256 results = mm256_blendv_si256(mm256_blendv_si256(c, n2, Avx2.mm256_cmpeq_epi64(k, ONE)), ONE, Avx2.mm256_cmpeq_epi64(k, Avx.mm256_setzero_si256()));
+ v256 i = Avx2.mm256_add_epi64(ONE, ONE);
+ v256 cmp = mm256_cmpgt_epu64(k, i, elements);
+
+ while (Hint.Likely(mm256_notallfalse_epi256(cmp, elements)))
+ {
+ i = Avx2.mm256_add_epi64(i, ONE);
+ v256 q = mm256_divrem_epu64(c, i, out v256 r, elements);
+ n = Avx2.mm256_sub_epi64(n, ONE);
+ c = Avx2.mm256_add_epi64(mm256_mullo_epi64(q, n, elements), mm256_div_epu64(mm256_mullo_epi64(r, n), i, elements));
+
+ results = mm256_blendv_si256(results, c, cmp);
+ cmp = mm256_cmpgt_epu64(k, i, elements);
+ }
+
+ return results;
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+ }
+ }
+
+
+ unsafe public static partial class maxmath
+ {
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 128 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static UInt128 comb(UInt128 n, UInt128 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U128))
+ {
+ return factorial(n, Promise.NoOverflow) / (factorial(k, Promise.NoOverflow) * factorial(n - k, Promise.NoOverflow));
+ }
+
+
+ k = min(k, n - k);
+ if (Hint.Unlikely(k.IsZero))
+ {
+ return 1;
+ }
+
+ UInt128 c = n--;
+
+ if (Hint.Likely(k > 1))
+ {
+ c = ((c >> 1) * n) + (((UInt128)(-(Int128)(c & 1))) & (n >> 1));
+
+ UInt128 i = 2;
+ while (Hint.Likely(k > i++))
+ {
+ UInt128 q = divrem(c, i, out UInt128 r);
+ n--;
+ c = (q * n) + ((r * n) / i);
+ }
+ }
+
+ return c;
+ }
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 128 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static UInt128 comb(Int128 n, Int128 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsTrue(k >= 0);
+Assert.IsTrue(n >= 0);
+
+ return comb((UInt128)n, (UInt128)k, useFactorial);
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte comb(byte n, byte k, Promise useFactorial = Promise.Nothing)
+ {
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (useFactorial.CountUnsafeLevels() > 2 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U16))
+ {
+ if (useFactorial.CountUnsafeLevels() > 3 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U8))
+ {
+ return (byte)(factorial(n, Promise.NoOverflow) / (factorial(k, Promise.NoOverflow) * factorial((byte)(n - k), Promise.NoOverflow)));
+ }
+ else
+ {
+ return (byte)(factorial((ushort)n, Promise.NoOverflow) / (factorial((ushort)k, Promise.NoOverflow) * factorial((ushort)(n - k), Promise.NoOverflow)));
+ }
+ }
+ else
+ {
+ return (byte)(factorial((uint)n, Promise.NoOverflow) / (factorial((uint)k, Promise.NoOverflow) * factorial((uint)(n - k), Promise.NoOverflow)));
+ }
+ }
+ else
+ {
+ return (byte)(factorial((ulong)n, Promise.NoOverflow) / (factorial((ulong)k, Promise.NoOverflow) * factorial((ulong)(n - k), Promise.NoOverflow)));
+ }
+ }
+
+
+ k = min(k, (byte)(n - k));
+ if (Hint.Unlikely(k == 0))
+ {
+ return 1;
+ }
+
+ byte c = n--;
+
+ if (Hint.Likely(k > 1))
+ {
+ c = (byte)((byte)((c >> 1) * n) + (byte)((-(c & 1)) & (n >> 1)));
+
+ byte i = 2;
+ while (Hint.Likely(k > i++))
+ {
+ byte q = divrem(c, i, out byte r);
+ n--;
+ c = (byte)((byte)(q * n) + (byte)((byte)(r * n) / i));
+ }
+ }
+
+ return c;
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte2 comb(byte2 n, byte2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu8(n, k, useFactorial.CountUnsafeLevels(), 2);
+ }
+ else
+ {
+ return new byte2(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte3 comb(byte3 n, byte3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu8(n, k, useFactorial.CountUnsafeLevels(), 3);
+ }
+ else
+ {
+ return new byte3(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte4 comb(byte4 n, byte4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu8(n, k, useFactorial.CountUnsafeLevels(), 4);
+ }
+ else
+ {
+ return new byte4(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial),
+ comb(n.w, k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte8 comb(byte8 n, byte8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu8(n, k, useFactorial.CountUnsafeLevels(), 8);
+ }
+ else
+ {
+ return new byte8(comb(n.x0, k.x0, useFactorial),
+ comb(n.x1, k.x1, useFactorial),
+ comb(n.x2, k.x2, useFactorial),
+ comb(n.x3, k.x3, useFactorial),
+ comb(n.x4, k.x4, useFactorial),
+ comb(n.x5, k.x5, useFactorial),
+ comb(n.x6, k.x6, useFactorial),
+ comb(n.x7, k.x7, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte16 comb(byte16 n, byte16 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu8(n, k, useFactorial.CountUnsafeLevels(), 16);
+ }
+ else
+ {
+ return new byte16(comb(n.x0, k.x0, useFactorial),
+ comb(n.x1, k.x1, useFactorial),
+ comb(n.x2, k.x2, useFactorial),
+ comb(n.x3, k.x3, useFactorial),
+ comb(n.x4, k.x4, useFactorial),
+ comb(n.x5, k.x5, useFactorial),
+ comb(n.x6, k.x6, useFactorial),
+ comb(n.x7, k.x7, useFactorial),
+ comb(n.x8, k.x8, useFactorial),
+ comb(n.x9, k.x9, useFactorial),
+ comb(n.x10, k.x10, useFactorial),
+ comb(n.x11, k.x11, useFactorial),
+ comb(n.x12, k.x12, useFactorial),
+ comb(n.x13, k.x13, useFactorial),
+ comb(n.x14, k.x14, useFactorial),
+ comb(n.x15, k.x15, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte32 comb(byte32 n, byte32 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epu8(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new byte32(comb(n.v16_0, k.v16_0, useFactorial),
+ comb(n.v16_16, k.v16_16, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte comb(sbyte n, sbyte k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k);
+Assert.IsNonNegative(n);
+
+ return comb((byte)n, (byte)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte2 comb(sbyte2 n, sbyte2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi8(n, k, useFactorial.CountUnsafeLevels(), 2);
+ }
+ else
+ {
+ return new byte2((byte)comb((int)n.x, (int)k.x, useFactorial),
+ (byte)comb((int)n.y, (int)k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte3 comb(sbyte3 n, sbyte3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi8(n, k, useFactorial.CountUnsafeLevels(), 3);
+ }
+ else
+ {
+ return new byte3((byte)comb((int)n.x, (int)k.x, useFactorial),
+ (byte)comb((int)n.y, (int)k.y, useFactorial),
+ (byte)comb((int)n.z, (int)k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte4 comb(sbyte4 n, sbyte4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi8(n, k, useFactorial.CountUnsafeLevels(), 4);
+ }
+ else
+ {
+ return new byte4((byte)comb((int)n.x, (int)k.x, useFactorial),
+ (byte)comb((int)n.y, (int)k.y, useFactorial),
+ (byte)comb((int)n.z, (int)k.z, useFactorial),
+ (byte)comb((int)n.w, (int)k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte8 comb(sbyte8 n, sbyte8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi8(n, k, useFactorial.CountUnsafeLevels(), 8);
+ }
+ else
+ {
+ return new byte8((byte)comb((int)n.x0, (int)k.x0, useFactorial),
+ (byte)comb((int)n.x1, (int)k.x1, useFactorial),
+ (byte)comb((int)n.x2, (int)k.x2, useFactorial),
+ (byte)comb((int)n.x3, (int)k.x3, useFactorial),
+ (byte)comb((int)n.x4, (int)k.x4, useFactorial),
+ (byte)comb((int)n.x5, (int)k.x5, useFactorial),
+ (byte)comb((int)n.x6, (int)k.x6, useFactorial),
+ (byte)comb((int)n.x7, (int)k.x7, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte16 comb(sbyte16 n, sbyte16 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi8(n, k, 16, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new byte16((byte)comb((int)n.x0, (int)k.x0, useFactorial),
+ (byte)comb((int)n.x1, (int)k.x1, useFactorial),
+ (byte)comb((int)n.x2, (int)k.x2, useFactorial),
+ (byte)comb((int)n.x3, (int)k.x3, useFactorial),
+ (byte)comb((int)n.x4, (int)k.x4, useFactorial),
+ (byte)comb((int)n.x5, (int)k.x5, useFactorial),
+ (byte)comb((int)n.x6, (int)k.x6, useFactorial),
+ (byte)comb((int)n.x7, (int)k.x7, useFactorial),
+ (byte)comb((int)n.x8, (int)k.x8, useFactorial),
+ (byte)comb((int)n.x9, (int)k.x9, useFactorial),
+ (byte)comb((int)n.x10, (int)k.x10, useFactorial),
+ (byte)comb((int)n.x11, (int)k.x11, useFactorial),
+ (byte)comb((int)n.x12, (int)k.x12, useFactorial),
+ (byte)comb((int)n.x13, (int)k.x13, useFactorial),
+ (byte)comb((int)n.x14, (int)k.x14, useFactorial),
+ (byte)comb((int)n.x15, (int)k.x15, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 8 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte32 comb(sbyte32 n, sbyte32 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epi8(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new byte32(comb(n.v16_0, k.v16_0, useFactorial),
+ comb(n.v16_16, k.v16_16, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort comb(ushort n, ushort k, Promise useFactorial = Promise.Nothing)
+ {
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (useFactorial.CountUnsafeLevels() > 2 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U16))
+ {
+ if (useFactorial.CountUnsafeLevels() > 3 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U8))
+ {
+ return (ushort)(factorial((byte)n, Promise.NoOverflow) / (factorial((byte)k, Promise.NoOverflow) * factorial((byte)(n - k), Promise.NoOverflow)));
+ }
+ else
+ {
+ return (ushort)(factorial(n, Promise.NoOverflow) / (factorial(k, Promise.NoOverflow) * factorial((ushort)(n - k), Promise.NoOverflow)));
+ }
+ }
+ else
+ {
+ return (ushort)(factorial((uint)n, Promise.NoOverflow) / (factorial((uint)k, Promise.NoOverflow) * factorial((uint)(n - k), Promise.NoOverflow)));
+ }
+ }
+ else
+ {
+ return (ushort)(factorial((ulong)n, Promise.NoOverflow) / (factorial((ulong)k, Promise.NoOverflow) * factorial((ulong)(n - k), Promise.NoOverflow)));
+ }
+ }
+
+
+ k = min(k, (ushort)(n - k));
+ if (Hint.Unlikely(k == 0))
+ {
+ return 1;
+ }
+
+ ushort c = n--;
+
+ if (Hint.Likely(k > 1))
+ {
+ c = (ushort)((ushort)((c >> 1) * n) + (ushort)((-(c & 1)) & (n >> 1)));
+
+ ushort i = 2;
+ while (Hint.Likely(k > i++))
+ {
+ ushort q = divrem(c, i, out ushort r);
+ n--;
+ c = (ushort)((ushort)(q * n) + (ushort)((ushort)(r * n) / i));
+ }
+ }
+
+ return c;
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort2 comb(ushort2 n, ushort2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu16(n, k, useFactorial.CountUnsafeLevels(), 2);
+ }
+ else
+ {
+ return new ushort2(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort3 comb(ushort3 n, ushort3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu16(n, k, useFactorial.CountUnsafeLevels(), 3);
+ }
+ else
+ {
+ return new ushort3(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort4 comb(ushort4 n, ushort4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu16(n, k, useFactorial.CountUnsafeLevels(), 4);
+ }
+ else
+ {
+ return new ushort4(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial),
+ comb(n.w, k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort8 comb(ushort8 n, ushort8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu16(n, k, useFactorial.CountUnsafeLevels(), 8);
+ }
+ else
+ {
+ return new ushort8(comb(n.x0, k.x0, useFactorial),
+ comb(n.x1, k.x1, useFactorial),
+ comb(n.x2, k.x2, useFactorial),
+ comb(n.x3, k.x3, useFactorial),
+ comb(n.x4, k.x4, useFactorial),
+ comb(n.x5, k.x5, useFactorial),
+ comb(n.x6, k.x6, useFactorial),
+ comb(n.x7, k.x7, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort16 comb(ushort16 n, ushort16 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epu16(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new ushort16(comb(n.v8_0, k.v8_0, useFactorial),
+ comb(n.v8_8, k.v8_8, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort comb(short n, short k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k);
+Assert.IsNonNegative(n);
+
+ return comb((short)n, (short)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort2 comb(short2 n, short2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi16(n, k, useFactorial.CountUnsafeLevels(), 2);
+ }
+ else
+ {
+ return new ushort2((ushort)comb((int)n.x, (int)k.x, useFactorial),
+ (ushort)comb((int)n.y, (int)k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort3 comb(short3 n, short3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi16(n, k, useFactorial.CountUnsafeLevels(), 3);
+ }
+ else
+ {
+ return new ushort3((ushort)comb((int)n.x, (int)k.x, useFactorial),
+ (ushort)comb((int)n.y, (int)k.y, useFactorial),
+ (ushort)comb((int)n.z, (int)k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort4 comb(short4 n, short4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi16(n, k, useFactorial.CountUnsafeLevels(), 4);
+ }
+ else
+ {
+ return new ushort4((ushort)comb((int)n.x, (int)k.x, useFactorial),
+ (ushort)comb((int)n.y, (int)k.y, useFactorial),
+ (ushort)comb((int)n.z, (int)k.z, useFactorial),
+ (ushort)comb((int)n.w, (int)k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort8 comb(short8 n, short8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi16(n, k, useFactorial.CountUnsafeLevels(), 8);
+ }
+ else
+ {
+ return new ushort8((ushort)comb((int)n.x0, (int)k.x0, useFactorial),
+ (ushort)comb((int)n.x1, (int)k.x1, useFactorial),
+ (ushort)comb((int)n.x2, (int)k.x2, useFactorial),
+ (ushort)comb((int)n.x3, (int)k.x3, useFactorial),
+ (ushort)comb((int)n.x4, (int)k.x4, useFactorial),
+ (ushort)comb((int)n.x5, (int)k.x5, useFactorial),
+ (ushort)comb((int)n.x6, (int)k.x6, useFactorial),
+ (ushort)comb((int)n.x7, (int)k.x7, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 16 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort16 comb(short16 n, short16 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epi16(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new ushort16(comb(n.v8_0, k.v8_0, useFactorial),
+ comb(n.v8_8, k.v8_8, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint comb(uint n, uint k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U32))
+ {
+ return factorial(n, Promise.NoOverflow) / (factorial(k, Promise.NoOverflow) * factorial(n - k, Promise.NoOverflow));
+ }
+ else
+ {
+ return (uint)(factorial((ulong)n, Promise.NoOverflow) / (factorial((ulong)k, Promise.NoOverflow) * factorial((ulong)n - (ulong)k, Promise.NoOverflow)));
+ }
+ }
+
+
+ k = math.min(k, n - k);
+ if (Hint.Unlikely(k == 0))
+ {
+ return 1;
+ }
+
+ uint c = n--;
+
+ if (Hint.Likely(k > 1))
+ {
+ c = ((c >> 1) * n) + (((uint)-(int)(c & 1)) & (n >> 1));
+
+ uint i = 2;
+ while (Hint.Likely(k > i++))
+ {
+ uint q = divrem(c, i, out uint r);
+ n--;
+ c = (q * n) + ((r * n) / i);
+ }
+ }
+
+ return c;
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint2 comb(uint2 n, uint2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt2(Xse.comb_epu32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), useFactorial.CountUnsafeLevels(), 2));
+ }
+ else
+ {
+ return new uint2(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint3 comb(uint3 n, uint3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt3(Xse.comb_epu32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), useFactorial.CountUnsafeLevels(), 3));
+ }
+ else
+ {
+ return new uint3(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint4 comb(uint4 n, uint4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt4(Xse.comb_epu32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), useFactorial.CountUnsafeLevels(), 4));
+ }
+ else
+ {
+ return new uint4(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial),
+ comb(n.w, k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint8 comb(uint8 n, uint8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epu32(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new uint8(comb(n.v4_0, k.v4_0, useFactorial),
+ comb(n.v4_4, k.v4_4, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint comb(int n, int k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k);
+Assert.IsNonNegative(n);
+
+ return comb((uint)n, (uint)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint2 comb(int2 n, int2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt2(Xse.comb_epi32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), useFactorial.CountUnsafeLevels(), 2));
+ }
+ else
+ {
+ return new uint2(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint3 comb(int3 n, int3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt3(Xse.comb_epi32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), useFactorial.CountUnsafeLevels(), 3));
+ }
+ else
+ {
+ return new uint3(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint4 comb(int4 n, int4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt4(Xse.comb_epi32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), useFactorial.CountUnsafeLevels(), 4));
+ }
+ else
+ {
+ return new uint4(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial),
+ comb(n.z, k.z, useFactorial),
+ comb(n.w, k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint8 comb(int8 n, int8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epi32(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new uint8(comb(n.v4_0, k.v4_0, useFactorial),
+ comb(n.v4_4, k.v4_4, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong comb(ulong n, ulong k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U32))
+ {
+ return (ulong)factorial((uint)n, Promise.NoOverflow) / (ulong)(factorial((uint)k, Promise.NoOverflow) * factorial((uint)n - (uint)k, Promise.NoOverflow));
+ }
+ else
+ {
+ return factorial(n, Promise.NoOverflow) / (factorial(k, Promise.NoOverflow) * factorial(n - k, Promise.NoOverflow));
+ }
+ }
+
+
+ k = math.min(k, n - k);
+ if (Hint.Unlikely(k == 0))
+ {
+ return 1;
+ }
+
+ ulong c = n--;
+
+ if (Hint.Likely(k > 1))
+ {
+ c = ((c >> 1) * n) + (((ulong)-(long)(c & 1)) & (n >> 1));
+
+ ulong i = 2;
+ while (Hint.Likely(k > i++))
+ {
+ ulong q = divrem(c, i, out ulong r);
+ n--;
+ c = (q * n) + ((r * n) / i);
+ }
+ }
+
+ return c;
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong2 comb(ulong2 n, ulong2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epu64(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new ulong2(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong3 comb(ulong3 n, ulong3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epu64(n, k, useFactorial.CountUnsafeLevels(), 3);
+ }
+ else
+ {
+ return new ulong3(comb(n.xy, k.xy, useFactorial),
+ comb(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong4 comb(ulong4 n, ulong4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epu64(n, k, useFactorial.CountUnsafeLevels(), 4);
+ }
+ else
+ {
+ return new ulong4(comb(n.xy, k.xy, useFactorial),
+ comb(n.zw, k.zw, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong comb(long n, long k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k);
+Assert.IsNonNegative(n);
+
+ return comb((ulong)n, (ulong)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong2 comb(long2 n, long2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.comb_epi64(n, k, useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new ulong2(comb(n.x, k.x, useFactorial),
+ comb(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong3 comb(long3 n, long3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epi64(n, k, useFactorial.CountUnsafeLevels(), 3);
+ }
+ else
+ {
+ return new ulong3(comb(n.xy, k.xy, useFactorial),
+ comb(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and without order. Also known as the binomial coefficient or " choose ". Arguments that produce an unsigned 64 bit overflow are undefined.
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong4 comb(long4 n, long4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_comb_epi64(n, k, useFactorial.CountUnsafeLevels(), 4);
+ }
+ else
+ {
+ return new ulong4(comb(n.xy, k.xy, useFactorial),
+ comb(n.zw, k.zw, useFactorial));
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/Runtime/XSE Core/Exceptions/ConstantException.cs.meta b/Runtime/Math Lib/Functions/Arithmetic/Integer/Binomial Coefficient.cs.meta
similarity index 83%
rename from Runtime/XSE Core/Exceptions/ConstantException.cs.meta
rename to Runtime/Math Lib/Functions/Arithmetic/Integer/Binomial Coefficient.cs.meta
index 12046d1..fefc02c 100644
--- a/Runtime/XSE Core/Exceptions/ConstantException.cs.meta
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Binomial Coefficient.cs.meta
@@ -1,5 +1,5 @@
fileFormatVersion: 2
-guid: a84bb5664b910d24bb32f94e36ed467b
+guid: c4d25c32de54fac4f949642b84a6f54a
MonoImporter:
externalObjects: {}
serializedVersion: 2
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Cube Root.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Cube Root.cs
index 4d07443..ebc0152 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Integer/Cube Root.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Cube Root.cs
@@ -425,13 +425,13 @@ public static v256 mm256_cbrt_epi16(v256 a, bool promiseAbsolute = false, bool p
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v128 cbrt_epu32(v128 a, byte rangeLevelPromise = 0, byte elements = 4)
+ public static v128 cbrt_epu32(v128 a, byte rangePromiseLevel = 0, byte elements = 4)
{
if (Sse2.IsSse2Supported)
{
- if (rangeLevelPromise > 0 || constexpr.ALL_LE_EPU32(a, ushort.MaxValue, elements))
+ if (rangePromiseLevel > 0 || constexpr.ALL_LE_EPU32(a, ushort.MaxValue, elements))
{
- if (rangeLevelPromise > 1 || constexpr.ALL_LE_EPU32(a, byte.MaxValue, elements))
+ if (rangePromiseLevel > 1 || constexpr.ALL_LE_EPU32(a, byte.MaxValue, elements))
{
v128 ONE = Sse2.set1_epi32(1);
@@ -619,13 +619,13 @@ public static v128 cbrt_epu32(v128 a, byte rangeLevelPromise = 0, byte elements
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v256 mm256_cbrt_epu32(v256 a, byte rangeLevelPromise = 0)
+ public static v256 mm256_cbrt_epu32(v256 a, byte rangePromiseLevel = 0)
{
if (Avx2.IsAvx2Supported)
{
- if (rangeLevelPromise > 0 || constexpr.ALL_LE_EPU32(a, ushort.MaxValue))
+ if (rangePromiseLevel > 0 || constexpr.ALL_LE_EPU32(a, ushort.MaxValue))
{
- if (rangeLevelPromise > 1 || constexpr.ALL_LE_EPU32(a, byte.MaxValue))
+ if (rangePromiseLevel > 1 || constexpr.ALL_LE_EPU32(a, byte.MaxValue))
{
v256 ONE = Avx.mm256_set1_epi32(1);
@@ -814,24 +814,24 @@ public static v256 mm256_cbrt_epu32(v256 a, byte rangeLevelPromise = 0)
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v128 cbrt_epi32(v128 a, bool promiseAbsolute = false, byte rangeLevelPromise = 0, byte elements = 4)
+ public static v128 cbrt_epi32(v128 a, bool promiseAbsolute = false, byte rangePromiseLevel = 0, byte elements = 4)
{
if (Sse2.IsSse2Supported)
{
if (promiseAbsolute || constexpr.ALL_GE_EPI32(a, 0, elements))
{
- return cbrt_epu32(a, rangeLevelPromise, elements);
+ return cbrt_epu32(a, rangePromiseLevel, elements);
}
else
{
if (Ssse3.IsSsse3Supported)
{
- return Ssse3.sign_epi32(cbrt_epu32(Ssse3.abs_epi32(a), rangeLevelPromise, elements), a);
+ return Ssse3.sign_epi32(cbrt_epu32(Ssse3.abs_epi32(a), rangePromiseLevel, elements), a);
}
else
{
v128 negative = Sse2.srai_epi32(a, 31);
- v128 cbrtAbs = cbrt_epu32(Sse2.xor_si128(Sse2.add_epi32(a, negative), negative), rangeLevelPromise, elements);
+ v128 cbrtAbs = cbrt_epu32(Sse2.xor_si128(Sse2.add_epi32(a, negative), negative), rangePromiseLevel, elements);
return Sse2.xor_si128(Sse2.add_epi32(cbrtAbs, negative), negative);
}
@@ -841,7 +841,7 @@ public static v128 cbrt_epi32(v128 a, bool promiseAbsolute = false, byte rangeLe
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v256 mm256_cbrt_epi32(v256 a, bool promiseAbsolute = false, byte rangeLevelPromise = 0)
+ public static v256 mm256_cbrt_epi32(v256 a, bool promiseAbsolute = false, byte rangePromiseLevel = 0)
{
if (Avx2.IsAvx2Supported)
{
@@ -851,7 +851,7 @@ public static v256 mm256_cbrt_epi32(v256 a, bool promiseAbsolute = false, byte r
}
else
{
- return Avx2.mm256_sign_epi32(mm256_cbrt_epu32(Avx2.mm256_abs_epi32(a), rangeLevelPromise), a);
+ return Avx2.mm256_sign_epi32(mm256_cbrt_epu32(Avx2.mm256_abs_epi32(a), rangePromiseLevel), a);
}
}
else throw new IllegalInstructionException();
@@ -859,17 +859,17 @@ public static v256 mm256_cbrt_epi32(v256 a, bool promiseAbsolute = false, byte r
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v128 cbrt_epu64(v128 a, byte rangeLevelPromise = 0)
+ public static v128 cbrt_epu64(v128 a, byte rangePromiseLevel = 0)
{
if (Sse2.IsSse2Supported)
{
- if (rangeLevelPromise > 0 || constexpr.ALL_LE_EPU64(a, 1ul << 40))
+ if (rangePromiseLevel > 0 || constexpr.ALL_LE_EPU64(a, 1ul << 46))
{
- if (rangeLevelPromise > 1 || constexpr.ALL_LE_EPU64(a, uint.MaxValue))
+ if (rangePromiseLevel > 1 || constexpr.ALL_LE_EPU64(a, uint.MaxValue))
{
- if (rangeLevelPromise > 2 || constexpr.ALL_LE_EPU64(a, ushort.MaxValue))
+ if (rangePromiseLevel > 2 || constexpr.ALL_LE_EPU64(a, ushort.MaxValue))
{
- if (rangeLevelPromise > 3 || constexpr.ALL_LE_EPU64(a, byte.MaxValue))
+ if (rangePromiseLevel > 3 || constexpr.ALL_LE_EPU64(a, byte.MaxValue))
{
return cbrt_epu8_takingAndReturning_epu16(a, 8);
}
@@ -885,7 +885,7 @@ public static v128 cbrt_epu64(v128 a, byte rangeLevelPromise = 0)
}
else
{
- // results within [0, 1ul << 40] have been proven to be correct empirically both with and without FMA instructions
+ // results within [0, 1ul << 46] have been proven to be correct empirically both with and without FMA instructions
return usfcvttpd_epu64(cbrt_pd(usfcvtepu64_pd(a)));
}
}
@@ -1092,17 +1092,17 @@ public static v128 cbrt_epu64(v128 a, byte rangeLevelPromise = 0)
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v256 mm256_cbrt_epu64(v256 a, byte rangeLevelPromise = 0, byte elements = 4)
+ public static v256 mm256_cbrt_epu64(v256 a, byte rangePromiseLevel = 0, byte elements = 4)
{
if (Avx2.IsAvx2Supported)
{
- if (rangeLevelPromise > 0 || constexpr.ALL_LE_EPU64(a, 1ul << 40, elements))
+ if (rangePromiseLevel > 0 || constexpr.ALL_LE_EPU64(a, 1ul << 46, elements))
{
- if (rangeLevelPromise > 1 || constexpr.ALL_LE_EPU64(a, uint.MaxValue, elements))
+ if (rangePromiseLevel > 1 || constexpr.ALL_LE_EPU64(a, uint.MaxValue, elements))
{
- if (rangeLevelPromise > 2 || constexpr.ALL_LE_EPU64(a, ushort.MaxValue, elements))
+ if (rangePromiseLevel > 2 || constexpr.ALL_LE_EPU64(a, ushort.MaxValue, elements))
{
- if (rangeLevelPromise > 3 || constexpr.ALL_LE_EPU64(a, byte.MaxValue, elements))
+ if (rangePromiseLevel > 3 || constexpr.ALL_LE_EPU64(a, byte.MaxValue, elements))
{
return cbrt_epu8_takingAndReturning_epu16(a);
}
@@ -1118,7 +1118,7 @@ public static v256 mm256_cbrt_epu64(v256 a, byte rangeLevelPromise = 0, byte ele
}
else
{
- // results within [0, 1ul << 40] have been proven to be correct empirically both with and without FMA instructions
+ // results within [0, 1ul << 46] have been proven to be correct empirically both with and without FMA instructions
return mm256_usfcvttpd_epu64(mm256_cbrt_pd(mm256_usfcvtepu64_pd(a)));
}
}
@@ -1325,18 +1325,18 @@ public static v256 mm256_cbrt_epu64(v256 a, byte rangeLevelPromise = 0, byte ele
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v128 cbrt_epi64(v128 a, bool promiseAbs = false, byte rangeLevelPromise = 0)
+ public static v128 cbrt_epi64(v128 a, bool promiseAbs = false, byte rangePromiseLevel = 0)
{
if (Sse2.IsSse2Supported)
{
if (promiseAbs || constexpr.ALL_GE_EPI64(a, 0))
{
- return cbrt_epu64(a, rangeLevelPromise);
+ return cbrt_epu64(a, rangePromiseLevel);
}
else
{
v128 negative = srai_epi64(a, 63);
- v128 cbrtAbs = cbrt_epu64(Sse2.xor_si128(Sse2.add_epi64(a, negative), negative), rangeLevelPromise);
+ v128 cbrtAbs = cbrt_epu64(Sse2.xor_si128(Sse2.add_epi64(a, negative), negative), rangePromiseLevel);
return Sse2.xor_si128(Sse2.add_epi64(cbrtAbs, negative), negative);
}
@@ -1345,18 +1345,18 @@ public static v128 cbrt_epi64(v128 a, bool promiseAbs = false, byte rangeLevelPr
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v256 mm256_cbrt_epi64(v256 a, bool promiseAbs = false, byte rangeLevelPromise = 0, byte elements = 4)
+ public static v256 mm256_cbrt_epi64(v256 a, bool promiseAbs = false, byte rangePromiseLevel = 0, byte elements = 4)
{
if (Avx2.IsAvx2Supported)
{
if (promiseAbs || constexpr.ALL_GE_EPI64(a, 0, elements))
{
- return mm256_cbrt_epu64(a, rangeLevelPromise, elements);
+ return mm256_cbrt_epu64(a, rangePromiseLevel, elements);
}
else
{
v256 negative = mm256_srai_epi64(a, 63);
- v256 cbrtAbs = mm256_cbrt_epu64(Avx2.mm256_xor_si256(Avx2.mm256_add_epi64(a, negative), negative), rangeLevelPromise, elements);
+ v256 cbrtAbs = mm256_cbrt_epu64(Avx2.mm256_xor_si256(Avx2.mm256_add_epi64(a, negative), negative), rangePromiseLevel, elements);
return Avx2.mm256_xor_si256(Avx2.mm256_add_epi64(cbrtAbs, negative), negative);
}
@@ -2475,7 +2475,7 @@ public static uint2 intcbrt(uint2 x, Promise optimizations = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_epu32(RegisterConversion.ToV128(x), optimizations.CountUnsafeLevels(), 2));
+ return RegisterConversion.ToUInt2(Xse.cbrt_epu32(RegisterConversion.ToV128(x), optimizations.CountUnsafeLevels(), 2));
}
else
{
@@ -2493,7 +2493,7 @@ public static uint3 intcbrt(uint3 x, Promise optimizations = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_epu32(RegisterConversion.ToV128(x), optimizations.CountUnsafeLevels(), 3));
+ return RegisterConversion.ToUInt3(Xse.cbrt_epu32(RegisterConversion.ToV128(x), optimizations.CountUnsafeLevels(), 3));
}
else
{
@@ -2511,7 +2511,7 @@ public static uint4 intcbrt(uint4 x, Promise optimizations = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_epu32(RegisterConversion.ToV128(x), optimizations.CountUnsafeLevels(), 4));
+ return RegisterConversion.ToUInt4(Xse.cbrt_epu32(RegisterConversion.ToV128(x), optimizations.CountUnsafeLevels(), 4));
}
else
{
@@ -2572,7 +2572,7 @@ public static int2 intcbrt(int2 x, Promise optimizations = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_epi32(RegisterConversion.ToV128(x), optimizations.Promises(Promise.ZeroOrGreater), optimizations.CountUnsafeLevels(), 2));
+ return RegisterConversion.ToInt2(Xse.cbrt_epi32(RegisterConversion.ToV128(x), optimizations.Promises(Promise.ZeroOrGreater), optimizations.CountUnsafeLevels(), 2));
}
else
{
@@ -2591,7 +2591,7 @@ public static int3 intcbrt(int3 x, Promise optimizations = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_epi32(RegisterConversion.ToV128(x), optimizations.Promises(Promise.ZeroOrGreater), optimizations.CountUnsafeLevels(), 3));
+ return RegisterConversion.ToInt3(Xse.cbrt_epi32(RegisterConversion.ToV128(x), optimizations.Promises(Promise.ZeroOrGreater), optimizations.CountUnsafeLevels(), 3));
}
else
{
@@ -2610,7 +2610,7 @@ public static int4 intcbrt(int4 x, Promise optimizations = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.cbrt_epi32(RegisterConversion.ToV128(x), optimizations.Promises(Promise.ZeroOrGreater), optimizations.CountUnsafeLevels(), 4));
+ return RegisterConversion.ToInt4(Xse.cbrt_epi32(RegisterConversion.ToV128(x), optimizations.Promises(Promise.ZeroOrGreater), optimizations.CountUnsafeLevels(), 4));
}
else
{
@@ -2847,7 +2847,7 @@ public static ulong intcbrt(ulong x, Promise optimizations = Promise.Nothing)
/// Computes the componentwise integer cube root ⌊∛⌋ of a .
///
- /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 40].
+ /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 46].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
@@ -2869,7 +2869,7 @@ public static ulong2 intcbrt(ulong2 x, Promise optimizations = Promise.Nothing)
/// Computes the componentwise integer cube root ⌊∛⌋ of a .
///
- /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 40].
+ /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 46].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
@@ -2895,7 +2895,7 @@ public static ulong3 intcbrt(ulong3 x, Promise optimizations = Promise.Nothing)
/// Computes the componentwise integer cube root ⌊∛⌋ of a .
///
- /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 40].
+ /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 46].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
/// A '' with its flag set returns undefined results for input values outside the interval [0, ].
@@ -2941,7 +2941,7 @@ public static long intcbrt(long x, Promise optimizations = Promise.Nothing)
/// Computes the componentwise integer cube root sgn() * ⌊|∛|⌋ of a .
///
/// A '' with its flag set returns undefined results for negative input values.
- /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 40] if the flag is also set, [-(1ul << 40), 1ul << 40] otherwise.
+ /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 46] if the flag is also set, [-(1ul << 46), 1ul << 46] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
@@ -2964,7 +2964,7 @@ public static long2 intcbrt(long2 x, Promise optimizations = Promise.Nothing)
/// Computes the componentwise integer cube root sgn() * ⌊|∛|⌋ of a .
///
/// A '' with its flag set returns undefined results for negative input values.
- /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 40] if the flag is also set, [-(1ul << 40), 1ul << 40] otherwise.
+ /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 46] if the flag is also set, [-(1ul << 46), 1ul << 46] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
@@ -2991,7 +2991,7 @@ public static long3 intcbrt(long3 x, Promise optimizations = Promise.Nothing)
/// Computes the componentwise integer cube root sgn() * ⌊|∛|⌋ of a .
///
/// A '' with its flag set returns undefined results for negative input values.
- /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 40] if the flag is also set, [-(1ul << 40), 1ul << 40] otherwise.
+ /// A '' with its flag set returns undefined results for input values outside the interval [0, 1ul << 46] if the flag is also set, [-(1ul << 46), 1ul << 46] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
/// A '' with its flag set returns undefined results for input values outside the interval [0, ] if the flag is also set, [-, ] otherwise.
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Exponentiation.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Exponentiation.cs
index 7a96155..40cecd9 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Integer/Exponentiation.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Exponentiation.cs
@@ -1281,7 +1281,7 @@ public static int2 intpow(int2 x, uint2 n)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pow_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(n), 2));
+ return RegisterConversion.ToInt2(Xse.pow_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(n), 2));
}
else
{
@@ -1295,7 +1295,7 @@ public static int3 intpow(int3 x, uint3 n)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pow_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(n), 3));
+ return RegisterConversion.ToInt3(Xse.pow_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(n), 3));
}
else
{
@@ -1310,7 +1310,7 @@ public static int4 intpow(int4 x, uint4 n)
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pow_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(n), 4));
+ return RegisterConversion.ToInt4(Xse.pow_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(n), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Factorial.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Factorial.cs
index 1427e9a..07b1b64 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Integer/Factorial.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Factorial.cs
@@ -5,6 +5,7 @@
using MaxMath.Intrinsics;
using static Unity.Burst.Intrinsics.X86;
+using static MaxMath.LUT.FACTORIAL;
namespace MaxMath
{
@@ -14,6 +15,7 @@ unsafe public static partial class Xse
{
private static v128 FACTORIALS_EPU8
{
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
if (Sse2.IsSse2Supported)
@@ -24,16 +26,43 @@ private static v128 FACTORIALS_EPU8
}
}
-
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 gamma_epu16_epu8range(v128 a)
+ {
+ if (Ssse3.IsSsse3Supported)
+ {
+ a = Sse2.or_si128(a, Sse2.set1_epi16(unchecked((short)0xFF00)));
+
+ return Ssse3.shuffle_epi8(FACTORIALS_EPU8, a);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_gamma_epu16_epu8range(v256 a)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ a = Avx2.mm256_or_si256(a, Avx.mm256_set1_epi16(unchecked((short)0xFF00)));
+
+ return Avx2.mm256_shuffle_epi8(new v256(FACTORIALS_EPU8, FACTORIALS_EPU8), a);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
[SkipLocalsInit]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v128 gamma_epu8(v128 a, bool promiseNoOverflow = false, byte elements = 16)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPU8(a, MAX_INVERSE_FACTORIAL_U8, elements);
+
if (Ssse3.IsSsse3Supported)
{
if (!promiseNoOverflow)
{
- a = Sse2.min_epu8(a, Sse2.set1_epi8(7));
+ a = Sse2.min_epu8(a, Sse2.set1_epi8(MAX_INVERSE_FACTORIAL_U8 + 1));
}
return Ssse3.shuffle_epi8(FACTORIALS_EPU8, a);
@@ -82,9 +111,11 @@ public static v256 mm256_gamma_epu8(v256 a, bool promiseNoOverflow = false)
{
if (Avx2.IsAvx2Supported)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPU8(a, MAX_INVERSE_FACTORIAL_U8);
+
if (!promiseNoOverflow)
{
- a = Avx2.mm256_min_epu8(a, Avx.mm256_set1_epi8(7));
+ a = Avx2.mm256_min_epu8(a, Avx.mm256_set1_epi8(MAX_INVERSE_FACTORIAL_U8 + 1));
}
return Avx2.mm256_shuffle_epi8(new v256(FACTORIALS_EPU8, FACTORIALS_EPU8), a);
@@ -97,13 +128,13 @@ public static v256 mm256_gamma_epu8(v256 a, bool promiseNoOverflow = false)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v128 gamma_epu16(v128 a, bool promiseNoOverflow = false, byte elements = 8)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPU16(a, MAX_INVERSE_FACTORIAL_U16, elements);
+
if (Ssse3.IsSsse3Supported)
{
- if (constexpr.ALL_LE_EPU16(a, 5))
+ if (constexpr.ALL_LE_EPU16(a, MAX_INVERSE_FACTORIAL_U8, elements))
{
- a = Sse2.or_si128(a, Sse2.set1_epi16(unchecked((short)0xFF00)));
-
- return Ssse3.shuffle_epi8(FACTORIALS_EPU8, a);
+ return gamma_epu16_epu8range(a);
}
}
@@ -111,7 +142,7 @@ public static v128 gamma_epu16(v128 a, bool promiseNoOverflow = false, byte elem
{
if (!promiseNoOverflow)
{
- a = min_epu16(a, Sse2.set1_epi16(9));
+ a = min_epu16(a, Sse2.set1_epi16(MAX_INVERSE_FACTORIAL_U16 + 1));
}
if (Avx2.IsAvx2Supported)
@@ -159,18 +190,18 @@ public static v128 gamma_epu16(v128 a, bool promiseNoOverflow = false, byte elem
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v256 mm256_gamma_epu16(v256 a, bool promiseNoOverflow = false)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPU16(a, MAX_INVERSE_FACTORIAL_U16);
+
if (Avx2.IsAvx2Supported)
{
- if (constexpr.ALL_LE_EPU16(a, 5))
+ if (constexpr.ALL_LE_EPU16(a, MAX_INVERSE_FACTORIAL_U8))
{
- a = Avx2.mm256_or_si256(a, Avx.mm256_set1_epi16(unchecked((short)0xFF00)));
-
- return Avx2.mm256_shuffle_epi8(new v256(FACTORIALS_EPU8, FACTORIALS_EPU8), a);
+ return mm256_gamma_epu16_epu8range(a);
}
if (!promiseNoOverflow)
{
- a = Avx2.mm256_min_epu16(a, Avx.mm256_set1_epi16(9));
+ a = Avx2.mm256_min_epu16(a, Avx.mm256_set1_epi16(MAX_INVERSE_FACTORIAL_U16 + 1));
}
uint* TABLE = stackalloc uint[10] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, ushort.MaxValue };
@@ -190,9 +221,11 @@ public static v256 mm256_gamma_epu16(v256 a, bool promiseNoOverflow = false)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v128 gamma_epi16(v128 a, bool promiseNoOverflow = false, byte elements = 8)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPI16(a, MAX_INVERSE_FACTORIAL_S16);
+
if (Ssse3.IsSsse3Supported)
{
- if (constexpr.ALL_LE_EPU16(a, 5))
+ if (constexpr.ALL_LE_EPU16(a, MAX_INVERSE_FACTORIAL_U8, elements))
{
a = Sse2.or_si128(a, Sse2.set1_epi16(unchecked((short)0xFF00)));
@@ -204,7 +237,7 @@ public static v128 gamma_epi16(v128 a, bool promiseNoOverflow = false, byte elem
{
if (!promiseNoOverflow)
{
- a = min_epu16(a, Sse2.set1_epi16(8));
+ a = min_epu16(a, Sse2.set1_epi16(MAX_INVERSE_FACTORIAL_S16 + 1));
}
if (Avx2.IsAvx2Supported)
@@ -252,9 +285,11 @@ public static v128 gamma_epi16(v128 a, bool promiseNoOverflow = false, byte elem
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v256 mm256_gamma_epi16(v256 a, bool promiseNoOverflow = false)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPI16(a, MAX_INVERSE_FACTORIAL_S16);
+
if (Avx2.IsAvx2Supported)
{
- if (constexpr.ALL_LE_EPU16(a, 5))
+ if (constexpr.ALL_LE_EPU16(a, MAX_INVERSE_FACTORIAL_U8))
{
a = Avx2.mm256_or_si256(a, Avx.mm256_set1_epi16(unchecked((short)0xFF00)));
@@ -263,7 +298,7 @@ public static v256 mm256_gamma_epi16(v256 a, bool promiseNoOverflow = false)
if (!promiseNoOverflow)
{
- a = Avx2.mm256_min_epu16(a, Avx.mm256_set1_epi16(9));
+ a = Avx2.mm256_min_epu16(a, Avx.mm256_set1_epi16(MAX_INVERSE_FACTORIAL_S16 + 1));
}
uint* TABLE = stackalloc uint[9] { 1, 1, 2, 6, 24, 120, 720, 5_040, (uint)short.MaxValue };
@@ -283,9 +318,11 @@ public static v256 mm256_gamma_epi16(v256 a, bool promiseNoOverflow = false)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v128 gamma_epu32(v128 a, bool promiseNoOverflow = false, byte elements = 4)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPU32(a, MAX_INVERSE_FACTORIAL_U32, elements);
+
if (Ssse3.IsSsse3Supported)
{
- if (constexpr.ALL_LE_EPU32(a, 5))
+ if (constexpr.ALL_LE_EPU32(a, MAX_INVERSE_FACTORIAL_U8))
{
a = Sse2.or_si128(a, Sse2.set1_epi32(unchecked((int)0xFFFF_FF00u)));
@@ -297,7 +334,7 @@ public static v128 gamma_epu32(v128 a, bool promiseNoOverflow = false, byte elem
{
if (!promiseNoOverflow)
{
- a = min_epu32(a, Sse2.set1_epi32(13));
+ a = min_epu32(a, Sse2.set1_epi32(MAX_INVERSE_FACTORIAL_U32 + 1));
}
uint* TABLE = stackalloc uint[14] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, 362_880, 3_628_800, 39_916_800, 479_001_600, uint.MaxValue };
@@ -325,9 +362,11 @@ public static v128 gamma_epu32(v128 a, bool promiseNoOverflow = false, byte elem
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v256 mm256_gamma_epu32(v256 a, bool promiseNoOverflow = false)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPU32(a, MAX_INVERSE_FACTORIAL_U32);
+
if (Avx2.IsAvx2Supported)
{
- if (constexpr.ALL_LE_EPU32(a, 5))
+ if (constexpr.ALL_LE_EPU32(a, MAX_INVERSE_FACTORIAL_U8))
{
a = Avx2.mm256_or_si256(a, Avx.mm256_set1_epi32(unchecked((int)0xFFFF_FF00u)));
@@ -336,7 +375,7 @@ public static v256 mm256_gamma_epu32(v256 a, bool promiseNoOverflow = false)
if (!promiseNoOverflow)
{
- a = Avx2.mm256_min_epu32(a, Avx.mm256_set1_epi32(13));
+ a = Avx2.mm256_min_epu32(a, Avx.mm256_set1_epi32(MAX_INVERSE_FACTORIAL_U32 + 1));
}
uint* TABLE = stackalloc uint[14] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, 362_880, 3_628_800, 39_916_800, 479_001_600, uint.MaxValue };
@@ -351,9 +390,11 @@ public static v256 mm256_gamma_epu32(v256 a, bool promiseNoOverflow = false)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v128 gamma_epu64(v128 a, bool promiseNoOverflow = false)
{
+ promiseNoOverflow |= constexpr.ALL_LE_EPU64(a, MAX_INVERSE_FACTORIAL_U64);
+
if (Ssse3.IsSsse3Supported)
{
- if (constexpr.ALL_LE_EPU64(a, 5))
+ if (constexpr.ALL_LE_EPU64(a, MAX_INVERSE_FACTORIAL_U8))
{
a = Sse2.or_si128(a, Sse2.set1_epi64x(unchecked((long)0xFFFF_FFFF_FFFF_FF00ul)));
@@ -365,7 +406,7 @@ public static v128 gamma_epu64(v128 a, bool promiseNoOverflow = false)
{
if (!promiseNoOverflow)
{
- a = min_epu64(a, Sse2.set1_epi64x(21));
+ a = min_epu64(a, Sse2.set1_epi64x(MAX_INVERSE_FACTORIAL_U64 + 1));
}
ulong* TABLE = stackalloc ulong[22] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, 362_880, 3_628_800, 39_916_800, 479_001_600, 6_227_020_800ul, 87_178_291_200ul, 1_307_674_368_000ul, 20_922_789_888_000ul, 355_687_428_096_000ul, 6_402_373_705_728_000ul, 121_645_100_408_832_000ul, 2_432_902_008_176_640_000ul, ulong.MaxValue };
@@ -388,75 +429,11 @@ public static v128 gamma_epu64(v128 a, bool promiseNoOverflow = false)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v256 mm256_gamma_epu64(v256 a, bool promiseNoOverflow = false, byte elements = 4)
{
- if (Avx2.IsAvx2Supported)
- {
- if (constexpr.ALL_LE_EPU32(a, 5))
- {
- a = Avx2.mm256_or_si256(a, Avx.mm256_set1_epi64x(unchecked((long)0xFFFF_FFFF_FFFF_FF00ul)));
-
- return Avx2.mm256_shuffle_epi8(new v256(FACTORIALS_EPU8, FACTORIALS_EPU8), a);
- }
-
- if (!promiseNoOverflow)
- {
- a = mm256_min_epu64(a, Avx.mm256_set1_epi64x(21));
- }
- else if (elements == 3)
- {
- a.SLong3 = 0;
- }
-
- ulong* TABLE = stackalloc ulong[22] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, 362_880, 3_628_800, 39_916_800, 479_001_600, 6_227_020_800ul, 87_178_291_200ul, 1_307_674_368_000ul, 20_922_789_888_000ul, 355_687_428_096_000ul, 6_402_373_705_728_000ul, 121_645_100_408_832_000ul, 2_432_902_008_176_640_000ul, ulong.MaxValue };
-
- return Avx2.mm256_i64gather_epi64(TABLE, a, sizeof(ulong));
- }
- else throw new IllegalInstructionException();
- }
-
- [SkipLocalsInit]
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v128 gamma_epi64(v128 a, bool promiseNoOverflow = false)
- {
- if (Ssse3.IsSsse3Supported)
- {
- if (constexpr.ALL_LE_EPU64(a, 5))
- {
- a = Sse2.or_si128(a, Sse2.set1_epi64x(unchecked((long)0xFFFF_FFFF_FFFF_FF00ul)));
-
- return Ssse3.shuffle_epi8(FACTORIALS_EPU8, a);
- }
- }
-
- if (Sse2.IsSse2Supported)
- {
- if (!promiseNoOverflow)
- {
- a = min_epu64(a, Sse2.set1_epi64x(21));
- }
-
- long* TABLE = stackalloc long[22] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, 362_880, 3_628_800, 39_916_800, 479_001_600, 6_227_020_800L, 87_178_291_200L, 1_307_674_368_000L, 20_922_789_888_000L, 355_687_428_096_000L, 6_402_373_705_728_000L, 121_645_100_408_832_000L, 2_432_902_008_176_640_000L, long.MaxValue };
-
- if (Avx2.IsAvx2Supported)
- {
- return Avx2.i64gather_epi64(TABLE, a, sizeof(ulong));
- }
- else
- {
- a = Sse2.slli_epi64(a, 3);
+ promiseNoOverflow |= constexpr.ALL_LE_EPU64(a, MAX_INVERSE_FACTORIAL_U64, elements);
- return new v128(*(ulong*)((byte*)TABLE + a.ULong0), *(ulong*)((byte*)TABLE + a.ULong1));
- }
- }
- else throw new IllegalInstructionException();
- }
-
- [SkipLocalsInit]
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v256 mm256_gamma_epi64(v256 a, bool promiseNoOverflow = false, byte elements = 4)
- {
if (Avx2.IsAvx2Supported)
{
- if (constexpr.ALL_LE_EPU32(a, 5))
+ if (constexpr.ALL_LE_EPU64(a, MAX_INVERSE_FACTORIAL_U8, elements))
{
a = Avx2.mm256_or_si256(a, Avx.mm256_set1_epi64x(unchecked((long)0xFFFF_FFFF_FFFF_FF00ul)));
@@ -465,14 +442,14 @@ public static v256 mm256_gamma_epi64(v256 a, bool promiseNoOverflow = false, byt
if (!promiseNoOverflow)
{
- a = mm256_min_epu64(a, Avx.mm256_set1_epi64x(21));
+ a = mm256_min_epu64(a, Avx.mm256_set1_epi64x(MAX_INVERSE_FACTORIAL_U64 + 1), elements);
}
else if (elements == 3)
{
a.SLong3 = 0;
}
- long* TABLE = stackalloc long[22] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, 362_880, 3_628_800, 39_916_800, 479_001_600, 6_227_020_800L, 87_178_291_200L, 1_307_674_368_000L, 20_922_789_888_000L, 355_687_428_096_000L, 6_402_373_705_728_000L, 121_645_100_408_832_000L, 2_432_902_008_176_640_000L, long.MaxValue };
+ ulong* TABLE = stackalloc ulong[22] { 1, 1, 2, 6, 24, 120, 720, 5_040, 40_320, 362_880, 3_628_800, 39_916_800, 479_001_600, 6_227_020_800ul, 87_178_291_200ul, 1_307_674_368_000ul, 20_922_789_888_000ul, 355_687_428_096_000ul, 6_402_373_705_728_000ul, 121_645_100_408_832_000ul, 2_432_902_008_176_640_000ul, ulong.MaxValue };
return Avx2.mm256_i64gather_epi64(TABLE, a, sizeof(ulong));
}
@@ -490,12 +467,12 @@ unsafe public static partial class maxmath
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static UInt128 factorial(UInt128 x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x <= 20))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U64))
{
return factorial(x.lo64, Promise.NoOverflow);
}
- if (Xse.constexpr.IS_TRUE(x < 35))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U128))
{
noOverflow |= Promise.NoOverflow;
}
@@ -553,12 +530,12 @@ public static UInt128 factorial(UInt128 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Int128 factorial(Int128 x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x >= 0 & x <= 33))
+ if (Xse.constexpr.IS_TRUE(x >= 0 & x <= MAX_INVERSE_FACTORIAL_S128))
{
noOverflow |= Promise.NoOverflow;
}
- if (Xse.constexpr.IS_TRUE(x <= 20))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U64))
{
if (Xse.constexpr.IS_TRUE(x >= 0))
{
@@ -593,19 +570,19 @@ public static Int128 factorial(Int128 x, Promise noOverflow = Promise.Nothing)
6_402_373_705_728_000ul,
121_645_100_408_832_000ul,
2_432_902_008_176_640_000ul,
- /*51090942171709440000*/new Int128(0xC507_7D36_B8C4_0000, 0x0000_0000_0000_0002),
- /*1124000727777607680000*/new Int128(0xEEA4_C2B3_E0D8_0000, 0x0000_0000_0000_003C),
- /*25852016738884976640000*/new Int128(0x70CD_7E29_3368_0000, 0x0000_0000_0000_0579),
- /*620448401733239439360000*/new Int128(0x9343_D3DC_D1C0_0000, 0x0000_0000_0000_8362),
- /*15511210043330985984000000*/new Int128(0x619F_B090_7BC0_0000, 0x0000_0000_000C_D4A0),
- /*403291461126605635584000000*/new Int128(0xEA37_EEAC_9180_0000, 0x0000_0000_014D_9849),
- /*10888869450418352160768000000*/new Int128(0xB3E6_2C33_5880_0000, 0x0000_0000_232F_0FCB),
- /*304888344611713860501504000000*/new Int128(0xAD2C_D59D_AE00_0000, 0x0000_0003_D925_BA47),
- /*8841761993739701954543616000000*/new Int128(0x9E14_32DC_B600_0000, 0x0000_006F_9946_1A1E),
- /*265252859812191058636308480000000*/new Int128(0x865D_F5DD_5400_0000, 0x0000_0D13_F637_0F96),
- /*8222838654177922817725562880000000*/new Int128(0x4560_C5CD_2C00_0000, 0x0001_956A_D0AA_E33A),
- /*263130836933693530167218012160000000*/new Int128(0xAC18_B9A5_8000_0000, 0x0032_AD5A_155C_6748),
- /*8683317618811886495518194401280000000*/new Int128(0x2F2F_EE55_8000_0000, 0x0688_589C_C0E9_505E),
+ /*51_090_942_171_709_440_000*/new Int128(0xC507_7D36_B8C4_0000, 0x0000_0000_0000_0002),
+ /*1_124_000_727_777_607_680_000*/new Int128(0xEEA4_C2B3_E0D8_0000, 0x0000_0000_0000_003C),
+ /*25_852_016_738_884_976_640_000*/new Int128(0x70CD_7E29_3368_0000, 0x0000_0000_0000_0579),
+ /*620_448_401_733_239_439_360_000*/new Int128(0x9343_D3DC_D1C0_0000, 0x0000_0000_0000_8362),
+ /*15_511_210_043_330_985_984_000_000*/new Int128(0x619F_B090_7BC0_0000, 0x0000_0000_000C_D4A0),
+ /*403_291_461_126_605_635_584_000_000*/new Int128(0xEA37_EEAC_9180_0000, 0x0000_0000_014D_9849),
+ /*10_888_869_450_418_352_160_768_000_000*/new Int128(0xB3E6_2C33_5880_0000, 0x0000_0000_232F_0FCB),
+ /*304_888_344_611_713_860_501_504_000_000*/new Int128(0xAD2C_D59D_AE00_0000, 0x0000_0003_D925_BA47),
+ /*8_841_761_993_739_701_954_543_616_000_000*/new Int128(0x9E14_32DC_B600_0000, 0x0000_006F_9946_1A1E),
+ /*265_252_859_812_191_058_636_308_480_000_000*/new Int128(0x865D_F5DD_5400_0000, 0x0000_0D13_F637_0F96),
+ /*8_222_838_654_177_922_817_725_562_880_000_000*/new Int128(0x4560_C5CD_2C00_0000, 0x0001_956A_D0AA_E33A),
+ /*263_130_836_933_693_530_167_218_012_160_000_000*/new Int128(0xAC18_B9A5_8000_0000, 0x0032_AD5A_155C_6748),
+ /*8_683_317_618_811_886_495_518_194_401_280_000_000*/new Int128(0x2F2F_EE55_8000_0000, 0x0688_589C_C0E9_505E),
Int128.MaxValue
};
@@ -622,7 +599,7 @@ public static Int128 factorial(Int128 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte factorial(byte x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x < 6))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U8))
{
noOverflow |= Promise.NoOverflow;
}
@@ -732,7 +709,7 @@ public static byte32 factorial(byte32 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte factorial(sbyte x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x >= 0 & x <= 5))
+ if (Xse.constexpr.IS_TRUE(x >= 0 & x <= MAX_INVERSE_FACTORIAL_S8))
{
noOverflow |= Promise.NoOverflow;
}
@@ -844,12 +821,12 @@ public static sbyte32 factorial(sbyte32 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort factorial(ushort x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x <= 5))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U8))
{
return factorial((byte)x, Promise.NoOverflow);
}
- if (Xse.constexpr.IS_TRUE(x < 9))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U16))
{
noOverflow |= Promise.NoOverflow;
}
@@ -945,12 +922,12 @@ public static ushort16 factorial(ushort16 x, Promise noOverflow = Promise.Nothin
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short factorial(short x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x >= 0 & x <= 7))
+ if (Xse.constexpr.IS_TRUE(x >= 0 & x <= MAX_INVERSE_FACTORIAL_S16))
{
noOverflow |= Promise.NoOverflow;
}
- if (Xse.constexpr.IS_TRUE(x <= 5))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U8))
{
return factorial((byte)x, noOverflow);
}
@@ -1046,12 +1023,12 @@ public static short16 factorial(short16 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint factorial(uint x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x <= 7))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U16))
{
return factorial((ushort)x, Promise.NoOverflow);
}
- if (Xse.constexpr.IS_TRUE(x < 13))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U32))
{
noOverflow |= Promise.NoOverflow;
}
@@ -1072,7 +1049,7 @@ public static uint2 factorial(uint2 x, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 2));
+ return RegisterConversion.ToUInt2(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 2));
}
else
{
@@ -1087,7 +1064,7 @@ public static uint3 factorial(uint3 x, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 3));
+ return RegisterConversion.ToUInt3(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 3));
}
else
{
@@ -1102,7 +1079,7 @@ public static uint4 factorial(uint4 x, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 4));
+ return RegisterConversion.ToUInt4(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 4));
}
else
{
@@ -1132,18 +1109,18 @@ public static uint8 factorial(uint8 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int factorial(int x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x >= 0 & x <= 12))
+ if (Xse.constexpr.IS_TRUE(x >= 0 & x <= MAX_INVERSE_FACTORIAL_S32))
{
noOverflow |= Promise.NoOverflow;
}
- if (Xse.constexpr.IS_TRUE(x <= 8))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U16))
{
if (Xse.constexpr.IS_TRUE(x >= 0))
{
return factorial((ushort)x, Promise.NoOverflow);
}
- if (Xse.constexpr.IS_TRUE(x <= 7))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_S16))
{
return factorial((short)x, noOverflow);
}
@@ -1165,7 +1142,7 @@ public static int2 factorial(int2 x, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 2));
+ return RegisterConversion.ToInt2(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 2));
}
else
{
@@ -1180,7 +1157,7 @@ public static int3 factorial(int3 x, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 3));
+ return RegisterConversion.ToInt3(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 3));
}
else
{
@@ -1195,7 +1172,7 @@ public static int4 factorial(int4 x, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 4));
+ return RegisterConversion.ToInt4(Xse.gamma_epu32(RegisterConversion.ToV128(x), noOverflow.Promises(Promise.NoOverflow), 4));
}
else
{
@@ -1225,12 +1202,12 @@ public static int8 factorial(int8 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong factorial(ulong x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x <= 12))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U32))
{
return factorial((uint)x, Promise.NoOverflow);
}
- if (Xse.constexpr.IS_TRUE(x < 21))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U64))
{
noOverflow |= Promise.NoOverflow;
}
@@ -1296,12 +1273,12 @@ public static ulong4 factorial(ulong4 x, Promise noOverflow = Promise.Nothing)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long factorial(long x, Promise noOverflow = Promise.Nothing)
{
- if (Xse.constexpr.IS_TRUE(x >= 0 & x <= 20))
+ if (Xse.constexpr.IS_TRUE(x >= 0 & x <= MAX_INVERSE_FACTORIAL_U64))
{
noOverflow |= Promise.NoOverflow;
}
- if (Xse.constexpr.IS_TRUE(x <= 12))
+ if (Xse.constexpr.IS_TRUE(x <= MAX_INVERSE_FACTORIAL_U32))
{
if (Xse.constexpr.IS_TRUE(x >= 0))
{
@@ -1329,7 +1306,7 @@ public static long2 factorial(long2 x, Promise noOverflow = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return Xse.gamma_epi64(x, noOverflow.Promises(Promise.NoOverflow));
+ return Xse.gamma_epu64(x, noOverflow.Promises(Promise.NoOverflow));
}
else
{
@@ -1344,7 +1321,7 @@ public static long3 factorial(long3 x, Promise noOverflow = Promise.Nothing)
{
if (Avx2.IsAvx2Supported)
{
- return Xse.mm256_gamma_epi64(x, noOverflow.Promises(Promise.NoOverflow), 3);
+ return Xse.mm256_gamma_epu64(x, noOverflow.Promises(Promise.NoOverflow), 3);
}
else
{
@@ -1359,7 +1336,7 @@ public static long4 factorial(long4 x, Promise noOverflow = Promise.Nothing)
{
if (Avx2.IsAvx2Supported)
{
- return Xse.mm256_gamma_epi64(x, noOverflow.Promises(Promise.NoOverflow), 4);
+ return Xse.mm256_gamma_epu64(x, noOverflow.Promises(Promise.NoOverflow), 4);
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Greatest Common Divisor.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Greatest Common Divisor.cs
index 0dea9cb..38f962a 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Integer/Greatest Common Divisor.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Greatest Common Divisor.cs
@@ -834,7 +834,7 @@ public static uint2 gcd(uint2 x, uint2 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gcd_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
+ return RegisterConversion.ToUInt2(Xse.gcd_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
@@ -848,7 +848,7 @@ public static uint3 gcd(uint3 x, uint3 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gcd_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
+ return RegisterConversion.ToUInt3(Xse.gcd_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
@@ -862,7 +862,7 @@ public static uint4 gcd(uint4 x, uint4 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.gcd_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
+ return RegisterConversion.ToUInt4(Xse.gcd_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Least Common Multiple.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Least Common Multiple.cs
index 9670d07..33eaf01 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Integer/Least Common Multiple.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Least Common Multiple.cs
@@ -2,9 +2,231 @@
using Unity.Mathematics;
using Unity.Burst.CompilerServices;
using MaxMath.Intrinsics;
+using Unity.Burst.Intrinsics;
+
+using static Unity.Burst.Intrinsics.X86;
namespace MaxMath
{
+ namespace Intrinsics
+ {
+ unsafe public static partial class Xse
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 lcm_epu8(v128 a, v128 b, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ switch (elements)
+ {
+ case 2:
+ case 3:
+ case 4:
+ {
+ v128 left;
+ if (Constant.IsConstantExpression(a))
+ {
+ left = cvtepu8_ps(b); // counter-intuitive but this is free (ILP during gcd); whereas multiplication at the very end could be up to 4 cycles faster this way
+ }
+ else
+ {
+ left = cvtepu8_ps(a);
+ }
+
+ v128 right = cvtepu8_ps(gcd_epu8(a, b));
+ v128 ints = DIV_FLOATV_SIGNED_USHORT_RANGE_RET_INT(left, right);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return cvtepi32_epi8(Sse4_1.mullo_epi32(ints, a), elements);
+ }
+ else
+ {
+ return cvtepi32_epi8(Sse4_1.mullo_epi32(ints, b), elements);
+ }
+ }
+ else
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return cvtepi16_epi8(Sse2.mullo_epi16(Sse2.packs_epi32(ints, ints), a), elements);
+ }
+ else
+ {
+ return cvtepi16_epi8(Sse2.mullo_epi16(Sse2.packs_epi32(ints, ints), b), elements);
+ }
+ }
+ }
+ case 8:
+ {
+ v128 leftLo;
+ v128 leftHi;
+ if (Constant.IsConstantExpression(a))
+ {
+ leftLo = cvt2x2epu16_ps(cvtepu8_epi16(b), out leftHi); // counter-intuitive but this is free (ILP during gcd); whereas multiplication at the very end could be up to 4 cycles faster this way
+ }
+ else
+ {
+ leftLo = cvt2x2epu16_ps(cvtepu8_epi16(a), out leftHi);
+ }
+
+ v128 rightLo = cvt2x2epu16_ps(cvtepu8_epi16(gcd_epu8(a, b)), out v128 rightHi);
+
+ v128 intsLo = DIV_FLOATV_SIGNED_USHORT_RANGE_RET_INT(leftLo, rightLo);
+ v128 intsHi = DIV_FLOATV_SIGNED_USHORT_RANGE_RET_INT(leftHi, rightHi);
+
+ if (Constant.IsConstantExpression(a))
+ {
+ return cvtepi16_epi8(Sse2.mullo_epi16(Sse2.packs_epi32(intsLo, intsHi), a), 8);
+ }
+ else
+ {
+ return cvtepi16_epi8(Sse2.mullo_epi16(Sse2.packs_epi32(intsLo, intsHi), b), 8);
+ }
+ }
+ default:
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return mullo_epi8(div_epu8(b, gcd_epu8(a, b)), a);
+ }
+ else
+ {
+ return mullo_epi8(div_epu8(a, gcd_epu8(a, b)), b);
+ }
+ }
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_lcm_epu8(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return mm256_mullo_epi8(mm256_div_epu8(b, mm256_gcd_epu8(a, b)), a);
+ }
+ else
+ {
+ return mm256_mullo_epi8(mm256_div_epu8(a, mm256_gcd_epu8(a, b)), b);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 lcm_epu16(v128 a, v128 b, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return Sse2.mullo_epi16(div_epu16(b, gcd_epu16(a, b), elements), a);
+ }
+ else
+ {
+ return Sse2.mullo_epi16(div_epu16(a, gcd_epu16(a, b), elements), b);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_lcm_epu16(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return Avx2.mm256_mullo_epi16(mm256_div_epu16(b, mm256_gcd_epu16(a, b)), a);
+ }
+ else
+ {
+ return Avx2.mm256_mullo_epi16(mm256_div_epu16(a, mm256_gcd_epu16(a, b)), b);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 lcm_epu32(v128 a, v128 b, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return mullo_epi32(div_epu32(b, gcd_epu32(a, b, elements), elements), a, elements);
+ }
+ else
+ {
+ return mullo_epi32(div_epu32(a, gcd_epu32(a, b, elements), elements), b, elements);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_lcm_epu32(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return Avx2.mm256_mullo_epi32(mm256_div_epu32(b, mm256_gcd_epu32(a, b)), a);
+ }
+ else
+ {
+ return Avx2.mm256_mullo_epi32(mm256_div_epu32(a, mm256_gcd_epu32(a, b)), b);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 lcm_epu64(v128 a, v128 b)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return mullo_epi64(div_epu64(b, gcd_epu64(a, b)), a);
+ }
+ else
+ {
+ return mullo_epi64(div_epu64(a, gcd_epu64(a, b)), b);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_lcm_epu64(v256 a, v256 b, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (Constant.IsConstantExpression(a))
+ {
+ return mm256_mullo_epi64(mm256_div_epu64(b, mm256_gcd_epu64(a, b, elements), elements), a, elements);
+ }
+ else
+ {
+ return mm256_mullo_epi64(mm256_div_epu64(a, mm256_gcd_epu64(a, b, elements), elements), b, elements);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+ }
+ }
+
+
unsafe public static partial class maxmath
{
/// Returns the least common multiple of two s.
@@ -25,8 +247,8 @@ public static UInt128 lcm(UInt128 x, UInt128 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static UInt128 lcm(Int128 x, Int128 y)
{
- UInt128 absX = Xse.constexpr.IS_TRUE(x >= 0) ? (UInt128)x : (UInt128)abs(x);
- UInt128 absY = Xse.constexpr.IS_TRUE(x >= 0) ? (UInt128)y : (UInt128)abs(y);
+ UInt128 absX = (UInt128)abs(x);
+ UInt128 absY = (UInt128)abs(y);
if (Constant.IsConstantExpression(absX))
{
@@ -51,13 +273,20 @@ public static uint lcm(byte x, byte y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte2 lcm(byte2 x, byte2 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu8(x, y, 2);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -65,13 +294,20 @@ public static byte2 lcm(byte2 x, byte2 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte3 lcm(byte3 x, byte3 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu8(x, y, 3);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -79,13 +315,20 @@ public static byte3 lcm(byte3 x, byte3 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte4 lcm(byte4 x, byte4 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu8(x, y, 4);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -93,13 +336,20 @@ public static byte4 lcm(byte4 x, byte4 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte8 lcm(byte8 x, byte8 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu8(x, y, 8);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -107,13 +357,20 @@ public static byte8 lcm(byte8 x, byte8 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte16 lcm(byte16 x, byte16 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu8(x, y, 16);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -121,13 +378,20 @@ public static byte16 lcm(byte16 x, byte16 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte32 lcm(byte32 x, byte32 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Avx2.IsAvx2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.mm256_lcm_epu8(x, y);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -144,16 +408,23 @@ public static uint lcm(sbyte x, sbyte y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte2 lcm(sbyte2 x, sbyte2 y)
{
- byte2 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (byte2)x : (byte2)abs(x);
- byte2 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (byte2)y : (byte2)abs(y);
+ byte2 absX = (byte2)abs(x);
+ byte2 absY = (byte2)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu8(absX, absY, 2);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -161,16 +432,23 @@ public static byte2 lcm(sbyte2 x, sbyte2 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte3 lcm(sbyte3 x, sbyte3 y)
{
- byte3 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (byte3)x : (byte3)abs(x);
- byte3 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (byte3)y : (byte3)abs(y);
+ byte3 absX = (byte3)abs(x);
+ byte3 absY = (byte3)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu8(absX, absY, 3);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -178,16 +456,23 @@ public static byte3 lcm(sbyte3 x, sbyte3 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte4 lcm(sbyte4 x, sbyte4 y)
{
- byte4 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (byte4)x : (byte4)abs(x);
- byte4 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (byte4)y : (byte4)abs(y);
+ byte4 absX = (byte4)abs(x);
+ byte4 absY = (byte4)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu8(absX, absY, 4);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -195,16 +480,23 @@ public static byte4 lcm(sbyte4 x, sbyte4 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte8 lcm(sbyte8 x, sbyte8 y)
{
- byte8 absX = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (byte8)x : (byte8)abs(x);
- byte8 absY = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (byte8)y : (byte8)abs(y);
+ byte8 absX = (byte8)abs(x);
+ byte8 absY = (byte8)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu8(absX, absY, 8);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -212,16 +504,23 @@ public static byte8 lcm(sbyte8 x, sbyte8 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte16 lcm(sbyte16 x, sbyte16 y)
{
- byte16 absX = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (byte16)x : (byte16)abs(x);
- byte16 absY = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (byte16)y : (byte16)abs(y);
+ byte16 absX = (byte16)abs(x);
+ byte16 absY = (byte16)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu8(absX, absY, 16);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -229,16 +528,23 @@ public static byte16 lcm(sbyte16 x, sbyte16 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte32 lcm(sbyte32 x, sbyte32 y)
{
- byte32 absX = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (byte32)x : (byte32)abs(x);
- byte32 absY = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (byte32)y : (byte32)abs(y);
+ byte32 absX = (byte32)abs(x);
+ byte32 absY = (byte32)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Avx2.IsAvx2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.mm256_lcm_epu8(absX, absY);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -254,13 +560,20 @@ public static uint lcm(ushort x, ushort y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort2 lcm(ushort2 x, ushort2 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu16(x, y, 2);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -268,13 +581,20 @@ public static ushort2 lcm(ushort2 x, ushort2 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort3 lcm(ushort3 x, ushort3 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu16(x, y, 3);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -282,13 +602,20 @@ public static ushort3 lcm(ushort3 x, ushort3 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort4 lcm(ushort4 x, ushort4 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu16(x, y, 4);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -296,13 +623,20 @@ public static ushort4 lcm(ushort4 x, ushort4 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort8 lcm(ushort8 x, ushort8 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu16(x, y, 8);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -310,13 +644,20 @@ public static ushort8 lcm(ushort8 x, ushort8 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort16 lcm(ushort16 x, ushort16 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Avx2.IsAvx2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.mm256_lcm_epu16(x, y);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -332,16 +673,23 @@ public static uint lcm(short x, short y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort2 lcm(short2 x, short2 y)
{
- ushort2 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ushort2)x : (ushort2)abs(x);
- ushort2 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ushort2)y : (ushort2)abs(y);
+ ushort2 absX = (ushort2)abs(x);
+ ushort2 absY = (ushort2)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu16(absX, absY, 2);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -349,16 +697,23 @@ public static ushort2 lcm(short2 x, short2 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort3 lcm(short3 x, short3 y)
{
- ushort3 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ushort3)x : (ushort3)abs(x);
- ushort3 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ushort3)y : (ushort3)abs(y);
+ ushort3 absX = (ushort3)abs(x);
+ ushort3 absY = (ushort3)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu16(absX, absY, 3);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -366,16 +721,23 @@ public static ushort3 lcm(short3 x, short3 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort4 lcm(short4 x, short4 y)
{
- ushort4 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ushort4)x : (ushort4)abs(x);
- ushort4 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ushort4)y : (ushort4)abs(y);
+ ushort4 absX = (ushort4)abs(x);
+ ushort4 absY = (ushort4)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu16(absX, absY, 4);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -383,16 +745,23 @@ public static ushort4 lcm(short4 x, short4 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort8 lcm(short8 x, short8 y)
{
- ushort8 absX = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (ushort8)x : (ushort8)abs(x);
- ushort8 absY = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (ushort8)y : (ushort8)abs(y);
+ ushort8 absX = (ushort8)abs(x);
+ ushort8 absY = (ushort8)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu16(absX, absY, 8);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -400,16 +769,23 @@ public static ushort8 lcm(short8 x, short8 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort16 lcm(short16 x, short16 y)
{
- ushort16 absX = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (ushort16)x : (ushort16)abs(x);
- ushort16 absY = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (ushort16)y : (ushort16)abs(y);
+ ushort16 absX = (ushort16)abs(x);
+ ushort16 absY = (ushort16)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Avx2.IsAvx2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.mm256_lcm_epu16(absX, absY);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -438,13 +814,20 @@ public static uint2 lcm(int2 x, int2 y)
uint2 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (uint2)x : (uint2)abs(x);
uint2 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (uint2)y : (uint2)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return div(absY, gcd(absX, absY)) * absX;
+ return RegisterConversion.ToUInt2(Xse.lcm_epu32(RegisterConversion.ToV128(absX), RegisterConversion.ToV128(absY), 2));
}
else
{
- return div(absX, gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -455,13 +838,20 @@ public static uint3 lcm(int3 x, int3 y)
uint3 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (uint3)x : (uint3)abs(x);
uint3 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (uint3)y : (uint3)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return div(absY, gcd(absX, absY)) * absX;
+ return RegisterConversion.ToUInt3(Xse.lcm_epu32(RegisterConversion.ToV128(absX), RegisterConversion.ToV128(absY), 3));
}
else
{
- return div(absX, gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -472,13 +862,20 @@ public static uint4 lcm(int4 x, int4 y)
uint4 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (uint4)x : (uint4)abs(x);
uint4 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (uint4)y : (uint4)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return div(absY, gcd(absX, absY)) * absX;
+ return RegisterConversion.ToUInt4(Xse.lcm_epu32(RegisterConversion.ToV128(absX), RegisterConversion.ToV128(absY), 4));
}
else
{
- return div(absX, gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -489,13 +886,20 @@ public static uint8 lcm(int8 x, int8 y)
uint8 absX = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (uint8)x : (uint8)abs(x);
uint8 absY = Xse.constexpr.IS_TRUE(all(x >= 0)) ? (uint8)y : (uint8)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Avx2.IsAvx2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.mm256_lcm_epu32(absX, absY);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -519,13 +923,20 @@ public static uint lcm(uint x, uint y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint2 lcm(uint2 x, uint2 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return div(y, gcd(x, y)) * x;
+ return RegisterConversion.ToUInt2(Xse.lcm_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
- return div(x, gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -533,13 +944,20 @@ public static uint2 lcm(uint2 x, uint2 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint3 lcm(uint3 x, uint3 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return div(y, gcd(x, y)) * x;
+ return RegisterConversion.ToUInt3(Xse.lcm_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
- return div(x, gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -547,13 +965,20 @@ public static uint3 lcm(uint3 x, uint3 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint4 lcm(uint4 x, uint4 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return div(y, gcd(x, y)) * x;
+ return RegisterConversion.ToUInt4(Xse.lcm_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
- return div(x, gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -561,13 +986,20 @@ public static uint4 lcm(uint4 x, uint4 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint8 lcm(uint8 x, uint8 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Avx2.IsAvx2Supported)
{
- return div(y, gcd(x, y)) * x;
+ return Xse.mm256_lcm_epu32(x, y);
}
else
{
- return div(x, gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -579,7 +1011,7 @@ public static ulong lcm(long x, long y)
ulong absX = Xse.constexpr.IS_TRUE(x >= 0) ? (ulong)x : (ulong)math.abs(x);
ulong absY = Xse.constexpr.IS_TRUE(x >= 0) ? (ulong)y : (ulong)math.abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Constant.IsConstantExpression(x))
{
return (absY / gcd(absX, absY)) * absX;
}
@@ -593,16 +1025,23 @@ public static ulong lcm(long x, long y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong2 lcm(long2 x, long2 y)
{
- ulong2 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ulong2)x : (ulong2)abs(x);
- ulong2 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ulong2)y : (ulong2)abs(y);
+ ulong2 absX = (ulong2)abs(x);
+ ulong2 absY = (ulong2)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Sse2.IsSse2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.lcm_epu64(absX, absY);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -610,16 +1049,23 @@ public static ulong2 lcm(long2 x, long2 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong3 lcm(long3 x, long3 y)
{
- ulong3 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ulong3)x : (ulong3)abs(x);
- ulong3 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ulong3)y : (ulong3)abs(y);
+ ulong3 absX = (ulong3)abs(x);
+ ulong3 absY = (ulong3)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Avx2.IsAvx2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.mm256_lcm_epu64(absX, absY, 3);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -627,16 +1073,23 @@ public static ulong3 lcm(long3 x, long3 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong4 lcm(long4 x, long4 y)
{
- ulong4 absX = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ulong4)x : (ulong4)abs(x);
- ulong4 absY = Xse.constexpr.IS_TRUE(math.all(x >= 0)) ? (ulong4)y : (ulong4)abs(y);
+ ulong4 absX = (ulong4)abs(x);
+ ulong4 absY = (ulong4)abs(y);
- if (Constant.IsConstantExpression(absX))
+ if (Avx2.IsAvx2Supported)
{
- return (absY / gcd(absX, absY)) * absX;
+ return Xse.mm256_lcm_epu64(absX, absY, 4);
}
else
{
- return (absX / gcd(absX, absY)) * absY;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (absY / gcd(absX, absY)) * absX;
+ }
+ else
+ {
+ return (absX / gcd(absX, absY)) * absY;
+ }
}
}
@@ -659,13 +1112,20 @@ public static ulong lcm(ulong x, ulong y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong2 lcm(ulong2 x, ulong2 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Sse2.IsSse2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.lcm_epu64(x, y);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -673,13 +1133,20 @@ public static ulong2 lcm(ulong2 x, ulong2 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong3 lcm(ulong3 x, ulong3 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Avx2.IsAvx2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.mm256_lcm_epu64(x, y, 3);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
@@ -687,13 +1154,20 @@ public static ulong3 lcm(ulong3 x, ulong3 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong4 lcm(ulong4 x, ulong4 y)
{
- if (Constant.IsConstantExpression(x))
+ if (Avx2.IsAvx2Supported)
{
- return (y / gcd(x, y)) * x;
+ return Xse.mm256_lcm_epu64(x, y, 4);
}
else
{
- return (x / gcd(x, y)) * y;
+ if (Constant.IsConstantExpression(x))
+ {
+ return (y / gcd(x, y)) * x;
+ }
+ else
+ {
+ return (x / gcd(x, y)) * y;
+ }
}
}
}
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/Square Root.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/Square Root.cs
index 647e31d..cbe29c6 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Integer/Square Root.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/Square Root.cs
@@ -20,11 +20,11 @@ public static v128 sqrt_epi8(v128 a, byte elements = 16)
{
if (elements <= 4)
{
- v128 toFloat = Xse.cvtepu8_ps(a);
+ v128 toFloat = cvtepu8_ps(a);
v128 sqrt = Sse.rcp_ps(Sse.rsqrt_ps(toFloat));
v128 toInt = Sse2.cvttps_epi32(sqrt);
- a = Xse.cvtepi32_epi8(toInt, elements);
+ a = cvtepi32_epi8(toInt, elements);
}
else if (elements <= 8)
{
@@ -1341,7 +1341,7 @@ public static uint2 intsqrt(uint2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.sqrt_epu32(RegisterConversion.ToV128(x), 2));
+ return RegisterConversion.ToUInt2(Xse.sqrt_epu32(RegisterConversion.ToV128(x), 2));
}
else
{
@@ -1355,7 +1355,7 @@ public static uint3 intsqrt(uint3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.sqrt_epu32(RegisterConversion.ToV128(x), 3));
+ return RegisterConversion.ToUInt3(Xse.sqrt_epu32(RegisterConversion.ToV128(x), 3));
}
else
{
@@ -1369,7 +1369,7 @@ public static uint4 intsqrt(uint4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.sqrt_epu32(RegisterConversion.ToV128(x), 4));
+ return RegisterConversion.ToUInt4(Xse.sqrt_epu32(RegisterConversion.ToV128(x), 4));
}
else
{
@@ -1418,7 +1418,7 @@ public static int2 intsqrt(int2 x)
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.sqrt_epi32(RegisterConversion.ToV128(x), 2));
+ return RegisterConversion.ToInt2(Xse.sqrt_epi32(RegisterConversion.ToV128(x), 2));
}
else
{
@@ -1436,7 +1436,7 @@ public static int3 intsqrt(int3 x)
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.sqrt_epi32(RegisterConversion.ToV128(x), 3));
+ return RegisterConversion.ToInt3(Xse.sqrt_epi32(RegisterConversion.ToV128(x), 3));
}
else
{
@@ -1455,7 +1455,7 @@ public static int4 intsqrt(int4 x)
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.sqrt_epi32(RegisterConversion.ToV128(x), 4));
+ return RegisterConversion.ToInt4(Xse.sqrt_epi32(RegisterConversion.ToV128(x), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Integer/k-permutations of n.cs b/Runtime/Math Lib/Functions/Arithmetic/Integer/k-permutations of n.cs
new file mode 100644
index 0000000..9d8d48a
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/k-permutations of n.cs
@@ -0,0 +1,2268 @@
+using System.Runtime.CompilerServices;
+using Unity.Burst.Intrinsics;
+using Unity.Burst.CompilerServices;
+using Unity.Mathematics;
+using MaxMath.Intrinsics;
+using DevTools;
+
+using static Unity.Burst.Intrinsics.X86;
+using static MaxMath.LUT.FACTORIAL;
+
+namespace MaxMath
+{
+ namespace Intrinsics
+ {
+ unsafe public static partial class Xse
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naiveperm_epu8(v128 n, v128 k, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ v128 nom = gamma_epu8(n, true, elements);
+ v128 denom = gamma_epu8(Sse2.sub_epi8(n, k), true, elements);
+
+ return div_epu8(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naiveperm_epu8(v256 n, v256 k)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 nom = mm256_gamma_epu8(n, true);
+ v256 denom = mm256_gamma_epu8(Avx2.mm256_sub_epi8(n, k), true);
+
+ return mm256_div_epu8(nom, denom);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naiveperm_epu16(v128 n, v128 k, bool epu8range = false, byte elements = 8)
+ {
+ v128 nom;
+ v128 denom;
+ if (Sse2.IsSse2Supported)
+ {
+ if (epu8range || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8, elements))
+ {
+ if (Ssse3.IsSsse3Supported)
+ {
+ nom = gamma_epu16_epu8range(n);
+ denom = gamma_epu16_epu8range(Sse2.sub_epi16(n, k));
+
+ return div_epu16(nom, denom, elements);
+ }
+ }
+
+ nom = gamma_epu16(n, true, elements);
+ denom = gamma_epu16(Sse2.sub_epi16(n, k), true, elements);
+
+ return div_epu16(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naiveperm_epu16(v256 n, v256 k, bool epu8range = false)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (epu8range || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8))
+ {
+ v256 nom = mm256_gamma_epu16_epu8range(n);
+ v256 denom = mm256_gamma_epu16_epu8range(Avx2.mm256_sub_epi16(n, k));
+
+ return mm256_div_epu16(nom, denom);
+ }
+ else
+ {
+ v256 nom = mm256_gamma_epu16(n, true);
+ v256 denom = mm256_gamma_epu16(Avx2.mm256_sub_epi16(n, k), true);
+
+ return mm256_div_epu16(nom, denom);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naiveperm_epu32(v128 n, v128 k, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ v128 nom = gamma_epu32(n, true, elements);
+ v128 denom = gamma_epu32(Sse2.sub_epi32(n, k), true, elements);
+
+ return div_epu32(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naiveperm_epu32(v256 n, v256 k)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 nom = mm256_gamma_epu32(n, true);
+ v256 denom = mm256_gamma_epu32(Avx2.mm256_sub_epi32(n, k), true);
+
+ return mm256_div_epu32(nom, denom);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v128 naiveperm_epu64(v128 n, v128 k)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ v128 nom = gamma_epu64(n, true);
+ v128 denom = gamma_epu64(Sse2.sub_epi64(n, k), true);
+
+ return div_epu64(nom, denom);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static v256 mm256_naiveperm_epu64(v256 n, v256 k, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 nom = mm256_gamma_epu64(n, true, elements);
+ v256 denom = mm256_gamma_epu64(Avx2.mm256_sub_epi64(n, k), true, elements);
+
+ return mm256_div_epu64(nom, denom, elements);
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 perm_epu8(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.Byte0, n.Byte0);
+Assert.IsNotGreater(k.Byte1, n.Byte1);
+if (elements > 2)
+{
+ Assert.IsNotGreater(k.Byte2, n.Byte2);
+}
+if (elements > 3)
+{
+ Assert.IsNotGreater(k.Byte3, n.Byte3);
+}
+if (elements > 4)
+{
+ Assert.IsNotGreater(k.Byte4, n.Byte4);
+ Assert.IsNotGreater(k.Byte5, n.Byte5);
+ Assert.IsNotGreater(k.Byte6, n.Byte6);
+ Assert.IsNotGreater(k.Byte7, n.Byte7);
+}
+if (elements > 8)
+{
+ Assert.IsNotGreater(k.Byte8, n.Byte8);
+ Assert.IsNotGreater(k.Byte9, n.Byte9);
+ Assert.IsNotGreater(k.Byte10, n.Byte10);
+ Assert.IsNotGreater(k.Byte11, n.Byte11);
+ Assert.IsNotGreater(k.Byte12, n.Byte12);
+ Assert.IsNotGreater(k.Byte13, n.Byte13);
+ Assert.IsNotGreater(k.Byte14, n.Byte14);
+ Assert.IsNotGreater(k.Byte15, n.Byte15);
+}
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U64, elements))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U32, elements))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U16, elements))
+ {
+ if (unsafeLevels > 3 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U8, elements))
+ {
+ return naiveperm_epu8(n, k, elements);
+ }
+ else
+ {
+ if (elements <= 8)
+ {
+ return cvtepi16_epi8(naiveperm_epu16(cvtepu8_epi16(n), cvtepu8_epi16(k), false, elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi16_epi8(mm256_naiveperm_epu16(Avx2.mm256_cvtepu8_epi16(n), Avx2.mm256_cvtepu8_epi16(k), false));
+ }
+ else
+ {
+ v128 nLo16 = cvt2x2epu8_epi16(n, out v128 nHi16);
+ v128 kLo16 = cvt2x2epu8_epi16(k, out v128 kHi16);
+
+ v128 resultLo = naiveperm_epu16(nLo16, kLo16, false);
+ v128 resultHi = naiveperm_epu16(nHi16, kHi16, false);
+
+ return cvt2x2epi16_epi8(resultLo, resultHi);
+ }
+ }
+ }
+ }
+ else
+ {
+ if (elements <= 4)
+ {
+ return cvtepi32_epi8(naiveperm_epu32(cvtepu8_epi32(n), cvtepu8_epi32(k), elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (elements == 8)
+ {
+ return mm256_cvtepi32_epi8(mm256_naiveperm_epu32(Avx2.mm256_cvtepu8_epi32(n), Avx2.mm256_cvtepu8_epi32(k)));
+ }
+ else
+ {
+ v256 loN32 = Avx2.mm256_cvtepu8_epi32(n);
+ v256 loK32 = Avx2.mm256_cvtepu8_epi32(k);
+ v256 hiN32 = Avx2.mm256_cvtepu8_epi32(Sse2.bsrli_si128(n, 8 * sizeof(byte)));
+ v256 hiK32 = Avx2.mm256_cvtepu8_epi32(Sse2.bsrli_si128(k, 8 * sizeof(byte)));
+
+ v128 resultLo = mm256_cvtepi32_epi8(mm256_naiveperm_epu32(loN32, loK32));
+ v128 resultHi = mm256_cvtepi32_epi8(mm256_naiveperm_epu32(hiN32, hiK32));
+
+ return Sse2.unpacklo_epi64(resultLo, resultHi);
+ }
+ }
+ else
+ {
+ if (elements == 8)
+ {
+ v128 loN32 = cvtepu8_epi32(n);
+ v128 loK32 = cvtepu8_epi32(k);
+ v128 hiN32 = cvtepu8_epi32(Sse2.bsrli_si128(n, 4 * sizeof(byte)));
+ v128 hiK32 = cvtepu8_epi32(Sse2.bsrli_si128(k, 4 * sizeof(byte)));
+
+ v128 resultLo = cvtepi32_epi8(naiveperm_epu32(loN32, loK32));
+ v128 resultHi = cvtepi32_epi8(naiveperm_epu32(hiN32, hiK32));
+
+ return Sse2.unpacklo_epi32(resultLo, resultHi);
+ }
+ else
+ {
+ v128 loN16 = cvt2x2epu8_epi16(n, out v128 hiN16);
+ v128 loK16 = cvt2x2epu8_epi16(k, out v128 hiK16);
+
+ v128 n32_0 = cvt2x2epu16_epi32(loN16, out v128 n32_1);
+ v128 n32_2 = cvt2x2epu16_epi32(hiN16, out v128 n32_3);
+ v128 k32_0 = cvt2x2epu16_epi32(loK16, out v128 k32_1);
+ v128 k32_2 = cvt2x2epu16_epi32(hiK16, out v128 k32_3);
+
+ v128 result32_0 = naiveperm_epu32(n32_0, k32_0);
+ v128 result32_1 = naiveperm_epu32(n32_1, k32_1);
+ v128 result32_2 = naiveperm_epu32(n32_2, k32_2);
+ v128 result32_3 = naiveperm_epu32(n32_3, k32_3);
+
+ v128 result16_0 = cvt2x2epi32_epi16(result32_0, result32_1);
+ v128 result16_1 = cvt2x2epi32_epi16(result32_2, result32_3);
+
+ return cvt2x2epi16_epi8(result16_0, result16_1);
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ switch (elements)
+ {
+ case 2:
+ {
+ return Sse2.unpacklo_epi8(Sse2.cvtsi64x_si128((long)maxmath.perm((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe1)),
+ Sse2.cvtsi64x_si128((long)maxmath.perm((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe1)));
+ }
+
+ case 3:
+ case 4:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi64_epi8(mm256_div_epu64(mm256_gamma_epu64(Avx2.mm256_cvtepu8_epi64(n), true, elements), mm256_gamma_epu64(Avx2.mm256_sub_epi64(Avx2.mm256_cvtepu8_epi64(n), Avx2.mm256_cvtepu8_epi64(k)), true, elements), elements));
+ }
+ else
+ {
+ return new v128((byte)maxmath.perm((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 2), (ulong)extract_epi8(k, 2), maxmath.Promise.Unsafe1),
+ (byte)(elements == 4 ? maxmath.perm((ulong)extract_epi8(n, 3), (ulong)extract_epi8(k, 3), maxmath.Promise.Unsafe1) : 0),
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0);
+ }
+ }
+
+ case 8:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 nLo = Avx2.mm256_cvtepu8_epi64(n);
+ v256 kLo = Avx2.mm256_cvtepu8_epi64(k);
+ v256 nHi = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 4 * sizeof(byte)));
+ v256 kHi = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 4 * sizeof(byte)));
+
+ v128 lo = mm256_cvtepi64_epi8(mm256_naiveperm_epu64(nLo, kLo));
+ v128 hi = mm256_cvtepi64_epi8(mm256_naiveperm_epu64(nHi, kHi));
+
+ return Sse2.unpacklo_epi32(lo, hi);
+ }
+ else
+ {
+ return new v128((byte)maxmath.perm((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 2), (ulong)extract_epi8(k, 2), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 3), (ulong)extract_epi8(k, 3), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 4), (ulong)extract_epi8(k, 4), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 5), (ulong)extract_epi8(k, 5), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 6), (ulong)extract_epi8(k, 6), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 7), (ulong)extract_epi8(k, 7), maxmath.Promise.Unsafe1),
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0);
+ }
+ }
+
+ default:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 n0 = Avx2.mm256_cvtepu8_epi64(n);
+ v256 k0 = Avx2.mm256_cvtepu8_epi64(k);
+ v256 n1 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 4 * sizeof(byte)));
+ v256 k1 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 4 * sizeof(byte)));
+ v256 n2 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 8 * sizeof(byte)));
+ v256 k2 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 8 * sizeof(byte)));
+ v256 n3 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(n, 12 * sizeof(byte)));
+ v256 k3 = Avx2.mm256_cvtepu8_epi64(Sse2.bsrli_si128(k, 12 * sizeof(byte)));
+
+ v128 result0 = mm256_cvtepi64_epi8(mm256_naiveperm_epu64(n0, k0));
+ v128 result1 = mm256_cvtepi64_epi8(mm256_naiveperm_epu64(n1, k1));
+ v128 result2 = mm256_cvtepi64_epi8(mm256_naiveperm_epu64(n2, k2));
+ v128 result3 = mm256_cvtepi64_epi8(mm256_naiveperm_epu64(n3, k3));
+
+ return Sse2.unpacklo_epi64(Sse2.unpacklo_epi32(result0, result1), Sse2.unpacklo_epi32(result2, result3));
+ }
+ else
+ {
+ return new v128((byte)maxmath.perm((ulong)extract_epi8(n, 0), (ulong)extract_epi8(k, 0), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 1), (ulong)extract_epi8(k, 1), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 2), (ulong)extract_epi8(k, 2), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 3), (ulong)extract_epi8(k, 3), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 4), (ulong)extract_epi8(k, 4), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 5), (ulong)extract_epi8(k, 5), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 6), (ulong)extract_epi8(k, 6), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 7), (ulong)extract_epi8(k, 7), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 8), (ulong)extract_epi8(k, 8), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 9), (ulong)extract_epi8(k, 9), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 10), (ulong)extract_epi8(k, 10), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 11), (ulong)extract_epi8(k, 11), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 12), (ulong)extract_epi8(k, 12), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 13), (ulong)extract_epi8(k, 13), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 14), (ulong)extract_epi8(k, 14), maxmath.Promise.Unsafe1),
+ (byte)maxmath.perm((ulong)extract_epi8(n, 15), (ulong)extract_epi8(k, 15), maxmath.Promise.Unsafe1));
+ }
+ }
+ }
+ }
+ }
+
+
+ if (elements <= 8)
+ {
+ return cvtepi16_epi8(perm_epu16(cvtepu8_epi16(n), cvtepu8_epi16(k), elements: elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi16_epi8(mm256_perm_epu16(Avx2.mm256_cvtepu8_epi16(n), Avx2.mm256_cvtepu8_epi16(k)));
+ }
+ else
+ {
+ v128 ZERO = Sse2.setzero_si128();
+
+ v128 resultsFoundMask = Sse2.cmpeq_epi8(k, ZERO);
+ v128 results = Sse2.sub_epi8(ZERO, resultsFoundMask);
+
+ k = dec_epi8(k);
+ v128 p = n;
+ n = dec_epi8(n);
+
+ v128 cmp = Sse2.cmpeq_epi8(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+
+ while (Hint.Likely(notalltrue_epi128(resultsFoundMask, elements)))
+ {
+ k = dec_epi8(k);
+ p = mullo_epi8(p, n, elements);
+ n = dec_epi8(n);
+
+ cmp = Sse2.cmpeq_epi8(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_perm_epu8(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.Byte0, n.Byte0);
+Assert.IsNotGreater(k.Byte1, n.Byte1);
+Assert.IsNotGreater(k.Byte2, n.Byte2);
+Assert.IsNotGreater(k.Byte3, n.Byte3);
+Assert.IsNotGreater(k.Byte4, n.Byte4);
+Assert.IsNotGreater(k.Byte5, n.Byte5);
+Assert.IsNotGreater(k.Byte6, n.Byte6);
+Assert.IsNotGreater(k.Byte7, n.Byte7);
+Assert.IsNotGreater(k.Byte8, n.Byte8);
+Assert.IsNotGreater(k.Byte9, n.Byte9);
+Assert.IsNotGreater(k.Byte10, n.Byte10);
+Assert.IsNotGreater(k.Byte11, n.Byte11);
+Assert.IsNotGreater(k.Byte12, n.Byte12);
+Assert.IsNotGreater(k.Byte13, n.Byte13);
+Assert.IsNotGreater(k.Byte14, n.Byte14);
+Assert.IsNotGreater(k.Byte15, n.Byte15);
+Assert.IsNotGreater(k.Byte16, n.Byte16);
+Assert.IsNotGreater(k.Byte17, n.Byte17);
+Assert.IsNotGreater(k.Byte18, n.Byte18);
+Assert.IsNotGreater(k.Byte19, n.Byte19);
+Assert.IsNotGreater(k.Byte20, n.Byte20);
+Assert.IsNotGreater(k.Byte21, n.Byte21);
+Assert.IsNotGreater(k.Byte22, n.Byte22);
+Assert.IsNotGreater(k.Byte23, n.Byte23);
+Assert.IsNotGreater(k.Byte24, n.Byte24);
+Assert.IsNotGreater(k.Byte25, n.Byte25);
+Assert.IsNotGreater(k.Byte26, n.Byte26);
+Assert.IsNotGreater(k.Byte27, n.Byte27);
+Assert.IsNotGreater(k.Byte28, n.Byte28);
+Assert.IsNotGreater(k.Byte29, n.Byte29);
+Assert.IsNotGreater(k.Byte30, n.Byte30);
+Assert.IsNotGreater(k.Byte31, n.Byte31);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U16))
+ {
+ if (unsafeLevels > 3 || constexpr.ALL_LE_EPU8(n, MAX_INVERSE_FACTORIAL_U8))
+ {
+ return mm256_naiveperm_epu8(n, k);
+ }
+ else
+ {
+ v256 nLo16 = mm256_cvt2x2epu8_epi16(n, out v256 nHi16);
+ v256 kLo16 = mm256_cvt2x2epu8_epi16(k, out v256 kHi16);
+
+ return mm256_cvt2x2epi16_epi8(mm256_naiveperm_epu16(nLo16, kLo16), mm256_naiveperm_epu16(nHi16, kHi16));
+ }
+ }
+ else
+ {
+ v256 loN16 = mm256_cvt2x2epu8_epi16(n, out v256 hiN16);
+ v256 loK16 = mm256_cvt2x2epu8_epi16(k, out v256 hiK16);
+
+ v256 n32_0 = mm256_cvt2x2epu16_epi32(loN16, out v256 n32_1);
+ v256 n32_2 = mm256_cvt2x2epu16_epi32(hiN16, out v256 n32_3);
+ v256 k32_0 = mm256_cvt2x2epu16_epi32(loK16, out v256 k32_1);
+ v256 k32_2 = mm256_cvt2x2epu16_epi32(hiK16, out v256 k32_3);
+
+ v256 result32_0 = mm256_naiveperm_epu32(n32_0, k32_0);
+ v256 result32_1 = mm256_naiveperm_epu32(n32_1, k32_1);
+ v256 result32_2 = mm256_naiveperm_epu32(n32_2, k32_2);
+ v256 result32_3 = mm256_naiveperm_epu32(n32_3, k32_3);
+
+ v256 result16_0 = mm256_cvt2x2epi32_epi16(result32_0, result32_1);
+ v256 result16_1 = mm256_cvt2x2epi32_epi16(result32_2, result32_3);
+
+ return mm256_cvt2x2epi16_epi8(result16_0, result16_1);
+ }
+ }
+ else
+ {
+ v256 loN16 = mm256_cvt2x2epu8_epi16(n, out v256 hiN16);
+ v256 loK16 = mm256_cvt2x2epu8_epi16(k, out v256 hiK16);
+
+ v256 n32_0 = mm256_cvt2x2epu16_epi32(loN16, out v256 n32_1);
+ v256 n32_2 = mm256_cvt2x2epu16_epi32(hiN16, out v256 n32_3);
+ v256 k32_0 = mm256_cvt2x2epu16_epi32(loK16, out v256 k32_1);
+ v256 k32_2 = mm256_cvt2x2epu16_epi32(hiK16, out v256 k32_3);
+
+ v256 n64_0 = mm256_cvt2x2epu32_epi64(n32_0, out v256 n64_1);
+ v256 n64_2 = mm256_cvt2x2epu32_epi64(n32_1, out v256 n64_3);
+ v256 n64_4 = mm256_cvt2x2epu32_epi64(n32_2, out v256 n64_5);
+ v256 n64_6 = mm256_cvt2x2epu32_epi64(n32_3, out v256 n64_7);
+ v256 k64_0 = mm256_cvt2x2epu32_epi64(k32_0, out v256 k64_1);
+ v256 k64_2 = mm256_cvt2x2epu32_epi64(k32_1, out v256 k64_3);
+ v256 k64_4 = mm256_cvt2x2epu32_epi64(k32_2, out v256 k64_5);
+ v256 k64_6 = mm256_cvt2x2epu32_epi64(k32_3, out v256 k64_7);
+
+ v256 result64_0 = mm256_naiveperm_epu64(n64_0, k64_0);
+ v256 result64_1 = mm256_naiveperm_epu64(n64_1, k64_1);
+ v256 result64_2 = mm256_naiveperm_epu64(n64_2, k64_2);
+ v256 result64_3 = mm256_naiveperm_epu64(n64_3, k64_3);
+ v256 result64_4 = mm256_naiveperm_epu64(n64_4, k64_4);
+ v256 result64_5 = mm256_naiveperm_epu64(n64_5, k64_5);
+ v256 result64_6 = mm256_naiveperm_epu64(n64_6, k64_6);
+ v256 result64_7 = mm256_naiveperm_epu64(n64_7, k64_7);
+
+ v256 result32_0 = mm256_cvt2x2epi64_epi32(result64_0, result64_1);
+ v256 result32_1 = mm256_cvt2x2epi64_epi32(result64_2, result64_3);
+ v256 result32_2 = mm256_cvt2x2epi64_epi32(result64_4, result64_5);
+ v256 result32_3 = mm256_cvt2x2epi64_epi32(result64_6, result64_7);
+
+ v256 result16_0 = mm256_cvt2x2epi32_epi16(result32_0, result32_1);
+ v256 result16_1 = mm256_cvt2x2epi32_epi16(result32_2, result32_3);
+
+ return mm256_cvt2x2epi16_epi8(result16_0, result16_1);
+ }
+ }
+
+
+ v256 ZERO = Avx.mm256_setzero_si256();
+
+ v256 resultsFoundMask = Avx2.mm256_cmpeq_epi8(k, ZERO);
+ v256 results = Avx2.mm256_sub_epi8(ZERO, resultsFoundMask);
+
+ k = mm256_dec_epi8(k);
+ v256 p = n;
+ n = mm256_dec_epi8(n);
+
+ v256 cmp = Avx2.mm256_cmpeq_epi8(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+
+ while (Hint.Likely(mm256_notalltrue_epi256(resultsFoundMask, 32)))
+ {
+ k = mm256_dec_epi8(k);
+ p = mm256_mullo_epi8(p, n);
+ n = mm256_dec_epi8(n);
+
+ cmp = Avx2.mm256_cmpeq_epi8(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 perm_epu16(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.UShort0, n.UShort0);
+Assert.IsNotGreater(k.UShort1, n.UShort1);
+if (elements > 2)
+{
+ Assert.IsNotGreater(k.UShort2, n.UShort2);
+}
+if (elements > 3)
+{
+ Assert.IsNotGreater(k.UShort3, n.UShort3);
+}
+if (elements > 4)
+{
+ Assert.IsNotGreater(k.UShort4, n.UShort4);
+ Assert.IsNotGreater(k.UShort5, n.UShort5);
+ Assert.IsNotGreater(k.UShort6, n.UShort6);
+ Assert.IsNotGreater(k.UShort7, n.UShort7);
+}
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U64, elements))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U32, elements))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U16, elements))
+ {
+ return naiveperm_epu16(n, k, unsafeLevels > 3 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8, elements), elements);
+ }
+ else
+ {
+ if (elements <= 4)
+ {
+ return cvtepi32_epi16(naiveperm_epu32(cvtepu16_epi32(n), cvtepu16_epi32(k), elements), elements);
+ }
+ else
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi32_epi16(mm256_naiveperm_epu32(Avx2.mm256_cvtepu16_epi32(n), Avx2.mm256_cvtepu16_epi32(k)));
+ }
+ else
+ {
+ v128 nLo32 = cvt2x2epu16_epi32(n, out v128 nHi32);
+ v128 kLo32 = cvt2x2epu16_epi32(k, out v128 kHi32);
+
+ return cvt2x2epi32_epi16(naiveperm_epu32(nLo32, kLo32, elements), naiveperm_epu32(nHi32, kHi32, elements));
+ }
+ }
+ }
+ }
+ else
+ {
+ switch (elements)
+ {
+ case 2:
+ {
+ return Sse2.unpacklo_epi16(Sse2.cvtsi32_si128((ushort)maxmath.perm((ulong)extract_epi16(n, 0), (ulong)extract_epi16(k, 0), maxmath.Promise.Unsafe1)),
+ Sse2.cvtsi32_si128((ushort)maxmath.perm((ulong)extract_epi16(n, 1), (ulong)extract_epi16(k, 1), maxmath.Promise.Unsafe1)));
+ }
+
+ case 3:
+ case 4:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi64_epi16(mm256_naiveperm_epu64(Avx2.mm256_cvtepu16_epi64(n), Avx2.mm256_cvtepu16_epi64(k), elements));
+ }
+ else
+ {
+ return new v128((ushort)maxmath.perm((ulong)extract_epi16(n, 0), (ulong)extract_epi16(k, 0), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 1), (ulong)extract_epi16(k, 1), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 2), (ulong)extract_epi16(k, 2), maxmath.Promise.Unsafe1),
+ (ushort)(elements == 4 ? maxmath.perm((ulong)extract_epi16(n, 3), (ulong)extract_epi16(k, 3), maxmath.Promise.Unsafe1) : 0),
+ 0,
+ 0,
+ 0,
+ 0);
+ }
+ }
+
+ default:
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ v256 n64Lo = Avx2.mm256_cvtepu16_epi64(n);
+ v256 k64Lo = Avx2.mm256_cvtepu16_epi64(k);
+ v256 n64Hi = Avx2.mm256_cvtepu16_epi64(Sse2.bsrli_si128(n, 4 * sizeof(ushort)));
+ v256 k64Hi = Avx2.mm256_cvtepu16_epi64(Sse2.bsrli_si128(k, 4 * sizeof(ushort)));
+
+ v256 result64Lo = mm256_naiveperm_epu64(n64Lo, k64Lo);
+ v256 result64Hi = mm256_naiveperm_epu64(n64Hi, k64Hi);
+
+ return Sse2.unpacklo_epi64(mm256_cvtepi64_epi16(result64Lo), mm256_cvtepi64_epi16(result64Hi));
+ }
+ else
+ {
+ return new v128((ushort)maxmath.perm((ulong)extract_epi16(n, 0), (ulong)extract_epi16(k, 0), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 1), (ulong)extract_epi16(k, 1), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 2), (ulong)extract_epi16(k, 2), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 3), (ulong)extract_epi16(k, 3), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 4), (ulong)extract_epi16(k, 4), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 5), (ulong)extract_epi16(k, 5), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 6), (ulong)extract_epi16(k, 6), maxmath.Promise.Unsafe1),
+ (ushort)maxmath.perm((ulong)extract_epi16(n, 7), (ulong)extract_epi16(k, 7), maxmath.Promise.Unsafe1));
+ }
+ }
+ }
+ }
+ }
+
+
+ v128 ZERO = Sse2.setzero_si128();
+
+ v128 resultsFoundMask = Sse2.cmpeq_epi16(k, ZERO);
+ v128 results = Sse2.sub_epi16(ZERO, resultsFoundMask);
+
+ k = dec_epi16(k);
+ v128 p = n;
+ n = dec_epi16(n);
+
+ v128 cmp = Sse2.cmpeq_epi16(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+
+ while (Hint.Likely(notalltrue_epi128(resultsFoundMask, elements)))
+ {
+ k = dec_epi16(k);
+ p = Sse2.mullo_epi16(p, n);
+ n = dec_epi16(n);
+
+ cmp = Sse2.cmpeq_epi16(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_perm_epu16(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.UShort0, n.UShort0);
+Assert.IsNotGreater(k.UShort1, n.UShort1);
+Assert.IsNotGreater(k.UShort2, n.UShort2);
+Assert.IsNotGreater(k.UShort3, n.UShort3);
+Assert.IsNotGreater(k.UShort4, n.UShort4);
+Assert.IsNotGreater(k.UShort5, n.UShort5);
+Assert.IsNotGreater(k.UShort6, n.UShort6);
+Assert.IsNotGreater(k.UShort7, n.UShort7);
+Assert.IsNotGreater(k.UShort8, n.UShort8);
+Assert.IsNotGreater(k.UShort9, n.UShort9);
+Assert.IsNotGreater(k.UShort10, n.UShort10);
+Assert.IsNotGreater(k.UShort11, n.UShort11);
+Assert.IsNotGreater(k.UShort12, n.UShort12);
+Assert.IsNotGreater(k.UShort13, n.UShort13);
+Assert.IsNotGreater(k.UShort14, n.UShort14);
+Assert.IsNotGreater(k.UShort15, n.UShort15);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (unsafeLevels > 2 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U16))
+ {
+ return mm256_naiveperm_epu16(n, k, unsafeLevels > 3 || constexpr.ALL_LE_EPU16(n, MAX_INVERSE_FACTORIAL_U8));
+ }
+ else
+ {
+ v256 nLo32 = mm256_cvt2x2epu16_epi32(n, out v256 nHi32);
+ v256 kLo32 = mm256_cvt2x2epu16_epi32(k, out v256 kHi32);
+
+ v256 resultLo = mm256_naiveperm_epu32(nLo32, kLo32);
+ v256 resultHi = mm256_naiveperm_epu32(nHi32, kHi32);
+
+ return mm256_cvt2x2epi32_epi16(resultLo, resultHi);
+ }
+ }
+ else
+ {
+ v256 nLo32 = mm256_cvt2x2epu16_epi32(n, out v256 nHi32);
+ v256 kLo32 = mm256_cvt2x2epu16_epi32(k, out v256 kHi32);
+ v256 n64LoLo = mm256_cvt2x2epu32_epi64(nLo32, out v256 n64LoHi);
+ v256 n64HiLo = mm256_cvt2x2epu32_epi64(nHi32, out v256 n64HiHi);
+ v256 k64LoLo = mm256_cvt2x2epu32_epi64(kLo32, out v256 k64LoHi);
+ v256 k64HiLo = mm256_cvt2x2epu32_epi64(kHi32, out v256 k64HiHi);
+
+ v256 resultLoLo = mm256_naiveperm_epu64(n64LoLo, k64LoLo);
+ v256 resultLoHi = mm256_naiveperm_epu64(n64LoHi, k64LoHi);
+ v256 resultHiLo = mm256_naiveperm_epu64(n64HiLo, k64HiLo);
+ v256 resultHiHi = mm256_naiveperm_epu64(n64HiHi, k64HiHi);
+
+ v256 result32Lo = mm256_cvt2x2epi64_epi32(resultLoLo, resultLoHi);
+ v256 result32Hi = mm256_cvt2x2epi64_epi32(resultHiLo, resultHiHi);
+
+ return mm256_cvt2x2epi32_epi16(result32Lo, result32Hi);
+ }
+ }
+
+
+ v256 ZERO = Avx.mm256_setzero_si256();
+
+ v256 resultsFoundMask = Avx2.mm256_cmpeq_epi16(k, ZERO);
+ v256 results = Avx2.mm256_sub_epi16(ZERO, resultsFoundMask);
+
+ k = mm256_dec_epi16(k);
+ v256 p = n;
+ n = mm256_dec_epi16(n);
+
+ v256 cmp = Avx2.mm256_cmpeq_epi16(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+
+ while (Hint.Likely(mm256_notalltrue_epi256(resultsFoundMask, 16)))
+ {
+ k = mm256_dec_epi16(k);
+ p = Avx2.mm256_mullo_epi16(p, n);
+ n = mm256_dec_epi16(n);
+
+ cmp = Avx2.mm256_cmpeq_epi16(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 perm_epu32(v128 n, v128 k, byte unsafeLevels = 0, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.UInt0, n.UInt0);
+Assert.IsNotGreater(k.UInt1, n.UInt1);
+if (elements > 2)
+{
+ Assert.IsNotGreater(k.UInt2, n.UInt2);
+}
+if (elements > 3)
+{
+ Assert.IsNotGreater(k.UInt3, n.UInt3);
+}
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U64, elements))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U32, elements))
+ {
+ return naiveperm_epu32(n, k, elements);
+ }
+ else
+ {
+ if (elements > 2)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return mm256_cvtepi64_epi32(mm256_naiveperm_epu64(Avx2.mm256_cvtepu32_epi64(n), Avx2.mm256_cvtepu32_epi64(k), elements));
+ }
+ else
+ {
+ v128 lo = Sse2.unpacklo_epi32(Sse2.cvtsi32_si128((int)maxmath.perm((ulong)extract_epi32(n, 0), (ulong)extract_epi32(k, 0), maxmath.Promise.Unsafe1)),
+ Sse2.cvtsi32_si128((int)maxmath.perm((ulong)extract_epi32(n, 1), (ulong)extract_epi32(k, 1), maxmath.Promise.Unsafe1)));
+ v128 hi = Sse2.cvtsi32_si128((int)maxmath.perm((ulong)extract_epi32(n, 2), (ulong)extract_epi32(k, 2), maxmath.Promise.Unsafe1));
+
+ if (elements == 4)
+ {
+ hi = Sse2.unpacklo_epi32(hi, Sse2.cvtsi32_si128((int)maxmath.perm((ulong)extract_epi32(n, 3), (ulong)extract_epi32(k, 3), maxmath.Promise.Unsafe1)));
+ }
+
+ return Sse2.unpacklo_epi64(lo, hi);
+ }
+ }
+ else
+ {
+ return Sse2.unpacklo_epi32(Sse2.cvtsi32_si128((int)maxmath.perm((ulong)extract_epi32(n, 0), (ulong)extract_epi32(k, 0), maxmath.Promise.Unsafe1)),
+ Sse2.cvtsi32_si128((int)maxmath.perm((ulong)extract_epi32(n, 1), (ulong)extract_epi32(k, 1), maxmath.Promise.Unsafe1)));
+ }
+ }
+ }
+
+
+ v128 ZERO = Sse2.setzero_si128();
+
+ v128 resultsFoundMask = Sse2.cmpeq_epi32(k, ZERO);
+ v128 results = Sse2.sub_epi32(ZERO, resultsFoundMask);
+
+ k = dec_epi32(k);
+ v128 p = n;
+ n = dec_epi32(n);
+
+ v128 cmp = Sse2.cmpeq_epi32(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+
+ while (Hint.Likely(notalltrue_epi128(resultsFoundMask, elements)))
+ {
+ k = dec_epi32(k);
+ p = mullo_epi32(p, n, elements);
+ n = dec_epi32(n);
+
+ cmp = Sse2.cmpeq_epi32(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_perm_epu32(v256 n, v256 k, byte unsafeLevels = 0)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.UInt0, n.UInt0);
+Assert.IsNotGreater(k.UInt1, n.UInt1);
+Assert.IsNotGreater(k.UInt2, n.UInt2);
+Assert.IsNotGreater(k.UInt3, n.UInt3);
+Assert.IsNotGreater(k.UInt4, n.UInt4);
+Assert.IsNotGreater(k.UInt5, n.UInt5);
+Assert.IsNotGreater(k.UInt6, n.UInt6);
+Assert.IsNotGreater(k.UInt7, n.UInt7);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU32(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ return mm256_naiveperm_epu32(n, k);
+ }
+ else
+ {
+ v256 n64Lo = mm256_cvt2x2epu32_epi64(n, out v256 n64Hi);
+ v256 k64Lo = mm256_cvt2x2epu32_epi64(k, out v256 k64Hi);
+
+ v256 resultLo = mm256_naiveperm_epu64(n64Lo, k64Lo);
+ v256 resultHi = mm256_naiveperm_epu64(n64Hi, k64Hi);
+
+ return mm256_cvt2x2epi64_epi32(resultLo, resultHi);
+ }
+ }
+
+
+ v256 ZERO = Avx.mm256_setzero_si256();
+
+ v256 resultsFoundMask = Avx2.mm256_cmpeq_epi32(k, ZERO);
+ v256 results = Avx2.mm256_sub_epi32(ZERO, resultsFoundMask);
+
+ k = mm256_dec_epi32(k);
+ v256 p = n;
+ n = mm256_dec_epi32(n);
+
+ v256 cmp = Avx2.mm256_cmpeq_epi32(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+
+ while (Hint.Likely(mm256_notalltrue_epi256(resultsFoundMask, 8)))
+ {
+ k = mm256_dec_epi32(k);
+ p = Avx2.mm256_mullo_epi32(p, n);
+ n = mm256_dec_epi32(n);
+
+ cmp = Avx2.mm256_cmpeq_epi32(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 perm_epu64(v128 n, v128 k, byte unsafeLevels = 0)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNotGreater(k.ULong0, n.ULong0);
+Assert.IsNotGreater(k.ULong1, n.ULong1);
+
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U32))
+ {
+ v128 nFactorial = gamma_epu64(n, true);
+ v128 nkFactorial = gamma_epu64(Sse2.sub_epi64(n, k), true);
+
+ return usfcvttpd_epu64(Sse2.div_pd(usfcvtepu64_pd(nFactorial), usfcvtepu64_pd(nkFactorial)));
+ }
+ else
+ {
+ return Sse2.unpacklo_epi64(Sse2.cvtsi64x_si128((long)maxmath.perm(extract_epi64(n, 0), extract_epi64(k, 0), maxmath.Promise.Unsafe1)),
+ Sse2.cvtsi64x_si128((long)maxmath.perm(extract_epi64(n, 1), extract_epi64(k, 1), maxmath.Promise.Unsafe1)));
+ }
+ }
+
+
+ v128 ZERO = Sse2.setzero_si128();
+
+ v128 resultsFoundMask = cmpeq_epi64(k, ZERO);
+ v128 results = Sse2.sub_epi64(ZERO, resultsFoundMask);
+
+ k = dec_epi64(k);
+ v128 p = n;
+ n = dec_epi64(n);
+
+ v128 cmp = cmpeq_epi64(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+
+ while (Hint.Likely(notalltrue_epi128(resultsFoundMask, 2)))
+ {
+ k = dec_epi64(k);
+ p = mullo_epi64(p, n);
+ n = dec_epi64(n);
+
+ cmp = cmpeq_epi64(ZERO, k);
+ results = blendv_si128(results, p, cmp);
+ resultsFoundMask = Sse2.or_si128(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_perm_epu64(v256 n, v256 k, byte unsafeLevels = 0, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNotGreater(k.ULong0, n.ULong0);
+Assert.IsNotGreater(k.ULong1, n.ULong1);
+Assert.IsNotGreater(k.ULong2, n.ULong2);
+if (elements > 3)
+{
+ Assert.IsNotGreater(k.ULong3, n.ULong3);
+}
+ if (unsafeLevels > 0 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U64, elements))
+ {
+ if (unsafeLevels > 1 || constexpr.ALL_LE_EPU64(n, MAX_INVERSE_FACTORIAL_U32, elements))
+ {
+ v256 nFactorial = mm256_gamma_epu64(n, true);
+ v256 nkFactorial = mm256_gamma_epu64(Avx2.mm256_sub_epi64(n, k), true);
+
+ return mm256_usfcvttpd_epu64(Avx.mm256_div_pd(mm256_usfcvtepu64_pd(nFactorial), mm256_usfcvtepu64_pd(nkFactorial)));
+ }
+ else
+ {
+ return mm256_naiveperm_epu64(n, k, elements);
+ }
+ }
+
+
+ v256 ZERO = Avx.mm256_setzero_si256();
+
+ v256 resultsFoundMask = Avx2.mm256_cmpeq_epi64(k, ZERO);
+ v256 results = Avx2.mm256_sub_epi64(ZERO, resultsFoundMask);
+
+ k = mm256_dec_epi64(k);
+ v256 p = n;
+ n = mm256_dec_epi64(n);
+
+ v256 cmp = Avx2.mm256_cmpeq_epi64(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+
+ while (Hint.Likely(mm256_notalltrue_epi256(resultsFoundMask, elements)))
+ {
+ k = mm256_dec_epi64(k);
+ p = mm256_mullo_epi64(p, n, elements);
+ n = mm256_dec_epi64(n);
+
+ cmp = Avx2.mm256_cmpeq_epi64(ZERO, k);
+ results = mm256_blendv_si256(results, p, cmp);
+ resultsFoundMask = Avx2.mm256_or_si256(resultsFoundMask, cmp);
+ }
+
+ return results;
+ }
+ else throw new IllegalInstructionException();
+ }
+ }
+ }
+
+
+ unsafe public static partial class maxmath
+ {
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static UInt128 perm(UInt128 n, UInt128 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U128))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ return factorial(n.lo64, Promise.NoOverflow) / factorial(n.lo64 - k.lo64, Promise.NoOverflow);
+ }
+ else
+ {
+ return factorial(n, Promise.NoOverflow) / factorial(n - k, Promise.NoOverflow);
+ }
+ }
+
+
+ if (Hint.Unlikely(k-- == 0))
+ {
+ return 1;
+ }
+ else
+ {
+ UInt128 p = n--;
+
+ while (Hint.Likely(k-- != 0))
+ {
+ p *= n--;
+ }
+
+ return p;
+ }
+ }
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static UInt128 perm(Int128 n, Int128 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsTrue(k >= 0);
+Assert.IsTrue(n >= 0);
+
+ return perm((UInt128)n, (UInt128)k, useFactorial);
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte perm(byte n, byte k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (useFactorial.CountUnsafeLevels() > 2 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U16))
+ {
+ if (useFactorial.CountUnsafeLevels() > 3 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U8))
+ {
+ return (byte)(factorial(n, Promise.NoOverflow) / factorial((byte)(n - k), Promise.NoOverflow));
+ }
+ else
+ {
+ return (byte)(factorial((ushort)n, Promise.NoOverflow) / factorial((ushort)(n - k), Promise.NoOverflow));
+ }
+ }
+ else
+ {
+ return (byte)(factorial((uint)n, Promise.NoOverflow) / factorial((uint)(n - k), Promise.NoOverflow));
+ }
+ }
+ else
+ {
+ return (byte)(factorial((ulong)n, Promise.NoOverflow) / factorial((ulong)(n - k), Promise.NoOverflow));
+ }
+ }
+
+
+ return (byte)perm((uint)n, (uint)k);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte2 perm(byte2 n, byte2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu8(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 2);
+ }
+ else
+ {
+ return new byte2(perm(n.x, k.x, useFactorial),
+ perm(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte3 perm(byte3 n, byte3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu8(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 3);
+ }
+ else
+ {
+ return new byte3(perm(n.x, k.x, useFactorial),
+ perm(n.y, k.y, useFactorial),
+ perm(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte4 perm(byte4 n, byte4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu8(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 4);
+ }
+ else
+ {
+ return new byte4(perm(n.x, k.x, useFactorial),
+ perm(n.y, k.y, useFactorial),
+ perm(n.z, k.z, useFactorial),
+ perm(n.w, k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte8 perm(byte8 n, byte8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu8(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 8);
+ }
+ else
+ {
+ return new byte8(perm(n.x0, k.x0, useFactorial),
+ perm(n.x1, k.x1, useFactorial),
+ perm(n.x2, k.x2, useFactorial),
+ perm(n.x3, k.x3, useFactorial),
+ perm(n.x4, k.x4, useFactorial),
+ perm(n.x5, k.x5, useFactorial),
+ perm(n.x6, k.x6, useFactorial),
+ perm(n.x7, k.x7, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte16 perm(byte16 n, byte16 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu8(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 16);
+ }
+ else
+ {
+ return new byte16(perm(n.x0, k.x0, useFactorial),
+ perm(n.x1, k.x1, useFactorial),
+ perm(n.x2, k.x2, useFactorial),
+ perm(n.x3, k.x3, useFactorial),
+ perm(n.x4, k.x4, useFactorial),
+ perm(n.x5, k.x5, useFactorial),
+ perm(n.x6, k.x6, useFactorial),
+ perm(n.x7, k.x7, useFactorial),
+ perm(n.x8, k.x8, useFactorial),
+ perm(n.x9, k.x9, useFactorial),
+ perm(n.x10, k.x10, useFactorial),
+ perm(n.x11, k.x11, useFactorial),
+ perm(n.x12, k.x12, useFactorial),
+ perm(n.x13, k.x13, useFactorial),
+ perm(n.x14, k.x14, useFactorial),
+ perm(n.x15, k.x15, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte32 perm(byte32 n, byte32 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_perm_epu8(n, k, unsafeLevels: useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new byte32(perm(n.v16_0, k.v16_0, useFactorial), perm(n.v16_16, k.v16_16, useFactorial));
+ }
+ }
+
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte perm(sbyte n, sbyte k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n);
+Assert.IsNonNegative(k);
+
+ return perm((byte)n, (byte)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte2 perm(sbyte2 n, sbyte2 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+
+ return perm((byte2)n, (byte2)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte3 perm(sbyte3 n, sbyte3 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+
+ return perm((byte3)n, (byte3)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte4 perm(sbyte4 n, sbyte4 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+Assert.IsNonNegative(n.w);
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+Assert.IsNonNegative(k.w);
+
+ return perm((byte4)n, (byte4)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte8 perm(sbyte8 n, sbyte8 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x0);
+Assert.IsNonNegative(n.x1);
+Assert.IsNonNegative(n.x2);
+Assert.IsNonNegative(n.x3);
+Assert.IsNonNegative(n.x4);
+Assert.IsNonNegative(n.x5);
+Assert.IsNonNegative(n.x6);
+Assert.IsNonNegative(n.x7);
+Assert.IsNonNegative(k.x0);
+Assert.IsNonNegative(k.x1);
+Assert.IsNonNegative(k.x2);
+Assert.IsNonNegative(k.x3);
+Assert.IsNonNegative(k.x4);
+Assert.IsNonNegative(k.x5);
+Assert.IsNonNegative(k.x6);
+Assert.IsNonNegative(k.x7);
+
+ return perm((byte8)n, (byte8)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte16 perm(sbyte16 n, sbyte16 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x0);
+Assert.IsNonNegative(n.x1);
+Assert.IsNonNegative(n.x2);
+Assert.IsNonNegative(n.x3);
+Assert.IsNonNegative(n.x4);
+Assert.IsNonNegative(n.x5);
+Assert.IsNonNegative(n.x6);
+Assert.IsNonNegative(n.x7);
+Assert.IsNonNegative(n.x8);
+Assert.IsNonNegative(n.x9);
+Assert.IsNonNegative(n.x10);
+Assert.IsNonNegative(n.x11);
+Assert.IsNonNegative(n.x12);
+Assert.IsNonNegative(n.x13);
+Assert.IsNonNegative(n.x14);
+Assert.IsNonNegative(n.x15);
+Assert.IsNonNegative(k.x0);
+Assert.IsNonNegative(k.x1);
+Assert.IsNonNegative(k.x2);
+Assert.IsNonNegative(k.x3);
+Assert.IsNonNegative(k.x4);
+Assert.IsNonNegative(k.x5);
+Assert.IsNonNegative(k.x6);
+Assert.IsNonNegative(k.x7);
+Assert.IsNonNegative(k.x8);
+Assert.IsNonNegative(k.x9);
+Assert.IsNonNegative(k.x10);
+Assert.IsNonNegative(k.x11);
+Assert.IsNonNegative(k.x12);
+Assert.IsNonNegative(k.x13);
+Assert.IsNonNegative(k.x14);
+Assert.IsNonNegative(k.x15);
+
+ return perm((byte16)n, (byte16)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte32 perm(sbyte32 n, sbyte32 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x0);
+Assert.IsNonNegative(n.x1);
+Assert.IsNonNegative(n.x2);
+Assert.IsNonNegative(n.x3);
+Assert.IsNonNegative(n.x4);
+Assert.IsNonNegative(n.x5);
+Assert.IsNonNegative(n.x6);
+Assert.IsNonNegative(n.x7);
+Assert.IsNonNegative(n.x8);
+Assert.IsNonNegative(n.x9);
+Assert.IsNonNegative(n.x10);
+Assert.IsNonNegative(n.x11);
+Assert.IsNonNegative(n.x12);
+Assert.IsNonNegative(n.x13);
+Assert.IsNonNegative(n.x14);
+Assert.IsNonNegative(n.x15);
+Assert.IsNonNegative(n.x16);
+Assert.IsNonNegative(n.x17);
+Assert.IsNonNegative(n.x18);
+Assert.IsNonNegative(n.x19);
+Assert.IsNonNegative(n.x20);
+Assert.IsNonNegative(n.x21);
+Assert.IsNonNegative(n.x22);
+Assert.IsNonNegative(n.x23);
+Assert.IsNonNegative(n.x24);
+Assert.IsNonNegative(n.x25);
+Assert.IsNonNegative(n.x26);
+Assert.IsNonNegative(n.x27);
+Assert.IsNonNegative(n.x28);
+Assert.IsNonNegative(n.x29);
+Assert.IsNonNegative(n.x30);
+Assert.IsNonNegative(n.x31);
+Assert.IsNonNegative(k.x0);
+Assert.IsNonNegative(k.x1);
+Assert.IsNonNegative(k.x2);
+Assert.IsNonNegative(k.x3);
+Assert.IsNonNegative(k.x4);
+Assert.IsNonNegative(k.x5);
+Assert.IsNonNegative(k.x6);
+Assert.IsNonNegative(k.x7);
+Assert.IsNonNegative(k.x8);
+Assert.IsNonNegative(k.x9);
+Assert.IsNonNegative(k.x10);
+Assert.IsNonNegative(k.x11);
+Assert.IsNonNegative(k.x12);
+Assert.IsNonNegative(k.x13);
+Assert.IsNonNegative(k.x14);
+Assert.IsNonNegative(k.x15);
+Assert.IsNonNegative(k.x16);
+Assert.IsNonNegative(k.x17);
+Assert.IsNonNegative(k.x18);
+Assert.IsNonNegative(k.x19);
+Assert.IsNonNegative(k.x20);
+Assert.IsNonNegative(k.x21);
+Assert.IsNonNegative(k.x22);
+Assert.IsNonNegative(k.x23);
+Assert.IsNonNegative(k.x24);
+Assert.IsNonNegative(k.x25);
+Assert.IsNonNegative(k.x26);
+Assert.IsNonNegative(k.x27);
+Assert.IsNonNegative(k.x28);
+Assert.IsNonNegative(k.x29);
+Assert.IsNonNegative(k.x30);
+Assert.IsNonNegative(k.x31);
+
+ return perm((byte32)n, (byte32)k, useFactorial);
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort perm(ushort n, ushort k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U32))
+ {
+ if (useFactorial.CountUnsafeLevels() > 2 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U16))
+ {
+ return (ushort)(factorial(n, Promise.NoOverflow) / factorial((ushort)(n - k), Promise.NoOverflow));
+ }
+ else
+ {
+ return (ushort)(factorial((uint)n, Promise.NoOverflow) / factorial((uint)(n - k), Promise.NoOverflow));
+ }
+ }
+ else
+ {
+ return (ushort)(factorial((ulong)n, Promise.NoOverflow) / factorial((ulong)(n - k), Promise.NoOverflow));
+ }
+ }
+
+
+ return (ushort)perm((uint)n, (uint)k);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort2 perm(ushort2 n, ushort2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu16(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 2);
+ }
+ else
+ {
+ return new ushort2(perm(n.x, k.x, useFactorial),
+ perm(n.y, k.y, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort3 perm(ushort3 n, ushort3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu16(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 3);
+ }
+ else
+ {
+ return new ushort3(perm(n.x, k.x, useFactorial),
+ perm(n.y, k.y, useFactorial),
+ perm(n.z, k.z, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort4 perm(ushort4 n, ushort4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu16(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 4);
+ }
+ else
+ {
+ return new ushort4(perm(n.x, k.x, useFactorial),
+ perm(n.y, k.y, useFactorial),
+ perm(n.z, k.z, useFactorial),
+ perm(n.w, k.w, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort8 perm(ushort8 n, ushort8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu16(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 8);
+ }
+ else
+ {
+ return new ushort8(perm(n.x0, k.x0, useFactorial),
+ perm(n.x1, k.x1, useFactorial),
+ perm(n.x2, k.x2, useFactorial),
+ perm(n.x3, k.x3, useFactorial),
+ perm(n.x4, k.x4, useFactorial),
+ perm(n.x5, k.x5, useFactorial),
+ perm(n.x6, k.x6, useFactorial),
+ perm(n.x7, k.x7, useFactorial));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort16 perm(ushort16 n, ushort16 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_perm_epu16(n, k, unsafeLevels: useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new ushort16(perm(n.v8_0, k.v8_0, useFactorial), perm(n.v8_8, k.v8_8, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort perm(short n, short k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n);
+Assert.IsNonNegative(k);
+
+ return perm((ushort)n, (ushort)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort2 perm(short2 n, short2 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+
+ return perm((ushort2)n, (ushort2)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort3 perm(short3 n, short3 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+
+ return perm((ushort3)n, (ushort3)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort4 perm(short4 n, short4 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+Assert.IsNonNegative(n.w);
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+Assert.IsNonNegative(k.w);
+
+ return perm((ushort4)n, (ushort4)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort8 perm(short8 n, short8 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x0);
+Assert.IsNonNegative(n.x1);
+Assert.IsNonNegative(n.x2);
+Assert.IsNonNegative(n.x3);
+Assert.IsNonNegative(n.x4);
+Assert.IsNonNegative(n.x5);
+Assert.IsNonNegative(n.x6);
+Assert.IsNonNegative(n.x7);
+Assert.IsNonNegative(k.x0);
+Assert.IsNonNegative(k.x1);
+Assert.IsNonNegative(k.x2);
+Assert.IsNonNegative(k.x3);
+Assert.IsNonNegative(k.x4);
+Assert.IsNonNegative(k.x5);
+Assert.IsNonNegative(k.x6);
+Assert.IsNonNegative(k.x7);
+
+ return perm((ushort8)n, (ushort8)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 16 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 8 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort16 perm(short16 n, short16 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(n.x0);
+Assert.IsNonNegative(n.x1);
+Assert.IsNonNegative(n.x2);
+Assert.IsNonNegative(n.x3);
+Assert.IsNonNegative(n.x4);
+Assert.IsNonNegative(n.x5);
+Assert.IsNonNegative(n.x6);
+Assert.IsNonNegative(n.x7);
+Assert.IsNonNegative(n.x8);
+Assert.IsNonNegative(n.x9);
+Assert.IsNonNegative(n.x10);
+Assert.IsNonNegative(n.x11);
+Assert.IsNonNegative(n.x12);
+Assert.IsNonNegative(n.x13);
+Assert.IsNonNegative(n.x14);
+Assert.IsNonNegative(n.x15);
+Assert.IsNonNegative(k.x0);
+Assert.IsNonNegative(k.x1);
+Assert.IsNonNegative(k.x2);
+Assert.IsNonNegative(k.x3);
+Assert.IsNonNegative(k.x4);
+Assert.IsNonNegative(k.x5);
+Assert.IsNonNegative(k.x6);
+Assert.IsNonNegative(k.x7);
+Assert.IsNonNegative(k.x8);
+Assert.IsNonNegative(k.x9);
+Assert.IsNonNegative(k.x10);
+Assert.IsNonNegative(k.x11);
+Assert.IsNonNegative(k.x12);
+Assert.IsNonNegative(k.x13);
+Assert.IsNonNegative(k.x14);
+Assert.IsNonNegative(k.x15);
+
+ return perm((ushort16)n, (ushort16)k, useFactorial);
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint perm(uint n, uint k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U128))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ return (uint)(factorial((ulong)n, Promise.NoOverflow) / factorial((ulong)(n - k), Promise.NoOverflow));
+ }
+ else
+ {
+ return (uint)(factorial((UInt128)n, Promise.NoOverflow) / factorial((UInt128)(n - k), Promise.NoOverflow));
+ }
+ }
+
+
+ if (Hint.Unlikely(k-- == 0))
+ {
+ return 1;
+ }
+ else
+ {
+ uint p = n--;
+
+ while (Hint.Likely(k-- != 0))
+ {
+ p *= n--;
+ }
+
+ return p;
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint2 perm(uint2 n, uint2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt2(Xse.perm_epu32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 2));
+ }
+ else
+ {
+ return new uint2(perm(n.x, k.x, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.y, k.y, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint3 perm(uint3 n, uint3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt3(Xse.perm_epu32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 3));
+ }
+ else
+ {
+ return new uint3(perm(n.x, k.x, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.y, k.y, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.z, k.z, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint4 perm(uint4 n, uint4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt4(Xse.perm_epu32(RegisterConversion.ToV128(n), RegisterConversion.ToV128(k), unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 4));
+ }
+ else
+ {
+ return new uint4(perm(n.x, k.x, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.y, k.y, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.z, k.z, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.w, k.w, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint8 perm(uint8 n, uint8 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_perm_epu32(n, k, unsafeLevels: useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new uint8(perm(n.v4_0, k.v4_0, useFactorial), perm(n.v4_4, k.v4_4, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint perm(int n, int k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k);
+Assert.IsNonNegative(n);
+
+ return perm((uint)n, (uint)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint2 perm(int2 n, int2 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+
+ return perm((uint2)n, (uint2)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint3 perm(int3 n, int3 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+
+ return perm((uint3)n, (uint3)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint4 perm(int4 n, int4 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+Assert.IsNonNegative(k.w);
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+Assert.IsNonNegative(n.w);
+
+ return perm((uint4)n, (uint4)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint8 perm(int8 n, int8 k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k.x0);
+Assert.IsNonNegative(k.x1);
+Assert.IsNonNegative(k.x2);
+Assert.IsNonNegative(k.x3);
+Assert.IsNonNegative(k.x4);
+Assert.IsNonNegative(k.x5);
+Assert.IsNonNegative(k.x6);
+Assert.IsNonNegative(k.x7);
+Assert.IsNonNegative(n.x0);
+Assert.IsNonNegative(n.x1);
+Assert.IsNonNegative(n.x2);
+Assert.IsNonNegative(n.x3);
+Assert.IsNonNegative(n.x4);
+Assert.IsNonNegative(n.x5);
+Assert.IsNonNegative(n.x6);
+Assert.IsNonNegative(n.x7);
+
+ return perm((uint8)n, (uint8)k, useFactorial);
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong perm(ulong n, ulong k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNotGreater(k, n);
+
+ if (useFactorial.CountUnsafeLevels() > 0 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U128))
+ {
+ if (useFactorial.CountUnsafeLevels() > 1 || Xse.constexpr.IS_TRUE(n <= MAX_INVERSE_FACTORIAL_U64))
+ {
+ return factorial(n, Promise.NoOverflow) / factorial(n - k, Promise.NoOverflow);
+ }
+ else
+ {
+ return (ulong)(factorial((UInt128)n, Promise.NoOverflow) / factorial((UInt128)(n - k), Promise.NoOverflow));
+ }
+ }
+
+
+ if (Hint.Unlikely(k-- == 0))
+ {
+ return 1;
+ }
+ else
+ {
+ ulong p = n--;
+
+ while (Hint.Likely(k-- != 0))
+ {
+ p *= n--;
+ }
+
+ return p;
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong2 perm(ulong2 n, ulong2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.perm_epu64(n, k, unsafeLevels: useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new ulong2(perm(n.x, k.x, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.y, k.y, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong3 perm(ulong3 n, ulong3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_perm_epu64(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 3);
+ }
+ else
+ {
+ return new ulong3(perm(n.xy, k.xy, useFactorial),
+ perm(n.z, k.z, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong4 perm(ulong4 n, ulong4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_perm_epu64(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 4);
+ }
+ else
+ {
+ return new ulong4(perm(n.xy, k.xy, useFactorial),
+ perm(n.zw, k.zw, useFactorial));
+ }
+ }
+
+
+ /// Returns the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 128 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong perm(long n, long k, Promise useFactorial = Promise.Nothing)
+ {
+Assert.IsNonNegative(k);
+Assert.IsNonNegative(n);
+
+ return perm((ulong)n, (ulong)k, useFactorial);
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong2 perm(long2 n, long2 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+
+ return Xse.perm_epu64(n, k, unsafeLevels: useFactorial.CountUnsafeLevels());
+ }
+ else
+ {
+ return new ulong2(perm(n.x, k.x, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing),
+ perm(n.y, k.y, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong3 perm(long3 n, long3 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+
+ return Xse.mm256_perm_epu64(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 3);
+ }
+ else
+ {
+ return new ulong3(perm(n.xy, k.xy, useFactorial),
+ perm(n.z, k.z, useFactorial.CountUnsafeLevels() != 0 ? Promise.Unsafe1 : Promise.Nothing));
+ }
+ }
+
+ /// Returns for each pair of corresponding components the number of ways to choose items from items without repetition and with order. Also known as "-permutations of ".
+ ///
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 64 bit overflow.
+ /// A '' with its flag set may cause a memory access violation for any ! that result in an unsigned 32 bit overflow.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong4 perm(long4 n, long4 k, Promise useFactorial = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+Assert.IsNonNegative(k.x);
+Assert.IsNonNegative(k.y);
+Assert.IsNonNegative(k.z);
+Assert.IsNonNegative(k.w);
+Assert.IsNonNegative(n.x);
+Assert.IsNonNegative(n.y);
+Assert.IsNonNegative(n.z);
+Assert.IsNonNegative(n.w);
+
+ return Xse.mm256_perm_epu64(n, k, unsafeLevels: useFactorial.CountUnsafeLevels(), elements: 4);
+ }
+ else
+ {
+ return new ulong4(perm(n.xy, k.xy, useFactorial),
+ perm(n.zw, k.zw, useFactorial));
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/Tests/Editor/Testing Tools.cs.meta b/Runtime/Math Lib/Functions/Arithmetic/Integer/k-permutations of n.cs.meta
similarity index 83%
rename from Tests/Editor/Testing Tools.cs.meta
rename to Runtime/Math Lib/Functions/Arithmetic/Integer/k-permutations of n.cs.meta
index 352ae19..8dc180b 100644
--- a/Tests/Editor/Testing Tools.cs.meta
+++ b/Runtime/Math Lib/Functions/Arithmetic/Integer/k-permutations of n.cs.meta
@@ -1,5 +1,5 @@
fileFormatVersion: 2
-guid: b9da01bff00a69e47953de66b0c760cb
+guid: 126706513ceea4640acc7beced516d77
MonoImporter:
externalObjects: {}
serializedVersion: 2
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Next Greater.cs b/Runtime/Math Lib/Functions/Arithmetic/Next Greater.cs
new file mode 100644
index 0000000..56ec156
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Next Greater.cs
@@ -0,0 +1,1714 @@
+using System.Runtime.CompilerServices;
+using Unity.Burst.Intrinsics;
+using Unity.Mathematics;
+using MaxMath.Intrinsics;
+
+using static Unity.Burst.Intrinsics.X86;
+using static MaxMath.LUT.FLOATING_POINT;
+
+namespace MaxMath
+{
+ namespace Intrinsics
+ {
+ unsafe public static partial class Xse
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 inc_pq(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ //promiseNonZero |= constexpr.ALL_NEQ_PQ(a, 0f, elements);
+ //promisePositive |= constexpr.ALL_GT_PQ(a, 0f, elements);
+ //promiseNegative |= constexpr.ALL_LT_PQ(a, 0f, elements);
+ //promiseNotNanInf |= constexpr.ALL_NOTNAN_PQ(a, elements) && constexpr.ALL_NEQ_PQ(a, quarter.PositiveInfinity, elements) && constexpr.ALL_NEQ_PH(a, quarter.NegativeInfinity, elements);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi8(1);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_epi8(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(srai_epi8(a, 7), ONE);
+ }
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi8(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi8(unchecked((sbyte)(1 << 7)));
+
+ v128 negative0 = Sse2.cmpeq_epi8(a, NEGATIVE_ZERO);
+ summand = ternarylogic_si128(negative0, srai_epi8(a, 7), ONE, TernaryOperation.OxAE);
+ a = Sse2.andnot_si128(negative0, a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi8(quarter.SIGNALING_EXPONENT);
+
+ v128 nanInf = Sse2.cmpeq_epi8(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi8(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi8(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 inc_ph(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ //promiseNonZero |= constexpr.ALL_NEQ_PH(a, 0f, elements);
+ //promisePositive |= constexpr.ALL_GT_PH(a, 0f, elements);
+ //promiseNegative |= constexpr.ALL_LT_PH(a, 0f, elements);
+ //promiseNotNanInf |= constexpr.ALL_NOTNAN_PH(a, elements) && constexpr.ALL_NEQ_PH(a, (half)float.PositiveInfinity, elements) && constexpr.ALL_NEQ_PH(a, (half)float.NegativeInfinity, elements);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi16(1);
+
+ summand = Sse2.or_si128(Sse2.srai_epi16(a, 15), ONE);
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi16(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi16(unchecked((short)(1 << 15)));
+
+ v128 negative0 = Sse2.cmpeq_epi16(a, NEGATIVE_ZERO);
+ summand = ternarylogic_si128(negative0, Sse2.srai_epi16(a, 15), ONE, TernaryOperation.OxAE);
+ a = Sse2.andnot_si128(negative0, a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi16((short)F16_SIGNALING_EXPONENT);
+
+ v128 nanInf = Sse2.cmpeq_epi16(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi16(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi16(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 inc_ps(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PS(a, 0f, elements);
+ promisePositive |= constexpr.ALL_GT_PS(a, 0f, elements);
+ promiseNegative |= constexpr.ALL_LT_PS(a, 0f, elements);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PS(a, elements) && constexpr.ALL_NEQ_PS(a, float.PositiveInfinity, elements) && constexpr.ALL_NEQ_PS(a, float.NegativeInfinity, elements);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi32(1);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_ps(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(Sse2.srai_epi32(a, 31), ONE);
+ }
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi32(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi32(1 << 31);
+
+ v128 negative0 = Sse2.cmpeq_epi32(a, NEGATIVE_ZERO);
+ summand = ternarylogic_si128(negative0, Sse2.srai_epi32(a, 31), ONE, TernaryOperation.OxAE);
+ a = Sse2.andnot_si128(negative0, a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v128 nanInf = Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi32(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi32(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_inc_ps(v256 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PS(a, 0f);
+ promisePositive |= constexpr.ALL_GT_PS(a, 0f);
+ promiseNegative |= constexpr.ALL_LT_PS(a, 0f);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PS(a) && constexpr.ALL_NEQ_PS(a, float.PositiveInfinity) && constexpr.ALL_NEQ_PS(a, float.NegativeInfinity);
+
+ v256 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = mm256_setall_si256();
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi32(1);
+
+ summand = Avx.mm256_blendv_ps(ONE, mm256_setall_si256(), a);
+ }
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi32(1);
+ v256 NEGATIVE_ZERO = Avx.mm256_set1_epi32(1 << 31);
+
+ v256 negative0 = Avx2.mm256_cmpeq_epi32(a, NEGATIVE_ZERO);
+ summand = mm256_ternarylogic_si256(negative0, Avx2.mm256_srai_epi32(a, 31), ONE, TernaryOperation.OxAE);
+ a = Avx2.mm256_andnot_si256(negative0, a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v256 nanInf = Avx2.mm256_cmpeq_epi32(SIGNALING_EXPONENT, Avx2.mm256_and_si256(a, SIGNALING_EXPONENT));
+ summand = Avx2.mm256_andnot_si256(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Avx2.mm256_sub_epi32(a, summand);
+ }
+ else
+ {
+ return Avx2.mm256_add_epi32(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 inc_pd(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PD(a, 0d);
+ promisePositive |= constexpr.ALL_GT_PD(a, 0d);
+ promiseNegative |= constexpr.ALL_LT_PD(a, 0d);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PD(a) && constexpr.ALL_NEQ_PD(a, double.PositiveInfinity) && constexpr.ALL_NEQ_PD(a, double.NegativeInfinity);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi64x(1);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_pd(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(srai_epi64(a, 63), ONE);
+ }
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi64x(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi64x(1L << 63);
+
+ v128 negative0 = cmpeq_epi64(a, NEGATIVE_ZERO);
+ summand = ternarylogic_si128(negative0, srai_epi64(a, 63), ONE, TernaryOperation.OxAE);
+ a = Sse2.andnot_si128(negative0, a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v128 nanInf;
+ if (Sse4_1.IsSse41Supported)
+ {
+ nanInf = Sse4_1.cmpeq_epi64(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ }
+ else
+ {
+ nanInf = Sse2.shuffle_epi32(Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT)), Sse.SHUFFLE(3, 3, 1, 1));
+ }
+
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi64(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi64(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_inc_pd(v256 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PD(a, 0d, elements);
+ promisePositive |= constexpr.ALL_GT_PD(a, 0d, elements);
+ promiseNegative |= constexpr.ALL_LT_PD(a, 0d, elements);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PD(a, elements) && constexpr.ALL_NEQ_PD(a, double.PositiveInfinity, elements) && constexpr.ALL_NEQ_PD(a, double.NegativeInfinity, elements);
+
+ v256 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = mm256_setall_si256();
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi64x(1);
+
+ summand = Avx.mm256_blendv_pd(ONE, mm256_setall_si256(), a);
+ }
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi64x(1);
+ v256 NEGATIVE_ZERO = Avx.mm256_set1_epi64x(1L << 63);
+
+ v256 negative0 = Avx2.mm256_cmpeq_epi64(a, NEGATIVE_ZERO);
+ summand = mm256_ternarylogic_si256(negative0, mm256_srai_epi64(a, 63), ONE, TernaryOperation.OxAE);
+ a = Avx2.mm256_andnot_si256(negative0, a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v256 nanInf = Avx2.mm256_cmpeq_epi64(SIGNALING_EXPONENT, Avx2.mm256_and_si256(a, SIGNALING_EXPONENT));
+ summand = Avx2.mm256_andnot_si256(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Avx2.mm256_sub_epi64(a, summand);
+ }
+ else
+ {
+ return Avx2.mm256_add_epi64(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+ }
+ }
+
+
+ unsafe public static partial class maxmath
+ {
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static UInt128 nextgreater(UInt128 x)
+ {
+ return x + tobyte(x != UInt128.MaxValue);
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Int128 nextgreater(Int128 x)
+ {
+ return x + tobyte(x != Int128.MaxValue);
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte nextgreater(byte x)
+ {
+ return (byte)(x + tobyte(x != byte.MaxValue));
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte2 nextgreater(byte2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu8(x);
+ }
+ else
+ {
+ return new byte2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte3 nextgreater(byte3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu8(x);
+ }
+ else
+ {
+ return new byte3(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte4 nextgreater(byte4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu8(x);
+ }
+ else
+ {
+ return new byte4(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z),
+ nextgreater(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte8 nextgreater(byte8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu8(x);
+ }
+ else
+ {
+ return new byte8(nextgreater(x.x0),
+ nextgreater(x.x1),
+ nextgreater(x.x2),
+ nextgreater(x.x3),
+ nextgreater(x.x4),
+ nextgreater(x.x5),
+ nextgreater(x.x6),
+ nextgreater(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte16 nextgreater(byte16 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu8(x);
+ }
+ else
+ {
+ return new byte16(nextgreater(x.x0),
+ nextgreater(x.x1),
+ nextgreater(x.x2),
+ nextgreater(x.x3),
+ nextgreater(x.x4),
+ nextgreater(x.x5),
+ nextgreater(x.x6),
+ nextgreater(x.x7),
+ nextgreater(x.x8),
+ nextgreater(x.x9),
+ nextgreater(x.x10),
+ nextgreater(x.x11),
+ nextgreater(x.x12),
+ nextgreater(x.x13),
+ nextgreater(x.x14),
+ nextgreater(x.x15));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte32 nextgreater(byte32 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epu8(x);
+ }
+ else
+ {
+ return new byte32(nextgreater(x.v16_0),
+ nextgreater(x.v16_16));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte nextgreater(sbyte x)
+ {
+ return (sbyte)(x + tosbyte(x != sbyte.MaxValue));
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte2 nextgreater(sbyte2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi8(x);
+ }
+ else
+ {
+ return new sbyte2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte3 nextgreater(sbyte3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi8(x);
+ }
+ else
+ {
+ return new sbyte3(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte4 nextgreater(sbyte4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi8(x);
+ }
+ else
+ {
+ return new sbyte4(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z),
+ nextgreater(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte8 nextgreater(sbyte8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi8(x);
+ }
+ else
+ {
+ return new sbyte8(nextgreater(x.x0),
+ nextgreater(x.x1),
+ nextgreater(x.x2),
+ nextgreater(x.x3),
+ nextgreater(x.x4),
+ nextgreater(x.x5),
+ nextgreater(x.x6),
+ nextgreater(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte16 nextgreater(sbyte16 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi8(x);
+ }
+ else
+ {
+ return new sbyte16(nextgreater(x.x0),
+ nextgreater(x.x1),
+ nextgreater(x.x2),
+ nextgreater(x.x3),
+ nextgreater(x.x4),
+ nextgreater(x.x5),
+ nextgreater(x.x6),
+ nextgreater(x.x7),
+ nextgreater(x.x8),
+ nextgreater(x.x9),
+ nextgreater(x.x10),
+ nextgreater(x.x11),
+ nextgreater(x.x12),
+ nextgreater(x.x13),
+ nextgreater(x.x14),
+ nextgreater(x.x15));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte32 nextgreater(sbyte32 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epi8(x);
+ }
+ else
+ {
+ return new sbyte32(nextgreater(x.v16_0),
+ nextgreater(x.v16_16));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort nextgreater(ushort x)
+ {
+ return (ushort)(x + tobyte(x != ushort.MaxValue));
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort2 nextgreater(ushort2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu16(x);
+ }
+ else
+ {
+ return new ushort2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort3 nextgreater(ushort3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu16(x);
+ }
+ else
+ {
+ return new ushort3(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort4 nextgreater(ushort4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu16(x);
+ }
+ else
+ {
+ return new ushort4(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z),
+ nextgreater(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort8 nextgreater(ushort8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu16(x);
+ }
+ else
+ {
+ return new ushort8(nextgreater(x.x0),
+ nextgreater(x.x1),
+ nextgreater(x.x2),
+ nextgreater(x.x3),
+ nextgreater(x.x4),
+ nextgreater(x.x5),
+ nextgreater(x.x6),
+ nextgreater(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort16 nextgreater(ushort16 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epu16(x);
+ }
+ else
+ {
+ return new ushort16(nextgreater(x.v8_0),
+ nextgreater(x.v8_8));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short nextgreater(short x)
+ {
+ return (short)(x + tobyte(x != short.MaxValue));
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short2 nextgreater(short2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi16(x);
+ }
+ else
+ {
+ return new short2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short3 nextgreater(short3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi16(x);
+ }
+ else
+ {
+ return new short3(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short4 nextgreater(short4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi16(x);
+ }
+ else
+ {
+ return new short4(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z),
+ nextgreater(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short8 nextgreater(short8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi16(x);
+ }
+ else
+ {
+ return new short8(nextgreater(x.x0),
+ nextgreater(x.x1),
+ nextgreater(x.x2),
+ nextgreater(x.x3),
+ nextgreater(x.x4),
+ nextgreater(x.x5),
+ nextgreater(x.x6),
+ nextgreater(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short16 nextgreater(short16 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epi16(x);
+ }
+ else
+ {
+ return new short16(nextgreater(x.v8_0),
+ nextgreater(x.v8_8));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint nextgreater(uint x)
+ {
+ return x + tobyte(x != uint.MaxValue);
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint2 nextgreater(uint2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt2(Xse.incs_epu32(RegisterConversion.ToV128(x), 2));
+ }
+ else
+ {
+ return new uint2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint3 nextgreater(uint3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt3(Xse.incs_epu32(RegisterConversion.ToV128(x), 3));
+ }
+ else
+ {
+ return new uint3(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint4 nextgreater(uint4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt4(Xse.incs_epu32(RegisterConversion.ToV128(x), 4));
+ }
+ else
+ {
+ return new uint4(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z),
+ nextgreater(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint8 nextgreater(uint8 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epu32(x);
+ }
+ else
+ {
+ return new uint8(nextgreater(x.v4_0),
+ nextgreater(x.v4_4));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int nextgreater(int x)
+ {
+ return x + tobyte(x != int.MaxValue);
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int2 nextgreater(int2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt2(Xse.incs_epi32(RegisterConversion.ToV128(x), 2));
+ }
+ else
+ {
+ return new int2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int3 nextgreater(int3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt3(Xse.incs_epi32(RegisterConversion.ToV128(x), 3));
+ }
+ else
+ {
+ return new int3(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int4 nextgreater(int4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt4(Xse.incs_epi32(RegisterConversion.ToV128(x), 4));
+ }
+ else
+ {
+ return new int4(nextgreater(x.x),
+ nextgreater(x.y),
+ nextgreater(x.z),
+ nextgreater(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int8 nextgreater(int8 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epi32(x);
+ }
+ else
+ {
+ return new int8(nextgreater(x.v4_0),
+ nextgreater(x.v4_4));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong nextgreater(ulong x)
+ {
+ return x + tobyte(x != ulong.MaxValue);
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong2 nextgreater(ulong2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epu64(x);
+ }
+ else
+ {
+ return new ulong2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong3 nextgreater(ulong3 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epu64(x, 3);
+ }
+ else
+ {
+ return new ulong3(nextgreater(x.xy),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong4 nextgreater(ulong4 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epu64(x, 4);
+ }
+ else
+ {
+ return new ulong4(nextgreater(x.xy),
+ nextgreater(x.zw));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long nextgreater(long x)
+ {
+ return x + tobyte(x != long.MaxValue);
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long2 nextgreater(long2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.incs_epi64(x);
+ }
+ else
+ {
+ return new long2(nextgreater(x.x),
+ nextgreater(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long3 nextgreater(long3 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epi64(x, 3);
+ }
+ else
+ {
+ return new long3(nextgreater(x.xy),
+ nextgreater(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long4 nextgreater(long4 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_incs_epi64(x, 4);
+ }
+ else
+ {
+ return new long4(nextgreater(x.xy),
+ nextgreater(x.zw));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter nextgreater(quarter x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return asquarter(Xse.inc_pq(Sse2.cvtsi32_si128(x.value),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).Byte0);
+ }
+ else
+ {
+ int __x = assbyte(x);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int notNegative0 = -tobyte((uint)__x != 0xFFFF_FF80);
+ summand = 1 | ((__x >> 31) & notNegative0);
+ __x = (__x & notNegative0);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int notNanInf = -tobyte((__x & quarter.SIGNALING_EXPONENT) != quarter.SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return asquarter((byte)(__x - summand));
+ }
+ else
+ {
+ return asquarter((byte)(__x + summand));
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter2 nextgreater(quarter2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.inc_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2);
+ }
+ else
+ {
+ return new quarter2(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter3 nextgreater(quarter3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.inc_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3);
+ }
+ else
+ {
+ return new quarter3(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations),
+ nextgreater(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter4 nextgreater(quarter4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.inc_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4);
+ }
+ else
+ {
+ return new quarter4(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations),
+ nextgreater(x.z, optimizations),
+ nextgreater(x.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter8 nextgreater(quarter8 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.inc_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 8);
+ }
+ else
+ {
+ return new quarter8(nextgreater(x.x0, optimizations),
+ nextgreater(x.x1, optimizations),
+ nextgreater(x.x2, optimizations),
+ nextgreater(x.x3, optimizations),
+ nextgreater(x.x4, optimizations),
+ nextgreater(x.x5, optimizations),
+ nextgreater(x.x6, optimizations),
+ nextgreater(x.x7, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half nextgreater(half x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return ashalf(Xse.inc_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).UShort0);
+ }
+ else
+ {
+ int __x = asshort(x);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int notNegative0 = -tobyte((uint)__x != 0xFFFF_8000);
+ summand = 1 | ((__x >> 31) & notNegative0);
+ __x = (__x & notNegative0);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int notNanInf = -tobyte((__x & F16_SIGNALING_EXPONENT) != F16_SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return ashalf((ushort)(__x - summand));
+ }
+ else
+ {
+ return ashalf((ushort)(__x + summand));
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half2 nextgreater(half2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf2(Xse.inc_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2));
+ }
+ else
+ {
+ return new half2(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half3 nextgreater(half3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf3(Xse.inc_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new half3(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations),
+ nextgreater(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half4 nextgreater(half4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf4(Xse.inc_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new half4(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations),
+ nextgreater(x.z, optimizations),
+ nextgreater(x.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half8 nextgreater(half8 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.inc_ph(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 8);
+ }
+ else
+ {
+ return new half8(nextgreater(x.x0, optimizations),
+ nextgreater(x.x1, optimizations),
+ nextgreater(x.x2, optimizations),
+ nextgreater(x.x3, optimizations),
+ nextgreater(x.x4, optimizations),
+ nextgreater(x.x5, optimizations),
+ nextgreater(x.x6, optimizations),
+ nextgreater(x.x7, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float nextgreater(float x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.inc_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).Float0;
+ }
+ else
+ {
+ int __x = math.asint(x);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int notNegative0 = -tobyte(__x != 1 << 31);
+ summand = 1 | ((__x >> 31) & notNegative0);
+ __x = (__x & notNegative0);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int notNanInf = -tobyte((__x & F32_SIGNALING_EXPONENT) != F32_SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return math.asfloat(__x - summand);
+ }
+ else
+ {
+ return math.asfloat(__x + summand);
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float2 nextgreater(float2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat2(Xse.inc_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2));
+ }
+ else
+ {
+ return new float2(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float3 nextgreater(float3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat3(Xse.inc_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new float3(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations),
+ nextgreater(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float4 nextgreater(float4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat4(Xse.inc_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new float4(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations),
+ nextgreater(x.z, optimizations),
+ nextgreater(x.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float8 nextgreater(float8 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_inc_ps(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0));
+ }
+ else
+ {
+ return new float8(nextgreater(x.v4_0, optimizations),
+ nextgreater(x.v4_4, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest greater than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double nextgreater(double x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.inc_pd(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0)).Double0;
+ }
+ else
+ {
+ long __x = math.aslong(x);
+ long summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 63);
+ }
+ }
+ else
+ {
+ long notNegative0 = -(long)tobyte(__x != 1L << 63);
+ summand = 1 | ((__x >> 63) & notNegative0);
+ __x = (__x & notNegative0);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ long notNanInf = -(long)tobyte((__x & F64_SIGNALING_EXPONENT) != F64_SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return math.asdouble(__x - summand);
+ }
+ else
+ {
+ return math.asdouble(__x + summand);
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double2 nextgreater(double2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToDouble2(Xse.inc_pd(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0)));
+ }
+ else
+ {
+ return new double2(nextgreater(x.x, optimizations),
+ nextgreater(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double3 nextgreater(double3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return RegisterConversion.ToDouble3(Xse.mm256_inc_pd(RegisterConversion.ToV256(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new double3(nextgreater(x.xy, optimizations),
+ nextgreater(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest greater than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double4 nextgreater(double4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return RegisterConversion.ToDouble4(Xse.mm256_inc_pd(RegisterConversion.ToV256(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new double4(nextgreater(x.xy, optimizations),
+ nextgreater(x.zw, optimizations));
+ }
+ }
+ }
+}
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Next Greater.cs.meta b/Runtime/Math Lib/Functions/Arithmetic/Next Greater.cs.meta
new file mode 100644
index 0000000..81b427f
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Next Greater.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 379c3659eef937746a7295bed42bd163
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Next Smaller.cs b/Runtime/Math Lib/Functions/Arithmetic/Next Smaller.cs
new file mode 100644
index 0000000..f13d6cc
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Next Smaller.cs
@@ -0,0 +1,1720 @@
+using System.Runtime.CompilerServices;
+using Unity.Burst.Intrinsics;
+using Unity.Mathematics;
+using MaxMath.Intrinsics;
+
+using static Unity.Burst.Intrinsics.X86;
+using static MaxMath.LUT.FLOATING_POINT;
+
+namespace MaxMath
+{
+ namespace Intrinsics
+ {
+ unsafe public static partial class Xse
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 dec_pq(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ //promiseNonZero |= constexpr.ALL_NEQ_PQ(a, 0f, elements);
+ //promisePositive |= constexpr.ALL_GT_PQ(a, 0f, elements);
+ //promiseNegative |= constexpr.ALL_LT_PQ(a, 0f, elements);
+ //promiseNotNanInf |= constexpr.ALL_NOTNAN_PQ(a, elements) && constexpr.ALL_NEQ_PQ(a, quarter.PositiveInfinity, elements) && constexpr.ALL_NEQ_PH(a, quarter.NegativeInfinity, elements);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi8(1);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_epi8(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(srai_epi8(a, 7), ONE);
+ }
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi8(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi8(unchecked((sbyte)(1 << 7)));
+ v128 SUMMAND_IF_ZERO = Sse2.set1_epi8(unchecked((sbyte)0b1000_0010));
+
+ v128 isZero = Sse2.or_si128(Sse2.cmpeq_epi8(a, Sse2.setzero_si128()), Sse2.cmpeq_epi8(a, NEGATIVE_ZERO));
+ summand = ternarylogic_si128(isZero, srai_epi8(a, 7), ONE, TernaryOperation.OxAE);
+ a = ternarylogic_si128(a, SUMMAND_IF_ZERO, isZero, TernaryOperation.OxF8);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi8(quarter.SIGNALING_EXPONENT);
+
+ v128 nanInf = Sse2.cmpeq_epi8(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.add_epi8(a, summand);
+ }
+ else
+ {
+ return Sse2.sub_epi8(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 dec_ph(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ //promiseNonZero |= constexpr.ALL_NEQ_PH(a, 0f, elements);
+ //promisePositive |= constexpr.ALL_GT_PH(a, 0f, elements);
+ //promiseNegative |= constexpr.ALL_LT_PH(a, 0f, elements);
+ //promiseNotNanInf |= constexpr.ALL_NOTNAN_PH(a, elements) && constexpr.ALL_NEQ_PH(a, (half)float.PositiveInfinity, elements) && constexpr.ALL_NEQ_PH(a, (half)float.NegativeInfinity, elements);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi16(1);
+
+ summand = Sse2.or_si128(Sse2.srai_epi16(a, 15), ONE);
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi16(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi16(unchecked((short)(1 << 15)));
+ v128 SUMMAND_IF_ZERO = Sse2.set1_epi16(unchecked((short)0x8002));
+
+ v128 isZero = Sse2.or_si128(Sse2.cmpeq_epi16(a, Sse2.setzero_si128()), Sse2.cmpeq_epi16(a, NEGATIVE_ZERO));
+ summand = ternarylogic_si128(isZero, Sse2.srai_epi16(a, 15), ONE, TernaryOperation.OxAE);
+ a = ternarylogic_si128(a, SUMMAND_IF_ZERO, isZero, TernaryOperation.OxF8);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi16((short)F16_SIGNALING_EXPONENT);
+
+ v128 nanInf = Sse2.cmpeq_epi16(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.add_epi16(a, summand);
+ }
+ else
+ {
+ return Sse2.sub_epi16(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 dec_ps(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PS(a, 0f, elements);
+ promisePositive |= constexpr.ALL_GT_PS(a, 0f, elements);
+ promiseNegative |= constexpr.ALL_LT_PS(a, 0f, elements);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PS(a, elements) && constexpr.ALL_NEQ_PS(a, float.PositiveInfinity, elements) && constexpr.ALL_NEQ_PS(a, float.NegativeInfinity, elements);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi32(1);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_ps(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(Sse2.srai_epi32(a, 31), ONE);
+ }
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi32(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi32(unchecked((int)(1 << 31)));
+ v128 SUMMAND_IF_ZERO = Sse2.set1_epi32(unchecked((int)0x8000_0002));
+
+ v128 isZero = Sse2.or_si128(Sse2.cmpeq_epi32(a, Sse2.setzero_si128()), Sse2.cmpeq_epi32(a, NEGATIVE_ZERO));
+ summand = ternarylogic_si128(isZero, Sse2.srai_epi32(a, 31), ONE, TernaryOperation.OxAE);
+ a = ternarylogic_si128(a, SUMMAND_IF_ZERO, isZero, TernaryOperation.OxF8);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v128 nanInf = Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.add_epi32(a, summand);
+ }
+ else
+ {
+ return Sse2.sub_epi32(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_dec_ps(v256 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PS(a, 0f);
+ promisePositive |= constexpr.ALL_GT_PS(a, 0f);
+ promiseNegative |= constexpr.ALL_LT_PS(a, 0f);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PS(a) && constexpr.ALL_NEQ_PS(a, float.PositiveInfinity) && constexpr.ALL_NEQ_PS(a, float.NegativeInfinity);
+
+ v256 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = mm256_setall_si256();
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi32(1);
+
+ summand = Avx.mm256_blendv_ps(ONE, mm256_setall_si256(), a);
+ }
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi32(1);
+ v256 NEGATIVE_ZERO = Avx.mm256_set1_epi32(unchecked((int)(1 << 31)));
+ v256 SUMMAND_IF_ZERO = Avx.mm256_set1_epi32(unchecked((int)0x8000_0002));
+
+ v256 isZero = Avx2.mm256_or_si256(Avx2.mm256_cmpeq_epi32(a, Avx.mm256_setzero_si256()), Avx2.mm256_cmpeq_epi32(a, NEGATIVE_ZERO));
+ summand = mm256_ternarylogic_si256(isZero, Avx2.mm256_srai_epi32(a, 31), ONE, TernaryOperation.OxAE);
+ a = mm256_ternarylogic_si256(a, SUMMAND_IF_ZERO, isZero, TernaryOperation.OxF8);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v256 nanInf = Avx2.mm256_cmpeq_epi32(SIGNALING_EXPONENT, Avx2.mm256_and_si256(a, SIGNALING_EXPONENT));
+ summand = Avx2.mm256_andnot_si256(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Avx2.mm256_add_epi32(a, summand);
+ }
+ else
+ {
+ return Avx2.mm256_sub_epi32(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 dec_pd(v128 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PD(a, 0d);
+ promisePositive |= constexpr.ALL_GT_PD(a, 0d);
+ promiseNegative |= constexpr.ALL_LT_PD(a, 0d);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PD(a) && constexpr.ALL_NEQ_PD(a, double.PositiveInfinity) && constexpr.ALL_NEQ_PD(a, double.NegativeInfinity);
+
+ v128 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi64x(1);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_pd(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(srai_epi64(a, 63), ONE);
+ }
+ }
+ }
+ else
+ {
+ v128 ONE = Sse2.set1_epi64x(1);
+ v128 NEGATIVE_ZERO = Sse2.set1_epi64x(unchecked((long)(1ul << 64)));
+ v128 SUMMAND_IF_ZERO = Sse2.set1_epi64x(unchecked((long)0x8000_0000_0000_0002));
+
+ v128 isZero = Sse2.or_si128(cmpeq_epi64(a, Sse2.setzero_si128()), cmpeq_epi64(a, NEGATIVE_ZERO));
+ summand = ternarylogic_si128(isZero, srai_epi64(a, 63), ONE, TernaryOperation.OxAE);
+ a = ternarylogic_si128(a, SUMMAND_IF_ZERO, isZero, TernaryOperation.OxF8);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v128 nanInf;
+ if (Sse4_1.IsSse41Supported)
+ {
+ nanInf = Sse4_1.cmpeq_epi64(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT));
+ }
+ else
+ {
+ nanInf = Sse2.shuffle_epi32(Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(a, SIGNALING_EXPONENT)), Sse.SHUFFLE(3, 3, 1, 1));
+ }
+
+ summand = Sse2.andnot_si128(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Sse2.add_epi64(a, summand);
+ }
+ else
+ {
+ return Sse2.sub_epi64(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_dec_pd(v256 a, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PD(a, 0d, elements);
+ promisePositive |= constexpr.ALL_GT_PD(a, 0d, elements);
+ promiseNegative |= constexpr.ALL_LT_PD(a, 0d, elements);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PD(a, elements) && constexpr.ALL_NEQ_PD(a, double.PositiveInfinity, elements) && constexpr.ALL_NEQ_PD(a, double.NegativeInfinity, elements);
+
+ v256 summand;
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = mm256_setall_si256();
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi64x(1);
+
+ summand = Avx.mm256_blendv_pd(ONE, mm256_setall_si256(), a);
+ }
+ }
+ else
+ {
+ v256 ONE = Avx.mm256_set1_epi64x(1);
+ v256 NEGATIVE_ZERO = Avx.mm256_set1_epi64x(unchecked((long)(1ul << 64)));
+ v256 SUMMAND_IF_ZERO = Avx.mm256_set1_epi64x(unchecked((long)0x8000_0000_0000_0002));
+
+ v256 isZero = Avx2.mm256_or_si256(Avx2.mm256_cmpeq_epi64(a, Avx.mm256_setzero_si256()), Avx2.mm256_cmpeq_epi64(a, NEGATIVE_ZERO));
+ summand = mm256_ternarylogic_si256(isZero, mm256_srai_epi64(a, 63), ONE, TernaryOperation.OxAE);
+ a = mm256_ternarylogic_si256(a, SUMMAND_IF_ZERO, isZero, TernaryOperation.OxF8);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v256 nanInf = Avx2.mm256_cmpeq_epi64(SIGNALING_EXPONENT, Avx2.mm256_and_si256(a, SIGNALING_EXPONENT));
+ summand = Avx2.mm256_andnot_si256(nanInf, summand);
+ }
+
+ if (promisePositive)
+ {
+ return Avx2.mm256_add_epi64(a, summand);
+ }
+ else
+ {
+ return Avx2.mm256_sub_epi64(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+ }
+ }
+
+
+ unsafe public static partial class maxmath
+ {
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static UInt128 nextsmaller(UInt128 x)
+ {
+ return x - tobyte(x != UInt128.MinValue);
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Int128 nextsmaller(Int128 x)
+ {
+ return x - tobyte(x != Int128.MinValue);
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte nextsmaller(byte x)
+ {
+ return (byte)(x - tobyte(x != byte.MinValue));
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte2 nextsmaller(byte2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu8(x);
+ }
+ else
+ {
+ return new byte2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte3 nextsmaller(byte3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu8(x);
+ }
+ else
+ {
+ return new byte3(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte4 nextsmaller(byte4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu8(x);
+ }
+ else
+ {
+ return new byte4(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z),
+ nextsmaller(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte8 nextsmaller(byte8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu8(x);
+ }
+ else
+ {
+ return new byte8(nextsmaller(x.x0),
+ nextsmaller(x.x1),
+ nextsmaller(x.x2),
+ nextsmaller(x.x3),
+ nextsmaller(x.x4),
+ nextsmaller(x.x5),
+ nextsmaller(x.x6),
+ nextsmaller(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte16 nextsmaller(byte16 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu8(x);
+ }
+ else
+ {
+ return new byte16(nextsmaller(x.x0),
+ nextsmaller(x.x1),
+ nextsmaller(x.x2),
+ nextsmaller(x.x3),
+ nextsmaller(x.x4),
+ nextsmaller(x.x5),
+ nextsmaller(x.x6),
+ nextsmaller(x.x7),
+ nextsmaller(x.x8),
+ nextsmaller(x.x9),
+ nextsmaller(x.x10),
+ nextsmaller(x.x11),
+ nextsmaller(x.x12),
+ nextsmaller(x.x13),
+ nextsmaller(x.x14),
+ nextsmaller(x.x15));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte32 nextsmaller(byte32 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epu8(x);
+ }
+ else
+ {
+ return new byte32(nextsmaller(x.v16_0),
+ nextsmaller(x.v16_16));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte nextsmaller(sbyte x)
+ {
+ return (sbyte)(x - tobyte(x != sbyte.MinValue));
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte2 nextsmaller(sbyte2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi8(x);
+ }
+ else
+ {
+ return new sbyte2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte3 nextsmaller(sbyte3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi8(x);
+ }
+ else
+ {
+ return new sbyte3(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte4 nextsmaller(sbyte4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi8(x);
+ }
+ else
+ {
+ return new sbyte4(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z),
+ nextsmaller(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte8 nextsmaller(sbyte8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi8(x);
+ }
+ else
+ {
+ return new sbyte8(nextsmaller(x.x0),
+ nextsmaller(x.x1),
+ nextsmaller(x.x2),
+ nextsmaller(x.x3),
+ nextsmaller(x.x4),
+ nextsmaller(x.x5),
+ nextsmaller(x.x6),
+ nextsmaller(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte16 nextsmaller(sbyte16 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi8(x);
+ }
+ else
+ {
+ return new sbyte16(nextsmaller(x.x0),
+ nextsmaller(x.x1),
+ nextsmaller(x.x2),
+ nextsmaller(x.x3),
+ nextsmaller(x.x4),
+ nextsmaller(x.x5),
+ nextsmaller(x.x6),
+ nextsmaller(x.x7),
+ nextsmaller(x.x8),
+ nextsmaller(x.x9),
+ nextsmaller(x.x10),
+ nextsmaller(x.x11),
+ nextsmaller(x.x12),
+ nextsmaller(x.x13),
+ nextsmaller(x.x14),
+ nextsmaller(x.x15));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte32 nextsmaller(sbyte32 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epi8(x);
+ }
+ else
+ {
+ return new sbyte32(nextsmaller(x.v16_0),
+ nextsmaller(x.v16_16));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort nextsmaller(ushort x)
+ {
+ return (ushort)(x - tobyte(x != ushort.MinValue));
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort2 nextsmaller(ushort2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu16(x);
+ }
+ else
+ {
+ return new ushort2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort3 nextsmaller(ushort3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu16(x);
+ }
+ else
+ {
+ return new ushort3(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort4 nextsmaller(ushort4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu16(x);
+ }
+ else
+ {
+ return new ushort4(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z),
+ nextsmaller(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort8 nextsmaller(ushort8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu16(x);
+ }
+ else
+ {
+ return new ushort8(nextsmaller(x.x0),
+ nextsmaller(x.x1),
+ nextsmaller(x.x2),
+ nextsmaller(x.x3),
+ nextsmaller(x.x4),
+ nextsmaller(x.x5),
+ nextsmaller(x.x6),
+ nextsmaller(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort16 nextsmaller(ushort16 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epu16(x);
+ }
+ else
+ {
+ return new ushort16(nextsmaller(x.v8_0),
+ nextsmaller(x.v8_8));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short nextsmaller(short x)
+ {
+ return (short)(x - tobyte(x != short.MinValue));
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short2 nextsmaller(short2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi16(x);
+ }
+ else
+ {
+ return new short2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short3 nextsmaller(short3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi16(x);
+ }
+ else
+ {
+ return new short3(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short4 nextsmaller(short4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi16(x);
+ }
+ else
+ {
+ return new short4(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z),
+ nextsmaller(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short8 nextsmaller(short8 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi16(x);
+ }
+ else
+ {
+ return new short8(nextsmaller(x.x0),
+ nextsmaller(x.x1),
+ nextsmaller(x.x2),
+ nextsmaller(x.x3),
+ nextsmaller(x.x4),
+ nextsmaller(x.x5),
+ nextsmaller(x.x6),
+ nextsmaller(x.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short16 nextsmaller(short16 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epi16(x);
+ }
+ else
+ {
+ return new short16(nextsmaller(x.v8_0),
+ nextsmaller(x.v8_8));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint nextsmaller(uint x)
+ {
+ return x - tobyte(x != uint.MinValue);
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint2 nextsmaller(uint2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt2(Xse.decs_epu32(RegisterConversion.ToV128(x), 2));
+ }
+ else
+ {
+ return new uint2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint3 nextsmaller(uint3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt3(Xse.decs_epu32(RegisterConversion.ToV128(x), 3));
+ }
+ else
+ {
+ return new uint3(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint4 nextsmaller(uint4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt4(Xse.decs_epu32(RegisterConversion.ToV128(x), 4));
+ }
+ else
+ {
+ return new uint4(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z),
+ nextsmaller(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint8 nextsmaller(uint8 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epu32(x);
+ }
+ else
+ {
+ return new uint8(nextsmaller(x.v4_0),
+ nextsmaller(x.v4_4));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int nextsmaller(int x)
+ {
+ return x - tobyte(x != int.MinValue);
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int2 nextsmaller(int2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt2(Xse.decs_epi32(RegisterConversion.ToV128(x), 2));
+ }
+ else
+ {
+ return new int2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int3 nextsmaller(int3 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt3(Xse.decs_epi32(RegisterConversion.ToV128(x), 3));
+ }
+ else
+ {
+ return new int3(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int4 nextsmaller(int4 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt4(Xse.decs_epi32(RegisterConversion.ToV128(x), 4));
+ }
+ else
+ {
+ return new int4(nextsmaller(x.x),
+ nextsmaller(x.y),
+ nextsmaller(x.z),
+ nextsmaller(x.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int8 nextsmaller(int8 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epi32(x);
+ }
+ else
+ {
+ return new int8(nextsmaller(x.v4_0),
+ nextsmaller(x.v4_4));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong nextsmaller(ulong x)
+ {
+ return x - tobyte(x != ulong.MinValue);
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong2 nextsmaller(ulong2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epu64(x);
+ }
+ else
+ {
+ return new ulong2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong3 nextsmaller(ulong3 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epu64(x, 3);
+ }
+ else
+ {
+ return new ulong3(nextsmaller(x.xy),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong4 nextsmaller(ulong4 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epu64(x, 4);
+ }
+ else
+ {
+ return new ulong4(nextsmaller(x.xy),
+ nextsmaller(x.zw));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long nextsmaller(long x)
+ {
+ return x - tobyte(x != long.MinValue);
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long2 nextsmaller(long2 x)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.decs_epi64(x);
+ }
+ else
+ {
+ return new long2(nextsmaller(x.x),
+ nextsmaller(x.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long3 nextsmaller(long3 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epi64(x, 3);
+ }
+ else
+ {
+ return new long3(nextsmaller(x.xy),
+ nextsmaller(x.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long4 nextsmaller(long4 x)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_decs_epi64(x, 4);
+ }
+ else
+ {
+ return new long4(nextsmaller(x.xy),
+ nextsmaller(x.zw));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter nextsmaller(quarter x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return asquarter(Xse.dec_pq(Sse2.cvtsi32_si128(x.value),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).Byte0);
+ }
+ else
+ {
+ int __x = assbyte(x);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int isNotZero = -tobyte((__x != unchecked((int)0xFFFF_FF80)) & (__x != 0));
+ summand = 1 | ((__x >> 31) & isNotZero);
+ __x |= (int)andnot(0b1000_0010, (uint)isNotZero);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int notNanInf = -tobyte((__x & quarter.SIGNALING_EXPONENT) != quarter.SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return asquarter((byte)(__x + summand));
+ }
+ else
+ {
+ return asquarter((byte)(__x - summand));
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter2 nextsmaller(quarter2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.dec_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2);
+ }
+ else
+ {
+ return new quarter2(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter3 nextsmaller(quarter3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.dec_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3);
+ }
+ else
+ {
+ return new quarter3(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations),
+ nextsmaller(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter4 nextsmaller(quarter4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.dec_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4);
+ }
+ else
+ {
+ return new quarter4(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations),
+ nextsmaller(x.z, optimizations),
+ nextsmaller(x.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter8 nextsmaller(quarter8 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.dec_pq(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 8);
+ }
+ else
+ {
+ return new quarter8(nextsmaller(x.x0, optimizations),
+ nextsmaller(x.x1, optimizations),
+ nextsmaller(x.x2, optimizations),
+ nextsmaller(x.x3, optimizations),
+ nextsmaller(x.x4, optimizations),
+ nextsmaller(x.x5, optimizations),
+ nextsmaller(x.x6, optimizations),
+ nextsmaller(x.x7, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half nextsmaller(half x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return ashalf(Xse.dec_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).UShort0);
+ }
+ else
+ {
+ int __x = asshort(x);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int isNotZero = -tobyte((__x != unchecked((int)0xFFFF_8000)) & (__x != 0));
+ summand = 1 | ((__x >> 31) & isNotZero);
+ __x |= (int)andnot(0x8002u, (uint)isNotZero);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int notNanInf = -tobyte((__x & F16_SIGNALING_EXPONENT) != F16_SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return ashalf((ushort)(__x + summand));
+ }
+ else
+ {
+ return ashalf((ushort)(__x - summand));
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half2 nextsmaller(half2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf2(Xse.dec_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2));
+ }
+ else
+ {
+ return new half2(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half3 nextsmaller(half3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf3(Xse.dec_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new half3(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations),
+ nextsmaller(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half4 nextsmaller(half4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf4(Xse.dec_ph(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new half4(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations),
+ nextsmaller(x.z, optimizations),
+ nextsmaller(x.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half8 nextsmaller(half8 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.dec_ph(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 8);
+ }
+ else
+ {
+ return new half8(nextsmaller(x.x0, optimizations),
+ nextsmaller(x.x1, optimizations),
+ nextsmaller(x.x2, optimizations),
+ nextsmaller(x.x3, optimizations),
+ nextsmaller(x.x4, optimizations),
+ nextsmaller(x.x5, optimizations),
+ nextsmaller(x.x6, optimizations),
+ nextsmaller(x.x7, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float nextsmaller(float x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.dec_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).Float0;
+ }
+ else
+ {
+ int __x = math.asint(x);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int isNotZero = -tobyte((__x != 1 << 31) & (__x != 0));
+ summand = 1 | ((__x >> 31) & isNotZero);
+ __x |= (int)andnot(0x8000_0002u, (uint)isNotZero);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int notNanInf = -tobyte((__x & F32_SIGNALING_EXPONENT) != F32_SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return math.asfloat(__x + summand);
+ }
+ else
+ {
+ return math.asfloat(__x - summand);
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float2 nextsmaller(float2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat2(Xse.dec_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2));
+ }
+ else
+ {
+ return new float2(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float3 nextsmaller(float3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat3(Xse.dec_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new float3(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations),
+ nextsmaller(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float4 nextsmaller(float4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat4(Xse.dec_ps(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new float4(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations),
+ nextsmaller(x.z, optimizations),
+ nextsmaller(x.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float8 nextsmaller(float8 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_dec_ps(x,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0));
+ }
+ else
+ {
+ return new float8(nextsmaller(x.v4_0, optimizations),
+ nextsmaller(x.v4_4, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest smaller than .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double nextsmaller(double x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.dec_pd(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0)).Double0;
+ }
+ else
+ {
+ long __x = math.aslong(x);
+ long summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 63);
+ }
+ }
+ else
+ {
+ long isNotZero = -tobyte((__x != 1L << 63) & (__x != 0));
+ summand = 1 | ((__x >> 63) & isNotZero);
+ __x |= (long)andnot(0x8000_0000_0000_0002, (ulong)isNotZero);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ long notNanInf = -(long)tobyte((__x & F64_SIGNALING_EXPONENT) != F64_SIGNALING_EXPONENT);
+ summand &= notNanInf;
+ }
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return math.asdouble(__x + summand);
+ }
+ else
+ {
+ return math.asdouble(__x - summand);
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double2 nextsmaller(double2 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToDouble2(Xse.dec_pd(RegisterConversion.ToV128(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0)));
+ }
+ else
+ {
+ return new double2(nextsmaller(x.x, optimizations),
+ nextsmaller(x.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double3 nextsmaller(double3 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return RegisterConversion.ToDouble3(Xse.mm256_dec_pd(RegisterConversion.ToV256(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new double3(nextsmaller(x.xy, optimizations),
+ nextsmaller(x.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest smaller than the corresponding component in .
+ ///
+ /// A "" with its flag set returns incorrect results for any that is negative 0.
+ /// A "" with its flag set returns incorrect results for any that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any that is either , or .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double4 nextsmaller(double4 x, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return RegisterConversion.ToDouble4(Xse.mm256_dec_pd(RegisterConversion.ToV256(x),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new double4(nextsmaller(x.xy, optimizations),
+ nextsmaller(x.zw, optimizations));
+ }
+ }
+ }
+}
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Next Smaller.cs.meta b/Runtime/Math Lib/Functions/Arithmetic/Next Smaller.cs.meta
new file mode 100644
index 0000000..d635d02
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Next Smaller.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 9155ad909a8150f47a4c844196b18aed
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Next Toward.cs b/Runtime/Math Lib/Functions/Arithmetic/Next Toward.cs
new file mode 100644
index 0000000..05d8c64
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Next Toward.cs
@@ -0,0 +1,2244 @@
+using System.Runtime.CompilerServices;
+using Unity.Burst.Intrinsics;
+using Unity.Mathematics;
+using MaxMath.Intrinsics;
+
+using static Unity.Burst.Intrinsics.X86;
+using static MaxMath.LUT.FLOATING_POINT;
+
+namespace MaxMath
+{
+ namespace Intrinsics
+ {
+ unsafe public static partial class Xse
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epu8(v128 a, v128 b, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU8(b, byte.MinValue, elements))
+ {
+ return decs_epu8(a);
+ }
+ if (constexpr.ALL_EQ_EPU8(b, byte.MaxValue, elements))
+ {
+ return incs_epu8(a);
+ }
+
+ return Sse2.sub_epi8(a, cmp_epu8(a, b, elements));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epu8(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU8(b, byte.MinValue))
+ {
+ return mm256_decs_epu8(a);
+ }
+ if (constexpr.ALL_EQ_EPU8(b, byte.MaxValue))
+ {
+ return mm256_incs_epu8(a);
+ }
+
+ return Avx2.mm256_sub_epi8(a, mm256_cmp_epu8(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epu16(v128 a, v128 b, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU16(b, ushort.MinValue, elements))
+ {
+ return decs_epu16(a);
+ }
+ if (constexpr.ALL_EQ_EPU16(b, ushort.MaxValue, elements))
+ {
+ return incs_epu16(a);
+ }
+
+ return Sse2.sub_epi16(a, cmp_epu16(a, b, elements));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epu16(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU16(b, ushort.MinValue))
+ {
+ return mm256_decs_epu16(a);
+ }
+ if (constexpr.ALL_EQ_EPU16(b, ushort.MaxValue))
+ {
+ return mm256_incs_epu16(a);
+ }
+
+ return Avx2.mm256_sub_epi16(a, mm256_cmp_epu16(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epu32(v128 a, v128 b, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU32(b, uint.MinValue, elements))
+ {
+ return decs_epu32(a, elements);
+ }
+ if (constexpr.ALL_EQ_EPU32(b, uint.MaxValue, elements))
+ {
+ return incs_epu32(a, elements);
+ }
+
+ return Sse2.sub_epi32(a, cmp_epu32(a, b, elements));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epu32(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU32(b, uint.MinValue))
+ {
+ return mm256_decs_epu32(a);
+ }
+ if (constexpr.ALL_EQ_EPU32(b, uint.MaxValue))
+ {
+ return mm256_incs_epu32(a);
+ }
+
+ return Avx2.mm256_sub_epi32(a, mm256_cmp_epu32(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epu64(v128 a, v128 b)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU64(b, ulong.MinValue))
+ {
+ return decs_epu64(a);
+ }
+ if (constexpr.ALL_EQ_EPU64(b, ulong.MaxValue))
+ {
+ return incs_epu64(a);
+ }
+
+ return Sse2.sub_epi64(a, cmp_epu64(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epu64(v256 a, v256 b, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPU64(b, ulong.MinValue, elements))
+ {
+ return mm256_decs_epu64(a, elements);
+ }
+ if (constexpr.ALL_EQ_EPU64(b, ulong.MaxValue, elements))
+ {
+ return mm256_incs_epu64(a, elements);
+ }
+
+ return Avx2.mm256_sub_epi64(a, mm256_cmp_epu64(a, b, elements));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epi8(v128 a, v128 b, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI8(b, sbyte.MinValue, elements))
+ {
+ return decs_epi8(a);
+ }
+ if (constexpr.ALL_EQ_EPI8(b, sbyte.MaxValue, elements))
+ {
+ return incs_epi8(a);
+ }
+
+ return Sse2.sub_epi8(a, cmp_epi8(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epi8(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI8(b, sbyte.MinValue))
+ {
+ return mm256_decs_epi8(a);
+ }
+ if (constexpr.ALL_EQ_EPI8(b, sbyte.MaxValue))
+ {
+ return mm256_incs_epi8(a);
+ }
+
+ return Avx2.mm256_sub_epi8(a, mm256_cmp_epi8(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epi16(v128 a, v128 b, byte elements = 8)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI16(b, short.MinValue, elements))
+ {
+ return decs_epi16(a);
+ }
+ if (constexpr.ALL_EQ_EPI16(b, short.MaxValue, elements))
+ {
+ return incs_epi16(a);
+ }
+
+ return Sse2.sub_epi16(a, cmp_epi16(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epi16(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI16(b, short.MinValue))
+ {
+ return mm256_decs_epi16(a);
+ }
+ if (constexpr.ALL_EQ_EPI16(b, short.MaxValue))
+ {
+ return mm256_incs_epi16(a);
+ }
+
+ return Avx2.mm256_sub_epi16(a, mm256_cmp_epi16(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epi32(v128 a, v128 b, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI32(b, int.MinValue, elements))
+ {
+ return decs_epi32(a, elements);
+ }
+ if (constexpr.ALL_EQ_EPI32(b, int.MaxValue, elements))
+ {
+ return incs_epi32(a, elements);
+ }
+
+ return Sse2.sub_epi32(a, cmp_epi32(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epi32(v256 a, v256 b)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI32(b, int.MinValue))
+ {
+ return mm256_decs_epi32(a);
+ }
+ if (constexpr.ALL_EQ_EPI32(b, int.MaxValue))
+ {
+ return mm256_incs_epi32(a);
+ }
+
+ return Avx2.mm256_sub_epi32(a, mm256_cmp_epi32(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_epi64(v128 a, v128 b)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI64(b, long.MinValue))
+ {
+ return decs_epi64(a);
+ }
+ if (constexpr.ALL_EQ_EPI64(b, long.MaxValue))
+ {
+ return incs_epi64(a);
+ }
+
+ return Sse2.sub_epi64(a, cmp_epi64(a, b));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_epi64(v256 a, v256 b, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI64(b, long.MinValue, elements))
+ {
+ return mm256_decs_epi64(a, elements);
+ }
+ if (constexpr.ALL_EQ_EPI64(b, long.MaxValue, elements))
+ {
+ return mm256_incs_epi64(a, elements);
+ }
+
+ return Avx2.mm256_sub_epi64(a, mm256_cmp_epi64(a, b, elements));
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_pq(v128 a, v128 b, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 16)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ //promiseNonZero |= constexpr.ALL_NEQ_PQ(a, 0f, elements);
+ //promisePositive |= constexpr.ALL_GT_PQ(a, 0f, elements);
+ //promiseNegative |= constexpr.ALL_LT_PQ(a, 0f, elements);
+ //promiseNotNanInf |= constexpr.ALL_NOTNAN_PQ(a, elements) && constexpr.ALL_NOTNAN_PQ(b, elements) && constexpr.ALL_NEQ_PQ(a, float.PositiveInfinity, elements) && constexpr.ALL_NEQ_PQ(a, float.NegativeInfinity, elements);
+
+ v128 ONE = Sse2.set1_epi8(1);
+
+ v128 isGreater = quarter.Vectorized.cmpgt_pq(a, b, promiseNeitherNaN: promiseNotNanInf, promiseNeitherZero: promiseNonZero && constexpr.ALL_NEQ_EPU8(b, 0, elements) && constexpr.ALL_NEQ_EPU8(b, 0b1000_0000, elements), elements: elements);
+ v128 areDifferent = quarter.Vectorized.cmpneq_pq(a, b, promiseNeitherNaN: promiseNotNanInf, promiseNeitherZero: promiseNonZero && constexpr.ALL_NEQ_EPU8(b, 0, elements) && constexpr.ALL_NEQ_EPU8(b, 0b1000_0000, elements), elements: elements);
+ v128 summand;
+
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_epi8(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(srai_epi8(a, 7), ONE);
+ }
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi8(quarter.SIGNALING_EXPONENT);
+
+ v128 xInfinite = Sse2.cmpeq_epi8(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ v128 eitherNaN = quarter.Vectorized.cmpunord_pq(a, b, elements);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = Sse2.or_si128(a, eitherNaN);
+ }
+ }
+ else
+ {
+ v128 NEGATIVE_ZERO = Sse2.set1_epi8(unchecked((sbyte)(1u << 7)));
+ v128 IF_ZERO = Sse2.set1_epi8(unchecked((sbyte)0x82u));
+
+ v128 isNegativeZero = Sse2.cmpeq_epi8(NEGATIVE_ZERO, a);
+ v128 isPositiveZero = Sse2.cmpeq_epi8(Sse2.setzero_si128(), a);
+ v128 zeroMask = ternarylogic_si128(isNegativeZero, isPositiveZero, isGreater, TernaryOperation.Ox87);
+
+ summand = ternarylogic_si128(ONE, srai_epi8(a, 7), zeroMask, TernaryOperation.OxF8);
+ v128 aPart0 = ternarylogic_si128(a, isGreater, zeroMask, TernaryOperation.OxEO);
+ v128 aPart1 = ternarylogic_si128(zeroMask, IF_ZERO, isGreater, TernaryOperation.OxO8);
+
+ if (promiseNotNanInf)
+ {
+ a = Sse2.or_si128(aPart0, aPart1);
+ }
+ else
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi8(quarter.SIGNALING_EXPONENT);
+
+ v128 xInfinite = Sse2.cmpeq_epi8(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ v128 eitherNaN = quarter.Vectorized.cmpunord_pq(a, b, elements);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = ternarylogic_si128(eitherNaN, aPart0, aPart1, TernaryOperation.OxFE);
+ }
+ }
+
+ summand = ternarylogic_si128(isGreater, summand, areDifferent, TernaryOperation.Ox78);
+ summand = Sse2.sub_epi8(summand, isGreater);
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi8(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi8(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_ph(v128 a, v128 b, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ //promiseNonZero |= constexpr.ALL_NEQ_PH(a, 0f, elements);
+ //promisePositive |= constexpr.ALL_GT_PH(a, 0f, elements);
+ //promiseNegative |= constexpr.ALL_LT_PH(a, 0f, elements);
+ //promiseNotNanInf |= constexpr.ALL_NOTNAN_PH(a, elements) && constexpr.ALL_NOTNAN_PH(b, elements) && constexpr.ALL_NEQ_PH(a, float.PositiveInfinity, elements) && constexpr.ALL_NEQ_PH(a, float.NegativeInfinity, elements);
+
+ v128 ONE = Sse2.set1_epi16(1);
+
+ v128 isGreater = cmpgt_ph(a, b, promiseNeitherNaN: promiseNotNanInf, promiseNeitherZero: promiseNonZero && constexpr.ALL_NEQ_EPU16(b, 0, elements) && constexpr.ALL_NEQ_EPU16(b, 0x8000, elements), elements: elements);
+ v128 areDifferent = cmpneq_ph(a, b, promiseNeitherNaN: promiseNotNanInf, promiseNeitherZero: promiseNonZero && constexpr.ALL_NEQ_EPU16(b, 0, elements) && constexpr.ALL_NEQ_EPU16(b, 0x8000, elements), elements: elements);
+ v128 summand;
+
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ summand = Sse2.or_si128(Sse2.srai_epi16(a, 15), ONE);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi16(F16_SIGNALING_EXPONENT);
+
+ v128 xInfinite = Sse2.cmpeq_epi16(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ v128 eitherNaN = cmpunord_ph(a, b, elements);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = Sse2.or_si128(a, eitherNaN);
+ }
+ }
+ else
+ {
+ v128 NEGATIVE_ZERO = Sse2.set1_epi16(unchecked((short)(1u << 15)));
+ v128 IF_ZERO = Sse2.set1_epi16(unchecked((short)0x8002u));
+
+ v128 isNegativeZero = Sse2.cmpeq_epi16(NEGATIVE_ZERO, a);
+ v128 isPositiveZero = Sse2.cmpeq_epi16(Sse2.setzero_si128(), a);
+ v128 zeroMask = ternarylogic_si128(isNegativeZero, isPositiveZero, isGreater, TernaryOperation.Ox87);
+
+ summand = ternarylogic_si128(ONE, Sse2.srai_epi16(a, 15), zeroMask, TernaryOperation.OxF8);
+ v128 aPart0 = ternarylogic_si128(a, isGreater, zeroMask, TernaryOperation.OxEO);
+ v128 aPart1 = ternarylogic_si128(zeroMask, IF_ZERO, isGreater, TernaryOperation.OxO8);
+
+ if (promiseNotNanInf)
+ {
+ a = Sse2.or_si128(aPart0, aPart1);
+ }
+ else
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi16(F16_SIGNALING_EXPONENT);
+
+ v128 xInfinite = Sse2.cmpeq_epi16(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ v128 eitherNaN = cmpunord_ph(a, b, elements);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = ternarylogic_si128(eitherNaN, aPart0, aPart1, TernaryOperation.OxFE);
+ }
+ }
+
+ summand = ternarylogic_si128(isGreater, summand, areDifferent, TernaryOperation.Ox78);
+ summand = Sse2.sub_epi16(summand, isGreater);
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi16(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi16(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_ps(v128 a, v128 b, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 4)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PS(a, 0f, elements);
+ promisePositive |= constexpr.ALL_GT_PS(a, 0f, elements);
+ promiseNegative |= constexpr.ALL_LT_PS(a, 0f, elements);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PS(a, elements) && constexpr.ALL_NOTNAN_PS(b, elements) && constexpr.ALL_NEQ_PS(a, float.PositiveInfinity, elements) && constexpr.ALL_NEQ_PS(a, float.NegativeInfinity, elements);
+
+ v128 ONE = Sse2.set1_epi32(1);
+
+ v128 isGreater = Sse.cmpgt_ps(a, b);
+ v128 areDifferent = Sse.cmpneq_ps(a, b);
+ v128 summand;
+
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_ps(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(Sse2.srai_epi32(a, 31), ONE);
+ }
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v128 xInfinite = Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ v128 eitherNaN = Sse.cmpunord_ps(a, b);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = Sse2.or_si128(a, eitherNaN);
+ }
+ }
+ else
+ {
+ v128 NEGATIVE_ZERO = Sse2.set1_epi32(unchecked((int)(1u << 31)));
+ v128 IF_ZERO = Sse2.set1_epi32(unchecked((int)0x8000_0002u));
+
+ v128 isNegativeZero = Sse2.cmpeq_epi32(NEGATIVE_ZERO, a);
+ v128 isPositiveZero = Sse2.cmpeq_epi32(Sse2.setzero_si128(), a);
+ v128 zeroMask = ternarylogic_si128(isNegativeZero, isPositiveZero, isGreater, TernaryOperation.Ox87);
+
+ summand = ternarylogic_si128(ONE, Sse2.srai_epi32(a, 31), zeroMask, TernaryOperation.OxF8);
+ v128 aPart0 = ternarylogic_si128(a, isGreater, zeroMask, TernaryOperation.OxEO);
+ v128 aPart1 = ternarylogic_si128(zeroMask, IF_ZERO, isGreater, TernaryOperation.OxO8);
+
+ if (promiseNotNanInf)
+ {
+ a = Sse2.or_si128(aPart0, aPart1);
+ }
+ else
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v128 xInfinite = Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ v128 eitherNaN = Sse.cmpunord_ps(a, b);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = ternarylogic_si128(eitherNaN, aPart0, aPart1, TernaryOperation.OxFE);
+ }
+ }
+
+ summand = ternarylogic_si128(isGreater, summand, areDifferent, TernaryOperation.Ox78);
+ summand = Sse2.sub_epi32(summand, isGreater);
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi32(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi32(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_ps(v256 a, v256 b, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PS(a, 0f);
+ promisePositive |= constexpr.ALL_GT_PS(a, 0f);
+ promiseNegative |= constexpr.ALL_LT_PS(a, 0f);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PS(a) && constexpr.ALL_NOTNAN_PS(b) && constexpr.ALL_NEQ_PS(a, float.PositiveInfinity) && constexpr.ALL_NEQ_PS(a, float.NegativeInfinity);
+
+ v256 ONE = Avx.mm256_set1_epi32(1);
+
+ v256 isGreater = Avx.mm256_cmp_ps(a, b, (int)Avx.CMP.GT_OQ);
+ v256 areDifferent = Avx.mm256_cmp_ps(a, b, (int)Avx.CMP.NEQ_OQ);
+ v256 summand;
+
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = mm256_setall_si256();
+ }
+ else
+ {
+ summand = Avx.mm256_blendv_ps(ONE, mm256_setall_si256(), a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v256 xInfinite = Avx2.mm256_cmpeq_epi32(SIGNALING_EXPONENT, Avx2.mm256_and_si256(SIGNALING_EXPONENT, a));
+ v256 eitherNaN = Avx.mm256_cmp_ps(a, b, (int)Avx.CMP.UNORD_Q);
+
+ summand = mm256_ternarylogic_si256(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = Avx2.mm256_or_si256(a, eitherNaN);
+ }
+ }
+ else
+ {
+ v256 NEGATIVE_ZERO = Avx.mm256_set1_epi32(unchecked((int)(1u << 31)));
+ v256 IF_ZERO = Avx.mm256_set1_epi32(unchecked((int)0x8000_0002u));
+
+ v256 isNegativeZero = Avx2.mm256_cmpeq_epi32(NEGATIVE_ZERO, a);
+ v256 isPositiveZero = Avx2.mm256_cmpeq_epi32(Avx.mm256_setzero_si256(), a);
+ v256 zeroMask = mm256_ternarylogic_si256(isNegativeZero, isPositiveZero, isGreater, TernaryOperation.Ox87);
+
+ summand = mm256_ternarylogic_si256(ONE, Avx2.mm256_srai_epi32(a, 31), zeroMask, TernaryOperation.OxF8);
+ v256 aPart0 = mm256_ternarylogic_si256(a, isGreater, zeroMask, TernaryOperation.OxEO);
+ v256 aPart1 = mm256_ternarylogic_si256(zeroMask, IF_ZERO, isGreater, TernaryOperation.OxO8);
+
+ if (promiseNotNanInf)
+ {
+ a = Avx2.mm256_or_si256(aPart0, aPart1);
+ }
+ else
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi32(F32_SIGNALING_EXPONENT);
+
+ v256 xInfinite = Avx2.mm256_cmpeq_epi32(SIGNALING_EXPONENT, Avx2.mm256_and_si256(SIGNALING_EXPONENT, a));
+ v256 eitherNaN = Avx.mm256_cmp_ps(a, b, (int)Avx.CMP.UNORD_Q);
+
+ summand = mm256_ternarylogic_si256(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = mm256_ternarylogic_si256(eitherNaN, aPart0, aPart1, TernaryOperation.OxFE);
+ }
+ }
+
+ summand = mm256_ternarylogic_si256(isGreater, summand, areDifferent, TernaryOperation.Ox78);
+ summand = Avx2.mm256_sub_epi32(summand, isGreater);
+
+ if (promisePositive)
+ {
+ return Avx2.mm256_sub_epi32(a, summand);
+ }
+ else
+ {
+ return Avx2.mm256_add_epi32(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v128 toward_pd(v128 a, v128 b, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PD(a, 0f);
+ promisePositive |= constexpr.ALL_GT_PD(a, 0f);
+ promiseNegative |= constexpr.ALL_LT_PD(a, 0f);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PD(a) && constexpr.ALL_NOTNAN_PD(b) && constexpr.ALL_NEQ_PD(a, double.PositiveInfinity) && constexpr.ALL_NEQ_PD(a, double.NegativeInfinity);
+
+ v128 ONE = Sse2.set1_epi64x(1);
+
+ v128 isGreater = Sse2.cmpgt_pd(a, b);
+ v128 areDifferent = Sse2.cmpneq_pd(a, b);
+ v128 summand;
+
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = setall_si128();
+ }
+ else
+ {
+ if (Sse4_1.IsSse41Supported)
+ {
+ summand = Sse4_1.blendv_pd(ONE, setall_si128(), a);
+ }
+ else
+ {
+ summand = Sse2.or_si128(srai_epi64(a, 63), ONE);
+ }
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v128 xInfinite;
+ if (Sse4_1.IsSse41Supported)
+ {
+ xInfinite = Sse4_1.cmpeq_epi64(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ }
+ else
+ {
+ xInfinite = Sse2.shuffle_epi32(Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a)), Sse.SHUFFLE(3, 3, 1, 1));
+ }
+
+ v128 eitherNaN = Sse2.cmpunord_pd(a, b);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = Sse2.or_si128(a, eitherNaN);
+ }
+ }
+ else
+ {
+ v128 NEGATIVE_ZERO = Sse2.set1_epi64x(unchecked((long)(1ul << 63)));
+ v128 IF_ZERO = Sse2.set1_epi64x(unchecked((long)0x8000_0000_0000_0002ul));
+
+ v128 isNegativeZero = cmpeq_epi64(NEGATIVE_ZERO, a);
+ v128 isPositiveZero = cmpeq_epi64(Sse2.setzero_si128(), a);
+ v128 zeroMask = ternarylogic_si128(isNegativeZero, isPositiveZero, isGreater, TernaryOperation.Ox87);
+
+ summand = ternarylogic_si128(ONE, srai_epi64(a, 63), zeroMask, TernaryOperation.OxF8);
+ v128 aPart0 = ternarylogic_si128(a, isGreater, zeroMask, TernaryOperation.OxEO);
+ v128 aPart1 = ternarylogic_si128(zeroMask, IF_ZERO, isGreater, TernaryOperation.OxO8);
+
+ if (promiseNotNanInf)
+ {
+ a = Sse2.or_si128(aPart0, aPart1);
+ }
+ else
+ {
+ v128 SIGNALING_EXPONENT = Sse2.set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v128 xInfinite;
+ if (Sse4_1.IsSse41Supported)
+ {
+ xInfinite = Sse4_1.cmpeq_epi64(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a));
+ }
+ else
+ {
+ xInfinite = Sse2.shuffle_epi32(Sse2.cmpeq_epi32(SIGNALING_EXPONENT, Sse2.and_si128(SIGNALING_EXPONENT, a)), Sse.SHUFFLE(3, 3, 1, 1));
+ }
+
+ v128 eitherNaN = Sse2.cmpunord_pd(a, b);
+
+ summand = ternarylogic_si128(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = ternarylogic_si128(eitherNaN, aPart0, aPart1, TernaryOperation.OxFE);
+ }
+ }
+
+ summand = ternarylogic_si128(isGreater, summand, areDifferent, TernaryOperation.Ox78);
+ summand = Sse2.sub_epi64(summand, isGreater);
+
+ if (promisePositive)
+ {
+ return Sse2.sub_epi64(a, summand);
+ }
+ else
+ {
+ return Sse2.add_epi64(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static v256 mm256_toward_pd(v256 a, v256 b, bool promiseNonZero = false, bool promisePositive = false, bool promiseNegative = false, bool promiseNotNanInf = false, byte elements = 4)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ promiseNonZero |= constexpr.ALL_NEQ_PD(a, 0f, elements);
+ promisePositive |= constexpr.ALL_GT_PD(a, 0f, elements);
+ promiseNegative |= constexpr.ALL_LT_PD(a, 0f, elements);
+ promiseNotNanInf |= constexpr.ALL_NOTNAN_PD(a, elements) && constexpr.ALL_NOTNAN_PD(b, elements) && constexpr.ALL_NEQ_PD(a, double.PositiveInfinity, elements) && constexpr.ALL_NEQ_PD(a, double.NegativeInfinity, elements);
+
+ v256 ONE = Avx.mm256_set1_epi64x(1);
+
+ v256 isGreater = Avx.mm256_cmp_pd(a, b, (int)Avx.CMP.GT_OQ);
+ v256 areDifferent = Avx.mm256_cmp_pd(a, b, (int)Avx.CMP.NEQ_OQ);
+ v256 summand;
+
+ if (promiseNonZero)
+ {
+ if (promisePositive | promiseNegative)
+ {
+ summand = mm256_setall_si256();
+ }
+ else
+ {
+ summand = Avx.mm256_blendv_pd(ONE, mm256_setall_si256(), a);
+ }
+
+ if (!promiseNotNanInf)
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v256 xInfinite = Avx2.mm256_cmpeq_epi64(SIGNALING_EXPONENT, Avx2.mm256_and_si256(SIGNALING_EXPONENT, a));
+ v256 eitherNaN = Avx.mm256_cmp_pd(a, b, (int)Avx.CMP.UNORD_Q);
+
+ summand = mm256_ternarylogic_si256(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = Avx2.mm256_or_si256(a, eitherNaN);
+ }
+ }
+ else
+ {
+ v256 NEGATIVE_ZERO = Avx.mm256_set1_epi64x(unchecked((long)(1ul << 63)));
+ v256 IF_ZERO = Avx.mm256_set1_epi64x(unchecked((long)0x8000_0000_0000_0002ul));
+
+ v256 isNegativeZero = Avx2.mm256_cmpeq_epi64(NEGATIVE_ZERO, a);
+ v256 isPositiveZero = Avx2.mm256_cmpeq_epi64(Avx.mm256_setzero_si256(), a);
+ v256 zeroMask = mm256_ternarylogic_si256(isNegativeZero, isPositiveZero, isGreater, TernaryOperation.Ox87);
+
+ summand = mm256_ternarylogic_si256(ONE, mm256_srai_epi64(a, 63), zeroMask, TernaryOperation.OxF8);
+ v256 aPart0 = mm256_ternarylogic_si256(a, isGreater, zeroMask, TernaryOperation.OxEO);
+ v256 aPart1 = mm256_ternarylogic_si256(zeroMask, IF_ZERO, isGreater, TernaryOperation.OxO8);
+
+ if (promiseNotNanInf)
+ {
+ a = Avx2.mm256_or_si256(aPart0, aPart1);
+ }
+ else
+ {
+ v256 SIGNALING_EXPONENT = Avx.mm256_set1_epi64x(F64_SIGNALING_EXPONENT);
+
+ v256 xInfinite = Avx2.mm256_cmpeq_epi64(SIGNALING_EXPONENT, Avx2.mm256_and_si256(SIGNALING_EXPONENT, a));
+ v256 eitherNaN = Avx.mm256_cmp_pd(a, b, (int)Avx.CMP.UNORD_Q);
+
+ summand = mm256_ternarylogic_si256(eitherNaN, xInfinite, summand, TernaryOperation.OxO2);
+ a = mm256_ternarylogic_si256(eitherNaN, aPart0, aPart1, TernaryOperation.OxFE);
+ }
+ }
+
+ summand = mm256_ternarylogic_si256(isGreater, summand, areDifferent, TernaryOperation.Ox78);
+ summand = Avx2.mm256_sub_epi64(summand, isGreater);
+
+ if (promisePositive)
+ {
+ return Avx2.mm256_sub_epi64(a, summand);
+ }
+ else
+ {
+ return Avx2.mm256_add_epi64(a, summand);
+ }
+ }
+ else throw new IllegalInstructionException();
+ }
+ }
+ }
+
+
+ unsafe public static partial class maxmath
+ {
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static UInt128 nexttoward(UInt128 from, UInt128 to)
+ {
+ return from - (UInt128)compareto(from, to);
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Int128 nexttoward(Int128 from, Int128 to)
+ {
+ return from - compareto(from, to);
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte nexttoward(byte from, byte to)
+ {
+ return (byte)(from - compareto(from, to));
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte2 nexttoward(byte2 from, byte2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu8(from, to, 2);
+ }
+ else
+ {
+ return new byte2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte3 nexttoward(byte3 from, byte3 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu8(from, to, 3);
+ }
+ else
+ {
+ return new byte3(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte4 nexttoward(byte4 from, byte4 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu8(from, to, 4);
+ }
+ else
+ {
+ return new byte4(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z),
+ nexttoward(from.w, to.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte8 nexttoward(byte8 from, byte8 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu8(from, to, 8);
+ }
+ else
+ {
+ return new byte8(nexttoward(from.x0, to.x0),
+ nexttoward(from.x1, to.x1),
+ nexttoward(from.x2, to.x2),
+ nexttoward(from.x3, to.x3),
+ nexttoward(from.x4, to.x4),
+ nexttoward(from.x5, to.x5),
+ nexttoward(from.x6, to.x6),
+ nexttoward(from.x7, to.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte16 nexttoward(byte16 from, byte16 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu8(from, to, 16);
+ }
+ else
+ {
+ return new byte16(nexttoward(from.x0, to.x0),
+ nexttoward(from.x1, to.x1),
+ nexttoward(from.x2, to.x2),
+ nexttoward(from.x3, to.x3),
+ nexttoward(from.x4, to.x4),
+ nexttoward(from.x5, to.x5),
+ nexttoward(from.x6, to.x6),
+ nexttoward(from.x7, to.x7),
+ nexttoward(from.x8, to.x8),
+ nexttoward(from.x9, to.x9),
+ nexttoward(from.x10, to.x10),
+ nexttoward(from.x11, to.x11),
+ nexttoward(from.x12, to.x12),
+ nexttoward(from.x13, to.x13),
+ nexttoward(from.x14, to.x14),
+ nexttoward(from.x15, to.x15));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte32 nexttoward(byte32 from, byte32 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epu8(from, to);
+ }
+ else
+ {
+ return new byte32(nexttoward(from.v16_0, to.v16_0),
+ nexttoward(from.v16_16, to.v16_16));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte nexttoward(sbyte from, sbyte to)
+ {
+ return (sbyte)(from - compareto(from, to));
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte2 nexttoward(sbyte2 from, sbyte2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi8(from, to, 2);
+ }
+ else
+ {
+ return new sbyte2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte3 nexttoward(sbyte3 from, sbyte3 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi8(from, to, 3);
+ }
+ else
+ {
+ return new sbyte3(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte4 nexttoward(sbyte4 from, sbyte4 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi8(from, to, 4);
+ }
+ else
+ {
+ return new sbyte4(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z),
+ nexttoward(from.w, to.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte8 nexttoward(sbyte8 from, sbyte8 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi8(from, to, 8);
+ }
+ else
+ {
+ return new sbyte8(nexttoward(from.x0, to.x0),
+ nexttoward(from.x1, to.x1),
+ nexttoward(from.x2, to.x2),
+ nexttoward(from.x3, to.x3),
+ nexttoward(from.x4, to.x4),
+ nexttoward(from.x5, to.x5),
+ nexttoward(from.x6, to.x6),
+ nexttoward(from.x7, to.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte16 nexttoward(sbyte16 from, sbyte16 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi8(from, to, 16);
+ }
+ else
+ {
+ return new sbyte16(nexttoward(from.x0, to.x0),
+ nexttoward(from.x1, to.x1),
+ nexttoward(from.x2, to.x2),
+ nexttoward(from.x3, to.x3),
+ nexttoward(from.x4, to.x4),
+ nexttoward(from.x5, to.x5),
+ nexttoward(from.x6, to.x6),
+ nexttoward(from.x7, to.x7),
+ nexttoward(from.x8, to.x8),
+ nexttoward(from.x9, to.x9),
+ nexttoward(from.x10, to.x10),
+ nexttoward(from.x11, to.x11),
+ nexttoward(from.x12, to.x12),
+ nexttoward(from.x13, to.x13),
+ nexttoward(from.x14, to.x14),
+ nexttoward(from.x15, to.x15));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static sbyte32 nexttoward(sbyte32 from, sbyte32 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epi8(from, to);
+ }
+ else
+ {
+ return new sbyte32(nexttoward(from.v16_0, to.v16_0),
+ nexttoward(from.v16_16, to.v16_16));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort nexttoward(ushort from, ushort to)
+ {
+ return (ushort)(from - compareto(from, to));
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort2 nexttoward(ushort2 from, ushort2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu16(from, to, 2);
+ }
+ else
+ {
+ return new ushort2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort3 nexttoward(ushort3 from, ushort3 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu16(from, to, 3);
+ }
+ else
+ {
+ return new ushort3(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort4 nexttoward(ushort4 from, ushort4 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu16(from, to, 4);
+ }
+ else
+ {
+ return new ushort4(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z),
+ nexttoward(from.w, to.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort8 nexttoward(ushort8 from, ushort8 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu16(from, to, 8);
+ }
+ else
+ {
+ return new ushort8(nexttoward(from.x0, to.x0),
+ nexttoward(from.x1, to.x1),
+ nexttoward(from.x2, to.x2),
+ nexttoward(from.x3, to.x3),
+ nexttoward(from.x4, to.x4),
+ nexttoward(from.x5, to.x5),
+ nexttoward(from.x6, to.x6),
+ nexttoward(from.x7, to.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort16 nexttoward(ushort16 from, ushort16 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epu16(from, to);
+ }
+ else
+ {
+ return new ushort16(nexttoward(from.v8_0, to.v8_0),
+ nexttoward(from.v8_8, to.v8_8));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short nexttoward(short from, short to)
+ {
+ return (short)(from - compareto(from, to));
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short2 nexttoward(short2 from, short2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi16(from, to, 2);
+ }
+ else
+ {
+ return new short2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short3 nexttoward(short3 from, short3 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi16(from, to, 3);
+ }
+ else
+ {
+ return new short3(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short4 nexttoward(short4 from, short4 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi16(from, to, 4);
+ }
+ else
+ {
+ return new short4(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z),
+ nexttoward(from.w, to.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short8 nexttoward(short8 from, short8 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi16(from, to, 8);
+ }
+ else
+ {
+ return new short8(nexttoward(from.x0, to.x0),
+ nexttoward(from.x1, to.x1),
+ nexttoward(from.x2, to.x2),
+ nexttoward(from.x3, to.x3),
+ nexttoward(from.x4, to.x4),
+ nexttoward(from.x5, to.x5),
+ nexttoward(from.x6, to.x6),
+ nexttoward(from.x7, to.x7));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static short16 nexttoward(short16 from, short16 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epi16(from, to);
+ }
+ else
+ {
+ return new short16(nexttoward(from.v8_0, to.v8_0),
+ nexttoward(from.v8_8, to.v8_8));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint nexttoward(uint from, uint to)
+ {
+ return from - (uint)compareto(from, to);
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint2 nexttoward(uint2 from, uint2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt2(Xse.toward_epu32(RegisterConversion.ToV128(from), RegisterConversion.ToV128(to), 2));
+ }
+ else
+ {
+ return new uint2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint3 nexttoward(uint3 from, uint3 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt3(Xse.toward_epu32(RegisterConversion.ToV128(from), RegisterConversion.ToV128(to), 3));
+ }
+ else
+ {
+ return new uint3(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint4 nexttoward(uint4 from, uint4 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToUInt4(Xse.toward_epu32(RegisterConversion.ToV128(from), RegisterConversion.ToV128(to), 4));
+ }
+ else
+ {
+ return new uint4(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z),
+ nexttoward(from.w, to.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint8 nexttoward(uint8 from, uint8 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epu32(from, to);
+ }
+ else
+ {
+ return new uint8(nexttoward(from.v4_0, to.v4_0),
+ nexttoward(from.v4_4, to.v4_4));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int nexttoward(int from, int to)
+ {
+ return from - compareto(from, to);
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int2 nexttoward(int2 from, int2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt2(Xse.toward_epi32(RegisterConversion.ToV128(from), RegisterConversion.ToV128(to), 2));
+ }
+ else
+ {
+ return new int2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int3 nexttoward(int3 from, int3 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt3(Xse.toward_epi32(RegisterConversion.ToV128(from), RegisterConversion.ToV128(to), 3));
+ }
+ else
+ {
+ return new int3(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int4 nexttoward(int4 from, int4 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToInt4(Xse.toward_epi32(RegisterConversion.ToV128(from), RegisterConversion.ToV128(to), 4));
+ }
+ else
+ {
+ return new int4(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y),
+ nexttoward(from.z, to.z),
+ nexttoward(from.w, to.w));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int8 nexttoward(int8 from, int8 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epi32(from, to);
+ }
+ else
+ {
+ return new int8(nexttoward(from.v4_0, to.v4_0),
+ nexttoward(from.v4_4, to.v4_4));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong nexttoward(ulong from, ulong to)
+ {
+ return from - (ulong)compareto(from, to);
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong2 nexttoward(ulong2 from, ulong2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epu64(from, to);
+ }
+ else
+ {
+ return new ulong2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong3 nexttoward(ulong3 from, ulong3 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epu64(from, to, 3);
+ }
+ else
+ {
+ return new ulong3(nexttoward(from.xy, to.xy),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong4 nexttoward(ulong4 from, ulong4 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epu64(from, to, 4);
+ }
+ else
+ {
+ return new ulong4(nexttoward(from.xy, to.xy),
+ nexttoward(from.zw, to.zw));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long nexttoward(long from, long to)
+ {
+ return from - compareto(from, to);
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long2 nexttoward(long2 from, long2 to)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_epi64(from, to);
+ }
+ else
+ {
+ return new long2(nexttoward(from.x, to.x),
+ nexttoward(from.y, to.y));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long3 nexttoward(long3 from, long3 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epi64(from, to, 3);
+ }
+ else
+ {
+ return new long3(nexttoward(from.xy, to.xy),
+ nexttoward(from.z, to.z));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long4 nexttoward(long4 from, long4 to)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_epi64(from, to, 4);
+ }
+ else
+ {
+ return new long4(nexttoward(from.xy, to.xy),
+ nexttoward(from.zw, to.zw));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter nexttoward(quarter from, quarter to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return asquarter(Xse.toward_pq(Sse2.cvtsi32_si128(from.value),
+ Sse2.cvtsi32_si128(to.value),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).Byte0);
+ }
+ else
+ {
+ int isGreater = -tobyte(from > to);
+ int __x = assbyte(from);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int zeroMask = -tobyte(__x != unchecked((int)0xFFFF_FF80)) ^ (-tobyte(__x == 0) & isGreater);
+ summand = 1 | ((__x >> 31) & zeroMask);
+ __x = (__x & (isGreater | zeroMask)) | andnot(0x82 & isGreater, zeroMask);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int xNotInf = -tobyte((__x & quarter.SIGNALING_EXPONENT) != quarter.SIGNALING_EXPONENT);
+ int eitherNaN = -tobyte(((__x & 0x7F) > quarter.SIGNALING_EXPONENT) | ((assbyte(to) & 0x7F) > quarter.SIGNALING_EXPONENT));
+ summand = andnot(summand & xNotInf, eitherNaN);
+ __x |= eitherNaN;
+ }
+
+ summand = (summand ^ isGreater) - isGreater;
+ summand &= -tobyte(from != to);
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return asquarter((byte)(__x - summand));
+ }
+ else
+ {
+ return asquarter((byte)(__x + summand));
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter2 nexttoward(quarter2 from, quarter2 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_pq(from,
+ to,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2);
+ }
+ else
+ {
+ return new quarter2(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter3 nexttoward(quarter3 from, quarter3 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_pq(from,
+ to,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3);
+ }
+ else
+ {
+ return new quarter3(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations),
+ nexttoward(from.z, to.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter4 nexttoward(quarter4 from, quarter4 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_pq(from,
+ to,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4);
+ }
+ else
+ {
+ return new quarter4(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations),
+ nexttoward(from.z, to.z, optimizations),
+ nexttoward(from.w, to.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static quarter8 nexttoward(quarter8 from, quarter8 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_pq(from,
+ to,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0));
+ }
+ else
+ {
+ return new quarter8(nexttoward(from.x0, to.x0, optimizations),
+ nexttoward(from.x1, to.x1, optimizations),
+ nexttoward(from.x2, to.x2, optimizations),
+ nexttoward(from.x3, to.x3, optimizations),
+ nexttoward(from.x4, to.x4, optimizations),
+ nexttoward(from.x5, to.x5, optimizations),
+ nexttoward(from.x6, to.x6, optimizations),
+ nexttoward(from.x7, to.x7, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN aswell as any '' that is half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half nexttoward(half from, half to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return ashalf(Xse.toward_ph(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).UShort0);
+ }
+ else
+ {
+ int isGreater = -tobyte(from.IsGreaterThan(to));
+ int __x = asshort(from);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int zeroMask = -tobyte(__x != unchecked((int)0xFFFF_8000)) ^ (-tobyte(__x == 0) & isGreater);
+ summand = 1 | ((__x >> 31) & zeroMask);
+ __x = (__x & (isGreater | zeroMask)) | andnot(unchecked((int)0xFFFF_8002) & isGreater, zeroMask);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int xNotInf = -tobyte((__x & F16_SIGNALING_EXPONENT) != F16_SIGNALING_EXPONENT);
+ int eitherNaN = -tobyte(((__x & 0x7FFF) > F16_SIGNALING_EXPONENT) | ((asushort(to) & 0x7FFF) > F16_SIGNALING_EXPONENT));
+ summand = andnot(summand & xNotInf, eitherNaN);
+ __x |= eitherNaN;
+ }
+
+ summand = (summand ^ isGreater) - isGreater;
+ summand &= -tobyte(from.IsNotEqualTo(to));
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return ashalf((ushort)(__x - summand));
+ }
+ else
+ {
+ return ashalf((ushort)(__x + summand));
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN aswell as any '' that is half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half2 nexttoward(half2 from, half2 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf2(Xse.toward_ph(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2));
+ }
+ else
+ {
+ return new half2(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN aswell as any '' that is half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half3 nexttoward(half3 from, half3 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf3(Xse.toward_ph(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new half3(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations),
+ nexttoward(from.z, to.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN aswell as any '' that is half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half4 nexttoward(half4 from, half4 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToHalf4(Xse.toward_ph(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new half4(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations),
+ nexttoward(from.z, to.z, optimizations),
+ nexttoward(from.w, to.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either half.PositiveInfinity, half.NegativeInfinity or half.NaN aswell as any '' that is half.NaN.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static half8 nexttoward(half8 from, half8 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_ph(from,
+ to,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0));
+ }
+ else
+ {
+ return new half8(nexttoward(from.x0, to.x0, optimizations),
+ nexttoward(from.x1, to.x1, optimizations),
+ nexttoward(from.x2, to.x2, optimizations),
+ nexttoward(from.x3, to.x3, optimizations),
+ nexttoward(from.x4, to.x4, optimizations),
+ nexttoward(from.x5, to.x5, optimizations),
+ nexttoward(from.x6, to.x6, optimizations),
+ nexttoward(from.x7, to.x7, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float nexttoward(float from, float to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_ps(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 1).Float0;
+ }
+ else
+ {
+ int isGreater = -tobyte(from > to);
+ int __x = math.asint(from);
+ int summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 31);
+ }
+ }
+ else
+ {
+ int zeroMask = -tobyte(__x != 1 << 31) ^ (-tobyte(__x == 0) & isGreater);
+ summand = 1 | ((__x >> 31) & zeroMask);
+ __x = (__x & (isGreater | zeroMask)) | (int)andnot(0x8000_0002u & isGreater, (uint)zeroMask);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ int xNotInf = -tobyte((__x & F32_SIGNALING_EXPONENT) != F32_SIGNALING_EXPONENT);
+ int eitherNaN = -tobyte(((__x & 0x7FFF_FFFF) > F32_SIGNALING_EXPONENT) | ((math.asint(to) & 0x7FFF_FFFF) > F32_SIGNALING_EXPONENT));
+ summand = andnot(summand & xNotInf, eitherNaN);
+ __x |= eitherNaN;
+ }
+
+ summand = (summand ^ isGreater) - isGreater;
+ summand &= -tobyte(from != to);
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return math.asfloat(__x - summand);
+ }
+ else
+ {
+ return math.asfloat(__x + summand);
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float2 nexttoward(float2 from, float2 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat2(Xse.toward_ps(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 2));
+ }
+ else
+ {
+ return new float2(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float3 nexttoward(float3 from, float3 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat3(Xse.toward_ps(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new float3(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations),
+ nexttoward(from.z, to.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float4 nexttoward(float4 from, float4 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToFloat4(Xse.toward_ps(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new float4(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations),
+ nexttoward(from.z, to.z, optimizations),
+ nexttoward(from.w, to.w, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float8 nexttoward(float8 from, float8 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return Xse.mm256_toward_ps(from,
+ to,
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0));
+ }
+ else
+ {
+ return new float8(nexttoward(from.v4_0, to.v4_0, optimizations),
+ nexttoward(from.v4_4, to.v4_4, optimizations));
+ }
+ }
+
+
+ /// Returns the next closest to '' in the direction of ''.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double nexttoward(double from, double to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return Xse.toward_pd(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0)).Double0;
+ }
+ else
+ {
+ long isGreater = -(long)tobyte(from > to);
+ long __x = math.aslong(from);
+ long summand;
+
+ if (optimizations.Promises(Promise.NonZero))
+ {
+ if (optimizations.Promises(Promise.Positive) | optimizations.Promises(Promise.Negative))
+ {
+ summand = 1;
+ }
+ else
+ {
+ summand = 1 | (__x >> 63);
+ }
+ }
+ else
+ {
+ long zeroMask = -(long)tobyte(__x != 1L << 63) ^ (-(long)tobyte(__x == 0) & isGreater);
+ summand = 1 | ((__x >> 63) & zeroMask);
+ __x = (__x & (isGreater | zeroMask)) | (long)andnot(0x8000_0000_0000_0002ul & (ulong)isGreater, (ulong)zeroMask);
+ }
+
+ if (!optimizations.Promises(Promise.Unsafe0))
+ {
+ long xNotInf = -(long)tobyte((__x & F64_SIGNALING_EXPONENT) != F64_SIGNALING_EXPONENT);
+ long eitherNaN = -(long)tobyte(((__x & 0x7FFF_FFFF_FFFF_FFFF) > F64_SIGNALING_EXPONENT) | ((math.aslong(to) & 0x7FFF_FFFF_FFFF_FFFF) > F64_SIGNALING_EXPONENT));
+ summand = andnot(summand & xNotInf, eitherNaN);
+ __x |= eitherNaN;
+ }
+
+ summand = (summand ^ isGreater) - isGreater;
+ summand &= -(long)tobyte(from != to);
+
+ if (optimizations.Promises(Promise.Negative))
+ {
+ return math.asdouble(__x - summand);
+ }
+ else
+ {
+ return math.asdouble(__x + summand);
+ }
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double2 nexttoward(double2 from, double2 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Sse2.IsSse2Supported)
+ {
+ return RegisterConversion.ToDouble2(Xse.toward_pd(RegisterConversion.ToV128(from),
+ RegisterConversion.ToV128(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0)));
+ }
+ else
+ {
+ return new double2(nexttoward(from.x, to.x, optimizations),
+ nexttoward(from.y, to.y, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double3 nexttoward(double3 from, double3 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return RegisterConversion.ToDouble3(Xse.mm256_toward_pd(RegisterConversion.ToV256(from),
+ RegisterConversion.ToV256(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 3));
+ }
+ else
+ {
+ return new double3(nexttoward(from.xy, to.xy, optimizations),
+ nexttoward(from.z, to.z, optimizations));
+ }
+ }
+
+ /// Returns a , where each component is the next closest to the corresponding '' component in the direction of the corresponding '' component.
+ ///
+ /// A "" with its flag set returns incorrect results for any '' that is negative 0.
+ /// A "" with its flag set returns incorrect results for any '' that is negative or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is positive or 0.
+ /// A "" with its flag set returns incorrect results for any '' that is either , or aswell as any '' that is .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double4 nexttoward(double4 from, double4 to, Promise optimizations = Promise.Nothing)
+ {
+ if (Avx2.IsAvx2Supported)
+ {
+ return RegisterConversion.ToDouble4(Xse.mm256_toward_pd(RegisterConversion.ToV256(from),
+ RegisterConversion.ToV256(to),
+ promiseNonZero: optimizations.Promises(Promise.NonZero),
+ promiseNegative: optimizations.Promises(Promise.Negative),
+ promisePositive: optimizations.Promises(Promise.Positive),
+ promiseNotNanInf: optimizations.Promises(Promise.Unsafe0),
+ elements: 4));
+ }
+ else
+ {
+ return new double4(nexttoward(from.xy, to.xy, optimizations),
+ nexttoward(from.zw, to.zw, optimizations));
+ }
+ }
+ }
+}
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Next Toward.cs.meta b/Runtime/Math Lib/Functions/Arithmetic/Next Toward.cs.meta
new file mode 100644
index 0000000..ad40c7f
--- /dev/null
+++ b/Runtime/Math Lib/Functions/Arithmetic/Next Toward.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: f896ced21b6058846a156f0e0f88957d
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Add.cs b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Add.cs
index f7f8d7d..febc3f6 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Add.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Add.cs
@@ -16,6 +16,14 @@ public static v128 adds_epu32(v128 a, v128 b, byte elements = 4)
{
if (Sse2.IsSse2Supported)
{
+ if (constexpr.ALL_EQ_EPU32(b, 1, elements))
+ {
+ v128 ALL_ONES = setall_si128();
+ v128 isMaxValue = Sse2.cmpeq_epi32(a, ALL_ONES);
+
+ return Sse2.sub_epi32(a, Sse2.andnot_si128(isMaxValue, ALL_ONES));
+ }
+
v128 sum = Sse2.add_epi32(a, b);
v128 overflowMask = cmpgt_epu32(b, sum, elements);
@@ -29,6 +37,14 @@ public static v256 mm256_adds_epu32(v256 a, v256 b)
{
if (Avx2.IsAvx2Supported)
{
+ if (constexpr.ALL_EQ_EPU32(b, 1))
+ {
+ v256 ALL_ONES = mm256_setall_si256();
+ v256 isMaxValue = Avx2.mm256_cmpeq_epi32(a, ALL_ONES);
+
+ return Avx2.mm256_sub_epi32(a, Avx2.mm256_andnot_si256(isMaxValue, ALL_ONES));
+ }
+
v256 sum = Avx2.mm256_add_epi32(a, b);
v256 overflowMask = mm256_cmpgt_epu32(b, sum);
@@ -42,6 +58,14 @@ public static v128 adds_epu64(v128 a, v128 b)
{
if (Sse2.IsSse2Supported)
{
+ if (constexpr.ALL_EQ_EPU64(b, 1))
+ {
+ v128 ALL_ONES = setall_si128();
+ v128 isMaxValue = cmpeq_epi64(a, ALL_ONES);
+
+ return Sse2.sub_epi64(a, Sse2.andnot_si128(isMaxValue, ALL_ONES));
+ }
+
v128 sum = Sse2.add_epi64(a, b);
v128 overflowMask = cmpgt_epu64(b, sum);
@@ -51,10 +75,18 @@ public static v128 adds_epu64(v128 a, v128 b)
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v256 mm256_adds_epu64(v256 a, v256 b)
+ public static v256 mm256_adds_epu64(v256 a, v256 b, byte elements = 4)
{
if (Avx2.IsAvx2Supported)
{
+ if (constexpr.ALL_EQ_EPU64(b, 1, elements))
+ {
+ v256 ALL_ONES = mm256_setall_si256();
+ v256 isMaxValue = Avx2.mm256_cmpeq_epi64(a, ALL_ONES);
+
+ return Avx2.mm256_sub_epi64(a, Avx2.mm256_andnot_si256(isMaxValue, ALL_ONES));
+ }
+
v256 sum = Avx2.mm256_add_epi64(a, b);
v256 overflowMask = mm256_cmpgt_epu64(a, sum);
@@ -64,19 +96,35 @@ public static v256 mm256_adds_epu64(v256 a, v256 b)
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v128 adds_epi32(v128 a, v128 b)
+ public static v128 adds_epi32(v128 a, v128 b, byte elements = 4)
{
if (Sse2.IsSse2Supported)
{
v128 MAX_VALUE = new v128(int.MaxValue);
+
+
+ if (constexpr.ALL_EQ_EPI32(b, -1, elements))
+ {
+ return Sse2.add_epi32(a, Sse2.cmpgt_epi32(a, Sse2.set1_epi32(int.MinValue)));
+ }
+ if (constexpr.ALL_EQ_EPI32(b, 1, elements))
+ {
+ return Sse2.sub_epi32(a, Sse2.cmpgt_epi32(MAX_VALUE, a));
+ }
+
- v128 x_negative_mask = Sse2.srai_epi32(a, 31);
- v128 ret = Sse2.sub_epi32(MAX_VALUE, x_negative_mask);
- v128 cmp = Sse2.sub_epi32(ret, a);
-
- x_negative_mask = Sse2.cmpeq_epi32(x_negative_mask, Sse2.cmpgt_epi32(b, cmp));
-
- return blendv_si128(ret, Sse2.add_epi32(a, b), x_negative_mask);
+ v128 result = Sse2.add_epi32(a, b);
+ v128 overflow = Sse2.add_epi32(MAX_VALUE, Sse2.srli_epi32(a, 31));
+ v128 selectResult = ternarylogic_si128(overflow, b, result, TernaryOperation.OxBD);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ return Sse4_1.blendv_ps(overflow, result, selectResult);
+ }
+ else
+ {
+ return blendv_si128(overflow, result, Sse2.srai_epi32(selectResult, 31));
+ }
}
else throw new IllegalInstructionException();
}
@@ -87,14 +135,23 @@ public static v256 mm256_adds_epi32(v256 a, v256 b)
if (Avx2.IsAvx2Supported)
{
v256 MAX_VALUE = new v256(int.MaxValue);
+
+
+ if (constexpr.ALL_EQ_EPI32(b, -1))
+ {
+ return Avx2.mm256_add_epi32(a, Avx2.mm256_cmpgt_epi32(a, Avx.mm256_set1_epi32(int.MinValue)));
+ }
+ if (constexpr.ALL_EQ_EPI32(b, 1))
+ {
+ return Avx2.mm256_sub_epi32(a, Avx2.mm256_cmpgt_epi32(MAX_VALUE, a));
+ }
+
- v256 x_negative_mask = Avx2.mm256_srai_epi32(a, 31);
- v256 ret = Avx2.mm256_sub_epi32(MAX_VALUE, x_negative_mask);
- v256 cmp = Avx2.mm256_sub_epi32(ret, a);
-
- x_negative_mask = Avx2.mm256_cmpeq_epi32(x_negative_mask, Avx2.mm256_cmpgt_epi32(b, cmp));
-
- return mm256_blendv_si256(ret, Avx2.mm256_add_epi32(a, b), x_negative_mask);
+ v256 result = Avx2.mm256_add_epi32(a, b);
+ v256 overflow = Avx2.mm256_add_epi32(MAX_VALUE, Avx2.mm256_srli_epi32(a, 31));
+ v256 selectResult = mm256_ternarylogic_si256(overflow, b, result, TernaryOperation.OxBD);
+
+ return Avx.mm256_blendv_ps(overflow, result, selectResult);
}
else throw new IllegalInstructionException();
}
@@ -105,14 +162,30 @@ public static v128 adds_epi64(v128 a, v128 b)
if (Sse2.IsSse2Supported)
{
v128 MAX_VALUE = new v128(long.MaxValue);
+
+
+ if (constexpr.ALL_EQ_EPI64(b, -1))
+ {
+ return Sse2.add_epi64(a, cmpgt_epi64(a, Sse2.set1_epi64x(long.MinValue)));
+ }
+ if (constexpr.ALL_EQ_EPI64(b, 1))
+ {
+ return Sse2.sub_epi64(a, cmpgt_epi64(MAX_VALUE, a));
+ }
+
- v128 x_negative_mask = srai_epi64(a, 63);
- v128 ret = Sse2.sub_epi64(MAX_VALUE, x_negative_mask);
- v128 cmp = Sse2.sub_epi64(ret, a);
-
- x_negative_mask = Sse2.cmpeq_epi32(x_negative_mask, cmpgt_epi64(b, cmp));
-
- return blendv_si128(ret, Sse2.add_epi64(a, b), x_negative_mask);
+ v128 result = Sse2.add_epi64(a, b);
+ v128 overflow = Sse2.add_epi64(MAX_VALUE, Sse2.srli_epi64(a, 63));
+ v128 selectResult = ternarylogic_si128(overflow, b, result, TernaryOperation.OxBD);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ return Sse4_1.blendv_pd(overflow, result, selectResult);
+ }
+ else
+ {
+ return blendv_si128(overflow, result, srai_epi64(selectResult, 63));
+ }
}
else throw new IllegalInstructionException();
}
@@ -124,13 +197,22 @@ public static v256 mm256_adds_epi64(v256 a, v256 b, byte elements = 4)
{
v256 MAX_VALUE = new v256(long.MaxValue);
- v256 x_negative_mask = mm256_srai_epi64(a, 63);
- v256 ret = Avx2.mm256_sub_epi64(MAX_VALUE, x_negative_mask);
- v256 cmp = Avx2.mm256_sub_epi64(ret, a);
+
+ if (constexpr.ALL_EQ_EPI64(b, -1, elements))
+ {
+ return Avx2.mm256_add_epi64(a, Avx2.mm256_cmpgt_epi64(a, Avx.mm256_set1_epi64x(long.MinValue)));
+ }
+ if (constexpr.ALL_EQ_EPI64(b, 1, elements))
+ {
+ return Avx2.mm256_sub_epi64(a, Avx2.mm256_cmpgt_epi64(MAX_VALUE, a));
+ }
- x_negative_mask = Avx2.mm256_cmpeq_epi64(x_negative_mask, Xse.mm256_cmpgt_epi64(b, cmp, elements));
-
- return mm256_blendv_si256(ret, Avx2.mm256_add_epi64(a, b), x_negative_mask);
+
+ v256 result = Avx2.mm256_add_epi64(a, b);
+ v256 overflow = Avx2.mm256_add_epi64(MAX_VALUE, Avx2.mm256_srli_epi64(a, 63));
+ v256 selectResult = mm256_ternarylogic_si256(overflow, b, result, TernaryOperation.OxBD);
+
+ return Avx.mm256_blendv_pd(overflow, result, selectResult);
}
else throw new IllegalInstructionException();
}
@@ -148,6 +230,14 @@ public static UInt128 addsaturated(UInt128 x, UInt128 y)
{
return x + y;
}
+ if (Xse.constexpr.IS_TRUE(x == 1))
+ {
+ return nextgreater(y);
+ }
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextgreater(x);
+ }
UInt128 res = x + y;
bool overflow = res < x;
@@ -159,15 +249,15 @@ public static UInt128 addsaturated(UInt128 x, UInt128 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Int128 addsaturated(Int128 x, Int128 y)
{
- Int128 ret = Int128.MaxValue + (x.hi64 >> 63);
- Int128 cmp = ret - x;
-
- if ((x < 0) == (y > cmp))
- {
- ret = x + y;
- }
-
- return ret;
+ Int128 result = x + y;
+ Int128 overflow = (x.hi64 >> 63) + Int128.MaxValue;
+
+ if ((long)((overflow ^ y) | ~(y ^ result)).hi64 >= 0)
+ {
+ result = overflow;
+ }
+
+ return result;
}
@@ -179,6 +269,14 @@ public static byte addsaturated(byte x, byte y)
{
return (byte)(x + y);
}
+ if (Xse.constexpr.IS_TRUE(x == 1))
+ {
+ return nextgreater(y);
+ }
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextgreater(x);
+ }
byte temp = (byte)(x + y);
@@ -307,6 +405,14 @@ public static ushort addsaturated(ushort x, ushort y)
{
return (ushort)(x + y);
}
+ if (Xse.constexpr.IS_TRUE(x == 1))
+ {
+ return nextgreater(y);
+ }
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextgreater(x);
+ }
ushort temp = (ushort)(x + y);
@@ -406,6 +512,14 @@ public static uint addsaturated(uint x, uint y)
{
return x + y;
}
+ if (Xse.constexpr.IS_TRUE(x == 1))
+ {
+ return nextgreater(y);
+ }
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextgreater(x);
+ }
uint temp = x + y;
@@ -418,7 +532,7 @@ public static uint2 addsaturated(uint2 x, uint2 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.adds_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
+ return RegisterConversion.ToUInt2(Xse.adds_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
@@ -433,7 +547,7 @@ public static uint3 addsaturated(uint3 x, uint3 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.adds_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
+ return RegisterConversion.ToUInt3(Xse.adds_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
@@ -449,7 +563,7 @@ public static uint4 addsaturated(uint4 x, uint4 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.adds_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
+ return RegisterConversion.ToUInt4(Xse.adds_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
@@ -484,6 +598,14 @@ public static ulong addsaturated(ulong x, ulong y)
{
return x + y;
}
+ if (Xse.constexpr.IS_TRUE(x == 1))
+ {
+ return nextgreater(y);
+ }
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextgreater(x);
+ }
ulong temp = x + y;
@@ -511,7 +633,7 @@ public static ulong3 addsaturated(ulong3 x, ulong3 y)
{
if (Avx2.IsAvx2Supported)
{
- return Xse.mm256_adds_epu64(x, y);
+ return Xse.mm256_adds_epu64(x, y, 3);
}
else
{
@@ -526,7 +648,7 @@ public static ulong4 addsaturated(ulong4 x, ulong4 y)
{
if (Avx2.IsAvx2Supported)
{
- return Xse.mm256_adds_epu64(x, y);
+ return Xse.mm256_adds_epu64(x, y, 4);
}
else
{
@@ -786,7 +908,7 @@ public static int2 addsaturated(int2 x, int2 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.adds_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToInt2(Xse.adds_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
@@ -801,7 +923,7 @@ public static int3 addsaturated(int3 x, int3 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.adds_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToInt3(Xse.adds_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
@@ -817,7 +939,7 @@ public static int4 addsaturated(int4 x, int4 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.adds_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToInt4(Xse.adds_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
@@ -848,15 +970,15 @@ public static int8 addsaturated(int8 x, int8 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long addsaturated(long x, long y)
{
- long ret = long.MaxValue - (x >> 63);
- long cmp = ret - x;
-
- if ((x < 0) == (y > cmp))
- {
- ret = x + y;
- }
-
- return ret;
+ long result = x + y;
+ long overflow = (long)((ulong)x >> 63) + long.MaxValue;
+
+ if ((long)((overflow ^ y) | ~(y ^ result)) >= 0)
+ {
+ result = overflow;
+ }
+
+ return result;
}
/// Adds each component of and and returns the results, which are clamped to if overflow occurs or if underflow occurs.
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Cast.cs b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Cast.cs
index a311d71..d91fcd9 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Cast.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Cast.cs
@@ -3059,7 +3059,7 @@ public static uint2 touintsaturated(long2 x)
{
v128 clamped = clamp(x, uint.MinValue, uint.MaxValue);
- return RegisterConversion.ToType(Xse.cvtepi64_epi32(clamped));
+ return RegisterConversion.ToUInt2(Xse.cvtepi64_epi32(clamped));
}
else
{
@@ -3075,7 +3075,7 @@ public static uint3 touintsaturated(long3 x)
{
v256 clamped = clamp(x, uint.MinValue, uint.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToUInt3(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3091,7 +3091,7 @@ public static uint4 touintsaturated(long4 x)
{
v256 clamped = clamp(x, uint.MinValue, uint.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToUInt4(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3127,7 +3127,7 @@ public static uint2 touintsaturated(ulong2 x)
{
v128 clamped = min(x, uint.MaxValue);
- return RegisterConversion.ToType(Xse.cvtepi64_epi32(clamped));
+ return RegisterConversion.ToUInt2(Xse.cvtepi64_epi32(clamped));
}
else
{
@@ -3143,7 +3143,7 @@ public static uint3 touintsaturated(ulong3 x)
{
v256 clamped = min(x, uint.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToUInt3(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3159,7 +3159,7 @@ public static uint4 touintsaturated(ulong4 x)
{
v256 clamped = min(x, uint.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToUInt4(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3319,7 +3319,7 @@ public static uint8 touintsaturated(float8 x)
v128 lo = Avx.mm256_castps256_ps128(result);
v128 hi = Avx.mm256_extractf128_ps(result, 1);
- return new uint8(RegisterConversion.ToType(lo), RegisterConversion.ToType(hi));
+ return new uint8(RegisterConversion.ToUInt4(lo), RegisterConversion.ToUInt4(hi));
}
else
{
@@ -3458,7 +3458,7 @@ public static int2 tointsaturated(long2 x)
{
v128 clamped = clamp(x, int.MinValue, int.MaxValue);
- return RegisterConversion.ToType(Xse.cvtepi64_epi32(clamped));
+ return RegisterConversion.ToInt2(Xse.cvtepi64_epi32(clamped));
}
else
{
@@ -3474,7 +3474,7 @@ public static int3 tointsaturated(long3 x)
{
v256 clamped = clamp(x, int.MinValue, int.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToInt3(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3490,7 +3490,7 @@ public static int4 tointsaturated(long4 x)
{
v256 clamped = clamp(x, int.MinValue, int.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToInt4(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3526,7 +3526,7 @@ public static int2 tointsaturated(ulong2 x)
{
v128 clamped = min(x, int.MaxValue);
- return RegisterConversion.ToType(Xse.cvtepi64_epi32(clamped));
+ return RegisterConversion.ToInt2(Xse.cvtepi64_epi32(clamped));
}
else
{
@@ -3542,7 +3542,7 @@ public static int3 tointsaturated(ulong3 x)
{
v256 clamped = min(x, int.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToInt3(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3558,7 +3558,7 @@ public static int4 tointsaturated(ulong4 x)
{
v256 clamped = min(x, int.MaxValue);
- return RegisterConversion.ToType(Xse.mm256_cvtepi64_epi32(clamped));
+ return RegisterConversion.ToInt4(Xse.mm256_cvtepi64_epi32(clamped));
}
else
{
@@ -3718,7 +3718,7 @@ public static int8 tointsaturated(float8 x)
v128 lo = Avx.mm256_castps256_ps128(result);
v128 hi = Avx.mm256_extractf128_ps(result, 1);
- return new int8(RegisterConversion.ToType(lo), RegisterConversion.ToType(hi));
+ return new int8(RegisterConversion.ToInt4(lo), RegisterConversion.ToInt4(hi));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Divide.cs b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Divide.cs
index 5534db7..7a30c70 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Divide.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Divide.cs
@@ -303,7 +303,7 @@ public static int2 divsaturated(int2 x, int2 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.divs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
+ return RegisterConversion.ToInt2(Xse.divs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
@@ -318,7 +318,7 @@ public static int3 divsaturated(int3 x, int3 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.divs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
+ return RegisterConversion.ToInt3(Xse.divs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
@@ -334,7 +334,7 @@ public static int4 divsaturated(int4 x, int4 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.divs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
+ return RegisterConversion.ToInt4(Xse.divs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Multiply.cs b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Multiply.cs
index 2e2ebcf..6a58114 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Multiply.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Multiply.cs
@@ -1476,7 +1476,7 @@ public static uint2 mulsaturated(uint2 x, uint2 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.muls_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToUInt2(Xse.muls_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
}
else
{
@@ -1491,7 +1491,7 @@ public static uint3 mulsaturated(uint3 x, uint3 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.muls_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToUInt3(Xse.muls_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
}
else
{
@@ -1507,7 +1507,7 @@ public static uint4 mulsaturated(uint4 x, uint4 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.muls_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToUInt4(Xse.muls_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
}
else
{
@@ -1850,7 +1850,7 @@ public static int2 mulsaturated(int2 x, int2 y)
{
if (Sse4_1.IsSse41Supported)
{
- return RegisterConversion.ToType(Xse.muls_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
+ return RegisterConversion.ToInt2(Xse.muls_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
@@ -1865,7 +1865,7 @@ public static int3 mulsaturated(int3 x, int3 y)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.muls_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
+ return RegisterConversion.ToInt3(Xse.muls_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
@@ -1880,7 +1880,7 @@ public static int4 mulsaturated(int4 x, int4 y)
{
if (Avx2.IsAvx2Supported)
{
- return RegisterConversion.ToType(Xse.muls_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
+ return RegisterConversion.ToInt4(Xse.muls_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Subtract.cs b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Subtract.cs
index ab2b33f..4f991cc 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Subtract.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Saturation/Saturation Subtract.cs
@@ -16,6 +16,12 @@ public static v128 subs_epu32(v128 a, v128 b, byte elements = 4)
{
if (Sse2.IsSse2Supported)
{
+ if (constexpr.ALL_EQ_EPU32(b, 1, elements))
+ {
+ return Sse2.add_epi32(a, not_si128(Sse2.cmpeq_epi32(a, Sse2.setzero_si128())));
+ }
+
+
return Sse2.andnot_si128(cmpge_epu32(b, a, elements), Sse2.sub_epi32(a, b));
}
else throw new IllegalInstructionException();
@@ -26,6 +32,12 @@ public static v256 mm256_subs_epu32(v256 a, v256 b)
{
if (Avx2.IsAvx2Supported)
{
+ if (constexpr.ALL_EQ_EPU32(b, 1))
+ {
+ return Avx2.mm256_add_epi32(a, mm256_not_si256(Avx2.mm256_cmpeq_epi32(a, Avx.mm256_setzero_si256())));
+ }
+
+
return Avx2.mm256_andnot_si256(mm256_cmpge_epu32(b, a), Avx2.mm256_sub_epi32(a, b));
}
else throw new IllegalInstructionException();
@@ -36,35 +48,63 @@ public static v128 subs_epu64(v128 a, v128 b)
{
if (Sse2.IsSse2Supported)
{
+ if (constexpr.ALL_EQ_EPU64(b, 1))
+ {
+ return Sse2.add_epi64(a, not_si128(cmpeq_epi64(a, Sse2.setzero_si128())));
+ }
+
+
return Sse2.andnot_si128(cmpgt_epu64(b, a), Sse2.sub_epi64(a, b));
}
else throw new IllegalInstructionException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v256 mm256_subs_epu64(v256 a, v256 b)
+ public static v256 mm256_subs_epu64(v256 a, v256 b, byte elements = 4)
{
if (Avx2.IsAvx2Supported)
{
+ if (constexpr.ALL_EQ_EPU64(b, 1, elements))
+ {
+ return Avx2.mm256_add_epi64(a, mm256_not_si256(Avx2.mm256_cmpeq_epi64(a, Avx.mm256_setzero_si256())));
+ }
+
+
return Avx2.mm256_andnot_si256(mm256_cmpgt_epu64(b, a), Avx2.mm256_sub_epi64(a, b));
}
else throw new IllegalInstructionException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static v128 subs_epi32(v128 a, v128 b)
+ public static v128 subs_epi32(v128 a, v128 b, byte elements = 4)
{
if (Sse2.IsSse2Supported)
{
- v128 MAX_VALUE = new v128(int.MaxValue);
-
- v128 x_negative_mask = Sse2.srai_epi32(a, 31);
- v128 ret = Sse2.sub_epi32(MAX_VALUE, x_negative_mask);
- v128 cmp = Sse2.add_epi32(ret, a);
-
- x_negative_mask = Sse2.cmpeq_epi32(Sse2.cmpgt_epi32(cmp, b), x_negative_mask);
+ v128 MAX_VALUE = Sse2.set1_epi32(int.MaxValue);
+
+
+ if (constexpr.ALL_EQ_EPI32(b, -1, elements))
+ {
+ return Sse2.sub_epi32(a, Sse2.cmpgt_epi32(MAX_VALUE, a));
+ }
+ if (constexpr.ALL_EQ_EPI32(b, 1, elements))
+ {
+ return Sse2.add_epi32(a, Sse2.cmpgt_epi32(a, Sse2.set1_epi32(int.MinValue)));
+ }
+
- return Xse.blendv_si128(ret, Sse2.sub_epi32(a, b), x_negative_mask);
+ v128 result = Sse2.sub_epi32(a, b);
+ v128 overflow = Sse2.add_epi32(MAX_VALUE, Sse2.srli_epi32(a, 31));
+ v128 selectOverflow = ternarylogic_si128(overflow, b, result, TernaryOperation.Ox18);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ return Sse4_1.blendv_ps(result, overflow, selectOverflow);
+ }
+ else
+ {
+ return blendv_si128(result, overflow, Sse2.srai_epi32(selectOverflow, 63));
+ }
}
else throw new IllegalInstructionException();
}
@@ -74,15 +114,24 @@ public static v256 mm256_subs_epi32(v256 a, v256 b)
{
if (Avx2.IsAvx2Supported)
{
- v256 MAX_VALUE = new v256(int.MaxValue);
-
- v256 x_negative_mask = Avx2.mm256_srai_epi32(a, 31);
- v256 ret = Avx2.mm256_sub_epi32(MAX_VALUE, x_negative_mask);
- v256 cmp = Avx2.mm256_add_epi32(ret, a);
-
- x_negative_mask = Avx2.mm256_cmpeq_epi32(Avx2.mm256_cmpgt_epi32(cmp, b), x_negative_mask);
+ v256 MAX_VALUE = Avx.mm256_set1_epi32(int.MaxValue);
+
+
+ if (constexpr.ALL_EQ_EPI32(b, -1))
+ {
+ return Avx2.mm256_sub_epi32(a, Avx2.mm256_cmpgt_epi32(MAX_VALUE, a));
+ }
+ if (constexpr.ALL_EQ_EPI32(b, 1))
+ {
+ return Avx2.mm256_add_epi32(a, Avx2.mm256_cmpgt_epi32(a, Avx.mm256_set1_epi32(int.MinValue)));
+ }
+
- return Xse.mm256_blendv_si256(ret, Avx2.mm256_sub_epi32(a, b), x_negative_mask);
+ v256 result = Avx2.mm256_sub_epi32(a, b);
+ v256 overflow = Avx2.mm256_add_epi32(MAX_VALUE, Avx2.mm256_srli_epi32(a, 31));
+ v256 selectOverflow = mm256_ternarylogic_si256(overflow, b, result, TernaryOperation.Ox18);
+
+ return Avx.mm256_blendv_ps(result, overflow, selectOverflow);
}
else throw new IllegalInstructionException();
}
@@ -92,15 +141,31 @@ public static v128 subs_epi64(v128 a, v128 b)
{
if (Sse2.IsSse2Supported)
{
- v128 MAX_VALUE = new v128(long.MaxValue);
+ v128 MAX_VALUE = Sse2.set1_epi64x(long.MaxValue);
+
+
+ if (constexpr.ALL_EQ_EPI64(b, -1))
+ {
+ return Sse2.sub_epi64(a, cmpgt_epi64(MAX_VALUE, a));
+ }
+ if (constexpr.ALL_EQ_EPI64(b, 1))
+ {
+ return Sse2.add_epi64(a, cmpgt_epi64(a, Sse2.set1_epi64x(long.MinValue)));
+ }
+
- v128 x_negative_mask = Xse.cmpgt_epi64(Sse2.setzero_si128(), a);
- v128 ret = Sse2.sub_epi64(MAX_VALUE, x_negative_mask);
- v128 cmp = Sse2.add_epi64(ret, a);
-
- x_negative_mask = Sse2.cmpeq_epi32(Xse.cmpgt_epi64(cmp, b), x_negative_mask);
-
- return Xse.blendv_si128(ret, Sse2.sub_epi64(a, b), x_negative_mask);
+ v128 result = Sse2.sub_epi64(a, b);
+ v128 overflow = Sse2.add_epi64(MAX_VALUE, Sse2.srli_epi64(a, 63));
+ v128 selectOverflow = ternarylogic_si128(overflow, b, result, TernaryOperation.Ox18);
+
+ if (Sse4_1.IsSse41Supported)
+ {
+ return Sse4_1.blendv_pd(result, overflow, selectOverflow);
+ }
+ else
+ {
+ return blendv_si128(result, overflow, srai_epi64(selectOverflow, 63));
+ }
}
else throw new IllegalInstructionException();
}
@@ -110,15 +175,24 @@ public static v256 mm256_subs_epi64(v256 a, v256 b, byte elements = 4)
{
if (Avx2.IsAvx2Supported)
{
- v256 MAX_VALUE = new v256(long.MaxValue);
+ v256 MAX_VALUE = Avx.mm256_set1_epi64x(long.MaxValue);
+
+
+ if (constexpr.ALL_EQ_EPI64(b, -1, elements))
+ {
+ return Avx2.mm256_sub_epi64(a, Avx2.mm256_cmpgt_epi64(MAX_VALUE, a));
+ }
+ if (constexpr.ALL_EQ_EPI64(b, 1, elements))
+ {
+ return Avx2.mm256_add_epi64(a, Avx2.mm256_cmpgt_epi64(a, Avx.mm256_set1_epi64x(long.MinValue)));
+ }
- v256 x_negative_mask = Xse.mm256_cmpgt_epi64(Avx.mm256_setzero_si256(), a, elements);
- v256 ret = Avx2.mm256_sub_epi8(MAX_VALUE, x_negative_mask);
- v256 cmp = Avx2.mm256_add_epi64(ret, a);
-
- x_negative_mask = Avx2.mm256_cmpeq_epi64(Xse.mm256_cmpgt_epi64(cmp, b, elements), x_negative_mask);
-
- return Xse.mm256_blendv_si256(ret, Avx2.mm256_sub_epi64(a, b), x_negative_mask);
+
+ v256 result = Avx2.mm256_sub_epi64(a, b);
+ v256 overflow = Avx2.mm256_add_epi64(MAX_VALUE, Avx2.mm256_srli_epi64(a, 63));
+ v256 selectOverflow = mm256_ternarylogic_si256(overflow, b, result, TernaryOperation.Ox18);
+
+ return Avx.mm256_blendv_pd(result, overflow, selectOverflow);
}
else throw new IllegalInstructionException();
}
@@ -132,15 +206,20 @@ unsafe public static partial class maxmath
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Int128 subsaturated(Int128 x, Int128 y)
{
- Int128 ret = Int128.MaxValue + tobyte((long)x.hi64 < 0);
- Int128 cmp = ret + x;
-
- if ((x < 0) == (y < cmp))
+ if (Xse.constexpr.IS_TRUE(y == 1))
{
- ret = x - y;
+ return nextsmaller(x);
}
-
- return ret;
+
+ Int128 res = x - y;
+ Int128 overflow = (x.hi64 >> 63) + Int128.MaxValue;
+
+ if ((long)((overflow ^ y) & (overflow ^ res)).hi64 < 0)
+ {
+ res = overflow;
+ }
+
+ return res;
}
/// Subtracts from and returns the result, which is clamped to if underflow occurs.
@@ -155,6 +234,11 @@ public static UInt128 subsaturated(UInt128 x, UInt128 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte subsaturated(byte x, byte y)
{
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextsmaller(x);
+ }
+
return (byte)(y >= x ? 0 : x - y);
}
@@ -276,6 +360,11 @@ public static byte32 subsaturated(byte32 x, byte32 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort subsaturated(ushort x, ushort y)
{
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextsmaller(x);
+ }
+
return (ushort)(y >= x ? 0 : x - y);
}
@@ -368,6 +457,11 @@ public static ushort16 subsaturated(ushort16 x, ushort16 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint subsaturated(uint x, uint y)
{
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextsmaller(x);
+ }
+
return y >= x ? 0 : x - y;
}
@@ -377,7 +471,7 @@ public static uint2 subsaturated(uint2 x, uint2 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subs_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
+ return RegisterConversion.ToUInt2(Xse.subs_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
@@ -392,7 +486,7 @@ public static uint3 subsaturated(uint3 x, uint3 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subs_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
+ return RegisterConversion.ToUInt3(Xse.subs_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
@@ -408,7 +502,7 @@ public static uint4 subsaturated(uint4 x, uint4 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subs_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
+ return RegisterConversion.ToUInt4(Xse.subs_epu32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
@@ -439,6 +533,11 @@ public static uint8 subsaturated(uint8 x, uint8 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong subsaturated(ulong x, ulong y)
{
+ if (Xse.constexpr.IS_TRUE(y == 1))
+ {
+ return nextsmaller(x);
+ }
+
return y >= x ? 0 : x - y;
}
@@ -463,7 +562,7 @@ public static ulong3 subsaturated(ulong3 x, ulong3 y)
{
if (Avx2.IsAvx2Supported)
{
- return Xse.mm256_subs_epu64(x, y);
+ return Xse.mm256_subs_epu64(x, y, 3);
}
else
{
@@ -478,7 +577,7 @@ public static ulong4 subsaturated(ulong4 x, ulong4 y)
{
if (Avx2.IsAvx2Supported)
{
- return Xse.mm256_subs_epu64(x, y);
+ return Xse.mm256_subs_epu64(x, y, 4);
}
else
{
@@ -738,7 +837,7 @@ public static int2 subsaturated(int2 x, int2 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToInt2(Xse.subs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 2));
}
else
{
@@ -753,7 +852,7 @@ public static int3 subsaturated(int3 x, int3 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToInt3(Xse.subs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 3));
}
else
{
@@ -769,7 +868,7 @@ public static int4 subsaturated(int4 x, int4 y)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.subs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y)));
+ return RegisterConversion.ToInt4(Xse.subs_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), 4));
}
else
{
@@ -800,15 +899,15 @@ public static int8 subsaturated(int8 x, int8 y)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long subsaturated(long x, long y)
{
- long ret = long.MaxValue + tobyte(x < 0);
- long cmp = ret + x;
-
- if ((x < 0) == (y < cmp))
- {
- ret = x - y;
- }
-
- return ret;
+ long result = x - y;
+ long overflow = (long)((ulong)x >> 63) + long.MaxValue;
+
+ if (((overflow ^ y) & (overflow ^ result)) < 0)
+ {
+ result = overflow;
+ }
+
+ return result;
}
/// Subtracts each component of from each component of and returns the results, which are clamped to if overflow occurs or if underflow occurs.
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Sign/Copy Sign.cs b/Runtime/Math Lib/Functions/Arithmetic/Sign/Copy Sign.cs
index 0448e36..5b38b7d 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Sign/Copy Sign.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Sign/Copy Sign.cs
@@ -20,11 +20,20 @@ public static v128 movsign_epi8(v128 a, v128 s, bool promise = false, byte eleme
{
s = Sse2.sub_epi8(s, Sse2.cmpeq_epi8(s, Sse2.setzero_si128()));
}
+ else if (constexpr.ALL_EQ_EPI8(a, 1, elements) || constexpr.ALL_EQ_EPI8(a, -1, elements))
+ {
+ return Sse2.or_si128(srai_epi8(s, 7), Sse2.set1_epi8(1));
+ }
return Ssse3.sign_epi8(abs_epi8(a, elements), s);
}
else if (Sse2.IsSse2Supported)
{
+ if (constexpr.ALL_EQ_EPI8(a, 1, elements) || constexpr.ALL_EQ_EPI8(a, -1, elements))
+ {
+ return Sse2.or_si128(srai_epi8(s, 7), Sse2.set1_epi8(1));
+ }
+
v128 res = Sse2.xor_si128(a, s);
res = srai_epi8(res, 7);
@@ -42,6 +51,10 @@ public static v256 mm256_movsign_epi8(v256 a, v256 s, bool promise = false)
{
s = Avx2.mm256_sub_epi8(s, Avx2.mm256_cmpeq_epi8(s, Avx.mm256_setzero_si256()));
}
+ else if (constexpr.ALL_EQ_EPI8(a, 1) || constexpr.ALL_EQ_EPI8(a, -1))
+ {
+ return Avx2.mm256_or_si256(mm256_srai_epi8(s, 7), Avx.mm256_set1_epi8(1));
+ }
return Avx2.mm256_sign_epi8(mm256_abs_epi8(a), s);
}
@@ -52,28 +65,30 @@ public static v256 mm256_movsign_epi8(v256 a, v256 s, bool promise = false)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v128 movsign_epi16(v128 a, v128 s, bool promise = false, byte elements = 8)
{
- if (Sse2.IsSse2Supported)
+ if (Ssse3.IsSsse3Supported)
{
- if (constexpr.ALL_EQ_EPI16(a, 1, elements) || constexpr.ALL_EQ_EPI16(a, -1, elements))
+ if (!(promise || constexpr.ALL_NEQ_EPI16(s, 0, elements)))
{
- return Sse2.or_si128(Sse2.set1_epi16(1), Sse2.srai_epi16(s, 15));
+ s = Sse2.sub_epi16(s, Sse2.cmpeq_epi16(s, Sse2.setzero_si128()));
}
- else if (Ssse3.IsSsse3Supported)
+ else if (constexpr.ALL_EQ_EPI16(a, 1, elements) || constexpr.ALL_EQ_EPI16(a, -1, elements))
{
- if (!(promise || constexpr.ALL_NEQ_EPI16(s, 0, elements)))
- {
- s = Sse2.sub_epi16(s, Sse2.cmpeq_epi16(s, Sse2.setzero_si128()));
- }
-
- return Ssse3.sign_epi16(abs_epi16(a, elements), s);
+ return Sse2.or_si128(srai_epi16(s, 15), Sse2.set1_epi16(1));
}
- else
- {
- v128 res = Sse2.xor_si128(a, s);
- res = Sse2.srai_epi16(res, 15);
- return Sse2.sub_epi16(Sse2.xor_si128(a, res), res);
+ return Ssse3.sign_epi16(abs_epi16(a, elements), s);
+ }
+ else if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI16(a, 1, elements) || constexpr.ALL_EQ_EPI16(a, -1, elements))
+ {
+ return Sse2.or_si128(srai_epi16(s, 15), Sse2.set1_epi16(1));
}
+
+ v128 res = Sse2.xor_si128(a, s);
+ res = Sse2.srai_epi16(res, 15);
+
+ return Sse2.sub_epi16(Sse2.xor_si128(a, res), res);
}
else throw new IllegalInstructionException();
}
@@ -83,19 +98,16 @@ public static v256 mm256_movsign_epi16(v256 a, v256 s, bool promise = false)
{
if (Avx2.IsAvx2Supported)
{
- if (constexpr.ALL_EQ_EPI16(a, 1) || constexpr.ALL_EQ_EPI16(a, -1))
+ if (!(promise || constexpr.ALL_NEQ_EPI16(s, 0)))
{
- return Avx2.mm256_or_si256(Avx.mm256_set1_epi16(1), Avx2.mm256_srai_epi16(s, 15));
+ s = Avx2.mm256_sub_epi16(s, Avx2.mm256_cmpeq_epi16(s, Avx.mm256_setzero_si256()));
}
- else
+ else if (constexpr.ALL_EQ_EPI16(a, 1) || constexpr.ALL_EQ_EPI16(a, -1))
{
- if (!(promise || constexpr.ALL_NEQ_EPI16(s, 0)))
- {
- s = Avx2.mm256_sub_epi16(s, Avx2.mm256_cmpeq_epi16(s, Avx.mm256_setzero_si256()));
- }
-
- return Avx2.mm256_sign_epi16(mm256_abs_epi16(a), s);
+ return Avx2.mm256_or_si256(mm256_srai_epi16(s, 15), Avx.mm256_set1_epi16(1));
}
+
+ return Avx2.mm256_sign_epi16(mm256_abs_epi16(a), s);
}
else throw new IllegalInstructionException();
}
@@ -104,28 +116,30 @@ public static v256 mm256_movsign_epi16(v256 a, v256 s, bool promise = false)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static v128 movsign_epi32(v128 a, v128 s, bool promise = false, byte elements = 4)
{
- if (Sse2.IsSse2Supported)
+ if (Ssse3.IsSsse3Supported)
{
- if (constexpr.ALL_EQ_EPI32(a, 1, elements) || constexpr.ALL_EQ_EPI32(a, -1, elements))
+ if (!(promise || constexpr.ALL_NEQ_EPI32(s, 0, elements)))
{
- return Sse2.or_si128(Sse2.set1_epi32(1), Sse2.srai_epi32(s, 31));
+ s = Sse2.sub_epi32(s, Sse2.cmpeq_epi32(s, Sse2.setzero_si128()));
}
- else if (Ssse3.IsSsse3Supported)
+ else if (constexpr.ALL_EQ_EPI32(a, 1, elements) || constexpr.ALL_EQ_EPI32(a, -1, elements))
{
- if (!(promise || constexpr.ALL_NEQ_EPI32(s, 0, elements)))
- {
- s = Sse2.sub_epi32(s, Sse2.cmpeq_epi32(s, Sse2.setzero_si128()));
- }
-
- return Ssse3.sign_epi32(abs_epi32(a, elements), s);
+ return Sse2.or_si128(srai_epi32(s, 31), Sse2.set1_epi32(1));
}
- else
- {
- v128 res = Sse2.xor_si128(a, s);
- res = Sse2.srai_epi32(res, 31);
- return Sse2.sub_epi32(Sse2.xor_si128(a, res), res);
+ return Ssse3.sign_epi32(abs_epi32(a, elements), s);
+ }
+ else if (Sse2.IsSse2Supported)
+ {
+ if (constexpr.ALL_EQ_EPI32(a, 1, elements) || constexpr.ALL_EQ_EPI32(a, -1, elements))
+ {
+ return Sse2.or_si128(srai_epi32(s, 31), Sse2.set1_epi32(1));
}
+
+ v128 res = Sse2.xor_si128(a, s);
+ res = Sse2.srai_epi32(res, 31);
+
+ return Sse2.sub_epi32(Sse2.xor_si128(a, res), res);
}
else throw new IllegalInstructionException();
}
@@ -135,19 +149,16 @@ public static v256 mm256_movsign_epi32(v256 a, v256 s, bool promise = false)
{
if (Avx2.IsAvx2Supported)
{
- if (constexpr.ALL_EQ_EPI32(a, 1) || constexpr.ALL_EQ_EPI32(a, -1))
+ if (!(promise || constexpr.ALL_NEQ_EPI32(s, 0)))
{
- return Avx2.mm256_or_si256(Avx.mm256_set1_epi32(1), Avx2.mm256_srai_epi32(s, 31));
+ s = Avx2.mm256_sub_epi32(s, Avx2.mm256_cmpeq_epi32(s, Avx.mm256_setzero_si256()));
}
- else
+ else if (constexpr.ALL_EQ_EPI32(a, 1) || constexpr.ALL_EQ_EPI32(a, -1))
{
- if (!(promise || constexpr.ALL_NEQ_EPI32(s, 0)))
- {
- s = Avx2.mm256_sub_epi32(s, Avx2.mm256_cmpeq_epi32(s, Avx.mm256_setzero_si256()));
- }
-
- return Avx2.mm256_sign_epi32(mm256_abs_epi32(a), s);
+ return Avx2.mm256_or_si256(mm256_srai_epi32(s, 31), Avx.mm256_set1_epi32(1));
}
+
+ return Avx2.mm256_sign_epi32(mm256_abs_epi32(a), s);
}
else throw new IllegalInstructionException();
}
@@ -542,7 +553,7 @@ public static int2 copysign(int2 x, int2 y, Promise nonZero = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero), 2));
+ return RegisterConversion.ToInt2(Xse.movsign_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero), 2));
}
else
{
@@ -559,7 +570,7 @@ public static int3 copysign(int3 x, int3 y, Promise nonZero = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero), 3));
+ return RegisterConversion.ToInt3(Xse.movsign_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero), 3));
}
else
{
@@ -576,7 +587,7 @@ public static int4 copysign(int4 x, int4 y, Promise nonZero = Promise.Nothing)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero), 4));
+ return RegisterConversion.ToInt4(Xse.movsign_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero), 4));
}
else
{
@@ -668,7 +679,7 @@ public static quarter copysign(quarter x, quarter y, Promise nonZero = Promise.N
{
if (Sse2.IsSse2Supported)
{
- return new quarter(Xse.movsign_pq(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)).Byte0);
+ return new quarter(Xse.movsign_pq(RegisterConversion.ToV128(x.value), RegisterConversion.ToV128(y.value), nonZero.Promises(Promise.NonZero)).Byte0);
}
else
{
@@ -702,7 +713,7 @@ public static quarter2 copysign(quarter2 x, quarter2 y, Promise nonZero = Promis
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_pq(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return Xse.movsign_pq(x, y, nonZero.Promises(Promise.NonZero));
}
else
{
@@ -717,7 +728,7 @@ public static quarter3 copysign(quarter3 x, quarter3 y, Promise nonZero = Promis
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_pq(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return Xse.movsign_pq(x, y, nonZero.Promises(Promise.NonZero));
}
else
{
@@ -732,7 +743,7 @@ public static quarter4 copysign(quarter4 x, quarter4 y, Promise nonZero = Promis
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_pq(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return Xse.movsign_pq(x, y, nonZero.Promises(Promise.NonZero));
}
else
{
@@ -747,7 +758,7 @@ public static quarter8 copysign(quarter8 x, quarter8 y, Promise nonZero = Promis
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_pq(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return Xse.movsign_pq(x, y, nonZero.Promises(Promise.NonZero));
}
else
{
@@ -797,7 +808,7 @@ public static half2 copysign(half2 x, half2 y, Promise nonZero = Promise.Nothing
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_ph(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToHalf2(Xse.movsign_ph(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -812,7 +823,7 @@ public static half3 copysign(half3 x, half3 y, Promise nonZero = Promise.Nothing
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_ph(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToHalf3(Xse.movsign_ph(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -827,7 +838,7 @@ public static half4 copysign(half4 x, half4 y, Promise nonZero = Promise.Nothing
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_ph(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToHalf4(Xse.movsign_ph(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -842,7 +853,7 @@ public static half8 copysign(half8 x, half8 y, Promise nonZero = Promise.Nothing
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_ph(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return Xse.movsign_ph(x, y, nonZero.Promises(Promise.NonZero));
}
else
{
@@ -892,7 +903,7 @@ public static float2 copysign(float2 x, float2 y, Promise nonZero = Promise.Noth
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_ps(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToFloat2(Xse.movsign_ps(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -925,7 +936,7 @@ public static float3 copysign(float3 x, float3 y, Promise nonZero = Promise.Noth
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_ps(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToFloat3(Xse.movsign_ps(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -958,7 +969,7 @@ public static float4 copysign(float4 x, float4 y, Promise nonZero = Promise.Noth
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_ps(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToFloat4(Xse.movsign_ps(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -1041,7 +1052,7 @@ public static double2 copysign(double2 x, double2 y, Promise nonZero = Promise.N
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.movsign_pd(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToDouble2(Xse.movsign_pd(RegisterConversion.ToV128(x), RegisterConversion.ToV128(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -1074,7 +1085,7 @@ public static double3 copysign(double3 x, double3 y, Promise nonZero = Promise.N
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_movsign_pd(RegisterConversion.ToV256(x), RegisterConversion.ToV256(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToDouble3(Xse.mm256_movsign_pd(RegisterConversion.ToV256(x), RegisterConversion.ToV256(y), nonZero.Promises(Promise.NonZero)));
}
else
{
@@ -1090,7 +1101,7 @@ public static double4 copysign(double4 x, double4 y, Promise nonZero = Promise.N
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Xse.mm256_movsign_pd(RegisterConversion.ToV256(x), RegisterConversion.ToV256(y), nonZero.Promises(Promise.NonZero)));
+ return RegisterConversion.ToDouble4(Xse.mm256_movsign_pd(RegisterConversion.ToV256(x), RegisterConversion.ToV256(y), nonZero.Promises(Promise.NonZero)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Sign/Negative-Absolute.cs b/Runtime/Math Lib/Functions/Arithmetic/Sign/Negative-Absolute.cs
index d310208..c4bef9b 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Sign/Negative-Absolute.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Sign/Negative-Absolute.cs
@@ -407,7 +407,7 @@ public static int2 nabs(int2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.nabs_epi32(RegisterConversion.ToV128(x), 2));
+ return RegisterConversion.ToInt2(Xse.nabs_epi32(RegisterConversion.ToV128(x), 2));
}
else
{
@@ -421,7 +421,7 @@ public static int3 nabs(int3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.nabs_epi32(RegisterConversion.ToV128(x), 3));
+ return RegisterConversion.ToInt3(Xse.nabs_epi32(RegisterConversion.ToV128(x), 3));
}
else
{
@@ -435,7 +435,7 @@ public static int4 nabs(int4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.nabs_epi32(RegisterConversion.ToV128(x), 4));
+ return RegisterConversion.ToInt4(Xse.nabs_epi32(RegisterConversion.ToV128(x), 4));
}
else
{
@@ -670,7 +670,7 @@ public static float2 nabs(float2 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.nabs_ps(RegisterConversion.ToV128(x), 2));
+ return RegisterConversion.ToFloat2(Xse.nabs_ps(RegisterConversion.ToV128(x), 2));
}
else
{
@@ -684,7 +684,7 @@ public static float3 nabs(float3 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.nabs_ps(RegisterConversion.ToV128(x), 3));
+ return RegisterConversion.ToFloat3(Xse.nabs_ps(RegisterConversion.ToV128(x), 3));
}
else
{
@@ -698,7 +698,7 @@ public static float4 nabs(float4 x)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.nabs_ps(RegisterConversion.ToV128(x), 4));
+ return RegisterConversion.ToFloat4(Xse.nabs_ps(RegisterConversion.ToV128(x), 4));
}
else
{
@@ -741,7 +741,7 @@ public static double2 nabs(double2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.nabs_pd(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToDouble2(Xse.nabs_pd(RegisterConversion.ToV128(x)));
}
else
{
@@ -755,7 +755,7 @@ public static double3 nabs(double3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType( Xse.mm256_nabs_pd(RegisterConversion.ToV256(x), 3));
+ return RegisterConversion.ToDouble3( Xse.mm256_nabs_pd(RegisterConversion.ToV256(x), 3));
}
else
{
@@ -769,7 +769,7 @@ public static double4 nabs(double4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.mm256_nabs_pd(RegisterConversion.ToV256(x), 4));
+ return RegisterConversion.ToDouble4(Xse.mm256_nabs_pd(RegisterConversion.ToV256(x), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Sign/Sign.cs b/Runtime/Math Lib/Functions/Arithmetic/Sign/Sign.cs
index 5d3bd78..ca6cecf 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Sign/Sign.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Sign/Sign.cs
@@ -249,7 +249,7 @@ public static int2 sign(int2 x)
{
if (Ssse3.IsSsse3Supported)
{
- return RegisterConversion.ToType(Ssse3.sign_epi32(new v128(1, 1, 0, 0), RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToInt2(Ssse3.sign_epi32(new v128(1, 1, 0, 0), RegisterConversion.ToV128(x)));
}
else
{
@@ -263,7 +263,7 @@ public static int3 sign(int3 x)
{
if (Ssse3.IsSsse3Supported)
{
- return RegisterConversion.ToType(Ssse3.sign_epi32(new v128(1, 1, 1, 0), RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToInt3(Ssse3.sign_epi32(new v128(1, 1, 1, 0), RegisterConversion.ToV128(x)));
}
else
{
@@ -277,7 +277,7 @@ public static int4 sign(int4 x)
{
if (Ssse3.IsSsse3Supported)
{
- return RegisterConversion.ToType(Ssse3.sign_epi32(new v128(1), RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToInt4(Ssse3.sign_epi32(new v128(1), RegisterConversion.ToV128(x)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Subtract-Add.cs b/Runtime/Math Lib/Functions/Arithmetic/Subtract-Add.cs
index 1bd2d4d..3a28287 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Subtract-Add.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Subtract-Add.cs
@@ -243,7 +243,7 @@ public static float2 subadd(float2 a, float2 b)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.addsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
+ return RegisterConversion.ToFloat2(Xse.addsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
}
else
{
@@ -257,7 +257,7 @@ public static float3 subadd(float3 a, float3 b)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.addsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
+ return RegisterConversion.ToFloat3(Xse.addsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
}
else
{
@@ -271,7 +271,7 @@ public static float4 subadd(float4 a, float4 b)
{
if (Sse.IsSseSupported)
{
- return RegisterConversion.ToType(Xse.addsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
+ return RegisterConversion.ToFloat4(Xse.addsub_ps(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));
}
else
{
@@ -300,7 +300,7 @@ public static double2 subadd(double2 a, double2 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.addsub_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));;
+ return RegisterConversion.ToDouble2(Xse.addsub_pd(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b)));;
}
else
{
@@ -314,7 +314,7 @@ public static double3 subadd(double3 a, double3 b)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Avx.mm256_addsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
+ return RegisterConversion.ToDouble3(Avx.mm256_addsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
}
else
{
@@ -328,7 +328,7 @@ public static double4 subadd(double4 a, double4 b)
{
if (Avx.IsAvxSupported)
{
- return RegisterConversion.ToType(Avx.mm256_addsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
+ return RegisterConversion.ToDouble4(Avx.mm256_addsub_pd(RegisterConversion.ToV256(a), RegisterConversion.ToV256(b)));
}
else
{
@@ -578,7 +578,7 @@ public static uint2 subadd(uint2 a, uint2 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.addsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 2));
+ return RegisterConversion.ToUInt2(Xse.addsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 2));
}
else
{
@@ -592,7 +592,7 @@ public static uint3 subadd(uint3 a, uint3 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.addsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 3));
+ return RegisterConversion.ToUInt3(Xse.addsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 3));
}
else
{
@@ -606,7 +606,7 @@ public static uint4 subadd(uint4 a, uint4 b)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.addsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 4));
+ return RegisterConversion.ToUInt4(Xse.addsub_epi32(RegisterConversion.ToV128(a), RegisterConversion.ToV128(b), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Column Average.cs b/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Column Average.cs
index 66d1fb5..f1dac4c 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Column Average.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Column Average.cs
@@ -1,6 +1,5 @@
using System.Runtime.CompilerServices;
using Unity.Mathematics;
-using Unity.Burst.CompilerServices;
using Unity.Burst.Intrinsics;
using MaxMath.Intrinsics;
@@ -87,7 +86,7 @@ public static v128 vavg_epi8(v128 a, bool promiseNoOverflow = false, byte elemen
}
v128 offset = Sse2.cvtsi32_si128(elements - 1);
- v128 csum = Xse.vsum_epi8(a, promiseNoOverflow, elements);
+ v128 csum = vsum_epi8(a, promiseNoOverflow, elements);
v128 result;
if (promiseNoOverflow)
@@ -98,11 +97,12 @@ public static v128 vavg_epi8(v128 a, bool promiseNoOverflow = false, byte elemen
}
else
{
- offset = movsign_epi8(offset, csum, false, 1);
+ v128 signMaskSum = srai_epi8(csum, 7);
+ offset = Sse2.sub_epi8(Sse2.xor_si128(offset, signMaskSum), signMaskSum);
}
result = Sse2.add_epi8(csum, offset);
- result = Xse.constexpr.div_epi8(result, (sbyte)elements, elements);
+ result = constexpr.div_epi8(result, (sbyte)elements, elements);
}
else
{
@@ -112,11 +112,12 @@ public static v128 vavg_epi8(v128 a, bool promiseNoOverflow = false, byte elemen
}
else
{
- offset = movsign_epi16(offset, csum, false, 1);
+ v128 signMaskSum = Sse2.srai_epi16(csum, 15);
+ offset = Sse2.sub_epi16(Sse2.xor_si128(offset, signMaskSum), signMaskSum);
}
result = Sse2.add_epi16(csum, offset);
- result = Xse.constexpr.div_epi16(result, elements, elements);
+ result = constexpr.div_epi16(result, elements, elements);
}
return result;
@@ -139,7 +140,7 @@ public static v128 vavg_epi16(v128 a, bool promiseNoOverflow = false, byte eleme
}
v128 offset = Sse2.cvtsi32_si128(elements - 1);
- v128 csum = Xse.vsum_epi16(a, promiseNoOverflow, elements);
+ v128 csum = vsum_epi16(a, promiseNoOverflow, elements);
v128 result;
if (promiseNoOverflow)
@@ -150,11 +151,12 @@ public static v128 vavg_epi16(v128 a, bool promiseNoOverflow = false, byte eleme
}
else
{
- offset = movsign_epi16(offset, csum, false, 1);
+ v128 signMaskSum = Sse2.srai_epi16(csum, 15);
+ offset = Sse2.sub_epi16(Sse2.xor_si128(offset, signMaskSum), signMaskSum);
}
result = Sse2.add_epi16(csum, offset);
- result = Xse.constexpr.div_epi16(result, elements, elements);
+ result = constexpr.div_epi16(result, elements, elements);
}
else
{
@@ -164,11 +166,12 @@ public static v128 vavg_epi16(v128 a, bool promiseNoOverflow = false, byte eleme
}
else
{
- offset = movsign_epi32(offset, csum, false, 1);
+ v128 signMaskSum = Sse2.srai_epi32(csum, 31);
+ offset = Sse2.sub_epi32(Sse2.xor_si128(offset, signMaskSum), signMaskSum);
}
result = Sse2.add_epi32(csum, offset);
- result = Xse.constexpr.div_epi32(result, elements);
+ result = constexpr.div_epi32(result, elements);
}
return result;
@@ -191,7 +194,7 @@ public static v128 vavg_epi32(v128 a, bool promiseNoOverflow = false, byte eleme
}
v128 offset = Sse2.cvtsi32_si128(elements - 1);
- v128 csum = Xse.vsum_epi32(a, promiseNoOverflow, elements);
+ v128 csum = vsum_epi32(a, promiseNoOverflow, elements);
v128 result;
if (promiseNoOverflow)
@@ -202,18 +205,20 @@ public static v128 vavg_epi32(v128 a, bool promiseNoOverflow = false, byte eleme
}
else
{
- offset = movsign_epi32(offset, csum, false, 1);
+ v128 signMaskSum = Sse2.srai_epi32(csum, 31);
+ offset = Sse2.sub_epi32(Sse2.xor_si128(offset, signMaskSum), signMaskSum);
}
result = Sse2.add_epi32(csum, offset);
- result = Xse.constexpr.div_epi32(result, elements);
+ result = constexpr.div_epi32(result, elements);
}
else
{
- offset = movsign_epi64(offset, csum);
+ v128 signMaskSum = srai_epi64(csum, 63);
+ offset = Sse2.sub_epi64(Sse2.xor_si128(offset, signMaskSum), signMaskSum);
result = Sse2.add_epi64(csum, offset);
- result = Xse.constexpr.div_epi64(result, elements);
+ result = constexpr.div_epi64(result, elements);
}
return result;
@@ -329,7 +334,7 @@ public static byte cavg(byte32 c)
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte cavg(sbyte2 c, Promise noOverflow = Promise.Nothing)
{
@@ -344,7 +349,7 @@ public static sbyte cavg(sbyte2 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte cavg(sbyte3 c, Promise noOverflow = Promise.Nothing)
{
@@ -361,7 +366,7 @@ public static sbyte cavg(sbyte3 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte cavg(sbyte4 c, Promise noOverflow = Promise.Nothing)
{
@@ -378,7 +383,7 @@ public static sbyte cavg(sbyte4 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 7 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 7 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 7 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 7 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte cavg(sbyte8 c, Promise noOverflow = Promise.Nothing)
{
@@ -395,7 +400,7 @@ public static sbyte cavg(sbyte8 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 15 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 15 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 15 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 15 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte cavg(sbyte16 c, Promise noOverflow = Promise.Nothing)
{
@@ -412,7 +417,7 @@ public static sbyte cavg(sbyte16 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 31 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 31 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 31 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 31 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte cavg(sbyte32 c, Promise noOverflow = Promise.Nothing)
{
@@ -589,7 +594,7 @@ public static ushort cavg(ushort16 c, Promise noOverflow = Promise.Nothing)
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short cavg(short2 c, Promise noOverflow = Promise.Nothing)
{
@@ -604,7 +609,7 @@ public static short cavg(short2 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short cavg(short3 c, Promise noOverflow = Promise.Nothing)
{
@@ -621,7 +626,7 @@ public static short cavg(short3 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short cavg(short4 c, Promise noOverflow = Promise.Nothing)
{
@@ -638,7 +643,7 @@ public static short cavg(short4 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 7 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 7 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 7 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 7 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short cavg(short8 c, Promise noOverflow = Promise.Nothing)
{
@@ -655,7 +660,7 @@ public static short cavg(short8 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 15 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 15 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 15 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 15 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short cavg(short16 c, Promise noOverflow = Promise.Nothing)
{
@@ -753,7 +758,7 @@ public static uint cavg(uint3 c, Promise noOverflow = Promise.Nothing)
}
else
{
- return (uint)((2 + (ulong)c.z + csum((ulong2)c.xy)) / 3);
+ return (uint)(((2 + (ulong)c.z) + csum((ulong2)c.xy)) / 3);
}
}
@@ -828,7 +833,7 @@ public static uint cavg(uint8 c, Promise noOverflow = Promise.Nothing)
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int cavg(int2 c, Promise noOverflow = Promise.Nothing)
{
@@ -843,7 +848,7 @@ public static int cavg(int2 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int cavg(int3 c, Promise noOverflow = Promise.Nothing)
{
@@ -860,7 +865,7 @@ public static int cavg(int3 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int cavg(int4 c, Promise noOverflow = Promise.Nothing)
{
@@ -877,7 +882,7 @@ public static int cavg(int4 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in an .
- /// A withs its flag set returns undefined results for any column sum of or 7 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 7 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 7 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 7 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int cavg(int8 c, Promise noOverflow = Promise.Nothing)
{
@@ -1012,7 +1017,7 @@ public static ulong cavg(ulong4 c, Promise noOverflow = Promise.Nothing)
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 1 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 1 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long cavg(long2 c, Promise noOverflow = Promise.Nothing)
{
@@ -1029,7 +1034,7 @@ public static long cavg(long2 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 2 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 2 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long cavg(long3 c, Promise noOverflow = Promise.Nothing)
{
@@ -1050,7 +1055,7 @@ public static long cavg(long3 c, Promise noOverflow = Promise.Nothing)
}
/// Returns the ceiling of the horizontal average value of components in a .
- /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
+ /// A withs its flag set returns undefined results for any column sum of or 3 ('+' if the column sum is positive, '-' otherwise) that overflows. It is only recommended to use this overload if each possible summation order of elements in or 3 is guaranteed not to overflow.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long cavg(long4 c, Promise noOverflow = Promise.Nothing)
{
diff --git a/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Dot Product.cs b/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Dot Product.cs
index 5485bdc..99025ca 100644
--- a/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Dot Product.cs
+++ b/Runtime/Math Lib/Functions/Arithmetic/Vector Reduction/Dot Product.cs
@@ -9,7 +9,7 @@
namespace MaxMath
{
namespace Intrinsics
- {
+ {
unsafe public static partial class Xse
{
// maddubs(byte16 a, sbyte16 b) is almost useless.
@@ -324,11 +324,11 @@ public static v128 dp_epi16(v128 a, v128 b, bool promiseNoOverflow = false, byte
{
if (Constant.IsConstantExpression(a))
{
- a = Sse2.insert_epi16(a, 0, 3);
+ a = Xse.insert_epi16(a, 0, 3);
}
else
{
- b = Sse2.insert_epi16(b, 0, 3);
+ b = Xse.insert_epi16(b, 0, 3);
}
}
diff --git a/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Bitfield.cs b/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Bitfield.cs
index d49b892..68a71f2 100644
--- a/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Bitfield.cs
+++ b/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Bitfield.cs
@@ -135,7 +135,7 @@ public static byte bits_extract(byte x, int index, int length)
return (byte)bits_extract((uint)x, index, length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte2 bits_extract(byte2 x, byte2 index, byte2 length)
{
@@ -149,7 +149,7 @@ public static byte2 bits_extract(byte2 x, byte2 index, byte2 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte3 bits_extract(byte3 x, byte3 index, byte3 length)
{
@@ -163,7 +163,7 @@ public static byte3 bits_extract(byte3 x, byte3 index, byte3 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte4 bits_extract(byte4 x, byte4 index, byte4 length)
{
@@ -177,7 +177,7 @@ public static byte4 bits_extract(byte4 x, byte4 index, byte4 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte8 bits_extract(byte8 x, byte8 index, byte8 length)
{
@@ -191,7 +191,7 @@ public static byte8 bits_extract(byte8 x, byte8 index, byte8 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte16 bits_extract(byte16 x, byte16 index, byte16 length)
{
@@ -205,7 +205,7 @@ public static byte16 bits_extract(byte16 x, byte16 index, byte16 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte32 bits_extract(byte32 x, byte32 index, byte32 length)
{
@@ -227,42 +227,42 @@ public static sbyte bits_extract(sbyte x, int index, int length)
return (sbyte)bits_extract((byte)x, index, length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte2 bits_extract(sbyte2 x, sbyte2 index, sbyte2 length)
{
return (sbyte2)bits_extract((byte2)x, (byte2)index, (byte2)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte3 bits_extract(sbyte3 x, sbyte3 index, sbyte3 length)
{
return (sbyte3)bits_extract((byte3)x, (byte3)index, (byte3)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte4 bits_extract(sbyte4 x, sbyte4 index, sbyte4 length)
{
return (sbyte4)bits_extract((byte4)x, (byte4)index, (byte4)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte8 bits_extract(sbyte8 x, sbyte8 index, sbyte8 length)
{
return (sbyte8)bits_extract((byte8)x, (byte8)index, (byte8)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte16 bits_extract(sbyte16 x, sbyte16 index, sbyte16 length)
{
return (sbyte16)bits_extract((byte16)x, (byte16)index, (byte16)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static sbyte32 bits_extract(sbyte32 x, sbyte32 index, sbyte32 length)
{
@@ -277,7 +277,7 @@ public static ushort bits_extract(ushort x, int index, int length)
return (ushort)bits_extract((uint)x, index, length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort2 bits_extract(ushort2 x, ushort2 index, ushort2 length)
{
@@ -291,7 +291,7 @@ public static ushort2 bits_extract(ushort2 x, ushort2 index, ushort2 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort3 bits_extract(ushort3 x, ushort3 index, ushort3 length)
{
@@ -305,7 +305,7 @@ public static ushort3 bits_extract(ushort3 x, ushort3 index, ushort3 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort4 bits_extract(ushort4 x, ushort4 index, ushort4 length)
{
@@ -319,7 +319,7 @@ public static ushort4 bits_extract(ushort4 x, ushort4 index, ushort4 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort8 bits_extract(ushort8 x, ushort8 index, ushort8 length)
{
@@ -333,7 +333,7 @@ public static ushort8 bits_extract(ushort8 x, ushort8 index, ushort8 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort16 bits_extract(ushort16 x, ushort16 index, ushort16 length)
{
@@ -355,35 +355,35 @@ public static short bits_extract(short x, int index, int length)
return (short)bits_extract((ushort)x, index, length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short2 bits_extract(short2 x, short2 index, short2 length)
{
return (short2)bits_extract((ushort2)x, (ushort2)index, (ushort2)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short3 bits_extract(short3 x, short3 index, short3 length)
{
return (short3)bits_extract((ushort3)x, (ushort3)index, (ushort3)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short4 bits_extract(short4 x, short4 index, short4 length)
{
return (short4)bits_extract((ushort4)x, (ushort4)index, (ushort4)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short8 bits_extract(short8 x, short8 index, short8 length)
{
return (short8)bits_extract((ushort8)x, (ushort8)index, (ushort8)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short16 bits_extract(short16 x, short16 index, short16 length)
{
@@ -405,13 +405,13 @@ public static uint bits_extract(uint x, int index, int length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint2 bits_extract(uint2 x, uint2 index, uint2 length)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.bextr_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(index), RegisterConversion.ToV128(length), 2));
+ return RegisterConversion.ToUInt2(Xse.bextr_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(index), RegisterConversion.ToV128(length), 2));
}
else
{
@@ -419,13 +419,13 @@ public static uint2 bits_extract(uint2 x, uint2 index, uint2 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint3 bits_extract(uint3 x, uint3 index, uint3 length)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.bextr_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(index), RegisterConversion.ToV128(length), 3));
+ return RegisterConversion.ToUInt3(Xse.bextr_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(index), RegisterConversion.ToV128(length), 3));
}
else
{
@@ -433,13 +433,13 @@ public static uint3 bits_extract(uint3 x, uint3 index, uint3 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint4 bits_extract(uint4 x, uint4 index, uint4 length)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.bextr_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(index), RegisterConversion.ToV128(length), 4));
+ return RegisterConversion.ToUInt4(Xse.bextr_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(index), RegisterConversion.ToV128(length), 4));
}
else
{
@@ -447,7 +447,7 @@ public static uint4 bits_extract(uint4 x, uint4 index, uint4 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint8 bits_extract(uint8 x, uint8 index, uint8 length)
{
@@ -469,28 +469,28 @@ public static int bits_extract(int x, int index, int length)
return (int)bits_extract((uint)x, index, length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int2 bits_extract(int2 x, int2 index, int2 length)
{
return (int2)bits_extract((uint2)x, (uint2)index, (uint2)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int3 bits_extract(int3 x, int3 index, int3 length)
{
return (int3)bits_extract((uint3)x, (uint3)index, (uint3)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int4 bits_extract(int4 x, int4 index, int4 length)
{
return (int4)bits_extract((uint4)x, (uint4)index, (uint4)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of an and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int8 bits_extract(int8 x, int8 index, int8 length)
{
@@ -512,7 +512,7 @@ public static ulong bits_extract(ulong x, int index, int length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong2 bits_extract(ulong2 x, ulong2 index, ulong2 length)
{
@@ -526,7 +526,7 @@ public static ulong2 bits_extract(ulong2 x, ulong2 index, ulong2 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong3 bits_extract(ulong3 x, ulong3 index, ulong3 length)
{
@@ -540,7 +540,7 @@ public static ulong3 bits_extract(ulong3 x, ulong3 index, ulong3 length)
}
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong4 bits_extract(ulong4 x, ulong4 index, ulong4 length)
{
@@ -562,21 +562,21 @@ public static long bits_extract(long x, int index, int length)
return (long)bits_extract((ulong)x, index, length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long2 bits_extract(long2 x, long2 index, long2 length)
{
return (long2)bits_extract((ulong2)x, (ulong2)index, (ulong2)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long3 bits_extract(long3 x, long3 index, long3 length)
{
return (long3)bits_extract((ulong3)x, (ulong3)index, (ulong3)length);
}
- /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit int that component to 0.
+ /// Shifts a bitfield in each component of the corresponding length and starting at the corresponding bit to the least significant bit of a and sets each remaining bit in that component to 0.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long4 bits_extract(long4 x, long4 index, long4 length)
{
diff --git a/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Lowest Set Bit.cs b/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Lowest Set Bit.cs
index f3bb0de..3e2ad90 100644
--- a/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Lowest Set Bit.cs
+++ b/Runtime/Math Lib/Functions/Bitwise/BMI/Extract Lowest Set Bit.cs
@@ -411,7 +411,7 @@ public static uint2 bits_extractlowest(uint2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsi_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt2(Xse.blsi_epi32(RegisterConversion.ToV128(x)));
}
else
{
@@ -425,7 +425,7 @@ public static uint3 bits_extractlowest(uint3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsi_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt3(Xse.blsi_epi32(RegisterConversion.ToV128(x)));
}
else
{
@@ -439,7 +439,7 @@ public static uint4 bits_extractlowest(uint4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsi_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt4(Xse.blsi_epi32(RegisterConversion.ToV128(x)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Bitwise/BMI/Mask Up To Lowest Set Bit.cs b/Runtime/Math Lib/Functions/Bitwise/BMI/Mask Up To Lowest Set Bit.cs
index 4aa75e7..192f562 100644
--- a/Runtime/Math Lib/Functions/Bitwise/BMI/Mask Up To Lowest Set Bit.cs
+++ b/Runtime/Math Lib/Functions/Bitwise/BMI/Mask Up To Lowest Set Bit.cs
@@ -395,7 +395,7 @@ public static uint2 bits_masktolowest(uint2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsmsk_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt2(Xse.blsmsk_epi32(RegisterConversion.ToV128(x)));
}
else
{
@@ -409,7 +409,7 @@ public static uint3 bits_masktolowest(uint3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsmsk_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt3(Xse.blsmsk_epi32(RegisterConversion.ToV128(x)));
}
else
{
@@ -423,7 +423,7 @@ public static uint4 bits_masktolowest(uint4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsmsk_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt4(Xse.blsmsk_epi32(RegisterConversion.ToV128(x)));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Deposit.cs b/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Deposit.cs
index f471396..cd3dccd 100644
--- a/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Deposit.cs
+++ b/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Deposit.cs
@@ -523,7 +523,7 @@ public static uint2 bits_depositparallel(uint2 x, uint2 mask)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pdep_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 2));
+ return RegisterConversion.ToUInt2(Xse.pdep_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 2));
}
else
{
@@ -537,7 +537,7 @@ public static uint3 bits_depositparallel(uint3 x, uint3 mask)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pdep_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 3));
+ return RegisterConversion.ToUInt3(Xse.pdep_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 3));
}
else
{
@@ -551,7 +551,7 @@ public static uint4 bits_depositparallel(uint4 x, uint4 mask)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pdep_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 4));
+ return RegisterConversion.ToUInt4(Xse.pdep_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Extract.cs b/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Extract.cs
index 2ed40f1..f2e8e2d 100644
--- a/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Extract.cs
+++ b/Runtime/Math Lib/Functions/Bitwise/BMI/Parallel Bit Extract.cs
@@ -536,7 +536,7 @@ public static uint2 bits_extractparallel(uint2 x, uint2 mask)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pext_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 2));
+ return RegisterConversion.ToUInt2(Xse.pext_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 2));
}
else
{
@@ -550,7 +550,7 @@ public static uint3 bits_extractparallel(uint3 x, uint3 mask)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pext_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 3));
+ return RegisterConversion.ToUInt3(Xse.pext_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 3));
}
else
{
@@ -564,7 +564,7 @@ public static uint4 bits_extractparallel(uint4 x, uint4 mask)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.pext_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 4));
+ return RegisterConversion.ToUInt4(Xse.pext_epi32(RegisterConversion.ToV128(x), RegisterConversion.ToV128(mask), 4));
}
else
{
diff --git a/Runtime/Math Lib/Functions/Bitwise/BMI/Reset Lowest Set Bit.cs b/Runtime/Math Lib/Functions/Bitwise/BMI/Reset Lowest Set Bit.cs
index bf5d2d5..697d3c9 100644
--- a/Runtime/Math Lib/Functions/Bitwise/BMI/Reset Lowest Set Bit.cs
+++ b/Runtime/Math Lib/Functions/Bitwise/BMI/Reset Lowest Set Bit.cs
@@ -395,7 +395,7 @@ public static uint2 bits_resetlowest(uint2 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsr_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt2(Xse.blsr_epi32(RegisterConversion.ToV128(x)));
}
else
{
@@ -409,7 +409,7 @@ public static uint3 bits_resetlowest(uint3 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType(Xse.blsr_epi32(RegisterConversion.ToV128(x)));
+ return RegisterConversion.ToUInt3(Xse.blsr_epi32(RegisterConversion.ToV128(x)));
}
else
{
@@ -423,7 +423,7 @@ public static uint4 bits_resetlowest(uint4 x)
{
if (Sse2.IsSse2Supported)
{
- return RegisterConversion.ToType