v2.1.2

### Known Issues - half8 "equals" and "not equals" operators don't conform to the IEEE 754 standard - Unity has not yet reacted to my bug-report in regards to their "half" implementation ### Fixes - fixed undefined behavior of "vshr" functions for vector types smaller than 128 bits - fixed SSE2 implementations of "vrol" and "vror" functions for the (u)short16 type ### Additions - implemented Bmi1 and Bmi2 intrinsics as functions with a "bits_" prefix (except for "andn", which has already been implemented as "andnot") - added high performance and/or SIMD "isdivisible" functions for all integer vector types and scalar value types - added high performance and/or SIMD "intpow" - integer exponantiation - functions for (u)int, (u)long and all integer vector types - added high performance and/or SIMD "floorpow2" functions for all integer vector types - added "nabs" - negative absolute value functions for all non-boolean vector- and single value types - added "indexof(vector v, value x)" functions for all non-boolean vector types ### Improvements - aggressivley optimized away global variables (shuffle masks) and thus memory access and usage where appropriate - improved performance of 256 bit vector subvector getters - added Sse2 fallback code for all (u)long2/3/4 operators - improved performance of mulitplication, division and modulo operations for all (s)byte- and (u)short vector- and matrix types when dividing by a single non-compile time constant value - added overloads for (s)byte- and (u)short vectors' "divrem" functions with a scalar value as the divisor parameter, improving performance when it is a compile time constant - improved performance of "intsqrt" functions for most types ### Changes - bump com.unity.burst to version 1.5 ### Fixed Oversights - added bitmask8 and bitmask16 functions for (s)byte and (u)short vector types, respectively
MrUnbelievable92 · Mar 24, 2021 · bec6dde · bec6dde
1 parent e3eff09
commit bec6dde
Show file tree

Hide file tree

Showing 143 changed files with 18,670 additions and 8,206 deletions.
diff --git a/README.md b/README.md
@@ -148,7 +148,7 @@ Note:
 
 - Division and modulo operations of (s)byte and (u)short vectors _by_ _other_ _vectors_ are implemented as either a long division algorithm ((s)byte32, (s)byte16 and (s)byte8 if not compiling for Avx2) or reciprocal multiplication after converting the vectors to float vectors (up to (s)byte8, all (u)short vectors) - it is very fast and, of course, 100% accurate!
 
-- This library uses Wojciech Mula's SIMD population count algorithm. Population count functions for (s)byte and (u)short types are very fast, and this library contains a function to sum up the number of 1-bits in a given block of memory based on it, too
+- This library uses Wojciech Mula's SIMD population count algorithm. You can count the amount of set bits of a contiguous block of memory very efficiently using either the (s)byte32 (Avx2) or (s)byte16 (Ssse3) type
 
 ### Notes
 

diff --git a/Runtime/AssemblyInfo.cs b/Runtime/AssemblyInfo.cs
@@ -15,6 +15,7 @@
 [assembly: AssemblyTrademark("")]
 [assembly: AssemblyCulture("")]
 [assembly: InternalsVisibleTo("MaxMath.Tests")]
+[assembly: InternalsVisibleTo("NativeArrayExtensions")]
 
 // Setting ComVisible to false makes the types in this assembly not visible 
 // to COM components.  If you need to access a type in this assembly from 
@@ -31,8 +32,8 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("2.1.1")]
-[assembly: AssemblyFileVersion("2.1.1")]
+[assembly: AssemblyVersion("2.1.2")]
+[assembly: AssemblyFileVersion("2.1.2")]
 [assembly: AssemblyInformationalVersion("2.1 Release")]
 
 [assembly: SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "Unity.Mathematics API consistency")]
diff --git a/Runtime/Functions/Arithmetic/Absolute.cs b/Runtime/Functions/Arithmetic/Absolute.cs
@@ -237,9 +237,9 @@ public static int8 abs(int8 x)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static long2 abs(long2 x)
         {
-            if (Sse4_2.IsSse42Supported)
+            if (Sse2.IsSse2Supported)
             {
-                long2 mask = Sse4_2.cmpgt_epi64(default(v128), x);
+                long2 mask = Operator.greater_mask_long(default(v128), x);
 
                 return (x + mask) ^ mask;
             }

diff --git a/Runtime/Functions/Arithmetic/Average.cs b/Runtime/Functions/Arithmetic/Average.cs
@@ -19,7 +19,14 @@ public static byte avg(byte x, byte y)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static byte avg(byte2 c)
         {
-            return (byte)((1u + csum(c)) / 2u);
+            if (Sse2.IsSse2Supported)
+            {
+                return Sse2.avg_epu8(c, Sse2.bsrli_si128(c, 1 * sizeof(byte))).Byte0;
+            }
+            else
+            {
+                return (byte)((1u + csum(c)) / 2u);
+            }
         }
 
         /// <summary>       Returns the componentwise average value of two byte2 vectors with rounding from |x| + 0.5 to |x| + 1.      </summary>
@@ -349,7 +356,14 @@ public static ushort2 avg(ushort2 x, ushort2 y)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static ushort avg(ushort2 c)
         {
-            return (ushort)((1u + csum(c)) / 2u);
+            if (Sse2.IsSse2Supported)
+            {
+                return Sse2.avg_epu16(c, Sse2.bsrli_si128(c, 1 * sizeof(ushort))).UShort0;
+            }
+            else
+            {
+                return (ushort)((1u + csum(c)) / 2u);
+            }
         }
 
         /// <summary>       Returns the componentwise average value of two ushort3 vectors with rounding from |x| + 0.5 to |x| + 1.      </summary>
@@ -855,9 +869,9 @@ public static long2 avg(long2 x, long2 y)
             long2 result = x + y;
 
             // if the intermediate sum is positive add 1
-            if (Sse4_2.IsSse42Supported)
+            if (Sse2.IsSse2Supported)
             {
-                result -= Sse4_2.cmpgt_epi64(result, default(v128));
+                result -= Operator.greater_mask_long(result, default(v128));
             }
             else
             {