From 9d43d71551f18db2635c83c81fee86561a604e7c Mon Sep 17 00:00:00 2001 From: Davis Polito Date: Tue, 26 Sep 2023 11:33:20 -0400 Subject: [PATCH] Add Makefile for stm32 library --- .gitignore | 1 + leaf/Makefile | 200 +++++++++ leaf/leaf_polyvalues.h | 976 ----------------------------------------- 3 files changed, 201 insertions(+), 976 deletions(-) create mode 100644 leaf/Makefile delete mode 100644 leaf/leaf_polyvalues.h diff --git a/.gitignore b/.gitignore index 8617b1c..9910455 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ *.swp */Builds */JuceLibraryCode +*/builds diff --git a/leaf/Makefile b/leaf/Makefile new file mode 100644 index 0000000..340ea08 --- /dev/null +++ b/leaf/Makefile @@ -0,0 +1,200 @@ +###################################### +# stm32 makefile copied from DaisySP Makefile +##################################### + +TARGET = libleaf + +MODULE_DIR=src + + +###################################### +# building variables +###################################### +#DEBUG = 0 +OPT = -O3 + +####################################### +# paths +####################################### + +# Build path +BUILD_DIR = build + +###################################### +# source +###################################### + +# manually adding necessary HAL files +# generated by dump_filepath.py +C_SOURCES = +C_SOURCES += \ +Src/leaf-math.c \ +Src/leaf-mempool.c \ +Src/leaf-tables.c \ +Src/leaf-distortion.c \ +Src/leaf-dynamics.c \ +Src/leaf-analysis.c \ +Src/leaf-delay.c \ +Src/leaf-oscillators.c \ +Src/leaf-effects.c \ +Src/leaf-electrical.c \ +Src/leaf-envelopes.c \ +Src/leaf-filters.c \ +Src/leaf-instruments.c \ +Src/leaf-reverb.c \ +Src/leaf-midi.c \ +Src/leaf-physical.c \ +Src/leaf-sampling.c \ +Externals/d_fft_mayer.c + + + + + + +#STARTUP_PATH = Drivers/CMSIS/Device/ST/STM32H7xx/Source/Templates/gcc +# ASM sources +#ASM_SOURCES = \ +#$(STARTUP_PATH)/startup_stm32h750xx.s + +####################################### +# binaries +####################################### +PREFIX = arm-none-eabi- +# The gcc compiler bin path can be either defined in make command via GCC_PATH variable (> make GCC_PATH=xxx) +# either it can be added to the PATH environment variable. +ifdef GCC_PATH +CC = $(GCC_PATH)/$(PREFIX)gcc +CXX = $(GCC_PATH)/$(PREFIX)g++ +AS = $(GCC_PATH)/$(PREFIX)gcc -x assembler-with-cpp +CP = $(GCC_PATH)/$(PREFIX)objcopy +SZ = $(GCC_PATH)/$(PREFIX)size +AR = $(GCC_PATH)/$(PREFIX)ar +GDB = $(GCC_PATH)/$(PREFIX)gdb +else +CC = $(PREFIX)gcc +CXX = $(PREFIX)g++ +AS = $(PREFIX)gcc -x assembler-with-cpp +CP = $(PREFIX)objcopy +SZ = $(PREFIX)size +AR = $(PREFIX)ar +GDB = $(PREFIX)gdb +endif +HEX = $(CP) -O ihex +BIN = $(CP) -O binary -S + +####################################### +# CFLAGS +####################################### +# cpu +CPU = -mcpu=cortex-m7 + +# fpu +FPU = -mfpu=fpv5-d16 + +# float-abi +FLOAT-ABI = -mfloat-abi=hard + +# mcu +MCU = -mthumb $(FLOAT-ABI) $(FPU) $(CPU) + +# macros for gcc +# AS defines +AS_DEFS = + +# C defines +C_DEFS = \ +-DCORE_CM7 \ +-DSTM32H750xx \ +-DSTM32H750IB \ +-Dflash_layout \ +-DHSE_VALUE=16000000 \ +-DUSE_HAL_DRIVER \ +-DUSE_FULL_LL_DRIVER \ +-DDATA_IN_D2_SRAM +# ^ added for easy startup access + + +C_INCLUDES = \ +-I$(MODULE_DIR) \ +-IInc \ +-IExternals \ +-I. \ + +# suppressions for warnings introduced by HAL/FatFS +WARNINGS += -Wall -Wno-attributes -Wno-strict-aliasing -Wno-maybe-uninitialized -Wno-missing-attributes -Wno-stringop-overflow #-Werror +CPP_WARNINGS += -Wno-register + +# compile gcc flags +ASFLAGS = $(MCU) $(AS_INCLUDES) $(AS_DEFS) -ggdb $(WARNINGS) $(OPT) -fdata-sections -ffunction-sections + +CFLAGS = $(MCU) $(C_INCLUDES) $(C_DEFS) -ggdb $(WARNINGS) $(OPT) -fasm -fdata-sections -ffunction-sections + +ifeq ($(DEBUG), 1) +CFLAGS += -g -ggdb +OPT = -O0 +C_DEFS += -DDEBUG=1 +else +C_DEFS += -DNDEBUG=1 -DRELEASE=1 +endif + +CFLAGS += \ +-finline-functions + +# C++ Flags +CPPFLAGS = $(CFLAGS) $(CPP_WARNINGS) +CPPFLAGS += \ +-fno-exceptions \ +-fno-rtti + +C_STANDARD = -std=gnu11 +CPP_STANDARD += -std=gnu++14 + +# default action: build all +all: $(BUILD_DIR)/$(TARGET).a + +####################################### +# build the application +####################################### + +# list of C program objects +OBJECTS = $(addprefix $(BUILD_DIR)/,$(C_SOURCES:.c=.o)) +vpath %.c $(sort $(dir $(C_SOURCES))) +# list of CPP program objects +OBJECTS += $(addprefix $(BUILD_DIR)/,$(CPP_SOURCES:.cpp=.o)) +vpath %.cpp $(sort $(dir $(CPP_SOURCES))) +# list of ASM program objects +OBJECTS += $(addprefix $(BUILD_DIR)/,$(ASM_SOURCES:.s=.o)) +vpath %.s $(sort $(dir $(ASM_SOURCES))) + +# Prunes duplicates, and orders lexically (for archive build) +SORTED_OBJECTS = $(sort $(OBJECTS)) + +$(BUILD_DIR)/%.o: %.c Makefile | $(BUILD_DIR) + mkdir -p $(@D) + $(CC) $(CFLAGS) $(C_STANDARD) -c $< -o $@ -MD -MP -MF $(BUILD_DIR)/$(notdir $(<:.c=.dep)) + +$(BUILD_DIR)/%.o: %.cpp Makefile | $(BUILD_DIR) + mkdir -p $(@D) + $(CXX) $(CPPFLAGS) $(CPP_STANDARD) -c $< -o $@ -MD -MP -MF $(BUILD_DIR)/$(notdir $(<:.cpp=.dep)) + +$(BUILD_DIR)/%.o: %.s Makefile | $(BUILD_DIR) + mkdir -p $(@D) + $(AS) -c $(ASFLAGS) $< -o $@ -MD -MP -MF $(BUILD_DIR)/$(notdir $(<:.s =.dep)) + +$(BUILD_DIR)/$(TARGET).a: $(SORTED_OBJECTS) Makefile + $(AR) -r $@ $(SORTED_OBJECTS) + +$(BUILD_DIR): + mkdir $@ + +####################################### +# clean up +####################################### +clean: + -rm -fR $(BUILD_DIR) +####################################### + +# dependencies +####################################### +-include $(wildcard $(BUILD_DIR)/*.dep) diff --git a/leaf/leaf_polyvalues.h b/leaf/leaf_polyvalues.h deleted file mode 100644 index 965caae..0000000 --- a/leaf/leaf_polyvalues.h +++ /dev/null @@ -1,976 +0,0 @@ - -/* Copyright 2013-2019 Matt Tytel - * - * vital is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * vital is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with vital. If not, see . - */ - -//stole this for Vital because it's cool - -#pragma once - -#include -#include -#include - -#if VITAL_AVX2 - #define VITAL_AVX2 1 - static_assert(false, "AVX2 is not supported yet."); -#elif __SSE2__ - #define VITAL_SSE2 1 -#elif defined(__ARM_NEON__) || defined(__ARM_NEON) - #define VITAL_NEON 1 -#else - static_assert(false, "No SIMD Intrinsics found which are necessary for compilation"); -#endif - -#if VITAL_SSE2 - #include -#elif VITAL_NEON - #include -#endif - -#if !defined(force_inline) -#if defined (_MSC_VER) -#define force_inline __forceinline - #define vector_call __vectorcall -#else - #define force_inline inline __attribute__((always_inline)) - #define vector_call -#endif -#endif - - - struct poly_int { -#if VITAL_AVX2 - static constexpr size_t kSize = 8; - typedef __m256i simd_type; -#elif VITAL_SSE2 - static constexpr size_t kSize = 4; - typedef __m128i simd_type; -#elif VITAL_NEON - static constexpr size_t kSize = 4; - typedef uint32x4_t simd_type; -#endif - - union scalar_simd_union { - int32_t scalar[kSize]; - simd_type simd; - }; - - union simd_scalar_union { - simd_type simd; - int32_t scalar[kSize]; - }; - - static constexpr uint32_t kFullMask = (unsigned int)-1; - static constexpr uint32_t kSignMask = 0x80000000; - static constexpr uint32_t kNotSignMask = kFullMask ^ kSignMask; - - static force_inline simd_type vector_call init(uint32_t scalar) { -#if VITAL_AVX2 - return _mm256_set1_epi32((int32_t)scalar); -#elif VITAL_SSE2 - return _mm_set1_epi32((int32_t)scalar); -#elif VITAL_NEON - return vdupq_n_u32(scalar); -#endif - } - - static force_inline simd_type vector_call load(const uint32_t* memory) { -#if VITAL_AVX2 - return _mm256_loadu_si256((const __m256i*)scalar); -#elif VITAL_SSE2 - return _mm_loadu_si128((const __m128i*)memory); -#elif VITAL_NEON - return vld1q_u32(memory); -#endif - } - - static force_inline simd_type vector_call add(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_add_epi32(one, two); -#elif VITAL_SSE2 - return _mm_add_epi32(one, two); -#elif VITAL_NEON - return vaddq_u32(one, two); -#endif - } - - static force_inline simd_type vector_call sub(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_sub_epi32(one, two); -#elif VITAL_SSE2 - return _mm_sub_epi32(one, two); -#elif VITAL_NEON - return vsubq_u32(one, two); -#endif - } - - static force_inline simd_type vector_call neg(simd_type value) { -#if VITAL_AVX2 - return _mm256_sub_epi32(_mm256_set1_epi32(0), value); -#elif VITAL_SSE2 - return _mm_sub_epi32(_mm_set1_epi32(0), value); -#elif VITAL_NEON - return vmulq_n_u32(value, -1); -#endif - } - - static force_inline simd_type vector_call mul(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_mul_epi32(one, two); -#elif VITAL_SSE2 - simd_type mul0_2 = _mm_mul_epu32(one, two); - simd_type mul1_3 = _mm_mul_epu32(_mm_shuffle_epi32(one, _MM_SHUFFLE(2, 3, 0, 1)), - _mm_shuffle_epi32(two, _MM_SHUFFLE(2, 3, 0, 1))); - return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul0_2, _MM_SHUFFLE (0, 0, 2, 0)), - _mm_shuffle_epi32(mul1_3, _MM_SHUFFLE (0, 0, 2, 0))); -#elif VITAL_NEON - return vmulq_u32(one, two); -#endif - } - - static force_inline simd_type vector_call bitAnd(simd_type value, simd_type mask) { -#if VITAL_AVX2 - return _mm256_and_si256(value, mask); -#elif VITAL_SSE2 - return _mm_and_si128(value, mask); -#elif VITAL_NEON - return vandq_u32(value, mask); -#endif - } - - static force_inline simd_type vector_call bitOr(simd_type value, simd_type mask) { -#if VITAL_AVX2 - return _mm256_or_si256(value, mask); -#elif VITAL_SSE2 - return _mm_or_si128(value, mask); -#elif VITAL_NEON - return vorrq_u32(value, mask); -#endif - } - - static force_inline simd_type vector_call bitXor(simd_type value, simd_type mask) { -#if VITAL_AVX2 - return _mm256_xor_si256(value, mask); -#elif VITAL_SSE2 - return _mm_xor_si128(value, mask); -#elif VITAL_NEON - return veorq_u32(value, mask); -#endif - } - - static force_inline simd_type vector_call bitNot(simd_type value) { - return bitXor(value, init(-1)); - } - - static force_inline simd_type vector_call max(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_max_epi32(one, two); -#elif VITAL_SSE2 - simd_type greater_than_mask = greaterThan(one, two); - return _mm_or_si128(_mm_and_si128(greater_than_mask, one), _mm_andnot_si128(greater_than_mask, two)); -#elif VITAL_NEON - return vmaxq_u32(one, two); -#endif - } - - static force_inline simd_type vector_call min(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_min_epi32(one, two); -#elif VITAL_SSE2 - simd_type less_than_mask = _mm_cmpgt_epi32(two, one); - return _mm_or_si128(_mm_and_si128(less_than_mask, one), _mm_andnot_si128(less_than_mask, two)); -#elif VITAL_NEON - return vminq_u32(one, two); -#endif - } - - static force_inline simd_type vector_call equal(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_cmpeq_epi32(one, two); -#elif VITAL_SSE2 - return _mm_cmpeq_epi32(one, two); -#elif VITAL_NEON - return vceqq_u32(one, two); -#endif - } - - static force_inline simd_type vector_call greaterThan(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_cmpgt_epi32(_mm256_xor_si256(one, init(kSignMask)), _mm256_xor_si256(two, init(kSignMask))); -#elif VITAL_SSE2 - return _mm_cmpgt_epi32(_mm_xor_si128(one, init(kSignMask)), _mm_xor_si128(two, init(kSignMask))); -#elif VITAL_NEON - return vcgtq_u32(one, two); -#endif - } - - static force_inline uint32_t vector_call sum(simd_type value) { -#if VITAL_AVX2 - simd_type flip = _mm256_permute4x64_epi64(value, _MM_SHUFFLE(1, 0, 3, 2)) - simd_type sum = _mm256_hadd_epi32(value, flip); - sum = _mm256_hadd_epi32(sum, sum); - return _mm256_cvtsi256_si32(sum); -#elif VITAL_SSE2 - simd_scalar_union union_value { value }; - uint32_t total = 0; - for (int i = 0; i < kSize; ++i) - total += union_value.scalar[i]; - return total; -#elif VITAL_NEON - uint32x2_t sum = vpadd_u32(vget_low_u32(value), vget_high_u32(value)); - sum = vpadd_u32(sum, sum); - return vget_lane_u32(sum, 0); -#endif - } - - static force_inline uint32_t vector_call anyMask(simd_type value) { -#if VITAL_AVX2 - return _mm256_movemask_epi8(value); -#elif VITAL_SSE2 - return _mm_movemask_epi8(value); -#elif VITAL_NEON - uint32x2_t max = vpmax_u32(vget_low_u32(value), vget_high_u32(value)); - max = vpmax_u32(max, max); - return vget_lane_u32(max, 0); -#endif - } - - static force_inline poly_int vector_call max(poly_int one, poly_int two) { - return max(one.value, two.value); - } - - static force_inline poly_int vector_call min(poly_int one, poly_int two) { - return min(one.value, two.value); - } - - static force_inline poly_int vector_call equal(poly_int one, poly_int two) { - return equal(one.value, two.value); - } - - static force_inline poly_int vector_call greaterThan(poly_int one, poly_int two) { - return greaterThan(one.value, two.value); - } - - static force_inline poly_int vector_call lessThan(poly_int one, poly_int two) { - return greaterThan(two.value, one.value); - } - - static force_inline uint32_t vector_call sum(poly_int value) { - return sum(value.value); - } - - simd_type value; - - force_inline poly_int() noexcept { value = init(0); } - force_inline poly_int(simd_type initial_value) noexcept : value(initial_value) { } - force_inline poly_int(uint32_t initial_value) noexcept { - value = init(initial_value); - } - - force_inline poly_int(uint32_t first, uint32_t second, uint32_t third, uint32_t fourth) noexcept { - scalar_simd_union union_value { (int32_t)first, (int32_t)second, (int32_t)third, (int32_t)fourth }; - value = union_value.simd; - } - - force_inline poly_int(uint32_t first, uint32_t second) noexcept : poly_int(first, second, first, second) { } - - force_inline ~poly_int() noexcept { } - - force_inline uint32_t vector_call access(size_t index) const noexcept { -#if VITAL_AVX2 - simd_union union_value { value }; - return union_value.scalar[index]; -#elif VITAL_SSE2 - simd_scalar_union union_value { value }; - return union_value.scalar[index]; -#elif VITAL_NEON - return value[index]; -#endif - } - - force_inline void vector_call set(size_t index, uint32_t new_value) noexcept { -#if VITAL_AVX2 - simd_union union_value { value }; - union_value.scalar[index] = new_value; - value = union_value.simd; -#elif VITAL_SSE2 - simd_scalar_union union_value { value }; - union_value.scalar[index] = new_value; - value = union_value.simd; -#elif VITAL_NEON - value[index] = new_value; -#endif - } - - force_inline uint32_t vector_call operator[](size_t index) const noexcept { - return access(index); - } - - force_inline poly_int& vector_call operator+=(poly_int other) noexcept { - value = add(value, other.value); - return *this; - } - - force_inline poly_int& vector_call operator-=(poly_int other) noexcept { - value = sub(value, other.value); - return *this; - } - - force_inline poly_int& vector_call operator*=(poly_int other) noexcept { - value = mul(value, other.value); - return *this; - } - - force_inline poly_int& vector_call operator&=(poly_int other) noexcept { - value = bitAnd(value, other.value); - return *this; - } - - force_inline poly_int& vector_call operator|=(poly_int other) noexcept { - value = bitOr(value, other.value); - return *this; - } - - force_inline poly_int& vector_call operator^=(poly_int other) noexcept { - value = bitXor(value, other.value); - return *this; - } - - force_inline poly_int& vector_call operator+=(simd_type other) noexcept { - value = add(value, other); - return *this; - } - - force_inline poly_int& vector_call operator-=(simd_type other) noexcept { - value = sub(value, other); - return *this; - } - - force_inline poly_int& vector_call operator*=(simd_type other) noexcept { - value = mul(value, other); - return *this; - } - - force_inline poly_int& vector_call operator&=(simd_type other) noexcept { - value = bitAnd(value, other); - return *this; - } - - force_inline poly_int& vector_call operator|=(simd_type other) noexcept { - value = bitOr(value, other); - return *this; - } - - force_inline poly_int& vector_call operator^=(simd_type other) noexcept { - value = bitXor(value, other); - return *this; - } - - force_inline poly_int& vector_call operator+=(uint32_t scalar) noexcept { - value = add(value, init(scalar)); - return *this; - } - - force_inline poly_int& vector_call operator-=(uint32_t scalar) noexcept { - value = sub(value, init(scalar)); - return *this; - } - - force_inline poly_int& vector_call operator*=(uint32_t scalar) noexcept { - value = mul(value, init(scalar)); - return *this; - } - - force_inline poly_int vector_call operator+(poly_int other) const noexcept { - return add(value, other.value); - } - - force_inline poly_int vector_call operator-(poly_int other) const noexcept { - return sub(value, other.value); - } - - force_inline poly_int vector_call operator*(poly_int other) const noexcept { - return mul(value, other.value); - } - - force_inline poly_int vector_call operator&(poly_int other) const noexcept { - return bitAnd(value, other.value); - } - - force_inline poly_int vector_call operator|(poly_int other) const noexcept { - return bitOr(value, other.value); - } - - force_inline poly_int vector_call operator^(poly_int other) const noexcept { - return bitXor(value, other.value); - } - - force_inline poly_int vector_call operator-() const noexcept { - return neg(value); - } - - force_inline poly_int vector_call operator~() const noexcept { - return bitNot(value); - } - - force_inline uint32_t vector_call sum() const noexcept { - return sum(value); - } - - force_inline uint32_t vector_call anyMask() const noexcept { - return anyMask(value); - } - }; - - typedef poly_int poly_mask; - - struct poly_float { -#if VITAL_AVX2 - static constexpr size_t kSize = 8; - typedef __m256 simd_type; - typedef __m256i mask_simd_type; -#elif VITAL_SSE2 - static constexpr size_t kSize = 4; - typedef __m128 simd_type; - typedef __m128i mask_simd_type; -#elif VITAL_NEON - static constexpr size_t kSize = 4; - typedef float32x4_t simd_type; - typedef uint32x4_t mask_simd_type; -#endif - - union simd_scalar_union { - simd_type simd; - float scalar[kSize]; - }; - - union scalar_simd_union { - float scalar[kSize]; - simd_type simd; - }; - - static force_inline mask_simd_type vector_call toMask(simd_type value) { -#if VITAL_AVX2 - return _mm256_castps_si256(value); -#elif VITAL_SSE2 - return _mm_castps_si128(value); -#elif VITAL_NEON - return vreinterpretq_u32_f32(value); -#endif - } - - static force_inline simd_type vector_call toSimd(mask_simd_type mask) { -#if VITAL_AVX2 - return _mm256_castsi256_ps(mask); -#elif VITAL_SSE2 - return _mm_castsi128_ps(mask); -#elif VITAL_NEON - return vreinterpretq_f32_u32(mask); -#endif - } - - static force_inline simd_type vector_call init(float scalar) { -#if VITAL_AVX2 - return _mm256_broadcast_ss(&scalar); -#elif VITAL_SSE2 - return _mm_set1_ps(scalar); -#elif VITAL_NEON - return vdupq_n_f32(scalar); -#endif - } - - static force_inline simd_type vector_call load(const float* memory) { -#if VITAL_AVX2 - return _mm256_loadu_ps(&scalar); -#elif VITAL_SSE2 - return _mm_loadu_ps(memory); -#elif VITAL_NEON - return vld1q_f32(memory); -#endif - } - - static force_inline simd_type vector_call add(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_add_ps(one, two); -#elif VITAL_SSE2 - return _mm_add_ps(one, two); -#elif VITAL_NEON - return vaddq_f32(one, two); -#endif - } - - static force_inline simd_type vector_call sub(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_sub_ps(one, two); -#elif VITAL_SSE2 - return _mm_sub_ps(one, two); -#elif VITAL_NEON - return vsubq_f32(one, two); -#endif - } - - static force_inline simd_type vector_call neg(simd_type value) { -#if VITAL_AVX2 - return _mm256_xor_ps(value, _mm256_set1_ps(-0.f)); -#elif VITAL_SSE2 - return _mm_xor_ps(value, _mm_set1_ps(-0.f)); -#elif VITAL_NEON - return vmulq_n_f32(value, -1.0f); -#endif - } - - static force_inline simd_type vector_call mul(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_mul_ps(one, two); -#elif VITAL_SSE2 - return _mm_mul_ps(one, two); -#elif VITAL_NEON - return vmulq_f32(one, two); -#endif - } - - static force_inline simd_type vector_call mulScalar(simd_type value, float scalar) { -#if VITAL_AVX2 - return _mm256_mul_ps(value, _mm_set1_ps(scalar)); -#elif VITAL_SSE2 - return _mm_mul_ps(value, _mm_set1_ps(scalar)); -#elif VITAL_NEON - return vmulq_n_f32(value, scalar); -#endif - } - - static force_inline simd_type vector_call mulAdd(simd_type one, simd_type two, simd_type three) { -#if VITAL_AVX2 - return _mm256_fmadd_ps(two, three, one); -#elif VITAL_SSE2 - return _mm_add_ps(one, _mm_mul_ps(two, three)); -#elif VITAL_NEON -#if defined(NEON_VFP_V3) - return vaddq_f32(one, vmulq_f32(two, three)); -#else - return vmlaq_f32(one, two, three); -#endif -#endif - } - - static force_inline simd_type vector_call mulSub(simd_type one, simd_type two, simd_type three) { -#if VITAL_AVX2 - return _mm256_fsub_ps(two, three, one); -#elif VITAL_SSE2 - return _mm_sub_ps(one, _mm_mul_ps(two, three)); -#elif VITAL_NEON -#if defined(NEON_VFP_V3) - return vsubq_f32(one, vmulq_f32(two, three)); -#else - return vmlsq_f32(one, two, three); -#endif -#endif - } - - static force_inline simd_type vector_call div(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_div_ps(one, two); -#elif VITAL_SSE2 - return _mm_div_ps(one, two); -#elif VITAL_NEON -#if defined(NEON_ARM32) - simd_type reciprocal = vrecpeq_f32(two); - reciprocal = vmulq_f32(vrecpsq_f32(two, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(two, reciprocal), reciprocal); - return vmulq_f32(one, reciprocal); -#else - return vdivq_f32(one, two); -#endif -#endif - } - - static force_inline simd_type vector_call bitAnd(simd_type value, mask_simd_type mask) { -#if VITAL_AVX2 - return _mm256_and_ps(value, toSimd(mask)); -#elif VITAL_SSE2 - return _mm_and_ps(value, toSimd(mask)); -#elif VITAL_NEON - return toSimd(vandq_u32(toMask(value), mask)); -#endif - } - - static force_inline simd_type vector_call bitOr(simd_type value, mask_simd_type mask) { -#if VITAL_AVX2 - return _mm256_or_ps(value, toSimd(mask)); -#elif VITAL_SSE2 - return _mm_or_ps(value, toSimd(mask)); -#elif VITAL_NEON - return toSimd(vorrq_u32(toMask(value), mask)); -#endif - } - - static force_inline simd_type vector_call bitXor(simd_type value, mask_simd_type mask) { -#if VITAL_AVX2 - return _mm256_xor_ps(value, toSimd(mask)); -#elif VITAL_SSE2 - return _mm_xor_ps(value, toSimd(mask)); -#elif VITAL_NEON - return toSimd(veorq_u32(toMask(value), mask)); -#endif - } - - static force_inline simd_type vector_call bitNot(simd_type value) { - return bitXor(value, poly_mask::init(-1)); - } - - static force_inline simd_type vector_call max(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_max_ps(one, two); -#elif VITAL_SSE2 - return _mm_max_ps(one, two); -#elif VITAL_NEON - return vmaxq_f32(one, two); -#endif - } - - static force_inline simd_type vector_call min(simd_type one, simd_type two) { -#if VITAL_AVX2 - return _mm256_min_ps(one, two); -#elif VITAL_SSE2 - return _mm_min_ps(one, two); -#elif VITAL_NEON - return vminq_f32(one, two); -#endif - } - - static force_inline simd_type vector_call abs(simd_type value) { - return bitAnd(value, poly_mask::init(poly_mask::kNotSignMask)); - } - - static force_inline mask_simd_type vector_call sign_mask(simd_type value) { - return toMask(bitAnd(value, poly_mask::init(poly_mask::kSignMask))); - } - - static force_inline mask_simd_type vector_call equal(simd_type one, simd_type two) { -#if VITAL_AVX2 - return toMask(_mm256_cmpeq_ps(one, two, _CMP_EQ_OQ)); -#elif VITAL_SSE2 - return toMask(_mm_cmpeq_ps(one, two)); -#elif VITAL_NEON - return vceqq_f32(one, two); -#endif - } - - static force_inline mask_simd_type vector_call greaterThan(simd_type one, simd_type two) { -#if VITAL_AVX2 - return toMask(_mm256_cmp_ps(one, two, _CMP_GT_OQ)); -#elif VITAL_SSE2 - return toMask(_mm_cmpgt_ps(one, two)); -#elif VITAL_NEON - return vcgtq_f32(one, two); -#endif - } - - static force_inline mask_simd_type vector_call greaterThanOrEqual(simd_type one, simd_type two) { -#if VITAL_AVX2 - return toMask(_mm256_cmp_ps(one, two, _CMP_GE_OQ)); -#elif VITAL_SSE2 - return toMask(_mm_cmpge_ps(one, two)); -#elif VITAL_NEON - return vcgeq_f32(one, two); -#endif - } - - static force_inline mask_simd_type vector_call notEqual(simd_type one, simd_type two) { -#if VITAL_AVX2 - return toMask(_mm256_cmp_ps(one, two, _CMP_NEQ_OQ)); -#elif VITAL_SSE2 - return toMask(_mm_cmpneq_ps(one, two)); -#elif VITAL_NEON - poly_mask greater = greaterThan(one, two); - poly_mask less = lessThan(one, two); - return poly_mask::bitOr(greater.value, less.value); -#endif - } - - static force_inline float vector_call sum(simd_type value) { -#if VITAL_AVX2 - simd_type flip = _mm256_permute2f128_ps(value, value, 1) - simd_type sum = _mm256_hadd_ps(value, flip); - sum = _mm256_hadd_ps(sum, sum); - return _mm256_cvtss_f32(sum); -#elif VITAL_SSE2 - simd_type flip = _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 0, 3, 2)); - simd_type sum = _mm_add_ps(value, flip); - simd_type swap = _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(2, 3, 0, 1)); - return _mm_cvtss_f32(_mm_add_ps(sum, swap)); -#elif VITAL_NEON - float32x2_t sum = vpadd_f32(vget_low_f32(value), vget_high_f32(value)); - sum = vpadd_f32(sum, sum); - return vget_lane_f32(sum, 0); -#endif - } - - static force_inline void vector_call transpose(simd_type& row0, simd_type& row1, - simd_type& row2, simd_type& row3) { -#if VITAL_AVX2 - static_assert(false, "AVX2 transpose not supported yet"); -#elif VITAL_SSE2 - __m128 low0 = _mm_unpacklo_ps(row0, row1); - __m128 low1 = _mm_unpacklo_ps(row2, row3); - __m128 high0 = _mm_unpackhi_ps(row0, row1); - __m128 high1 = _mm_unpackhi_ps(row2, row3); - row0 = _mm_movelh_ps(low0, low1); - row1 = _mm_movehl_ps(low1, low0); - row2 = _mm_movelh_ps(high0, high1); - row3 = _mm_movehl_ps(high1, high0); -#elif VITAL_NEON - float32x4x2_t swap_low = vtrnq_f32(row0, row1); - float32x4x2_t swap_high = vtrnq_f32(row2, row3); - row0 = vextq_f32(vextq_f32(swap_low.val[0], swap_low.val[0], 2), swap_high.val[0], 2); - row1 = vextq_f32(vextq_f32(swap_low.val[1], swap_low.val[1], 2), swap_high.val[1], 2); - row2 = vextq_f32(swap_low.val[0], vextq_f32(swap_high.val[0], swap_high.val[0], 2), 2); - row3 = vextq_f32(swap_low.val[1], vextq_f32(swap_high.val[1], swap_high.val[1], 2), 2); -#else -#endif - } - - static force_inline poly_float vector_call mulAdd(poly_float one, poly_float two, poly_float three) { - return mulAdd(one.value, two.value, three.value); - } - - static force_inline poly_float vector_call mulSub(poly_float one, poly_float two, poly_float three) { - return mulSub(one.value, two.value, three.value); - } - - static force_inline poly_float vector_call max(poly_float one, poly_float two) { - return max(one.value, two.value); - } - - static force_inline poly_float vector_call min(poly_float one, poly_float two) { - return min(one.value, two.value); - } - - static force_inline poly_float vector_call abs(poly_float value) { - return abs(value.value); - } - - static force_inline poly_mask vector_call sign_mask(poly_float value) { - return sign_mask(value.value); - } - - static force_inline poly_mask vector_call equal(poly_float one, poly_float two) { - return equal(one.value, two.value); - } - - static force_inline poly_mask vector_call notEqual(poly_float one, poly_float two) { - return notEqual(one.value, two.value); - } - - static force_inline poly_mask vector_call greaterThan(poly_float one, poly_float two) { - return greaterThan(one.value, two.value); - } - - static force_inline poly_mask vector_call greaterThanOrEqual(poly_float one, poly_float two) { - return greaterThanOrEqual(one.value, two.value); - } - - static force_inline poly_mask vector_call lessThan(poly_float one, poly_float two) { - return greaterThan(two.value, one.value); - } - - static force_inline poly_mask vector_call lessThanOrEqual(poly_float one, poly_float two) { - return greaterThanOrEqual(two.value, one.value); - } - - simd_type value; - - force_inline poly_float() noexcept { value = init(0.0f); } - force_inline poly_float(simd_type initial_value) noexcept : value(initial_value) { } - force_inline poly_float(float initial_value) noexcept { value = init(initial_value); } - - force_inline poly_float(float initial_value1, float initial_value2) noexcept { - scalar_simd_union union_value { initial_value1, initial_value2, initial_value1, initial_value2 }; - value = union_value.simd; - } - - force_inline poly_float(float first, float second, float third, float fourth) noexcept { - scalar_simd_union union_value { first, second, third, fourth }; - value = union_value.simd; - } - - force_inline ~poly_float() noexcept { } - - force_inline float vector_call access(size_t index) const noexcept { -#if VITAL_AVX2 - simd_union union_value { value }; - return union_value.scalar[index]; -#elif VITAL_SSE2 - simd_scalar_union union_value { value }; - return union_value.scalar[index]; -#elif VITAL_NEON - return value[index]; -#endif - } - - force_inline void vector_call set(size_t index, float new_value) noexcept { -#if VITAL_AVX2 - simd_union union_value { value }; - union_value.scalar[index] = new_value; - value = union_value.simd; -#elif VITAL_SSE2 - simd_scalar_union union_value { value }; - union_value.scalar[index] = new_value; - value = union_value.simd; -#elif VITAL_NEON - value[index] = new_value; -#endif - } - - force_inline float vector_call operator[](size_t index) const noexcept { - return access(index); - } - - force_inline poly_float& vector_call operator+=(poly_float other) noexcept { - value = add(value, other.value); - return *this; - } - - force_inline poly_float& vector_call operator-=(poly_float other) noexcept { - value = sub(value, other.value); - return *this; - } - - force_inline poly_float& vector_call operator*=(poly_float other) noexcept { - value = mul(value, other.value); - return *this; - } - - force_inline poly_float& vector_call operator/=(poly_float other) noexcept { - value = div(value, other.value); - return *this; - } - - force_inline poly_float& vector_call operator&=(poly_mask other) noexcept { - value = bitAnd(value, other.value); - return *this; - } - - force_inline poly_float& vector_call operator|=(poly_mask other) noexcept { - value = bitOr(value, other.value); - return *this; - } - - force_inline poly_float& vector_call operator^=(poly_mask other) noexcept { - value = bitXor(value, other.value); - return *this; - } - - force_inline poly_float& vector_call operator+=(simd_type other) noexcept { - value = add(value, other); - return *this; - } - - force_inline poly_float& vector_call operator-=(simd_type other) noexcept { - value = sub(value, other); - return *this; - } - - force_inline poly_float& vector_call operator*=(simd_type other) noexcept { - value = mul(value, other); - return *this; - } - - force_inline poly_float& vector_call operator/=(simd_type other) noexcept { - value = div(value, other); - return *this; - } - - force_inline poly_float& vector_call operator&=(mask_simd_type other) noexcept { - value = bitAnd(value, other); - return *this; - } - - force_inline poly_float& vector_call operator|=(mask_simd_type other) noexcept { - value = bitOr(value, other); - return *this; - } - - force_inline poly_float& vector_call operator^=(mask_simd_type other) noexcept { - value = bitXor(value, other); - return *this; - } - - force_inline poly_float& vector_call operator+=(float scalar) noexcept { - value = add(value, init(scalar)); - return *this; - } - - force_inline poly_float& vector_call operator-=(float scalar) noexcept { - value = sub(value, init(scalar)); - return *this; - } - - force_inline poly_float& vector_call operator*=(float scalar) noexcept { - value = mulScalar(value, scalar); - return *this; - } - - force_inline poly_float& vector_call operator/=(float scalar) noexcept { - value = div(value, init(scalar)); - return *this; - } - - force_inline poly_float vector_call operator+(poly_float other) const noexcept { - return add(value, other.value); - } - - force_inline poly_float vector_call operator-(poly_float other) const noexcept { - return sub(value, other.value); - } - - force_inline poly_float vector_call operator*(poly_float other) const noexcept { - return mul(value, other.value); - } - - force_inline poly_float vector_call operator/(poly_float other) const noexcept { - return div(value, other.value); - } - - force_inline poly_float vector_call operator*(float scalar) const noexcept { - return mulScalar(value, scalar); - } - - force_inline poly_float vector_call operator&(poly_mask other) const noexcept { - return bitAnd(value, other.value); - } - - force_inline poly_float vector_call operator|(poly_mask other) const noexcept { - return bitOr(value, other.value); - } - - force_inline poly_float vector_call operator^(poly_mask other) const noexcept { - return bitXor(value, other.value); - } - - force_inline poly_float vector_call operator-() const noexcept { - return neg(value); - } - - force_inline poly_float vector_call operator~() const noexcept { - return bitNot(value); - } - - force_inline float vector_call sum() const noexcept { - return sum(value); - } - }; -