1
0
Fork 0
Univerxel/include/FastNoiseSIMD/FastNoiseSIMD_internal.cpp

2420 lines
80 KiB
C++

// FastNoiseSIMD_internal.cpp
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#include "FastNoiseSIMD.h"
#include <assert.h>
#if defined(SIMD_LEVEL) || defined(FN_COMPILE_NO_SIMD_FALLBACK)
#ifndef SIMD_LEVEL
#define SIMD_LEVEL FN_NO_SIMD_FALLBACK
#define SIMD_LEVEL_H FN_NO_SIMD_FALLBACK
#include "FastNoiseSIMD_internal.h"
#include <math.h>
#define FN_ALIGNED_SETS
#endif
// Per SIMD level var/function naming
#define L_VAR2(x, l) L##l##_##x
#define L_VAR(x, l) L_VAR2(x, l)
#define VAR(x) L_VAR(x, SIMD_LEVEL)
#define FUNC(x) VAR(FUNC_##x)
#define SIMDf_NUM(n) VAR(SIMDf_NUM_##n)
#define SIMDi_NUM(n) VAR(SIMDi_NUM_##n)
#define SIMD_LEVEL_CLASS FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(SIMD_LEVEL)
#if defined(_WIN32) && SIMD_LEVEL > FN_NO_SIMD_FALLBACK
#define VECTORCALL __vectorcall
#else
#define VECTORCALL
#endif
// Typedefs
#if SIMD_LEVEL == FN_NEON
#define VECTOR_SIZE 4
#define MEMORY_ALIGNMENT 16
typedef float32x4_t SIMDf;
typedef int32x4_t SIMDi;
#define SIMDf_SET(a) vdupq_n_f32(a)
#define SIMDf_SET_ZERO() vdupq_n_f32(0)
#define SIMDi_SET(a) vdupq_n_s32(a)
#define SIMDi_SET_ZERO() vdupq_n_s32(0)
#elif SIMD_LEVEL == FN_AVX512
#define VECTOR_SIZE 16
#define MEMORY_ALIGNMENT 64
typedef __m512 SIMDf;
typedef __m512i SIMDi;
#define SIMDf_SET(a) _mm512_set1_ps(a)
#define SIMDf_SET_ZERO() _mm512_setzero_ps()
#define SIMDi_SET(a) _mm512_set1_epi32(a)
#define SIMDi_SET_ZERO() _mm512_setzero_si512()
#elif SIMD_LEVEL == FN_AVX2
#define VECTOR_SIZE 8
#define MEMORY_ALIGNMENT 32
typedef __m256 SIMDf;
typedef __m256i SIMDi;
#define SIMDf_SET(a) _mm256_set1_ps(a)
#define SIMDf_SET_ZERO() _mm256_setzero_ps()
#define SIMDi_SET(a) _mm256_set1_epi32(a)
#define SIMDi_SET_ZERO() _mm256_setzero_si256()
#elif SIMD_LEVEL >= FN_SSE2
#define VECTOR_SIZE 4
#define MEMORY_ALIGNMENT 16
typedef __m128 SIMDf;
typedef __m128i SIMDi;
#define SIMDf_SET(a) _mm_set1_ps(a)
#define SIMDf_SET_ZERO() _mm_setzero_ps()
#define SIMDi_SET(a) _mm_set1_epi32(a)
#define SIMDi_SET_ZERO() _mm_setzero_si128()
#else // Fallback to float/int
#define VECTOR_SIZE 1
#define MEMORY_ALIGNMENT 4
typedef float SIMDf;
typedef int SIMDi;
#define SIMDf_SET(a) (a)
#define SIMDf_SET_ZERO() (0)
#define SIMDi_SET(a) (a)
#define SIMDi_SET_ZERO() (0)
#endif
// Memory Allocation
#if SIMD_LEVEL > FN_NO_SIMD_FALLBACK && defined(FN_ALIGNED_SETS)
#ifdef _WIN32
#define SIMD_ALLOCATE_SET(floatP, floatCount) floatP = (float*)_aligned_malloc((floatCount)* sizeof(float), MEMORY_ALIGNMENT)
#else
#include <cstdlib>
#define SIMD_ALLOCATE_SET(floatP, floatCount) posix_memalign((void**)&floatP, MEMORY_ALIGNMENT, (floatCount)* sizeof(float))
#endif
#else
#define SIMD_ALLOCATE_SET(floatP, floatCount) floatP = new float[floatCount]
#endif
union uSIMDf
{
SIMDf m;
float a[VECTOR_SIZE];
};
union uSIMDi
{
SIMDi m;
int a[VECTOR_SIZE];
};
#if SIMD_LEVEL == FN_AVX512
typedef __mmask16 MASK;
#else
typedef SIMDi MASK;
#endif
static SIMDi SIMDi_NUM(0xffffffff);
static SIMDf SIMDf_NUM(1);
// SIMD functions
#if SIMD_LEVEL == FN_NEON
#define SIMDf_STORE(p,a) vst1q_f32(p, a)
#define SIMDf_LOAD(p) vld1q_f32(p)
#define SIMDf_UNDEFINED() SIMDf_SET(0)
#define SIMDi_UNDEFINED() SIMDi_SET(0)
#define SIMDf_CONVERT_TO_FLOAT(a) vcvtq_f32_s32(a)
#define SIMDf_CAST_TO_FLOAT(a) vreinterpretq_f32_s32(a)
#define SIMDi_CONVERT_TO_INT(a) vcvtq_s32_f32(a)
#define SIMDi_CAST_TO_INT(a) vreinterpretq_s32_f32(a)
#define SIMDf_ADD(a,b) vaddq_f32(a,b)
#define SIMDf_SUB(a,b) vsubq_f32(a,b)
#define SIMDf_MUL(a,b) vmulq_f32(a,b)
#define SIMDf_DIV(a,b) FUNC(DIV)(a,b)
static SIMDf VECTORCALL FUNC(DIV)(SIMDf a, SIMDf b)
{
SIMDf reciprocal = vrecpeq_f32(b);
// use a couple Newton-Raphson steps to refine the estimate. Depending on your
// application's accuracy requirements, you may be able to get away with only
// one refinement (instead of the two used here). Be sure to test!
reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal);
// and finally, compute a/b = a*(1/b)
return vmulq_f32(a, reciprocal);
}
#define SIMDf_MIN(a,b) vminq_f32(a,b)
#define SIMDf_MAX(a,b) vmaxq_f32(a,b)
#define SIMDf_INV_SQRT(a) vrsqrteq_f32(a)
#define SIMDf_LESS_THAN(a,b) vreinterpretq_s32_u32(vcltq_f32(a,b))
#define SIMDf_GREATER_THAN(a,b) vreinterpretq_s32_u32(vcgtq_f32(a,b))
#define SIMDf_LESS_EQUAL(a,b) vreinterpretq_s32_u32(vcleq_f32(a,b))
#define SIMDf_GREATER_EQUAL(a,b) vreinterpretq_s32_u32(vcgeq_f32(a,b))
#define SIMDf_AND(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_f32(b)))
#define SIMDf_AND_NOT(a,b) SIMDf_CAST_TO_FLOAT(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(a)),vreinterpretq_s32_f32(b)))
#define SIMDf_XOR(a,b) SIMDf_CAST_TO_FLOAT(veorq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_f32(b)))
#ifndef __aarch64__
static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a)
{
SIMDf fval = SIMDf_CONVERT_TO_FLOAT(SIMDi_CONVERT_TO_INT(a));
return vsubq_f32(fval,
SIMDf_CAST_TO_FLOAT(vandq_s32(SIMDf_LESS_THAN(a, fval),
SIMDi_CAST_TO_INT(SIMDf_NUM(1)))));
}
#define SIMDf_FLOOR(a) FUNC(FLOOR)(a)
#else
#define SIMDf_FLOOR(a) vrndmq_f32(a)
#endif
#define SIMDf_ABS(a) vabsq_f32(a)
#define SIMDf_BLENDV(a,b,mask) vbslq_f32(vreinterpretq_u32_s32(mask),b,a)
#define SIMDi_ADD(a,b) vaddq_s32(a,b)
#define SIMDi_SUB(a,b) vsubq_s32(a,b)
#define SIMDi_MUL(a,b) vmulq_s32(a,b)
#define SIMDi_AND(a,b) vandq_s32(a,b)
#define SIMDi_AND_NOT(a,b) vandq_s32(vmvnq_s32(a),b)
#define SIMDi_OR(a,b) vorrq_s32(a,b)
#define SIMDi_XOR(a,b) veorq_s32(a,b)
#define SIMDi_NOT(a) vmvnq_s32(a)
#define SIMDi_SHIFT_R(a, b) vshrq_n_s32(a, b)
#define SIMDi_SHIFT_L(a, b) vshlq_n_s32(a, b)
#define SIMDi_VSHIFT_L(a, b) vshlq_s32(a, b)
#define SIMDi_EQUAL(a,b) vreinterpretq_s32_u32(vceqq_s32(a,b))
#define SIMDi_GREATER_THAN(a,b) vreinterpretq_s32_u32(vcgtq_s32(a,b))
#define SIMDi_LESS_THAN(a,b) vreinterpretq_s32_u32(vcltq_s32(a,b))
#elif SIMD_LEVEL == FN_AVX512
#ifdef FN_ALIGNED_SETS
#define SIMDf_STORE(p,a) _mm512_store_ps(p,a)
#define SIMDf_LOAD(p) _mm512_load_ps(p)
#else
#define SIMDf_STORE(p,a) _mm512_storeu_ps(p,a)
#define SIMDf_LOAD(p) _mm512_loadu_ps(p)
#endif
#define SIMDf_UNDEFINED() _mm512_undefined_ps()
#define SIMDi_UNDEFINED() _mm512_undefined_epi32()
#define SIMDf_ADD(a,b) _mm512_add_ps(a,b)
#define SIMDf_SUB(a,b) _mm512_sub_ps(a,b)
#define SIMDf_MUL(a,b) _mm512_mul_ps(a,b)
#define SIMDf_DIV(a,b) _mm512_div_ps(a,b)
#define SIMDf_MIN(a,b) _mm512_min_ps(a,b)
#define SIMDf_MAX(a,b) _mm512_max_ps(a,b)
#define SIMDf_INV_SQRT(a) _mm512_rsqrt14_ps(a)
#define SIMDf_LESS_THAN(a,b) _mm512_cmp_ps_mask(a,b,_CMP_LT_OQ)
#define SIMDf_GREATER_THAN(a,b) _mm512_cmp_ps_mask(a,b,_CMP_GT_OQ)
#define SIMDf_LESS_EQUAL(a,b) _mm512_cmp_ps_mask(a,b,_CMP_LE_OQ)
#define SIMDf_GREATER_EQUAL(a,b) _mm512_cmp_ps_mask(a,b,_CMP_GE_OQ)
#define SIMDf_AND(a,b) _mm512_and_ps(a,b)
#define SIMDf_AND_NOT(a,b) _mm512_andnot_ps(a,b)
#define SIMDf_XOR(a,b) _mm512_xor_ps(a,b)
#define SIMDf_FLOOR(a) _mm512_floor_ps(a)
#define SIMDf_ABS(a) _mm512_abs_ps(a)
#define SIMDf_BLENDV(a,b,mask) _mm512_mask_blend_ps(mask,a,b)
#define SIMDf_GATHER(p,a) _mm512_i32gather_ps(a,p,4)
#define SIMDf_PERMUTE(a,b) _mm512_permutexvar_ps(b,a)
#define SIMDi_ADD(a,b) _mm512_add_epi32(a,b)
#define SIMDi_SUB(a,b) _mm512_sub_epi32(a,b)
#define SIMDi_MUL(a,b) _mm512_mullo_epi32(a,b)
#define SIMDi_AND(a,b) _mm512_and_si512(a,b)
#define SIMDi_AND_NOT(a,b) _mm512_andnot_si512(a,b)
#define SIMDi_OR(a,b) _mm512_or_si512(a,b)
#define SIMDi_XOR(a,b) _mm512_xor_si512(a,b)
#define SIMDi_NOT(a) SIMDi_XOR(a,SIMDi_NUM(0xffffffff))
#define SIMDi_SHIFT_R(a, b) _mm512_srai_epi32(a, b)
#define SIMDi_SHIFT_L(a, b) _mm512_slli_epi32(a, b)
#define SIMDi_VSHIFT_R(a,b) _mm512_srl_epi32(a, b)
#define SIMDi_VSHIFT_L(a,b) _mm512_sll_epi32(a, b)
#define SIMDi_EQUAL(a,b) _mm512_cmpeq_epi32_mask(a,b)
#define SIMDi_GREATER_THAN(a,b) _mm512_cmpgt_epi32_mask(a,b)
#define SIMDi_LESS_THAN(a,b) _mm512_cmpgt_epi32_mask(b,a)
#define SIMDf_CONVERT_TO_FLOAT(a) _mm512_cvtepi32_ps(a)
#define SIMDf_CAST_TO_FLOAT(a) _mm512_castsi512_ps(a)
#define SIMDi_CONVERT_TO_INT(a) _mm512_cvtps_epi32(a)
#define SIMDi_CAST_TO_INT(a) _mm512_castps_si512(a)
#elif SIMD_LEVEL == FN_AVX2
#ifdef FN_ALIGNED_SETS
#define SIMDf_STORE(p,a) _mm256_store_ps(p,a)
#define SIMDf_LOAD(p) _mm256_load_ps(p)
#else
#define SIMDf_STORE(p,a) _mm256_storeu_ps(p,a)
#define SIMDf_LOAD(p) _mm256_loadu_ps(p)
#endif
#define SIMDf_UNDEFINED() _mm256_undefined_ps()
#define SIMDi_UNDEFINED() _mm256_undefined_si256()
#define SIMDf_CONVERT_TO_FLOAT(a) _mm256_cvtepi32_ps(a)
#define SIMDf_CAST_TO_FLOAT(a) _mm256_castsi256_ps(a)
#define SIMDi_CONVERT_TO_INT(a) _mm256_cvtps_epi32(a)
#define SIMDi_CAST_TO_INT(a) _mm256_castps_si256(a)
#define SIMDf_ADD(a,b) _mm256_add_ps(a,b)
#define SIMDf_SUB(a,b) _mm256_sub_ps(a,b)
#define SIMDf_MUL(a,b) _mm256_mul_ps(a,b)
#define SIMDf_DIV(a,b) _mm256_div_ps(a,b)
#define SIMDf_MIN(a,b) _mm256_min_ps(a,b)
#define SIMDf_MAX(a,b) _mm256_max_ps(a,b)
#define SIMDf_INV_SQRT(a) _mm256_rsqrt_ps(a)
#define SIMDf_LESS_THAN(a,b) SIMDi_CAST_TO_INT(_mm256_cmp_ps(a,b,_CMP_LT_OQ))
#define SIMDf_GREATER_THAN(a,b) SIMDi_CAST_TO_INT(_mm256_cmp_ps(a,b,_CMP_GT_OQ))
#define SIMDf_LESS_EQUAL(a,b) SIMDi_CAST_TO_INT(_mm256_cmp_ps(a,b,_CMP_LE_OQ))
#define SIMDf_GREATER_EQUAL(a,b) SIMDi_CAST_TO_INT( _mm256_cmp_ps(a,b,_CMP_GE_OQ))
#define SIMDf_AND(a,b) _mm256_and_ps(a,b)
#define SIMDf_AND_NOT(a,b) _mm256_andnot_ps(a,b)
#define SIMDf_XOR(a,b) _mm256_xor_ps(a,b)
#define SIMDf_FLOOR(a) _mm256_floor_ps(a)
#define SIMDf_ABS(a) SIMDf_AND(a,SIMDf_CAST_TO_FLOAT(SIMDi_NUM(0x7fffffff)))
#define SIMDf_BLENDV(a,b,mask) _mm256_blendv_ps(a,b,SIMDf_CAST_TO_FLOAT(mask))
#define SIMDf_PERMUTE(a,b) _mm256_permutevar8x32_ps(a,b)
#define SIMDi_ADD(a,b) _mm256_add_epi32(a,b)
#define SIMDi_SUB(a,b) _mm256_sub_epi32(a,b)
#define SIMDi_MUL(a,b) _mm256_mullo_epi32(a,b)
#define SIMDi_AND(a,b) _mm256_and_si256(a,b)
#define SIMDi_AND_NOT(a,b) _mm256_andnot_si256(a,b)
#define SIMDi_OR(a,b) _mm256_or_si256(a,b)
#define SIMDi_XOR(a,b) _mm256_xor_si256(a,b)
#define SIMDi_NOT(a) SIMDi_XOR(a,SIMDi_NUM(0xffffffff))
#define SIMDi_SHIFT_R(a, b) _mm256_srai_epi32(a, b)
#define SIMDi_SHIFT_L(a, b) _mm256_slli_epi32(a, b)
#define SIMDi_EQUAL(a,b) _mm256_cmpeq_epi32(a,b)
#define SIMDi_GREATER_THAN(a,b) _mm256_cmpgt_epi32(a,b)
#define SIMDi_LESS_THAN(a,b) _mm256_cmpgt_epi32(b,a)
#elif SIMD_LEVEL >= FN_SSE2
#ifdef FN_ALIGNED_SETS
#define SIMDf_STORE(p,a) _mm_store_ps(p,a)
#define SIMDf_LOAD(p) _mm_load_ps(p)
#else
#define SIMDf_STORE(p,a) _mm_storeu_ps(p,a)
#define SIMDf_LOAD(p) _mm_loadu_ps(p)
#endif
#define SIMDf_UNDEFINED() SIMDf_SET_ZERO()
#define SIMDi_UNDEFINED() SIMDi_SET_ZERO()
#define SIMDf_CONVERT_TO_FLOAT(a) _mm_cvtepi32_ps(a)
#define SIMDf_CAST_TO_FLOAT(a) _mm_castsi128_ps(a)
#define SIMDi_CONVERT_TO_INT(a) _mm_cvtps_epi32(a)
#define SIMDi_CAST_TO_INT(a) _mm_castps_si128(a)
#define SIMDf_ADD(a,b) _mm_add_ps(a,b)
#define SIMDf_SUB(a,b) _mm_sub_ps(a,b)
#define SIMDf_MUL(a,b) _mm_mul_ps(a,b)
#define SIMDf_DIV(a,b) _mm_div_ps(a,b)
#define SIMDf_MIN(a,b) _mm_min_ps(a,b)
#define SIMDf_MAX(a,b) _mm_max_ps(a,b)
#define SIMDf_INV_SQRT(a) _mm_rsqrt_ps(a)
#define SIMDf_LESS_THAN(a,b) SIMDi_CAST_TO_INT(_mm_cmplt_ps(a,b))
#define SIMDf_GREATER_THAN(a,b) SIMDi_CAST_TO_INT(_mm_cmpgt_ps(a,b))
#define SIMDf_LESS_EQUAL(a,b) SIMDi_CAST_TO_INT(_mm_cmple_ps(a,b))
#define SIMDf_GREATER_EQUAL(a,b) SIMDi_CAST_TO_INT(_mm_cmpge_ps(a,b))
#define SIMDf_AND(a,b) _mm_and_ps(a,b)
#define SIMDf_AND_NOT(a,b) _mm_andnot_ps(a,b)
#define SIMDf_XOR(a,b) _mm_xor_ps(a,b)
#define SIMDf_ABS(a) SIMDf_AND(a,SIMDf_CAST_TO_FLOAT(SIMDi_NUM(0x7fffffff)))
#if SIMD_LEVEL == FN_SSE41
#define SIMDi_MUL(a,b) _mm_mullo_epi32(a,b)
#define SIMDf_FLOOR(a) _mm_floor_ps(a)
#define SIMDf_BLENDV(a,b,mask) _mm_blendv_ps(a,b,SIMDf_CAST_TO_FLOAT(mask))
#else
static SIMDi VECTORCALL FUNC(MUL)(SIMDi a, SIMDi b)
{
__m128 tmp1 = _mm_castsi128_ps(_mm_mul_epu32(a, b)); /* mul 2,0*/
__m128 tmp2 = _mm_castsi128_ps(_mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4))); /* mul 3,1 */
return _mm_shuffle_epi32(_mm_castps_si128(_mm_shuffle_ps(tmp1, tmp2, _MM_SHUFFLE(2, 0, 2, 0))), _MM_SHUFFLE(3, 1, 2, 0));
}
#define SIMDi_MUL(a,b) FUNC(MUL)(a,b)
static SIMDf VECTORCALL FUNC(FLOOR)(SIMDf a)
{
__m128 fval = _mm_cvtepi32_ps(_mm_cvttps_epi32(a));
return _mm_sub_ps(fval, _mm_and_ps(_mm_cmplt_ps(a, fval), SIMDf_NUM(1)));
}
#define SIMDf_FLOOR(a) FUNC(FLOOR)(a)
#define SIMDf_BLENDV(a,b,mask) _mm_or_ps(_mm_andnot_ps(SIMDf_CAST_TO_FLOAT(mask), a), _mm_and_ps(SIMDf_CAST_TO_FLOAT(mask), b))
#endif
#define SIMDi_ADD(a,b) _mm_add_epi32(a,b)
#define SIMDi_SUB(a,b) _mm_sub_epi32(a,b)
#define SIMDi_AND(a,b) _mm_and_si128(a,b)
#define SIMDi_AND_NOT(a,b) _mm_andnot_si128(a,b)
#define SIMDi_OR(a,b) _mm_or_si128(a,b)
#define SIMDi_XOR(a,b) _mm_xor_si128(a,b)
#define SIMDi_NOT(a) SIMDi_XOR(a,SIMDi_NUM(0xffffffff))
#define SIMDi_SHIFT_R(a,b) _mm_srai_epi32(a, b)
#define SIMDi_SHIFT_L(a,b) _mm_slli_epi32(a, b)
#define SIMDi_EQUAL(a,b) _mm_cmpeq_epi32(a,b)
#define SIMDi_GREATER_THAN(a,b) _mm_cmpgt_epi32(a,b)
#define SIMDi_LESS_THAN(a,b) _mm_cmpgt_epi32(b,a)
#else // Fallback
static int FUNC(CAST_TO_INT)(float f) { return *reinterpret_cast<int*>(&f); }
static float FUNC(CAST_TO_FLOAT)(int i) { return *reinterpret_cast<float*>(&i); }
#define SIMDi_CAST_TO_INT(a) FUNC(CAST_TO_INT)(a)
#define SIMDf_CAST_TO_FLOAT(a) FUNC(CAST_TO_FLOAT)(a)
#define SIMDf_STORE(p,a) (*(p) = a)
#define SIMDf_LOAD(p) (*p)
#define SIMDf_UNDEFINED() (0)
#define SIMDi_UNDEFINED() (0)
#define SIMDf_ADD(a,b) ((a) + (b))
#define SIMDf_SUB(a,b) ((a) - (b))
#define SIMDf_MUL(a,b) ((a) * (b))
#define SIMDf_DIV(a,b) ((a) / (b))
#define SIMDf_MIN(a,b) fminf(a,b)
#define SIMDf_MAX(a,b) fmaxf(a,b)
static float FUNC(INV_SQRT)(float x)
{
float xhalf = 0.5f * x;
int i = *(int*)&x;
i = 0x5f3759df - (i >> 1);
x = *(float*)&i;
x = x*(1.5f - xhalf*x*x);
return x;
}
#define SIMDf_INV_SQRT(a) FUNC(INV_SQRT)(a)
#define SIMDf_LESS_THAN(a,b) (((a) < (b)) ? 0xFFFFFFFF : 0)
#define SIMDf_GREATER_THAN(a,b) (((a) > (b)) ? 0xFFFFFFFF : 0)
#define SIMDf_LESS_EQUAL(a,b) (((a) <= (b)) ? 0xFFFFFFFF : 0)
#define SIMDf_GREATER_EQUAL(a,b) (((a) >= (b)) ? 0xFFFFFFFF : 0)
#define SIMDf_AND(a,b) SIMDf_CAST_TO_FLOAT(SIMDi_CAST_TO_INT(a) & SIMDi_CAST_TO_INT(b))
#define SIMDf_AND_NOT(a,b) SIMDf_CAST_TO_FLOAT(~SIMDi_CAST_TO_INT(a) & SIMDi_CAST_TO_INT(b))
#define SIMDf_XOR(a,b) SIMDf_CAST_TO_FLOAT(SIMDi_CAST_TO_INT(a) ^ SIMDi_CAST_TO_INT(b))
#define SIMDf_FLOOR(a) floorf(a)
#define SIMDf_ABS(a) fabsf(a)
#define SIMDf_BLENDV(a,b,mask) (mask ? (b) : (a))
#define SIMDf_GATHER(p,a) (*(reinterpret_cast<const float*>(p)+(a)))
#define SIMDi_ADD(a,b) ((a) + (b))
#define SIMDi_SUB(a,b) ((a) - (b))
#define SIMDi_MUL(a,b) ((a) * (b))
#define SIMDi_AND(a,b) ((a) & (b))
#define SIMDi_AND_NOT(a,b) (~(a) & (b))
#define SIMDi_OR(a,b) ((a) | (b))
#define SIMDi_XOR(a,b) ((a) ^ (b))
#define SIMDi_NOT(a) (~(a))
#define SIMDi_SHIFT_R(a, b) ((a) >> (b))
#define SIMDi_SHIFT_L(a, b) ((a) << (b))
#define SIMDi_EQUAL(a,b) (((a) == (b)) ? 0xFFFFFFFF : 0)
#define SIMDi_GREATER_THAN(a,b) (((a) > (b)) ? 0xFFFFFFFF : 0)
#define SIMDi_LESS_THAN(a,b) (((a) < (b)) ? 0xFFFFFFFF : 0)
#define SIMDi_CONVERT_TO_INT(a) static_cast<int>(roundf(a))
#define SIMDf_CONVERT_TO_FLOAT(a) static_cast<float>(a)
#endif
//#define SIMDf_SIGN_FLIP(a) SIMDf_XOR(a,SIMDf_NUM(neg0)))
//#define SIMDi_GREATER_EQUAL(a,b) SIMDi_NOT(SIMDi_LESS_THAN(a,b))
//#define SIMDi_LESS_EQUAL(a,b) SIMDi_NOT(SIMDi_GREATER_THAN(a,b))
//#define SIMDi_BLENDV(a,b, mask) SIMDi_CAST_TO_INT(SIMDf_BLENDV(SIMDf_CAST_TO_FLOAT(a),SIMDf_CAST_TO_FLOAT(b),SIMDf_CAST_TO_FLOAT(mask)))
#if SIMD_LEVEL == FN_AVX512
#define MASK_OR(a,b) ((a)|(b))
#define MASK_AND(a,b) ((a)&(b))
#define MASK_AND_NOT(a,b) (~(a)&(b))
#define MASK_NOT(a) (~(a))
#define SIMDf_MASK(m,a) _mm512_maskz_mov_ps(m,a)
#define SIMDf_MASK_ADD(m,a,b) _mm512_mask_add_ps(a,m,a,b)
#define SIMDf_MASK_SUB(m,a,b) _mm512_mask_sub_ps(a,m,a,b)
#define SIMDi_MASK_ADD(m,a,b) _mm512_mask_add_epi32(a,m,a,b)
#define SIMDi_MASK_SUB(m,a,b) _mm512_mask_sub_epi32(a,m,a,b)
#else
#define MASK_OR(a,b) SIMDi_OR(a,b)
#define MASK_AND(a,b) SIMDi_AND(a,b)
#define MASK_AND_NOT(a,b) SIMDi_AND_NOT(a,b)
#define MASK_NOT(a) SIMDi_NOT(a)
#define SIMDf_MASK(m,a) SIMDf_AND(SIMDf_CAST_TO_FLOAT(m),a)
#define SIMDf_MASK_ADD(m,a,b) SIMDf_ADD(a,SIMDf_AND(SIMDf_CAST_TO_FLOAT(m),b))
#define SIMDf_MASK_SUB(m,a,b) SIMDf_SUB(a,SIMDf_AND(SIMDf_CAST_TO_FLOAT(m),b))
#define SIMDi_MASK_ADD(m,a,b) SIMDi_ADD(a,SIMDi_AND(m,b))
#define SIMDi_MASK_SUB(m,a,b) SIMDi_SUB(a,SIMDi_AND(m,b))
#endif
#if SIMD_LEVEL == FN_AVX512
#elif SIMD_LEVEL == FN_NEON
#elif SIMD_LEVEL == FN_NO_SIMD_FALLBACK
#else
#endif
#if SIMD_LEVEL == FN_AVX2
#define SIMD_ZERO_ALL() //_mm256_zeroall()
#else
#define SIMD_ZERO_ALL()
#endif
// FMA
#ifdef FN_USE_FMA
#if SIMD_LEVEL == FN_NEON
#define SIMDf_MUL_ADD(a,b,c) vmlaq_f32(b,c,a)
#define SIMDf_MUL_SUB(a,b,c) SIMDf_SUB(SIMDf_MUL(a,b),c) // Neon multiply sub swaps sides of minus compared to FMA making it unusable
#define SIMDf_NMUL_ADD(a,b,c) vmlaq_f32(b,c,a)
#elif SIMD_LEVEL == FN_AVX512
#define SIMDf_MUL_ADD(a,b,c) _mm512_fmadd_ps(a,b,c)
#define SIMDf_MUL_SUB(a,b,c) _mm512_fmsub_ps(a,b,c)
#define SIMDf_NMUL_ADD(a,b,c) _mm512_fnmadd_ps(a,b,c)
#elif SIMD_LEVEL == FN_AVX2
#define SIMDf_MUL_ADD(a,b,c) _mm256_fmadd_ps(a,b,c)
#define SIMDf_MUL_SUB(a,b,c) _mm256_fmsub_ps(a,b,c)
#define SIMDf_NMUL_ADD(a,b,c) _mm256_fnmadd_ps(a,b,c)
#endif
#endif
#ifndef SIMDf_MUL_ADD
#define SIMDf_MUL_ADD(a,b,c) SIMDf_ADD(SIMDf_MUL(a,b),c)
#define SIMDf_MUL_SUB(a,b,c) SIMDf_SUB(SIMDf_MUL(a,b),c)
#define SIMDf_NMUL_ADD(a,b,c) SIMDf_SUB(c, SIMDf_MUL(a,b))
#endif
static bool VAR(SIMD_Values_Set) = false;
static SIMDf SIMDf_NUM(incremental);
static SIMDf SIMDf_NUM(0);
static SIMDf SIMDf_NUM(2);
static SIMDf SIMDf_NUM(6);
static SIMDf SIMDf_NUM(10);
static SIMDf SIMDf_NUM(15);
static SIMDf SIMDf_NUM(32);
static SIMDf SIMDf_NUM(999999);
static SIMDf SIMDf_NUM(0_5);
static SIMDf SIMDf_NUM(0_6);
static SIMDf SIMDf_NUM(15_5);
static SIMDf SIMDf_NUM(511_5);
//static SIMDf SIMDf_NUM(cellJitter);
static SIMDf SIMDf_NUM(F3);
static SIMDf SIMDf_NUM(G3);
static SIMDf SIMDf_NUM(G33);
static SIMDf SIMDf_NUM(hash2Float);
static SIMDf SIMDf_NUM(vectorSize);
static SIMDf SIMDf_NUM(cubicBounding);
#if SIMD_LEVEL == FN_AVX512
static SIMDf SIMDf_NUM(X_GRAD);
static SIMDf SIMDf_NUM(Y_GRAD);
static SIMDf SIMDf_NUM(Z_GRAD);
#else
static SIMDi SIMDi_NUM(8);
static SIMDi SIMDi_NUM(12);
static SIMDi SIMDi_NUM(13);
#endif
static SIMDi SIMDi_NUM(incremental);
static SIMDi SIMDi_NUM(1);
static SIMDi SIMDi_NUM(2);
static SIMDi SIMDi_NUM(255);
static SIMDi SIMDi_NUM(60493);
static SIMDi SIMDi_NUM(0x7fffffff);
//static SIMDi SIMDi_NUM(xGradBits);
//static SIMDi SIMDi_NUM(yGradBits);
//static SIMDi SIMDi_NUM(zGradBits);
static SIMDi SIMDi_NUM(xPrime);
static SIMDi SIMDi_NUM(yPrime);
static SIMDi SIMDi_NUM(zPrime);
static SIMDi SIMDi_NUM(bit5Mask);
static SIMDi SIMDi_NUM(bit10Mask);
static SIMDi SIMDi_NUM(vectorSize);
void FUNC(InitSIMDValues)()
{
if (VAR(SIMD_Values_Set))
return;
uSIMDf incF;
uSIMDi incI;
for (int i = 0; i < VECTOR_SIZE; i++)
{
incF.a[i] = float(i);
incI.a[i] = i;
}
SIMDf_NUM(incremental) = incF.m;
SIMDi_NUM(incremental) = incI.m;
SIMDf_NUM(0) = SIMDf_SET_ZERO();
SIMDf_NUM(1) = SIMDf_SET(1.0f);
SIMDf_NUM(2) = SIMDf_SET(2.0f);
SIMDf_NUM(6) = SIMDf_SET(6.0f);
SIMDf_NUM(10) = SIMDf_SET(10.0f);
SIMDf_NUM(15) = SIMDf_SET(15.0f);
SIMDf_NUM(32) = SIMDf_SET(32.0f);
SIMDf_NUM(999999) = SIMDf_SET(999999.0f);
SIMDf_NUM(0_5) = SIMDf_SET(0.5f);
SIMDf_NUM(0_6) = SIMDf_SET(0.6f);
SIMDf_NUM(15_5) = SIMDf_SET(15.5f);
SIMDf_NUM(511_5) = SIMDf_SET(511.5f);
//SIMDf_NUM(cellJitter) = SIMDf_SET(0.39614f);
SIMDf_NUM(F3) = SIMDf_SET(1.f / 3.f);
SIMDf_NUM(G3) = SIMDf_SET(1.f / 6.f);
SIMDf_NUM(G33) = SIMDf_SET((3.f / 6.f) - 1.f);
SIMDf_NUM(hash2Float) = SIMDf_SET(1.f / 2147483648.f);
SIMDf_NUM(vectorSize) = SIMDf_SET(VECTOR_SIZE);
SIMDf_NUM(cubicBounding) = SIMDf_SET(1.f / (1.5f*1.5f*1.5f));
#if SIMD_LEVEL == FN_AVX512
SIMDf_NUM(X_GRAD) = _mm512_set_ps(0, -1, 0, 1, 0, 0, 0, 0, -1, 1, -1, 1, -1, 1, -1, 1);
SIMDf_NUM(Y_GRAD) = _mm512_set_ps(-1, 1, -1, 1, -1, 1, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1);
SIMDf_NUM(Z_GRAD) = _mm512_set_ps(-1, 0, 1, 0, -1, -1, 1, 1, -1, -1, 1, 1, 0, 0, 0, 0);
#else
SIMDi_NUM(8) = SIMDi_SET(8);
SIMDi_NUM(12) = SIMDi_SET(12);
SIMDi_NUM(13) = SIMDi_SET(13);
#endif
SIMDi_NUM(1) = SIMDi_SET(1);
SIMDi_NUM(2) = SIMDi_SET(2);
SIMDi_NUM(255) = SIMDi_SET(255);
SIMDi_NUM(60493) = SIMDi_SET(60493);
SIMDi_NUM(0x7fffffff) = SIMDi_SET(0x7fffffff);
//SIMDi_NUM(xGradBits) = SIMDi_SET(1683327112);
//SIMDi_NUM(yGradBits) = SIMDi_SET(-2004331104);
//SIMDi_NUM(zGradBits) = SIMDi_SET(-1851744171);
SIMDi_NUM(xPrime) = SIMDi_SET(1619);
SIMDi_NUM(yPrime) = SIMDi_SET(31337);
SIMDi_NUM(zPrime) = SIMDi_SET(6971);
SIMDi_NUM(bit5Mask) = SIMDi_SET(31);
SIMDi_NUM(bit10Mask) = SIMDi_SET(1023);
SIMDi_NUM(vectorSize) = SIMDi_SET(VECTOR_SIZE);
SIMDi_NUM(0xffffffff) = SIMDi_SET(-1);
VAR(SIMD_Values_Set) = true;
}
static SIMDf VECTORCALL FUNC(Lerp)(SIMDf a, SIMDf b, SIMDf t)
{
SIMDf r;
r = SIMDf_SUB(b, a);
r = SIMDf_MUL_ADD(r, t, a);
return r;
}
static SIMDf VECTORCALL FUNC(InterpQuintic)(SIMDf t)
{
SIMDf r;
r = SIMDf_MUL_SUB(t, SIMDf_NUM(6), SIMDf_NUM(15));
r = SIMDf_MUL_ADD(r, t, SIMDf_NUM(10));
r = SIMDf_MUL(r, t);
r = SIMDf_MUL(r, t);
r = SIMDf_MUL(r, t);
return r;
}
static SIMDf VECTORCALL FUNC(CubicLerp)(SIMDf a, SIMDf b, SIMDf c, SIMDf d, SIMDf t)
{
SIMDf p = SIMDf_SUB(SIMDf_SUB(d, c), SIMDf_SUB(a, b));
return SIMDf_MUL_ADD(t, SIMDf_MUL(t, SIMDf_MUL(t, p)), SIMDf_MUL_ADD(t, SIMDf_MUL(t, SIMDf_SUB(SIMDf_SUB(a, b), p)), SIMDf_MUL_ADD(t, SIMDf_SUB(c, a), b)));
}
//static SIMDf VECTORCALL FUNC(InterpHermite)(SIMDf t)
//{
// SIMDf r;
// r = SIMDf_MUL(t, SIMDf_NUM(2));
// r = SIMDf_SUB(SIMDf_ADD(SIMDf_NUM(1), SIMDf_NUM(2)), r);
// r = SIMDf_MUL(r, t);
// r = SIMDf_MUL(r, t);
//
// return r;
//}
static SIMDi VECTORCALL FUNC(Hash)(SIMDi seed, SIMDi x, SIMDi y, SIMDi z)
{
SIMDi hash = seed;
hash = SIMDi_XOR(x, hash);
hash = SIMDi_XOR(y, hash);
hash = SIMDi_XOR(z, hash);
hash = SIMDi_MUL(SIMDi_MUL(SIMDi_MUL(hash, hash), SIMDi_NUM(60493)), hash);
hash = SIMDi_XOR(SIMDi_SHIFT_R(hash, 13), hash);
return hash;
}
static SIMDi VECTORCALL FUNC(HashHB)(SIMDi seed, SIMDi x, SIMDi y, SIMDi z)
{
SIMDi hash = seed;
hash = SIMDi_XOR(x, hash);
hash = SIMDi_XOR(y, hash);
hash = SIMDi_XOR(z, hash);
//hash = SIMDi_XOR(SIMDi_SHIFT_R(hash, 13), hash);
hash = SIMDi_MUL(SIMDi_MUL(SIMDi_MUL(hash, hash), SIMDi_NUM(60493)), hash);
return hash;
}
static SIMDf VECTORCALL FUNC(ValCoord)(SIMDi seed, SIMDi x, SIMDi y, SIMDi z)
{
// High bit hash
SIMDi hash = seed;
hash = SIMDi_XOR(x, hash);
hash = SIMDi_XOR(y, hash);
hash = SIMDi_XOR(z, hash);
hash = SIMDi_MUL(SIMDi_MUL(SIMDi_MUL(hash, hash), SIMDi_NUM(60493)), hash);
//hash = SIMDi_XOR(SIMDi_SHIFT_L(hash, 13), hash);
return SIMDf_MUL(SIMDf_NUM(hash2Float), SIMDf_CONVERT_TO_FLOAT(hash));
}
#if SIMD_LEVEL == FN_AVX512
static SIMDf VECTORCALL FUNC(GradCoord)(SIMDi seed, SIMDi xi, SIMDi yi, SIMDi zi, SIMDf x, SIMDf y, SIMDf z)
{
SIMDi hash = FUNC(Hash)(seed, xi, yi, zi);
SIMDf xGrad = SIMDf_PERMUTE(SIMDf_NUM(X_GRAD), hash);
SIMDf yGrad = SIMDf_PERMUTE(SIMDf_NUM(Y_GRAD), hash);
SIMDf zGrad = SIMDf_PERMUTE(SIMDf_NUM(Z_GRAD), hash);
return SIMDf_MUL_ADD(x, xGrad, SIMDf_MUL_ADD(y, yGrad, SIMDf_MUL(z, zGrad)));
}
#else
static SIMDf VECTORCALL FUNC(GradCoord)(SIMDi seed, SIMDi xi, SIMDi yi, SIMDi zi, SIMDf x, SIMDf y, SIMDf z)
{
SIMDi hash = FUNC(Hash)(seed, xi, yi, zi);
SIMDi hasha13 = SIMDi_AND(hash, SIMDi_NUM(13));
//if h < 8 then x, else y
MASK l8 = SIMDi_LESS_THAN(hasha13, SIMDi_NUM(8));
SIMDf u = SIMDf_BLENDV(y, x, l8);
//if h < 4 then y else if h is 12 or 14 then x else z
MASK l4 = SIMDi_LESS_THAN(hasha13, SIMDi_NUM(2));
MASK h12o14 = SIMDi_EQUAL(SIMDi_NUM(12), hasha13);
SIMDf v = SIMDf_BLENDV(SIMDf_BLENDV(z, x, h12o14), y, l4);
//if h1 then -u else u
//if h2 then -v else v
SIMDf h1 = SIMDf_CAST_TO_FLOAT(SIMDi_SHIFT_L(hash, 31));
SIMDf h2 = SIMDf_CAST_TO_FLOAT(SIMDi_SHIFT_L(SIMDi_AND(hash, SIMDi_NUM(2)), 30));
//then add them
return SIMDf_ADD(SIMDf_XOR(u, h1), SIMDf_XOR(v, h2));
}
#endif
static SIMDf VECTORCALL FUNC(WhiteNoiseSingle)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z)
{
return FUNC(ValCoord)(seed,
SIMDi_MUL(SIMDi_XOR(SIMDi_CAST_TO_INT(x), SIMDi_SHIFT_R(SIMDi_CAST_TO_INT(x), 16)), SIMDi_NUM(xPrime)),
SIMDi_MUL(SIMDi_XOR(SIMDi_CAST_TO_INT(y), SIMDi_SHIFT_R(SIMDi_CAST_TO_INT(y), 16)), SIMDi_NUM(yPrime)),
SIMDi_MUL(SIMDi_XOR(SIMDi_CAST_TO_INT(z), SIMDi_SHIFT_R(SIMDi_CAST_TO_INT(z), 16)), SIMDi_NUM(zPrime)));
}
static SIMDf VECTORCALL FUNC(ValueSingle)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z)
{
SIMDf xs = SIMDf_FLOOR(x);
SIMDf ys = SIMDf_FLOOR(y);
SIMDf zs = SIMDf_FLOOR(z);
SIMDi x0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(xs), SIMDi_NUM(xPrime));
SIMDi y0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(ys), SIMDi_NUM(yPrime));
SIMDi z0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(zs), SIMDi_NUM(zPrime));
SIMDi x1 = SIMDi_ADD(x0, SIMDi_NUM(xPrime));
SIMDi y1 = SIMDi_ADD(y0, SIMDi_NUM(yPrime));
SIMDi z1 = SIMDi_ADD(z0, SIMDi_NUM(zPrime));
xs = FUNC(InterpQuintic)(SIMDf_SUB(x, xs));
ys = FUNC(InterpQuintic)(SIMDf_SUB(y, ys));
zs = FUNC(InterpQuintic)(SIMDf_SUB(z, zs));
return FUNC(Lerp)(
FUNC(Lerp)(
FUNC(Lerp)(FUNC(ValCoord)(seed, x0, y0, z0), FUNC(ValCoord)(seed, x1, y0, z0), xs),
FUNC(Lerp)(FUNC(ValCoord)(seed, x0, y1, z0), FUNC(ValCoord)(seed, x1, y1, z0), xs), ys),
FUNC(Lerp)(
FUNC(Lerp)(FUNC(ValCoord)(seed, x0, y0, z1), FUNC(ValCoord)(seed, x1, y0, z1), xs),
FUNC(Lerp)(FUNC(ValCoord)(seed, x0, y1, z1), FUNC(ValCoord)(seed, x1, y1, z1), xs), ys), zs);
}
static SIMDf VECTORCALL FUNC(PerlinSingle)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z)
{
SIMDf xs = SIMDf_FLOOR(x);
SIMDf ys = SIMDf_FLOOR(y);
SIMDf zs = SIMDf_FLOOR(z);
SIMDi x0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(xs), SIMDi_NUM(xPrime));
SIMDi y0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(ys), SIMDi_NUM(yPrime));
SIMDi z0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(zs), SIMDi_NUM(zPrime));
SIMDi x1 = SIMDi_ADD(x0, SIMDi_NUM(xPrime));
SIMDi y1 = SIMDi_ADD(y0, SIMDi_NUM(yPrime));
SIMDi z1 = SIMDi_ADD(z0, SIMDi_NUM(zPrime));
SIMDf xf0 = xs = SIMDf_SUB(x, xs);
SIMDf yf0 = ys = SIMDf_SUB(y, ys);
SIMDf zf0 = zs = SIMDf_SUB(z, zs);
SIMDf xf1 = SIMDf_SUB(xf0, SIMDf_NUM(1));
SIMDf yf1 = SIMDf_SUB(yf0, SIMDf_NUM(1));
SIMDf zf1 = SIMDf_SUB(zf0, SIMDf_NUM(1));
xs = FUNC(InterpQuintic)(xs);
ys = FUNC(InterpQuintic)(ys);
zs = FUNC(InterpQuintic)(zs);
return FUNC(Lerp)(
FUNC(Lerp)(
FUNC(Lerp)(FUNC(GradCoord)(seed, x0, y0, z0, xf0, yf0, zf0), FUNC(GradCoord)(seed, x1, y0, z0, xf1, yf0, zf0), xs),
FUNC(Lerp)(FUNC(GradCoord)(seed, x0, y1, z0, xf0, yf1, zf0), FUNC(GradCoord)(seed, x1, y1, z0, xf1, yf1, zf0), xs), ys),
FUNC(Lerp)(
FUNC(Lerp)(FUNC(GradCoord)(seed, x0, y0, z1, xf0, yf0, zf1), FUNC(GradCoord)(seed, x1, y0, z1, xf1, yf0, zf1), xs),
FUNC(Lerp)(FUNC(GradCoord)(seed, x0, y1, z1, xf0, yf1, zf1), FUNC(GradCoord)(seed, x1, y1, z1, xf1, yf1, zf1), xs), ys), zs);
}
static SIMDf VECTORCALL FUNC(SimplexSingle)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z)
{
SIMDf f = SIMDf_MUL(SIMDf_NUM(F3), SIMDf_ADD(SIMDf_ADD(x, y), z));
SIMDf x0 = SIMDf_FLOOR(SIMDf_ADD(x, f));
SIMDf y0 = SIMDf_FLOOR(SIMDf_ADD(y, f));
SIMDf z0 = SIMDf_FLOOR(SIMDf_ADD(z, f));
SIMDi i = SIMDi_MUL(SIMDi_CONVERT_TO_INT(x0), SIMDi_NUM(xPrime));
SIMDi j = SIMDi_MUL(SIMDi_CONVERT_TO_INT(y0), SIMDi_NUM(yPrime));
SIMDi k = SIMDi_MUL(SIMDi_CONVERT_TO_INT(z0), SIMDi_NUM(zPrime));
SIMDf g = SIMDf_MUL(SIMDf_NUM(G3), SIMDf_ADD(SIMDf_ADD(x0, y0), z0));
x0 = SIMDf_SUB(x, SIMDf_SUB(x0, g));
y0 = SIMDf_SUB(y, SIMDf_SUB(y0, g));
z0 = SIMDf_SUB(z, SIMDf_SUB(z0, g));
MASK x0_ge_y0 = SIMDf_GREATER_EQUAL(x0, y0);
MASK y0_ge_z0 = SIMDf_GREATER_EQUAL(y0, z0);
MASK x0_ge_z0 = SIMDf_GREATER_EQUAL(x0, z0);
MASK i1 = MASK_AND(x0_ge_y0, x0_ge_z0);
MASK j1 = MASK_AND_NOT(x0_ge_y0, y0_ge_z0);
MASK k1 = MASK_AND_NOT(x0_ge_z0, MASK_NOT(y0_ge_z0));
MASK i2 = MASK_OR(x0_ge_y0, x0_ge_z0);
MASK j2 = MASK_OR(MASK_NOT(x0_ge_y0), y0_ge_z0);
MASK k2 = MASK_NOT(MASK_AND(x0_ge_z0, y0_ge_z0));
SIMDf x1 = SIMDf_ADD(SIMDf_MASK_SUB(i1, x0, SIMDf_NUM(1)), SIMDf_NUM(G3));
SIMDf y1 = SIMDf_ADD(SIMDf_MASK_SUB(j1, y0, SIMDf_NUM(1)), SIMDf_NUM(G3));
SIMDf z1 = SIMDf_ADD(SIMDf_MASK_SUB(k1, z0, SIMDf_NUM(1)), SIMDf_NUM(G3));
SIMDf x2 = SIMDf_ADD(SIMDf_MASK_SUB(i2, x0, SIMDf_NUM(1)), SIMDf_NUM(F3));
SIMDf y2 = SIMDf_ADD(SIMDf_MASK_SUB(j2, y0, SIMDf_NUM(1)), SIMDf_NUM(F3));
SIMDf z2 = SIMDf_ADD(SIMDf_MASK_SUB(k2, z0, SIMDf_NUM(1)), SIMDf_NUM(F3));
SIMDf x3 = SIMDf_ADD(x0, SIMDf_NUM(G33));
SIMDf y3 = SIMDf_ADD(y0, SIMDf_NUM(G33));
SIMDf z3 = SIMDf_ADD(z0, SIMDf_NUM(G33));
SIMDf t0 = SIMDf_NMUL_ADD(z0, z0, SIMDf_NMUL_ADD(y0, y0, SIMDf_NMUL_ADD(x0, x0, SIMDf_NUM(0_6))));
SIMDf t1 = SIMDf_NMUL_ADD(z1, z1, SIMDf_NMUL_ADD(y1, y1, SIMDf_NMUL_ADD(x1, x1, SIMDf_NUM(0_6))));
SIMDf t2 = SIMDf_NMUL_ADD(z2, z2, SIMDf_NMUL_ADD(y2, y2, SIMDf_NMUL_ADD(x2, x2, SIMDf_NUM(0_6))));
SIMDf t3 = SIMDf_NMUL_ADD(z3, z3, SIMDf_NMUL_ADD(y3, y3, SIMDf_NMUL_ADD(x3, x3, SIMDf_NUM(0_6))));
MASK n0 = SIMDf_GREATER_EQUAL(t0, SIMDf_NUM(0));
MASK n1 = SIMDf_GREATER_EQUAL(t1, SIMDf_NUM(0));
MASK n2 = SIMDf_GREATER_EQUAL(t2, SIMDf_NUM(0));
MASK n3 = SIMDf_GREATER_EQUAL(t3, SIMDf_NUM(0));
t0 = SIMDf_MUL(t0, t0);
t1 = SIMDf_MUL(t1, t1);
t2 = SIMDf_MUL(t2, t2);
t3 = SIMDf_MUL(t3, t3);
SIMDf v0 = SIMDf_MUL(SIMDf_MUL(t0, t0), FUNC(GradCoord)(seed, i, j, k, x0, y0, z0));
SIMDf v1 = SIMDf_MUL(SIMDf_MUL(t1, t1), FUNC(GradCoord)(seed, SIMDi_MASK_ADD(i1, i, SIMDi_NUM(xPrime)), SIMDi_MASK_ADD(j1, j, SIMDi_NUM(yPrime)), SIMDi_MASK_ADD(k1, k, SIMDi_NUM(zPrime)), x1, y1, z1));
SIMDf v2 = SIMDf_MUL(SIMDf_MUL(t2, t2), FUNC(GradCoord)(seed, SIMDi_MASK_ADD(i2, i, SIMDi_NUM(xPrime)), SIMDi_MASK_ADD(j2, j, SIMDi_NUM(yPrime)), SIMDi_MASK_ADD(k2, k, SIMDi_NUM(zPrime)), x2, y2, z2));
SIMDf v3 = SIMDf_MASK(n3, SIMDf_MUL(SIMDf_MUL(t3, t3), FUNC(GradCoord)(seed, SIMDi_ADD(i, SIMDi_NUM(xPrime)), SIMDi_ADD(j, SIMDi_NUM(yPrime)), SIMDi_ADD(k, SIMDi_NUM(zPrime)), x3, y3, z3)));
return SIMDf_MUL(SIMDf_NUM(32), SIMDf_MASK_ADD(n0, SIMDf_MASK_ADD(n1, SIMDf_MASK_ADD(n2, v3, v2), v1), v0));
}
static SIMDf VECTORCALL FUNC(CubicSingle)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z)
{
SIMDf xf1 = SIMDf_FLOOR(x);
SIMDf yf1 = SIMDf_FLOOR(y);
SIMDf zf1 = SIMDf_FLOOR(z);
SIMDi x1 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(xf1), SIMDi_NUM(xPrime));
SIMDi y1 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(yf1), SIMDi_NUM(yPrime));
SIMDi z1 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(zf1), SIMDi_NUM(zPrime));
SIMDi x0 = SIMDi_SUB(x1, SIMDi_NUM(xPrime));
SIMDi y0 = SIMDi_SUB(y1, SIMDi_NUM(yPrime));
SIMDi z0 = SIMDi_SUB(z1, SIMDi_NUM(zPrime));
SIMDi x2 = SIMDi_ADD(x1, SIMDi_NUM(xPrime));
SIMDi y2 = SIMDi_ADD(y1, SIMDi_NUM(yPrime));
SIMDi z2 = SIMDi_ADD(z1, SIMDi_NUM(zPrime));
SIMDi x3 = SIMDi_ADD(x2, SIMDi_NUM(xPrime));
SIMDi y3 = SIMDi_ADD(y2, SIMDi_NUM(yPrime));
SIMDi z3 = SIMDi_ADD(z2, SIMDi_NUM(zPrime));
SIMDf xs = SIMDf_SUB(x, xf1);
SIMDf ys = SIMDf_SUB(y, yf1);
SIMDf zs = SIMDf_SUB(z, zf1);
return SIMDf_MUL(FUNC(CubicLerp)(
FUNC(CubicLerp)(
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y0, z0), FUNC(ValCoord)(seed, x1, y0, z0), FUNC(ValCoord)(seed, x2, y0, z0), FUNC(ValCoord)(seed, x3, y0, z0), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y1, z0), FUNC(ValCoord)(seed, x1, y1, z0), FUNC(ValCoord)(seed, x2, y1, z0), FUNC(ValCoord)(seed, x3, y1, z0), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y2, z0), FUNC(ValCoord)(seed, x1, y2, z0), FUNC(ValCoord)(seed, x2, y2, z0), FUNC(ValCoord)(seed, x3, y2, z0), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y3, z0), FUNC(ValCoord)(seed, x1, y3, z0), FUNC(ValCoord)(seed, x2, y3, z0), FUNC(ValCoord)(seed, x3, y3, z0), xs),
ys),
FUNC(CubicLerp)(
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y0, z1), FUNC(ValCoord)(seed, x1, y0, z1), FUNC(ValCoord)(seed, x2, y0, z1), FUNC(ValCoord)(seed, x3, y0, z1), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y1, z1), FUNC(ValCoord)(seed, x1, y1, z1), FUNC(ValCoord)(seed, x2, y1, z1), FUNC(ValCoord)(seed, x3, y1, z1), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y2, z1), FUNC(ValCoord)(seed, x1, y2, z1), FUNC(ValCoord)(seed, x2, y2, z1), FUNC(ValCoord)(seed, x3, y2, z1), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y3, z1), FUNC(ValCoord)(seed, x1, y3, z1), FUNC(ValCoord)(seed, x2, y3, z1), FUNC(ValCoord)(seed, x3, y3, z1), xs),
ys),
FUNC(CubicLerp)(
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y0, z2), FUNC(ValCoord)(seed, x1, y0, z2), FUNC(ValCoord)(seed, x2, y0, z2), FUNC(ValCoord)(seed, x3, y0, z2), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y1, z2), FUNC(ValCoord)(seed, x1, y1, z2), FUNC(ValCoord)(seed, x2, y1, z2), FUNC(ValCoord)(seed, x3, y1, z2), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y2, z2), FUNC(ValCoord)(seed, x1, y2, z2), FUNC(ValCoord)(seed, x2, y2, z2), FUNC(ValCoord)(seed, x3, y2, z2), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y3, z2), FUNC(ValCoord)(seed, x1, y3, z2), FUNC(ValCoord)(seed, x2, y3, z2), FUNC(ValCoord)(seed, x3, y3, z2), xs),
ys),
FUNC(CubicLerp)(
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y0, z3), FUNC(ValCoord)(seed, x1, y0, z3), FUNC(ValCoord)(seed, x2, y0, z3), FUNC(ValCoord)(seed, x3, y0, z3), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y1, z3), FUNC(ValCoord)(seed, x1, y1, z3), FUNC(ValCoord)(seed, x2, y1, z3), FUNC(ValCoord)(seed, x3, y1, z3), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y2, z3), FUNC(ValCoord)(seed, x1, y2, z3), FUNC(ValCoord)(seed, x2, y2, z3), FUNC(ValCoord)(seed, x3, y2, z3), xs),
FUNC(CubicLerp)(FUNC(ValCoord)(seed, x0, y3, z3), FUNC(ValCoord)(seed, x1, y3, z3), FUNC(ValCoord)(seed, x2, y3, z3), FUNC(ValCoord)(seed, x3, y3, z3), xs),
ys),
zs), SIMDf_NUM(cubicBounding));
}
#define GRADIENT_COORD(_x,_y,_z)\
SIMDi hash##_x##_y##_z = FUNC(HashHB)(seed, x##_x, y##_y, z##_z); \
SIMDf x##_x##_y##_z = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(hash##_x##_y##_z, SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5)); \
SIMDf y##_x##_y##_z = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash##_x##_y##_z, 10), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5)); \
SIMDf z##_x##_y##_z = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash##_x##_y##_z, 20), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));
//SIMDf invMag##_x##_y##_z = SIMDf_MUL(SIMDf_NUM(cellJitter), SIMDf_INV_SQRT(SIMDf_MUL_ADD(x##_x##_y##_z, x##_x##_y##_z, SIMDf_MUL_ADD(y##_x##_y##_z, y##_x##_y##_z, SIMDf_MUL(z##_x##_y##_z, z##_x##_y##_z)))));
//x##_x##_y##_z = SIMDf_MUL(x##_x##_y##_z, invMag##_x##_y##_z);
//y##_x##_y##_z = SIMDf_MUL(y##_x##_y##_z, invMag##_x##_y##_z);
//z##_x##_y##_z = SIMDf_MUL(z##_x##_y##_z, invMag##_x##_y##_z);
static void VECTORCALL FUNC(GradientPerturbSingle)(SIMDi seed, SIMDf perturbAmp, SIMDf perturbFrequency, SIMDf& x, SIMDf& y, SIMDf& z)
{
SIMDf xf = SIMDf_MUL(x, perturbFrequency);
SIMDf yf = SIMDf_MUL(y, perturbFrequency);
SIMDf zf = SIMDf_MUL(z, perturbFrequency);
SIMDf xs = SIMDf_FLOOR(xf);
SIMDf ys = SIMDf_FLOOR(yf);
SIMDf zs = SIMDf_FLOOR(zf);
SIMDi x0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(xs), SIMDi_NUM(xPrime));
SIMDi y0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(ys), SIMDi_NUM(yPrime));
SIMDi z0 = SIMDi_MUL(SIMDi_CONVERT_TO_INT(zs), SIMDi_NUM(zPrime));
SIMDi x1 = SIMDi_ADD(x0, SIMDi_NUM(xPrime));
SIMDi y1 = SIMDi_ADD(y0, SIMDi_NUM(yPrime));
SIMDi z1 = SIMDi_ADD(z0, SIMDi_NUM(zPrime));
xs = FUNC(InterpQuintic)(SIMDf_SUB(xf, xs));
ys = FUNC(InterpQuintic)(SIMDf_SUB(yf, ys));
zs = FUNC(InterpQuintic)(SIMDf_SUB(zf, zs));
GRADIENT_COORD(0, 0, 0);
GRADIENT_COORD(0, 0, 1);
GRADIENT_COORD(0, 1, 0);
GRADIENT_COORD(0, 1, 1);
GRADIENT_COORD(1, 0, 0);
GRADIENT_COORD(1, 0, 1);
GRADIENT_COORD(1, 1, 0);
GRADIENT_COORD(1, 1, 1);
SIMDf x0y = FUNC(Lerp)(FUNC(Lerp)(x000, x100, xs), FUNC(Lerp)(x010, x110, xs), ys);
SIMDf y0y = FUNC(Lerp)(FUNC(Lerp)(y000, y100, xs), FUNC(Lerp)(y010, y110, xs), ys);
SIMDf z0y = FUNC(Lerp)(FUNC(Lerp)(z000, z100, xs), FUNC(Lerp)(z010, z110, xs), ys);
SIMDf x1y = FUNC(Lerp)(FUNC(Lerp)(x001, x101, xs), FUNC(Lerp)(x011, x111, xs), ys);
SIMDf y1y = FUNC(Lerp)(FUNC(Lerp)(y001, y101, xs), FUNC(Lerp)(y011, y111, xs), ys);
SIMDf z1y = FUNC(Lerp)(FUNC(Lerp)(z001, z101, xs), FUNC(Lerp)(z011, z111, xs), ys);
x = SIMDf_MUL_ADD(FUNC(Lerp)(x0y, x1y, zs), perturbAmp, x);
y = SIMDf_MUL_ADD(FUNC(Lerp)(y0y, y1y, zs), perturbAmp, y);
z = SIMDf_MUL_ADD(FUNC(Lerp)(z0y, z1y, zs), perturbAmp, z);
}
SIMD_LEVEL_CLASS::FASTNOISE_SIMD_CLASS(SIMD_LEVEL)(int seed)
{
m_seed = seed;
m_fractalBounding = CalculateFractalBounding(m_octaves, m_gain);
m_perturbFractalBounding = CalculateFractalBounding(m_perturbOctaves, m_perturbGain);
FUNC(InitSIMDValues)();
s_currentSIMDLevel = SIMD_LEVEL;
}
int SIMD_LEVEL_CLASS::AlignedSize(int size)
{
#ifdef FN_ALIGNED_SETS
// size must be a multiple of VECTOR_SIZE (8)
if ((size & (VECTOR_SIZE - 1)) != 0)
{
size &= ~(VECTOR_SIZE - 1);
size += VECTOR_SIZE;
}
#endif
return size;
}
float* SIMD_LEVEL_CLASS::GetEmptySet(int size)
{
size = AlignedSize(size);
float* noiseSet;
SIMD_ALLOCATE_SET(noiseSet, size);
return noiseSet;
}
#define AXIS_RESET(_zSize, _start) for (int _i = (_zSize) * (_start); _i < VECTOR_SIZE; _i+=(_zSize)){\
MASK _zReset = SIMDi_GREATER_THAN(z, zEndV);\
y = SIMDi_MASK_ADD(_zReset, y, SIMDi_NUM(1));\
z = SIMDi_MASK_SUB(_zReset, z, zSizeV);\
\
MASK _yReset = SIMDi_GREATER_THAN(y, yEndV);\
x = SIMDi_MASK_ADD(_yReset, x, SIMDi_NUM(1));\
y = SIMDi_MASK_SUB(_yReset, y, ySizeV);}
#ifdef FN_ALIGNED_SETS
#define STORE_LAST_RESULT(_dest, _source) SIMDf_STORE(_dest, _source)
#else
#include <cstring>
#define STORE_LAST_RESULT(_dest, _source) std::memcpy(_dest, &_source, (maxIndex - index) * 4)
#endif
#define INIT_PERTURB_VALUES() \
SIMDf perturbAmpV, perturbFreqV, perturbLacunarityV, perturbGainV, perturbNormaliseLengthV;\
switch (m_perturbType)\
{\
case None:\
break;\
case Gradient_Normalise:\
perturbNormaliseLengthV = SIMDf_SET(m_perturbNormaliseLength*m_frequency);\
case Gradient:\
perturbAmpV = SIMDf_SET(m_perturbAmp);\
perturbFreqV = SIMDf_SET(m_perturbFrequency);\
break;\
case GradientFractal_Normalise:\
perturbNormaliseLengthV = SIMDf_SET(m_perturbNormaliseLength*m_frequency);\
case GradientFractal:\
perturbAmpV = SIMDf_SET(m_perturbAmp*m_fractalBounding);\
perturbFreqV = SIMDf_SET(m_perturbFrequency);\
perturbLacunarityV = SIMDf_SET(m_perturbLacunarity);\
perturbGainV = SIMDf_SET(m_perturbGain);\
break;\
case Normalise:\
perturbNormaliseLengthV = SIMDf_SET(m_perturbNormaliseLength*m_frequency);\
break;\
}
#define PERTURB_SWITCH()\
switch (m_perturbType)\
{\
case None:\
break;\
case Gradient:\
FUNC(GradientPerturbSingle)(SIMDi_SUB(seedV, SIMDi_NUM(1)), perturbAmpV, perturbFreqV, xF, yF, zF); \
break; \
case GradientFractal:\
{\
SIMDi seedF = SIMDi_SUB(seedV, SIMDi_NUM(1));\
SIMDf freqF = perturbFreqV;\
SIMDf ampF = perturbAmpV;\
\
FUNC(GradientPerturbSingle)(seedF, ampF, freqF, xF, yF, zF);\
\
int octaveIndex = 0;\
\
while (++octaveIndex < m_perturbOctaves)\
{\
freqF = SIMDf_MUL(freqF, perturbLacunarityV);\
seedF = SIMDi_SUB(seedF, SIMDi_NUM(1));\
ampF = SIMDf_MUL(ampF, perturbGainV);\
\
FUNC(GradientPerturbSingle)(seedF, ampF, freqF, xF, yF, zF);\
}}\
break;\
case Gradient_Normalise:\
FUNC(GradientPerturbSingle)(SIMDi_SUB(seedV, SIMDi_NUM(1)), perturbAmpV, perturbFreqV, xF, yF, zF); \
case Normalise:\
{\
SIMDf invMag = SIMDf_MUL(perturbNormaliseLengthV, SIMDf_INV_SQRT(SIMDf_MUL_ADD(xF, xF, SIMDf_MUL_ADD(yF, yF, SIMDf_MUL(zF, zF)))));\
xF = SIMDf_MUL(xF, invMag);\
yF = SIMDf_MUL(yF, invMag);\
zF = SIMDf_MUL(zF, invMag);\
}break;\
case GradientFractal_Normalise:\
{\
SIMDi seedF = SIMDi_SUB(seedV, SIMDi_NUM(1));\
SIMDf freqF = perturbFreqV;\
SIMDf ampF = perturbAmpV;\
\
FUNC(GradientPerturbSingle)(seedF, ampF, freqF, xF, yF, zF);\
\
int octaveIndex = 0;\
\
while (++octaveIndex < m_perturbOctaves)\
{\
freqF = SIMDf_MUL(freqF, perturbLacunarityV);\
seedF = SIMDi_SUB(seedF, SIMDi_NUM(1));\
ampF = SIMDf_MUL(ampF, perturbGainV);\
\
FUNC(GradientPerturbSingle)(seedF, ampF, freqF, xF, yF, zF);\
}\
SIMDf invMag = SIMDf_MUL(perturbNormaliseLengthV, SIMDf_INV_SQRT(SIMDf_MUL_ADD(xF, xF, SIMDf_MUL_ADD(yF, yF, SIMDf_MUL(zF, zF)))));\
xF = SIMDf_MUL(xF, invMag);\
yF = SIMDf_MUL(yF, invMag);\
zF = SIMDf_MUL(zF, invMag);\
}break;\
}
#define SET_BUILDER(f)\
if ((zSize & (VECTOR_SIZE - 1)) == 0)\
{\
SIMDi yBase = SIMDi_SET(yStart);\
SIMDi zBase = SIMDi_ADD(SIMDi_NUM(incremental), SIMDi_SET(zStart));\
\
SIMDi x = SIMDi_SET(xStart);\
\
int index = 0;\
\
for (int ix = 0; ix < xSize; ix++)\
{\
SIMDf xf = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(x), xFreqV);\
SIMDi y = yBase;\
\
for (int iy = 0; iy < ySize; iy++)\
{\
SIMDf yf = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(y), yFreqV);\
SIMDi z = zBase;\
SIMDf xF = xf;\
SIMDf yF = yf;\
SIMDf zF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(z), zFreqV);\
\
PERTURB_SWITCH()\
SIMDf result;\
f;\
SIMDf_STORE(&noiseSet[index], result);\
\
int iz = VECTOR_SIZE;\
while (iz < zSize)\
{\
z = SIMDi_ADD(z, SIMDi_NUM(vectorSize));\
index += VECTOR_SIZE;\
iz += VECTOR_SIZE;\
xF = xf;\
yF = yf;\
zF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(z), zFreqV);\
\
PERTURB_SWITCH()\
SIMDf result;\
f;\
SIMDf_STORE(&noiseSet[index], result);\
}\
index += VECTOR_SIZE;\
y = SIMDi_ADD(y, SIMDi_NUM(1));\
}\
x = SIMDi_ADD(x, SIMDi_NUM(1));\
}\
}\
else\
{\
SIMDi ySizeV = SIMDi_SET(ySize); \
SIMDi zSizeV = SIMDi_SET(zSize); \
\
SIMDi yEndV = SIMDi_SET(yStart + ySize - 1); \
SIMDi zEndV = SIMDi_SET(zStart + zSize - 1); \
\
SIMDi x = SIMDi_SET(xStart); \
SIMDi y = SIMDi_SET(yStart); \
SIMDi z = SIMDi_ADD(SIMDi_SET(zStart), SIMDi_NUM(incremental)); \
AXIS_RESET(zSize, 1)\
\
int index = 0; \
int maxIndex = xSize * ySize * zSize; \
\
for (; index < maxIndex - VECTOR_SIZE; index += VECTOR_SIZE)\
{\
SIMDf xF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(x), xFreqV);\
SIMDf yF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(y), yFreqV);\
SIMDf zF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(z), zFreqV);\
\
PERTURB_SWITCH()\
SIMDf result;\
f;\
SIMDf_STORE(&noiseSet[index], result);\
\
z = SIMDi_ADD(z, SIMDi_NUM(vectorSize));\
\
AXIS_RESET(zSize, 0)\
}\
\
SIMDf xF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(x), xFreqV);\
SIMDf yF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(y), yFreqV);\
SIMDf zF = SIMDf_MUL(SIMDf_CONVERT_TO_FLOAT(z), zFreqV);\
\
PERTURB_SWITCH()\
SIMDf result;\
f;\
STORE_LAST_RESULT(&noiseSet[index], result);\
}
// FBM SINGLE
#define FBM_SINGLE(f)\
SIMDi seedF = seedV;\
\
result = FUNC(f##Single)(seedF, xF, yF, zF);\
\
SIMDf ampF = SIMDf_NUM(1);\
int octaveIndex = 0;\
\
while (++octaveIndex < m_octaves)\
{\
xF = SIMDf_MUL(xF, lacunarityV);\
yF = SIMDf_MUL(yF, lacunarityV);\
zF = SIMDf_MUL(zF, lacunarityV);\
seedF = SIMDi_ADD(seedF, SIMDi_NUM(1));\
\
ampF = SIMDf_MUL(ampF, gainV);\
result = SIMDf_MUL_ADD(FUNC(f##Single)(seedF, xF, yF, zF), ampF, result);\
}\
result = SIMDf_MUL(result, fractalBoundingV)
// BILLOW SINGLE
#define BILLOW_SINGLE(f)\
SIMDi seedF = seedV;\
\
result = SIMDf_MUL_SUB(SIMDf_ABS(FUNC(f##Single)(seedF, xF, yF, zF)), SIMDf_NUM(2), SIMDf_NUM(1));\
\
SIMDf ampF = SIMDf_NUM(1);\
int octaveIndex = 0;\
\
while (++octaveIndex < m_octaves)\
{\
xF = SIMDf_MUL(xF, lacunarityV);\
yF = SIMDf_MUL(yF, lacunarityV);\
zF = SIMDf_MUL(zF, lacunarityV);\
seedF = SIMDi_ADD(seedF, SIMDi_NUM(1));\
\
ampF = SIMDf_MUL(ampF, gainV);\
result = SIMDf_MUL_ADD(SIMDf_MUL_SUB(SIMDf_ABS(FUNC(f##Single)(seedF, xF, yF, zF)), SIMDf_NUM(2), SIMDf_NUM(1)), ampF, result);\
}\
result = SIMDf_MUL(result, fractalBoundingV)
// RIGIDMULTI SINGLE
#define RIGIDMULTI_SINGLE(f)\
SIMDi seedF = seedV;\
\
result = SIMDf_SUB(SIMDf_NUM(1), SIMDf_ABS(FUNC(f##Single)(seedF, xF, yF, zF)));\
\
SIMDf ampF = SIMDf_NUM(1);\
int octaveIndex = 0;\
\
while (++octaveIndex < m_octaves)\
{\
xF = SIMDf_MUL(xF, lacunarityV);\
yF = SIMDf_MUL(yF, lacunarityV);\
zF = SIMDf_MUL(zF, lacunarityV);\
seedF = SIMDi_ADD(seedF, SIMDi_NUM(1));\
\
ampF = SIMDf_MUL(ampF, gainV);\
result = SIMDf_NMUL_ADD(SIMDf_SUB(SIMDf_NUM(1), SIMDf_ABS(FUNC(f##Single)(seedF, xF, yF, zF))), ampF, result);\
}
#define FILL_SET(func) \
void SIMD_LEVEL_CLASS::Fill##func##Set(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier)\
{\
assert(noiseSet);\
SIMD_ZERO_ALL();\
SIMDi seedV = SIMDi_SET(m_seed); \
INIT_PERTURB_VALUES();\
\
scaleModifier *= m_frequency;\
\
SIMDf xFreqV = SIMDf_SET(scaleModifier * m_xScale);\
SIMDf yFreqV = SIMDf_SET(scaleModifier * m_yScale);\
SIMDf zFreqV = SIMDf_SET(scaleModifier * m_zScale);\
\
SET_BUILDER(result = FUNC(func##Single)(seedV, xF, yF, zF))\
\
SIMD_ZERO_ALL();\
}
#define FILL_FRACTAL_SET(func) \
void SIMD_LEVEL_CLASS::Fill##func##FractalSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier)\
{\
assert(noiseSet);\
SIMD_ZERO_ALL();\
\
SIMDi seedV = SIMDi_SET(m_seed);\
SIMDf lacunarityV = SIMDf_SET(m_lacunarity);\
SIMDf gainV = SIMDf_SET(m_gain);\
SIMDf fractalBoundingV = SIMDf_SET(m_fractalBounding);\
INIT_PERTURB_VALUES();\
\
scaleModifier *= m_frequency;\
\
SIMDf xFreqV = SIMDf_SET(scaleModifier * m_xScale);\
SIMDf yFreqV = SIMDf_SET(scaleModifier * m_yScale);\
SIMDf zFreqV = SIMDf_SET(scaleModifier * m_zScale);\
\
switch(m_fractalType)\
{\
case FBM:\
SET_BUILDER(FBM_SINGLE(func))\
break;\
case Billow:\
SET_BUILDER(BILLOW_SINGLE(func))\
break;\
case RigidMulti:\
SET_BUILDER(RIGIDMULTI_SINGLE(func))\
break;\
}\
SIMD_ZERO_ALL();\
}
FILL_SET(Value)
FILL_FRACTAL_SET(Value)
FILL_SET(Perlin)
FILL_FRACTAL_SET(Perlin)
FILL_SET(Simplex)
FILL_FRACTAL_SET(Simplex)
//FILL_SET(WhiteNoise)
FILL_SET(Cubic)
FILL_FRACTAL_SET(Cubic)
#ifdef FN_ALIGNED_SETS
#define SIZE_MASK
#define SAFE_LAST(f)
#else
#define SIZE_MASK & ~(VECTOR_SIZE - 1)
#define SAFE_LAST(f)\
if (loopMax != vectorSet->size)\
{\
std::size_t remaining = (vectorSet->size - loopMax) * 4;\
\
SIMDf xF = SIMDf_LOAD(&vectorSet->xSet[loopMax]);\
SIMDf yF = SIMDf_LOAD(&vectorSet->ySet[loopMax]);\
SIMDf zF = SIMDf_LOAD(&vectorSet->zSet[loopMax]);\
\
xF = SIMDf_MUL_ADD(xF, xFreqV, xOffsetV);\
yF = SIMDf_MUL_ADD(yF, yFreqV, yOffsetV);\
zF = SIMDf_MUL_ADD(zF, zFreqV, zOffsetV);\
\
SIMDf result;\
f;\
std::memcpy(&noiseSet[index], &result, remaining);\
}
#endif
#define VECTOR_SET_BUILDER(f)\
while (index < loopMax)\
{\
SIMDf xF = SIMDf_MUL_ADD(SIMDf_LOAD(&vectorSet->xSet[index]), xFreqV, xOffsetV);\
SIMDf yF = SIMDf_MUL_ADD(SIMDf_LOAD(&vectorSet->ySet[index]), yFreqV, yOffsetV);\
SIMDf zF = SIMDf_MUL_ADD(SIMDf_LOAD(&vectorSet->zSet[index]), zFreqV, zOffsetV);\
\
PERTURB_SWITCH()\
SIMDf result;\
f;\
SIMDf_STORE(&noiseSet[index], result);\
index += VECTOR_SIZE;\
}\
SAFE_LAST(f)
#define FILL_VECTOR_SET(func)\
void SIMD_LEVEL_CLASS::Fill##func##Set(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset, float yOffset, float zOffset)\
{\
assert(noiseSet);\
assert(vectorSet);\
assert(vectorSet->size >= 0);\
SIMD_ZERO_ALL();\
\
SIMDi seedV = SIMDi_SET(m_seed);\
SIMDf xFreqV = SIMDf_SET(m_frequency * m_xScale);\
SIMDf yFreqV = SIMDf_SET(m_frequency * m_yScale);\
SIMDf zFreqV = SIMDf_SET(m_frequency * m_zScale);\
SIMDf xOffsetV = SIMDf_MUL(SIMDf_SET(xOffset), xFreqV);\
SIMDf yOffsetV = SIMDf_MUL(SIMDf_SET(yOffset), yFreqV);\
SIMDf zOffsetV = SIMDf_MUL(SIMDf_SET(zOffset), zFreqV);\
INIT_PERTURB_VALUES();\
\
int index = 0;\
int loopMax = vectorSet->size SIZE_MASK;\
\
VECTOR_SET_BUILDER(result = FUNC(func##Single)(seedV, xF, yF, zF))\
SIMD_ZERO_ALL();\
}
#define FILL_FRACTAL_VECTOR_SET(func)\
void SIMD_LEVEL_CLASS::Fill##func##FractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset, float yOffset, float zOffset)\
{\
assert(noiseSet);\
assert(vectorSet);\
assert(vectorSet->size >= 0);\
SIMD_ZERO_ALL();\
\
SIMDi seedV = SIMDi_SET(m_seed);\
SIMDf lacunarityV = SIMDf_SET(m_lacunarity);\
SIMDf gainV = SIMDf_SET(m_gain);\
SIMDf fractalBoundingV = SIMDf_SET(m_fractalBounding);\
SIMDf xFreqV = SIMDf_SET(m_frequency * m_xScale);\
SIMDf yFreqV = SIMDf_SET(m_frequency * m_yScale);\
SIMDf zFreqV = SIMDf_SET(m_frequency * m_zScale);\
SIMDf xOffsetV = SIMDf_MUL(SIMDf_SET(xOffset), xFreqV);\
SIMDf yOffsetV = SIMDf_MUL(SIMDf_SET(yOffset), yFreqV);\
SIMDf zOffsetV = SIMDf_MUL(SIMDf_SET(zOffset), zFreqV);\
INIT_PERTURB_VALUES();\
\
int index = 0;\
int loopMax = vectorSet->size SIZE_MASK;\
\
switch(m_fractalType)\
{\
case FBM:\
VECTOR_SET_BUILDER(FBM_SINGLE(func))\
break;\
case Billow:\
VECTOR_SET_BUILDER(BILLOW_SINGLE(func))\
break;\
case RigidMulti:\
VECTOR_SET_BUILDER(RIGIDMULTI_SINGLE(func))\
break;\
}\
SIMD_ZERO_ALL();\
}
FILL_VECTOR_SET(Value)
FILL_FRACTAL_VECTOR_SET(Value)
FILL_VECTOR_SET(Perlin)
FILL_FRACTAL_VECTOR_SET(Perlin)
FILL_VECTOR_SET(Simplex)
FILL_FRACTAL_VECTOR_SET(Simplex)
FILL_VECTOR_SET(WhiteNoise)
FILL_VECTOR_SET(Cubic)
FILL_FRACTAL_VECTOR_SET(Cubic)
void SIMD_LEVEL_CLASS::FillWhiteNoiseSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier)
{
assert(noiseSet);
SIMD_ZERO_ALL();
SIMDi seedV = SIMDi_SET(m_seed);
if ((zSize & (VECTOR_SIZE - 1)) == 0)
{
SIMDi x = SIMDi_MUL(SIMDi_SET(xStart), SIMDi_NUM(xPrime));
SIMDi yBase = SIMDi_MUL(SIMDi_SET(yStart), SIMDi_NUM(yPrime));
SIMDi zBase = SIMDi_MUL(SIMDi_ADD(SIMDi_NUM(incremental), SIMDi_SET(zStart)), SIMDi_NUM(zPrime));
SIMDi zStep = SIMDi_MUL(SIMDi_NUM(vectorSize), SIMDi_NUM(zPrime));
int index = 0;
for (int ix = 0; ix < xSize; ix++)
{
SIMDi y = yBase;
for (int iy = 0; iy < ySize; iy++)
{
SIMDi z = zBase;
SIMDf_STORE(&noiseSet[index], FUNC(ValCoord)(seedV, x, y, z));
int iz = VECTOR_SIZE;
while (iz < zSize)
{
z = SIMDi_ADD(z, zStep);
index += VECTOR_SIZE;
iz += VECTOR_SIZE;
SIMDf_STORE(&noiseSet[index], FUNC(ValCoord)(seedV, x, y, z));
}
index += VECTOR_SIZE;
y = SIMDi_ADD(y, SIMDi_NUM(yPrime));
}
x = SIMDi_ADD(x, SIMDi_NUM(xPrime));
}
}
else
{
SIMDi ySizeV = SIMDi_SET(ySize);
SIMDi zSizeV = SIMDi_SET(zSize);
SIMDi yEndV = SIMDi_SET(yStart + ySize - 1);
SIMDi zEndV = SIMDi_SET(zStart + zSize - 1);
SIMDi x = SIMDi_SET(xStart);
SIMDi y = SIMDi_SET(yStart);
SIMDi z = SIMDi_ADD(SIMDi_SET(zStart), SIMDi_NUM(incremental));
AXIS_RESET(zSize, 1);
int index = 0;
int maxIndex = xSize * ySize * zSize;
for (; index < maxIndex - VECTOR_SIZE; index += VECTOR_SIZE)
{
SIMDf_STORE(&noiseSet[index], FUNC(ValCoord)(seedV, SIMDi_MUL(x, SIMDi_NUM(xPrime)), SIMDi_MUL(y, SIMDi_NUM(yPrime)), SIMDi_MUL(z, SIMDi_NUM(zPrime))));
z = SIMDi_ADD(z, SIMDi_NUM(vectorSize));
AXIS_RESET(zSize, 0);
}
SIMDf result = FUNC(ValCoord)(seedV, SIMDi_MUL(x, SIMDi_NUM(xPrime)), SIMDi_MUL(y, SIMDi_NUM(yPrime)), SIMDi_MUL(z, SIMDi_NUM(zPrime)));
STORE_LAST_RESULT(&noiseSet[index], result);
}
SIMD_ZERO_ALL();
}
#define Euclidean_DISTANCE(_x, _y, _z) SIMDf_MUL_ADD(_x, _x, SIMDf_MUL_ADD(_y, _y, SIMDf_MUL(_z, _z)))
#define Manhattan_DISTANCE(_x, _y, _z) SIMDf_ADD(SIMDf_ADD(SIMDf_ABS(_x), SIMDf_ABS(_y)), SIMDf_ABS(_z))
#define Natural_DISTANCE(_x, _y, _z) SIMDf_ADD(Euclidean_DISTANCE(_x,_y,_z), Manhattan_DISTANCE(_x,_y,_z))
#define Distance2_RETURN(_distance, _distance2) (_distance2)
#define Distance2Add_RETURN(_distance, _distance2) SIMDf_ADD(_distance, _distance2)
#define Distance2Sub_RETURN(_distance, _distance2) SIMDf_SUB(_distance2, _distance)
#define Distance2Mul_RETURN(_distance, _distance2) SIMDf_MUL(_distance, _distance2)
#define Distance2Div_RETURN(_distance, _distance2) SIMDf_DIV(_distance, _distance2)
#define CELLULAR_VALUE_SINGLE(distanceFunc)\
static SIMDf VECTORCALL FUNC(CellularValue##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter)\
{\
SIMDf distance = SIMDf_NUM(999999);\
SIMDf cellValue = SIMDf_UNDEFINED();\
\
SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\
SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\
SIMDi zcBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(z), SIMDi_NUM(1));\
\
SIMDf xcf = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(xc), x);\
SIMDf ycfBase = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(ycBase), y);\
SIMDf zcfBase = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(zcBase), z);\
\
xc = SIMDi_MUL(xc, SIMDi_NUM(xPrime));\
ycBase = SIMDi_MUL(ycBase, SIMDi_NUM(yPrime));\
zcBase = SIMDi_MUL(zcBase, SIMDi_NUM(zPrime));\
\
for (int xi = 0; xi < 3; xi++)\
{\
SIMDf ycf = ycfBase;\
SIMDi yc = ycBase;\
for (int yi = 0; yi < 3; yi++)\
{\
SIMDf zcf = zcfBase;\
SIMDi zc = zcBase;\
for (int zi = 0; zi < 3; zi++)\
{\
SIMDi hash = FUNC(HashHB)(seed, xc, yc, zc);\
SIMDf xd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(hash, SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf yd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,10), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf zd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,20), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
\
SIMDf invMag = SIMDf_MUL(cellJitter, SIMDf_INV_SQRT(SIMDf_MUL_ADD(xd, xd, SIMDf_MUL_ADD(yd, yd, SIMDf_MUL(zd, zd)))));\
\
xd = SIMDf_MUL_ADD(xd, invMag, xcf);\
yd = SIMDf_MUL_ADD(yd, invMag, ycf);\
zd = SIMDf_MUL_ADD(zd, invMag, zcf);\
\
SIMDf newCellValue = SIMDf_MUL(SIMDf_NUM(hash2Float), SIMDf_CONVERT_TO_FLOAT(hash));\
SIMDf newDistance = distanceFunc##_DISTANCE(xd, yd, zd);\
\
MASK closer = SIMDf_LESS_THAN(newDistance, distance);\
\
distance = SIMDf_MIN(newDistance, distance);\
cellValue = SIMDf_BLENDV(cellValue, newCellValue, closer);\
\
zcf = SIMDf_ADD(zcf, SIMDf_NUM(1));\
zc = SIMDi_ADD(zc, SIMDi_NUM(zPrime));\
}\
ycf = SIMDf_ADD(ycf, SIMDf_NUM(1));\
yc = SIMDi_ADD(yc, SIMDi_NUM(yPrime));\
}\
xcf = SIMDf_ADD(xcf, SIMDf_NUM(1));\
xc = SIMDi_ADD(xc, SIMDi_NUM(xPrime));\
}\
\
return cellValue;\
}
struct NoiseLookupSettings
{
FastNoiseSIMD::NoiseType type;
SIMDf frequency;
FastNoiseSIMD::FractalType fractalType;
int fractalOctaves;
SIMDf fractalLacunarity;
SIMDf fractalGain;
SIMDf fractalBounding;
};
#define CELLULAR_LOOKUP_FRACTAL_VALUE(noiseType){\
SIMDf lacunarityV = noiseLookupSettings.fractalLacunarity;\
SIMDf gainV = noiseLookupSettings.fractalGain;\
SIMDf fractalBoundingV = noiseLookupSettings.fractalBounding;\
int m_octaves = noiseLookupSettings.fractalOctaves;\
switch(noiseLookupSettings.fractalType)\
{\
case FastNoiseSIMD::FBM:\
{FBM_SINGLE(noiseType);}\
break;\
case FastNoiseSIMD::Billow:\
{BILLOW_SINGLE(noiseType);}\
break;\
case FastNoiseSIMD::RigidMulti:\
{RIGIDMULTI_SINGLE(noiseType);}\
break;\
}}\
#define CELLULAR_LOOKUP_SINGLE(distanceFunc)\
static SIMDf VECTORCALL FUNC(CellularLookup##distanceFunc##Single)(SIMDi seedV, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter, const NoiseLookupSettings& noiseLookupSettings)\
{\
SIMDf distance = SIMDf_NUM(999999);\
SIMDf xCell = SIMDf_UNDEFINED();\
SIMDf yCell = SIMDf_UNDEFINED();\
SIMDf zCell = SIMDf_UNDEFINED();\
\
SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\
SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\
SIMDi zcBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(z), SIMDi_NUM(1));\
\
SIMDf xcf = SIMDf_CONVERT_TO_FLOAT(xc);\
SIMDf ycfBase = SIMDf_CONVERT_TO_FLOAT(ycBase);\
SIMDf zcfBase = SIMDf_CONVERT_TO_FLOAT(zcBase);\
\
xc = SIMDi_MUL(xc, SIMDi_NUM(xPrime));\
ycBase = SIMDi_MUL(ycBase, SIMDi_NUM(yPrime));\
zcBase = SIMDi_MUL(zcBase, SIMDi_NUM(zPrime));\
\
for (int xi = 0; xi < 3; xi++)\
{\
SIMDf ycf = ycfBase;\
SIMDi yc = ycBase;\
SIMDf xLocal = SIMDf_SUB(xcf, x);\
for (int yi = 0; yi < 3; yi++)\
{\
SIMDf zcf = zcfBase;\
SIMDi zc = zcBase;\
SIMDf yLocal = SIMDf_SUB(ycf, y);\
for (int zi = 0; zi < 3; zi++)\
{\
SIMDf zLocal = SIMDf_SUB(zcf, z);\
\
SIMDi hash = FUNC(HashHB)(seedV, xc, yc, zc);\
SIMDf xd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(hash, SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf yd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,10), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf zd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,20), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
\
SIMDf invMag = SIMDf_MUL(cellJitter, SIMDf_INV_SQRT(SIMDf_MUL_ADD(xd, xd, SIMDf_MUL_ADD(yd, yd, SIMDf_MUL(zd, zd)))));\
\
SIMDf xCellNew = SIMDf_MUL(xd, invMag);\
SIMDf yCellNew = SIMDf_MUL(yd, invMag);\
SIMDf zCellNew = SIMDf_MUL(zd, invMag);\
\
xd = SIMDf_ADD(xCellNew, xLocal);\
yd = SIMDf_ADD(yCellNew, yLocal);\
zd = SIMDf_ADD(zCellNew, zLocal);\
\
xCellNew = SIMDf_ADD(xCellNew, xcf); \
yCellNew = SIMDf_ADD(yCellNew, ycf); \
zCellNew = SIMDf_ADD(zCellNew, zcf); \
\
SIMDf newDistance = distanceFunc##_DISTANCE(xd, yd, zd);\
\
MASK closer = SIMDf_LESS_THAN(newDistance, distance);\
\
distance = SIMDf_MIN(newDistance, distance);\
xCell = SIMDf_BLENDV(xCell, xCellNew, closer);\
yCell = SIMDf_BLENDV(yCell, yCellNew, closer);\
zCell = SIMDf_BLENDV(zCell, zCellNew, closer);\
\
zcf = SIMDf_ADD(zcf, SIMDf_NUM(1));\
zc = SIMDi_ADD(zc, SIMDi_NUM(zPrime));\
}\
ycf = SIMDf_ADD(ycf, SIMDf_NUM(1));\
yc = SIMDi_ADD(yc, SIMDi_NUM(yPrime));\
}\
xcf = SIMDf_ADD(xcf, SIMDf_NUM(1));\
xc = SIMDi_ADD(xc, SIMDi_NUM(xPrime));\
}\
\
SIMDf xF = SIMDf_MUL(xCell, noiseLookupSettings.frequency);\
SIMDf yF = SIMDf_MUL(yCell, noiseLookupSettings.frequency);\
SIMDf zF = SIMDf_MUL(zCell, noiseLookupSettings.frequency);\
SIMDf result;\
\
switch(noiseLookupSettings.type)\
{\
default:\
break;\
case FastNoiseSIMD::Value:\
result = FUNC(ValueSingle)(seedV, xF, yF, zF); \
break;\
case FastNoiseSIMD::ValueFractal:\
CELLULAR_LOOKUP_FRACTAL_VALUE(Value);\
break; \
case FastNoiseSIMD::Perlin:\
result = FUNC(PerlinSingle)(seedV, xF, yF, zF); \
break;\
case FastNoiseSIMD::PerlinFractal:\
CELLULAR_LOOKUP_FRACTAL_VALUE(Perlin);\
break; \
case FastNoiseSIMD::Simplex:\
result = FUNC(SimplexSingle)(seedV, xF, yF, zF); \
break;\
case FastNoiseSIMD::SimplexFractal:\
CELLULAR_LOOKUP_FRACTAL_VALUE(Simplex);\
break; \
case FastNoiseSIMD::Cubic:\
result = FUNC(CubicSingle)(seedV, xF, yF, zF); \
break;\
case FastNoiseSIMD::CubicFractal:\
CELLULAR_LOOKUP_FRACTAL_VALUE(Cubic);\
break; \
}\
\
return result;\
}
#define CELLULAR_DISTANCE_SINGLE(distanceFunc)\
static SIMDf VECTORCALL FUNC(CellularDistance##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter)\
{\
SIMDf distance = SIMDf_NUM(999999);\
\
SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\
SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\
SIMDi zcBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(z), SIMDi_NUM(1));\
\
SIMDf xcf = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(xc), x);\
SIMDf ycfBase = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(ycBase), y);\
SIMDf zcfBase = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(zcBase), z);\
\
xc = SIMDi_MUL(xc, SIMDi_NUM(xPrime));\
ycBase = SIMDi_MUL(ycBase, SIMDi_NUM(yPrime));\
zcBase = SIMDi_MUL(zcBase, SIMDi_NUM(zPrime));\
\
for (int xi = 0; xi < 3; xi++)\
{\
SIMDf ycf = ycfBase;\
SIMDi yc = ycBase;\
for (int yi = 0; yi < 3; yi++)\
{\
SIMDf zcf = zcfBase;\
SIMDi zc = zcBase;\
for (int zi = 0; zi < 3; zi++)\
{\
SIMDi hash = FUNC(HashHB)(seed, xc, yc, zc);\
SIMDf xd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(hash, SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf yd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,10), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf zd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,20), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
\
SIMDf invMag = SIMDf_MUL(cellJitter, SIMDf_INV_SQRT(SIMDf_MUL_ADD(xd, xd, SIMDf_MUL_ADD(yd, yd, SIMDf_MUL(zd, zd)))));\
\
xd = SIMDf_MUL_ADD(xd, invMag, xcf);\
yd = SIMDf_MUL_ADD(yd, invMag, ycf);\
zd = SIMDf_MUL_ADD(zd, invMag, zcf);\
\
SIMDf newDistance = distanceFunc##_DISTANCE(xd, yd, zd);\
\
distance = SIMDf_MIN(distance, newDistance);\
\
zcf = SIMDf_ADD(zcf, SIMDf_NUM(1));\
zc = SIMDi_ADD(zc, SIMDi_NUM(zPrime));\
}\
ycf = SIMDf_ADD(ycf, SIMDf_NUM(1));\
yc = SIMDi_ADD(yc, SIMDi_NUM(yPrime));\
}\
xcf = SIMDf_ADD(xcf, SIMDf_NUM(1));\
xc = SIMDi_ADD(xc, SIMDi_NUM(xPrime));\
}\
\
return distance;\
}
#define CELLULAR_DISTANCE2_SINGLE(distanceFunc, returnFunc)\
static SIMDf VECTORCALL FUNC(Cellular##returnFunc##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter, int index0, int index1)\
{\
SIMDf distance[FN_CELLULAR_INDEX_MAX+1] = {SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999),SIMDf_NUM(999999)};\
\
SIMDi xc = SIMDi_SUB(SIMDi_CONVERT_TO_INT(x), SIMDi_NUM(1));\
SIMDi ycBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(y), SIMDi_NUM(1));\
SIMDi zcBase = SIMDi_SUB(SIMDi_CONVERT_TO_INT(z), SIMDi_NUM(1));\
\
SIMDf xcf = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(xc), x);\
SIMDf ycfBase = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(ycBase), y);\
SIMDf zcfBase = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(zcBase), z);\
\
xc = SIMDi_MUL(xc, SIMDi_NUM(xPrime));\
ycBase = SIMDi_MUL(ycBase, SIMDi_NUM(yPrime));\
zcBase = SIMDi_MUL(zcBase, SIMDi_NUM(zPrime));\
\
for (int xi = 0; xi < 3; xi++)\
{\
SIMDf ycf = ycfBase;\
SIMDi yc = ycBase;\
for (int yi = 0; yi < 3; yi++)\
{\
SIMDf zcf = zcfBase;\
SIMDi zc = zcBase;\
for (int zi = 0; zi < 3; zi++)\
{\
SIMDi hash = FUNC(HashHB)(seed, xc, yc, zc);\
SIMDf xd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(hash, SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf yd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,10), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
SIMDf zd = SIMDf_SUB(SIMDf_CONVERT_TO_FLOAT(SIMDi_AND(SIMDi_SHIFT_R(hash,20), SIMDi_NUM(bit10Mask))), SIMDf_NUM(511_5));\
\
SIMDf invMag = SIMDf_MUL(cellJitter, SIMDf_INV_SQRT(SIMDf_MUL_ADD(xd, xd, SIMDf_MUL_ADD(yd, yd, SIMDf_MUL(zd, zd)))));\
\
xd = SIMDf_MUL_ADD(xd, invMag, xcf);\
yd = SIMDf_MUL_ADD(yd, invMag, ycf);\
zd = SIMDf_MUL_ADD(zd, invMag, zcf);\
\
SIMDf newDistance = distanceFunc##_DISTANCE(xd, yd, zd);\
\
for(int i = index1; i > 0; i--)\
distance[i] = SIMDf_MAX(SIMDf_MIN(distance[i], newDistance), distance[i-1]);\
distance[0] = SIMDf_MIN(distance[0], newDistance);\
\
zcf = SIMDf_ADD(zcf, SIMDf_NUM(1));\
zc = SIMDi_ADD(zc, SIMDi_NUM(zPrime));\
}\
ycf = SIMDf_ADD(ycf, SIMDf_NUM(1));\
yc = SIMDi_ADD(yc, SIMDi_NUM(yPrime));\
}\
xcf = SIMDf_ADD(xcf, SIMDf_NUM(1));\
xc = SIMDi_ADD(xc, SIMDi_NUM(xPrime));\
}\
\
return returnFunc##_RETURN(distance[index0], distance[index1]);\
}
#define CELLULAR_DISTANCE2CAVE_SINGLE(distanceFunc)\
static SIMDf VECTORCALL FUNC(CellularDistance2Cave##distanceFunc##Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z, SIMDf cellJitter, int index0, int index1)\
{\
SIMDf c0 = FUNC(CellularDistance2Div##distanceFunc##Single)(seed, x, y, z, cellJitter, index0, index1);\
\
x = SIMDf_ADD(x, SIMDf_NUM(0_5));\
y = SIMDf_ADD(y, SIMDf_NUM(0_5));\
z = SIMDf_ADD(z, SIMDf_NUM(0_5));\
seed = SIMDi_ADD(seed, SIMDi_NUM(1));\
\
SIMDf c1 = FUNC(CellularDistance2Div##distanceFunc##Single)(seed, x, y, z, cellJitter, index0, index1);\
\
return SIMDf_MIN(c0,c1);\
}
CELLULAR_VALUE_SINGLE(Euclidean)
CELLULAR_VALUE_SINGLE(Manhattan)
CELLULAR_VALUE_SINGLE(Natural)
CELLULAR_LOOKUP_SINGLE(Euclidean)
CELLULAR_LOOKUP_SINGLE(Manhattan)
CELLULAR_LOOKUP_SINGLE(Natural)
#undef Natural_DISTANCE
#define Natural_DISTANCE(_x, _y, _z) SIMDf_MUL(Euclidean_DISTANCE(_x,_y,_z), Manhattan_DISTANCE(_x,_y,_z))
CELLULAR_DISTANCE_SINGLE(Euclidean)
CELLULAR_DISTANCE_SINGLE(Manhattan)
CELLULAR_DISTANCE_SINGLE(Natural)
#define CELLULAR_DISTANCE2_MULTI(returnFunc)\
CELLULAR_DISTANCE2_SINGLE(Euclidean, returnFunc)\
CELLULAR_DISTANCE2_SINGLE(Manhattan, returnFunc)\
CELLULAR_DISTANCE2_SINGLE(Natural, returnFunc)
CELLULAR_DISTANCE2_MULTI(Distance2)
CELLULAR_DISTANCE2_MULTI(Distance2Add)
CELLULAR_DISTANCE2_MULTI(Distance2Sub)
CELLULAR_DISTANCE2_MULTI(Distance2Div)
CELLULAR_DISTANCE2_MULTI(Distance2Mul)
CELLULAR_DISTANCE2CAVE_SINGLE(Euclidean)
CELLULAR_DISTANCE2CAVE_SINGLE(Manhattan)
CELLULAR_DISTANCE2CAVE_SINGLE(Natural)
#define CELLULAR_MULTI(returnFunc)\
switch(m_cellularDistanceFunction)\
{\
case Euclidean:\
SET_BUILDER(result = FUNC(Cellular##returnFunc##EuclideanSingle)(seedV, xF, yF, zF, cellJitterV))\
break;\
case Manhattan:\
SET_BUILDER(result = FUNC(Cellular##returnFunc##ManhattanSingle)(seedV, xF, yF, zF, cellJitterV))\
break;\
case Natural:\
SET_BUILDER(result = FUNC(Cellular##returnFunc##NaturalSingle)(seedV, xF, yF, zF, cellJitterV))\
break;\
}
#define CELLULAR_INDEX_MULTI(returnFunc)\
switch(m_cellularDistanceFunction)\
{\
case Euclidean:\
SET_BUILDER(result = FUNC(Cellular##returnFunc##EuclideanSingle)(seedV, xF, yF, zF, cellJitterV, m_cellularDistanceIndex0, m_cellularDistanceIndex1))\
break;\
case Manhattan:\
SET_BUILDER(result = FUNC(Cellular##returnFunc##ManhattanSingle)(seedV, xF, yF, zF, cellJitterV, m_cellularDistanceIndex0, m_cellularDistanceIndex1))\
break;\
case Natural:\
SET_BUILDER(result = FUNC(Cellular##returnFunc##NaturalSingle)(seedV, xF, yF, zF, cellJitterV, m_cellularDistanceIndex0, m_cellularDistanceIndex1))\
break;\
}
void SIMD_LEVEL_CLASS::FillCellularSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier)
{
assert(noiseSet);
SIMD_ZERO_ALL();
SIMDi seedV = SIMDi_SET(m_seed);
INIT_PERTURB_VALUES();
scaleModifier *= m_frequency;
SIMDf xFreqV = SIMDf_SET(scaleModifier * m_xScale);
SIMDf yFreqV = SIMDf_SET(scaleModifier * m_yScale);
SIMDf zFreqV = SIMDf_SET(scaleModifier * m_zScale);
SIMDf cellJitterV = SIMDf_SET(m_cellularJitter);
NoiseLookupSettings nls;
switch (m_cellularReturnType)
{
case CellValue:
CELLULAR_MULTI(Value);
break;
case Distance:
CELLULAR_MULTI(Distance);
break;
case Distance2:
CELLULAR_INDEX_MULTI(Distance2);
break;
case Distance2Add:
CELLULAR_INDEX_MULTI(Distance2Add);
break;
case Distance2Sub:
CELLULAR_INDEX_MULTI(Distance2Sub);
break;
case Distance2Mul:
CELLULAR_INDEX_MULTI(Distance2Mul);
break;
case Distance2Div:
CELLULAR_INDEX_MULTI(Distance2Div);
break;
case Distance2Cave:
CELLULAR_INDEX_MULTI(Distance2Cave);
break;
case NoiseLookup:
nls.type = m_cellularNoiseLookupType;
nls.frequency = SIMDf_SET(m_cellularNoiseLookupFrequency);
nls.fractalType = m_fractalType;
nls.fractalOctaves = m_octaves;
nls.fractalLacunarity = SIMDf_SET(m_lacunarity);
nls.fractalGain = SIMDf_SET(m_gain);
nls.fractalBounding = SIMDf_SET(m_fractalBounding);
switch (m_cellularDistanceFunction)
{
case Euclidean:
SET_BUILDER(result = FUNC(CellularLookupEuclideanSingle)(seedV, xF, yF, zF, cellJitterV, nls))
break; \
case Manhattan:
SET_BUILDER(result = FUNC(CellularLookupManhattanSingle)(seedV, xF, yF, zF, cellJitterV, nls))
break; \
case Natural:
SET_BUILDER(result = FUNC(CellularLookupNaturalSingle)(seedV, xF, yF, zF, cellJitterV, nls))
break;
}
break;
}
SIMD_ZERO_ALL();
}
#define CELLULAR_MULTI_VECTOR(returnFunc)\
switch(m_cellularDistanceFunction)\
{\
case Euclidean:\
VECTOR_SET_BUILDER(result = FUNC(Cellular##returnFunc##EuclideanSingle)(seedV, xF, yF, zF, cellJitterV))\
break;\
case Manhattan:\
VECTOR_SET_BUILDER(result = FUNC(Cellular##returnFunc##ManhattanSingle)(seedV, xF, yF, zF, cellJitterV))\
break;\
case Natural:\
VECTOR_SET_BUILDER(result = FUNC(Cellular##returnFunc##NaturalSingle)(seedV, xF, yF, zF, cellJitterV))\
break;\
}
#define CELLULAR_INDEX_MULTI_VECTOR(returnFunc)\
switch(m_cellularDistanceFunction)\
{\
case Euclidean:\
VECTOR_SET_BUILDER(result = FUNC(Cellular##returnFunc##EuclideanSingle)(seedV, xF, yF, zF, cellJitterV, m_cellularDistanceIndex0, m_cellularDistanceIndex1))\
break;\
case Manhattan:\
VECTOR_SET_BUILDER(result = FUNC(Cellular##returnFunc##ManhattanSingle)(seedV, xF, yF, zF, cellJitterV, m_cellularDistanceIndex0, m_cellularDistanceIndex1))\
break;\
case Natural:\
VECTOR_SET_BUILDER(result = FUNC(Cellular##returnFunc##NaturalSingle)(seedV, xF, yF, zF, cellJitterV, m_cellularDistanceIndex0, m_cellularDistanceIndex1))\
break;\
}
void SIMD_LEVEL_CLASS::FillCellularSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset, float yOffset, float zOffset)
{
assert(noiseSet);
assert(vectorSet);
assert(vectorSet->size >= 0);
SIMD_ZERO_ALL();
SIMDi seedV = SIMDi_SET(m_seed);
SIMDf xFreqV = SIMDf_SET(m_frequency * m_xScale);
SIMDf yFreqV = SIMDf_SET(m_frequency * m_yScale);
SIMDf zFreqV = SIMDf_SET(m_frequency * m_zScale);
SIMDf xOffsetV = SIMDf_MUL(SIMDf_SET(xOffset), xFreqV);
SIMDf yOffsetV = SIMDf_MUL(SIMDf_SET(yOffset), yFreqV);
SIMDf zOffsetV = SIMDf_MUL(SIMDf_SET(zOffset), zFreqV);
SIMDf cellJitterV = SIMDf_SET(m_cellularJitter);
INIT_PERTURB_VALUES();
int index = 0;
int loopMax = vectorSet->size SIZE_MASK;
NoiseLookupSettings nls;
switch (m_cellularReturnType)
{
case CellValue:
CELLULAR_MULTI_VECTOR(Value);
break;
case Distance:
CELLULAR_MULTI_VECTOR(Distance);
break;
case Distance2:
CELLULAR_INDEX_MULTI_VECTOR(Distance2);
break;
case Distance2Add:
CELLULAR_INDEX_MULTI_VECTOR(Distance2Add);
break;
case Distance2Sub:
CELLULAR_INDEX_MULTI_VECTOR(Distance2Sub);
break;
case Distance2Mul:
CELLULAR_INDEX_MULTI_VECTOR(Distance2Mul);
break;
case Distance2Div:
CELLULAR_INDEX_MULTI_VECTOR(Distance2Div);
break;
case Distance2Cave:
CELLULAR_INDEX_MULTI_VECTOR(Distance2Cave);
break;
case NoiseLookup:
nls.type = m_cellularNoiseLookupType;
nls.frequency = SIMDf_SET(m_cellularNoiseLookupFrequency);
nls.fractalType = m_fractalType;
nls.fractalOctaves = m_octaves;
nls.fractalLacunarity = SIMDf_SET(m_lacunarity);
nls.fractalGain = SIMDf_SET(m_gain);
nls.fractalBounding = SIMDf_SET(m_fractalBounding);
switch (m_cellularDistanceFunction)
{
case Euclidean:
VECTOR_SET_BUILDER(result = FUNC(CellularLookupEuclideanSingle)(seedV, xF, yF, zF, cellJitterV, nls));
break;
case Manhattan:
VECTOR_SET_BUILDER(result = FUNC(CellularLookupManhattanSingle)(seedV, xF, yF, zF, cellJitterV, nls));
break;
case Natural:
VECTOR_SET_BUILDER(result = FUNC(CellularLookupNaturalSingle)(seedV, xF, yF, zF, cellJitterV, nls));
break;
}
break;
}
SIMD_ZERO_ALL();
}
#define SAMPLE_INDEX(_x,_y,_z) ((_x) * yzSizeSample + (_y) * zSizeSample + (_z))
#define SET_INDEX(_x,_y,_z) ((_x) * yzSize + (_y) * zSize + (_z))
void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int sampleScale)
{
assert(noiseSet);
SIMD_ZERO_ALL();
if (sampleScale <= 0)
{
FillNoiseSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize);
return;
}
int sampleSize = 1 << sampleScale;
int sampleMask = sampleSize - 1;
float scaleModifier = float(sampleSize);
int xOffset = (sampleSize - (xStart & sampleMask)) & sampleMask;
int yOffset = (sampleSize - (yStart & sampleMask)) & sampleMask;
int zOffset = (sampleSize - (zStart & sampleMask)) & sampleMask;
int xSizeSample = xSize + xOffset;
int ySizeSample = ySize + yOffset;
int zSizeSample = zSize + zOffset;
if (xSizeSample & sampleMask)
xSizeSample = (xSizeSample & ~sampleMask) + sampleSize;
if (ySizeSample & sampleMask)
ySizeSample = (ySizeSample & ~sampleMask) + sampleSize;
if (zSizeSample & sampleMask)
zSizeSample = (zSizeSample & ~sampleMask) + sampleSize;
xSizeSample = (xSizeSample >> sampleScale) + 1;
ySizeSample = (ySizeSample >> sampleScale) + 1;
zSizeSample = (zSizeSample >> sampleScale) + 1;
float* noiseSetSample = GetEmptySet(xSizeSample * ySizeSample * zSizeSample);
FillNoiseSet(noiseSetSample, xStart >> sampleScale, yStart >> sampleScale, zStart >> sampleScale, xSizeSample, ySizeSample, zSizeSample, scaleModifier);
int yzSizeSample = ySizeSample * zSizeSample;
int yzSize = ySize * zSize;
SIMDi axisMask = SIMDi_SET(sampleMask);
SIMDf axisScale = SIMDf_SET(1.f / scaleModifier);
SIMDf axisOffset = SIMDf_MUL(axisScale, SIMDf_NUM(0_5));
SIMDi sampleSizeSIMD = SIMDi_SET(sampleSize);
SIMDi xSIMD = SIMDi_SET(-xOffset);
SIMDi yBase = SIMDi_SET(-yOffset);
SIMDi zBase = SIMDi_SET(-zOffset);
int localCountMax = (1 << (sampleScale * 3));
int vMax = VECTOR_SIZE;
#if SIMD_LEVEL == FN_NEON
SIMDi sampleScaleV = SIMDi_SET(-sampleScale);
SIMDi sampleScale2V = SIMDi_MUL(sampleScaleV, SIMDi_NUM(2));
#endif
for (int x = 0; x < xSizeSample - 1; x++)
{
SIMDi ySIMD = yBase;
for (int y = 0; y < ySizeSample - 1; y++)
{
SIMDi zSIMD = zBase;
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, 0)]);
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, 0)]);
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, 0)]);
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, 0)]);
for (int z = 0; z < zSizeSample - 1; z++)
{
SIMDf c000 = c001;
SIMDf c100 = c101;
SIMDf c010 = c011;
SIMDf c110 = c111;
c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
SIMDi localCountSIMD = SIMDi_NUM(incremental);
int localCount = 0;
while (localCount < localCountMax)
{
uSIMDi xi, yi, zi;
#if SIMD_LEVEL == FN_NEON
xi.m = SIMDi_AND(SIMDi_VSHIFT_L(localCountSIMD, sampleScale2V), axisMask);
yi.m = SIMDi_AND(SIMDi_VSHIFT_L(localCountSIMD, sampleScaleV), axisMask);
#else
xi.m = SIMDi_AND(SIMDi_SHIFT_R(localCountSIMD, sampleScale * 2), axisMask);
yi.m = SIMDi_AND(SIMDi_SHIFT_R(localCountSIMD, sampleScale), axisMask);
#endif
zi.m = SIMDi_AND(localCountSIMD, axisMask);
SIMDf xf = SIMDf_MUL_ADD(SIMDf_CONVERT_TO_FLOAT(xi.m), axisScale, axisOffset);
SIMDf yf = SIMDf_MUL_ADD(SIMDf_CONVERT_TO_FLOAT(yi.m), axisScale, axisOffset);
SIMDf zf = SIMDf_MUL_ADD(SIMDf_CONVERT_TO_FLOAT(zi.m), axisScale, axisOffset);
xi.m = SIMDi_ADD(xi.m, xSIMD);
yi.m = SIMDi_ADD(yi.m, ySIMD);
zi.m = SIMDi_ADD(zi.m, zSIMD);
uSIMDf sampledResults;
sampledResults.m = FUNC(Lerp)(
FUNC(Lerp)(
FUNC(Lerp)(c000, c100, xf),
FUNC(Lerp)(c010, c110, xf), yf),
FUNC(Lerp)(
FUNC(Lerp)(c001, c101, xf),
FUNC(Lerp)(c011, c111, xf), yf), zf);
for (int i = 0; i < vMax; i++)
{
if (xi.a[i] >= 0 && xi.a[i] < xSize &&
yi.a[i] >= 0 && yi.a[i] < ySize &&
zi.a[i] >= 0 && zi.a[i] < zSize)
{
int index = SET_INDEX(xi.a[i], yi.a[i], zi.a[i]);
noiseSet[index] = sampledResults.a[i];
}
}
localCount += VECTOR_SIZE;
localCountSIMD = SIMDi_ADD(localCountSIMD, SIMDi_NUM(vectorSize));
}
zSIMD = SIMDi_ADD(zSIMD, sampleSizeSIMD);
}
ySIMD = SIMDi_ADD(ySIMD, sampleSizeSIMD);
}
xSIMD = SIMDi_ADD(xSIMD, sampleSizeSIMD);
}
FreeNoiseSet(noiseSetSample);
SIMD_ZERO_ALL();
}
void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset, float yOffset, float zOffset)
{
assert(noiseSet);
assert(vectorSet);
assert(vectorSet->size >= 0);
SIMD_ZERO_ALL();
int sampleScale = vectorSet->sampleScale;
if (sampleScale <= 0)
{
FillNoiseSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
return;
}
int sampleSize = 1 << sampleScale;
int sampleMask = sampleSize - 1;
float scaleModifier = float(sampleSize);
int xSize = vectorSet->sampleSizeX;
int ySize = vectorSet->sampleSizeY;
int zSize = vectorSet->sampleSizeZ;
int xSizeSample = xSize;
int ySizeSample = ySize;
int zSizeSample = zSize;
if (xSizeSample & sampleMask)
xSizeSample = (xSizeSample & ~sampleMask) + sampleSize;
if (ySizeSample & sampleMask)
ySizeSample = (ySizeSample & ~sampleMask) + sampleSize;
if (zSizeSample & sampleMask)
zSizeSample = (zSizeSample & ~sampleMask) + sampleSize;
xSizeSample = (xSizeSample >> sampleScale) + 1;
ySizeSample = (ySizeSample >> sampleScale) + 1;
zSizeSample = (zSizeSample >> sampleScale) + 1;
float* noiseSetSample = GetEmptySet(vectorSet->size);
FillNoiseSet(noiseSetSample, vectorSet, xOffset - 0.5f, yOffset - 0.5f, zOffset - 0.5f);
int yzSizeSample = ySizeSample * zSizeSample;
int yzSize = ySize * zSize;
SIMDi axisMask = SIMDi_SET(sampleMask);
SIMDf axisScale = SIMDf_SET(1.f / scaleModifier);
SIMDf axisOffset = SIMDf_MUL(axisScale, SIMDf_NUM(0_5));
SIMDi sampleSizeSIMD = SIMDi_SET(sampleSize);
SIMDi xSIMD = SIMDi_SET_ZERO();
int localCountMax = (1 << (sampleScale * 3));
int vMax = VECTOR_SIZE;
#if SIMD_LEVEL == FN_NEON
SIMDi sampleScaleV = SIMDi_SET(-sampleScale);
SIMDi sampleScale2V = SIMDi_MUL(sampleScaleV, SIMDi_NUM(2));
#endif
for (int x = 0; x < xSizeSample - 1; x++)
{
SIMDi ySIMD = SIMDi_SET_ZERO();
for (int y = 0; y < ySizeSample - 1; y++)
{
SIMDi zSIMD = SIMDi_SET_ZERO();
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, 0)]);
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, 0)]);
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, 0)]);
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, 0)]);
for (int z = 0; z < zSizeSample - 1; z++)
{
SIMDf c000 = c001;
SIMDf c100 = c101;
SIMDf c010 = c011;
SIMDf c110 = c111;
c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
SIMDi localCountSIMD = SIMDi_NUM(incremental);
int localCount = 0;
while (localCount < localCountMax)
{
uSIMDi xi, yi, zi;
#if SIMD_LEVEL == FN_NEON
xi.m = SIMDi_AND(SIMDi_VSHIFT_L(localCountSIMD, sampleScale2V), axisMask);
yi.m = SIMDi_AND(SIMDi_VSHIFT_L(localCountSIMD, sampleScaleV), axisMask);
#else
xi.m = SIMDi_AND(SIMDi_SHIFT_R(localCountSIMD, sampleScale * 2), axisMask);
yi.m = SIMDi_AND(SIMDi_SHIFT_R(localCountSIMD, sampleScale), axisMask);
#endif
zi.m = SIMDi_AND(localCountSIMD, axisMask);
SIMDf xf = SIMDf_MUL_ADD(SIMDf_CONVERT_TO_FLOAT(xi.m), axisScale, axisOffset);
SIMDf yf = SIMDf_MUL_ADD(SIMDf_CONVERT_TO_FLOAT(yi.m), axisScale, axisOffset);
SIMDf zf = SIMDf_MUL_ADD(SIMDf_CONVERT_TO_FLOAT(zi.m), axisScale, axisOffset);
xi.m = SIMDi_ADD(xi.m, xSIMD);
yi.m = SIMDi_ADD(yi.m, ySIMD);
zi.m = SIMDi_ADD(zi.m, zSIMD);
uSIMDf sampledResults;
sampledResults.m = FUNC(Lerp)(
FUNC(Lerp)(
FUNC(Lerp)(c000, c100, xf),
FUNC(Lerp)(c010, c110, xf), yf),
FUNC(Lerp)(
FUNC(Lerp)(c001, c101, xf),
FUNC(Lerp)(c011, c111, xf), yf), zf);
for (int i = 0; i < vMax; i++)
{
if (xi.a[i] < xSize &&
yi.a[i] < ySize &&
zi.a[i] < zSize)
{
int index = SET_INDEX(xi.a[i], yi.a[i], zi.a[i]);
noiseSet[index] = sampledResults.a[i];
}
}
localCount += VECTOR_SIZE;
localCountSIMD = SIMDi_ADD(localCountSIMD, SIMDi_NUM(vectorSize));
}
zSIMD = SIMDi_ADD(zSIMD, sampleSizeSIMD);
}
ySIMD = SIMDi_ADD(ySIMD, sampleSizeSIMD);
}
xSIMD = SIMDi_ADD(xSIMD, sampleSizeSIMD);
}
FreeNoiseSet(noiseSetSample);
SIMD_ZERO_ALL();
}
#undef SIMD_LEVEL
#endif