Fork 0

306 lines
12 KiB
Raw Normal View History

#pragma once
#include "FastSIMD/InlInclude.h"
#include <climits>
namespace FastNoise
namespace Primes
static constexpr int32_t X = 501125321;
static constexpr int32_t Y = 1136930381;
static constexpr int32_t Z = 1720413743;
static constexpr int32_t W = 1066037191;
static constexpr int32_t Lookup[] = { X,Y,Z,W };
template<typename FS>
struct Utils
using float32v = typename FS::float32v;
using int32v = typename FS::int32v;
using mask32v = typename FS::mask32v;
static constexpr float ROOT2 = 1.4142135623730950488f;
static constexpr float ROOT3 = 1.7320508075688772935f;
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
// Bit-4 = Choose X Y ordering
mask32v xy;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
xy = int32_t( index & int32v( 1 << 2 ) ) != 0;
xy = index << 29;
if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
xy >>= 31;
float32v a = FS_Select_f32( xy, fY, fX );
float32v b = FS_Select_f32( xy, fX, fY );
// Bit-1 = b flip sign
b ^= FS_Casti32_f32( index << 31 );
// Bit-2 = Mul a by 2 or Root3
mask32v aMul2;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
aMul2 = int32_t( index & int32v( 1 << 1 ) ) != 0;
aMul2 = (index << 30) >> 31;
a *= FS_Select_f32( aMul2, float32v( 2 ), float32v( ROOT3 ) );
// b zero value if a mul 2
b = FS_NMask_f32( b, aMul2 );
// Bit-8 = Flip sign of a + b
return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
// Bit-8 = Flip sign of a + b
return FS_FMulAdd_f32( gX, fX, fY * gY ) ^ FS_Casti32_f32( (index >> 3) << 31 );
template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
return FS_FMulAdd_f32( gX, fX, fY * gY );
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
// ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
// ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
int32v bit1 = (hash << 31);
int32v bit2 = (hash >> 1) << 31;
mask32v bit4;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
bit4 = int32_t( hash & int32v( 1 << 2 ) ) != 0;
bit4 = hash << 29;
if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
bit4 >>= 31;
fX ^= FS_Casti32_f32( bit1 );
fY ^= FS_Casti32_f32( bit2 );
float32v a = FS_Select_f32( bit4, fY, fX );
float32v b = FS_Select_f32( bit4, fX, fY );
return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
return FS_FMulAdd_f32( gX, fX, fY * gY );
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512> * = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
return FS_FMulAdd_f32( gX, fX, fY * gY );
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512 > * = nullptr >
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
int32v hasha13 = hash & int32v( 13 );
//if h < 8 then x, else y
float32v u = FS_Select_f32( hasha13 < int32v( 8 ), fX, fY );
//if h < 4 then y else if h is 12 or 14 then x else z
float32v v = FS_Select_f32( hasha13 == int32v( 12 ), fX, fZ );
v = FS_Select_f32( hasha13 < int32v( 2 ), fY, v );
//if h1 then -u else u
//if h2 then -v else v
float32v h1 = FS_Casti32_f32( hash << 31 );
float32v h2 = FS_Casti32_f32( (hash & int32v( 2 )) << 30 );
//then add them
return ( u ^ h1 ) + ( v ^ h2 );
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, fZ * gZ ));
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512>* = nullptr >
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
int32v p = hash & int32v( 3 << 3 );
float32v a = FS_Select_f32( p > int32v( 0 ), fX, fY );
float32v b;
if constexpr( FS::SIMD_Level <= FastSIMD::Level_SSE2 )
b = FS_Select_f32( p > int32v( 1 << 3 ), fY, fZ );
b = FS_Select_f32( hash << 27, fY, fZ );
float32v c = FS_Select_f32( p > int32v( 2 << 3 ), fZ, fW );
float32v aSign = FS_Casti32_f32( hash << 31 );
float32v bSign = FS_Casti32_f32( (hash << 30) & int32v( 0x80000000 ) );
float32v cSign = FS_Casti32_f32( (hash << 29) & int32v( 0x80000000 ) );
return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, FS_FMulAdd_f32( fZ, gZ, fW * gW ) ));
template<typename SIMD = FS, typename... P>
FS_INLINE static int32v HashPrimes( int32v seed, P... primedPos )
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= int32v( 0x27d4eb2d );
return (hash >> 15) ^ hash;
template<typename SIMD = FS, typename... P>
FS_INLINE static int32v HashPrimesHB( int32v seed, P... primedPos )
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= int32v( 0x27d4eb2d );
return hash;
template<typename SIMD = FS, typename... P>
FS_INLINE static float32v GetValueCoord( int32v seed, P... primedPos )
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= hash * int32v( 0x27d4eb2d );
return FS_Converti32_f32( hash ) * float32v( 1.0f / (float)INT_MAX );
template<typename SIMD = FS>
FS_INLINE static float32v Lerp( float32v a, float32v b, float32v t )
return FS_FMulAdd_f32( t, b - a, a );
template<typename SIMD = FS>
FS_INLINE static float32v InterpHermite( float32v t )
return t * t * FS_FNMulAdd_f32( t, float32v( 2 ), float32v( 3 ));
template<typename SIMD = FS>
FS_INLINE static float32v InterpQuintic( float32v t )
return t * t * t * FS_FMulAdd_f32( t, FS_FMulAdd_f32( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
template<typename SIMD = FS, typename... P>
FS_INLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
switch( distFunc )
case DistanceFunction::Euclidean:
float32v distSqr = dX * dX;
((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
return FS_InvSqrt_f32( distSqr ) * distSqr;
case DistanceFunction::EuclideanSquared:
float32v distSqr = dX * dX;
((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
return distSqr;
case DistanceFunction::Manhattan:
float32v dist = FS_Abs_f32( dX );
dist += (FS_Abs_f32( d ) + ...);
return dist;
case DistanceFunction::Hybrid:
float32v both = FS_FMulAdd_f32( dX, dX, FS_Abs_f32( dX ) );
((both += FS_FMulAdd_f32( d, d, FS_Abs_f32( d ) )), ...);
return both;
using FnUtils = FastNoise::Utils<FS_SIMD_CLASS>;
namespace FnPrimes = FastNoise::Primes;