1
0
Fork 0

New year reboot

Switch to FastNoise2
Hierarchy including rotate
And many more
master
May B. 2021-01-20 17:13:15 +01:00
parent e8080bffb6
commit 6288606505
400 changed files with 17217 additions and 14045 deletions

View File

@ -1,94 +1,100 @@
cmake_minimum_required(VERSION 3.11)
project (univerxel VERSION 0.0.1)
project (univerxel VERSION 0.0.2)
option(PROFILING "Build with profiling" 0)
option(FIXED_WINDOW "Lock window size: Force floating on i3" 0)
set(SIMD_LEVEL "avx2" CACHE STRING "SIMD processor acceleration (sse2, sse4.1, avx2, avx512f)")
option(USE_FMA "Use fma" 1)
option(LOG_DEBUG "Show debug logs" 0)
option(LOG_TRACE "Show trace logs" 0)
option(FIXED_WINDOW "Lock window size: Force floating on i3" 0)
option(RENDER_VK "Include vulkan renderer" 1)
option(NATIVE "Build with -march=native" 0)
option(IPO "Link time optimisation" 1)
option(LD_GOLD "Use gold linker" 1)
option(CCACHE "Use code cache" 1)
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
else(CCACHE_FOUND)
#TODO: set(CMAKE_UNITY_BUILD ON)
endif(CCACHE_FOUND)
add_subdirectory("deps/glfw")
add_subdirectory("deps/glm")
add_subdirectory("deps/picoquic")
add_subdirectory("deps/zstd")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
if(IPO)
include(CheckIPOSupported)
check_ipo_supported(RESULT IPO_OK OUTPUT IPO_ERROR)
if(IPO_OK)
message(STATUS "IPO / LTO enabled")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(STATUS "IPO / LTO not supported: <${IPO_ERROR}>")
add_subdirectory("deps/glfw")
add_subdirectory("deps/glm")
add_subdirectory("deps/picoquic")
add_subdirectory("deps/zstd")
add_subdirectory("deps/FastNoise2/src")
if (CCACHE)
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
message(STATUS "CCACHE enabled")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
else(CCACHE_FOUND)
#MAYBE: set(CMAKE_UNITY_BUILD ON)
endif(CCACHE_FOUND)
endif()
if(IPO)
include(CheckIPOSupported)
check_ipo_supported(RESULT IPO_OK OUTPUT IPO_ERROR)
if(IPO_OK)
message(STATUS "IPO / LTO enabled")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(STATUS "IPO / LTO not supported: <${IPO_ERROR}>")
endif()
endif()
set(LD_GOLD 1)
if(LD_GOLD)
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -fuse-ld=gold -Wl,--version ERROR_QUIET OUTPUT_VARIABLE LD_VERSION)
if("${LD_VERSION}" MATCHES "GNU gold")
message(STATUS "Gold linker enabled")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
endif()
endif()
if(MSVC)
add_definitions(/std:c++latest)
add_compile_definitions(WIN32_LEAN_AND_MEAN=)
else()
#FIXME: by target set(CMAKE_CXX_FLAGS "-Wall -Wextra")
endif()
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
add_compile_definitions(FIXED_WINDOW=${FIXED_WINDOW} LOG_DEBUG=${LOG_DEBUG} LOG_TRACE=${LOG_TRACE} HN_USE_FILESYSTEM=1)
if(PROFILING)
add_compile_definitions(TRACY_ENABLE=1)
endif(PROFILING)
add_compile_definitions(TRACY_ENABLE=1)
endif()
add_compile_definitions(FIXED_WINDOW=${FIXED_WINDOW} LOG_DEBUG=${LOG_DEBUG} LOG_TRACE=${LOG_TRACE})
if(NATIVE)
add_definitions(-march=native)
endif()
if(SIMD_LEVEL EQUAL "avx2")
add_compile_definitions(FN_COMPILE_AVX2=1)
elseif(SIMD_LEVEL EQUAL "avx512f")
add_compile_definitions(FN_COMPILE_AVX512=1)
endif()
if(MSVC)
add_definitions(/arch:AVX2)
else()
add_definitions(-m${SIMD_LEVEL})
endif()
if(USE_FMA)
if(MSVC)
add_definitions(/GL /fp:fast)
else()
add_definitions(-mfma)
endif()
endif(USE_FMA)
file(GLOB_RECURSE CORE_SOURCES "src/core/*.cpp" "deps/tracy/TracyClient.cpp")
configure_file(src/version.h.in generated/version.h @ONLY)
set(CORE_HEADERS "${CMAKE_CURRENT_BINARY_DIR}/generated" "deps/toml++" "deps/robin_hood" "deps/libguarded" "deps/tracy")
set(CORE_HEADERS "src" "${CMAKE_CURRENT_BINARY_DIR}/generated" "deps/toml++" "deps/robin_hood" "deps/tracy")
set(CORE_LIBS glm::glm_static zstd::zstd_static) # picoquic
file(GLOB_RECURSE CLIENT_SOURCES "src/client/*.cpp" "deps/imgui/*.cpp" "deps/meshoptimizer/*.cpp" "deps/gl3w/gl3w.c" "deps/volk/volk.c")
set(CLIENT_HEADERS "deps/imgui" "deps/meshoptimizer" "deps/gl3w" "deps/volk")
file(GLOB_RECURSE CLIENT_SOURCES "src/client/*.cpp" "deps/imgui/*.cpp" "deps/meshoptimizer/*.cpp" "deps/gl3w/gl3w.c")
set(CLIENT_HEADERS "deps/imgui" "deps/meshoptimizer" "deps/gl3w")
set(CLIENT_LIBS glfw)
set(CLIENT_DEFS)
if(RENDER_VK)
list(APPEND CLIENT_SOURCES "deps/volk/volk.c")
list(APPEND CLIENT_HEADERS "deps/volk")
list(APPEND CLIENT_DEFS RENDER_VK=1)
else()
file(GLOB_RECURSE CLIENT_RENDER "src/client/render/impl/vk/*.cpp")
list(REMOVE_ITEM CLIENT_SOURCE ${CLIENT_RENDER})
endif()
file(GLOB_RECURSE SERVER_SOURCES "src/server/*.cpp" "deps/FastNoiseSIMD/*.cpp")
set(SERVER_HEADERS "deps/FastNoiseSIMD")
set(SERVER_LINKED)
file(GLOB_RECURSE SERVER_SOURCES "src/server/*.cpp")
set(SERVER_HEADERS)
set(SERVER_LIBS FastNoise)
if(WIN32)
set(CLIENT_HEADERS ${CLIENT_HEADERS} $ENV{VULKAN_SDK}\\include)
find_package(OpenSSL)
set(CORE_LIBS ${CORE_LIBS} ${CMAKE_BINARY_DIR}/libs/*.lib ${OPENSSL_LIBRARIES} ws2_32)
list(APPEND CORE_LIBS ${CMAKE_BINARY_DIR}/libs/*.lib ${OPENSSL_LIBRARIES} ws2_32)
else()
set(CORE_LIBS ${CORE_LIBS} picoquic-core pthread dl)
list(APPEND CORE_LIBS picoquic-core pthread dl)
endif()
if (WIN32)
@ -99,32 +105,56 @@ elseif (APPLE)
set(ICON univerxel.icns)
endif()
list(APPEND CORE_HEADERS "src/modules")
list(APPEND CORE_SOURCES "src/modules/core/Core.cpp")
# All in one exec
add_executable(univerxel "src/main.cpp" ${ICON} ${CORE_SOURCES} ${CLIENT_SOURCES} ${SERVER_SOURCES})
target_compile_features(univerxel PUBLIC cxx_std_17)
target_link_libraries(univerxel ${CORE_LIBS} ${CLIENT_LIBS} ${SERVER_LIBS})
target_link_libraries(univerxel PRIVATE ${CORE_LIBS} ${CLIENT_LIBS} ${SERVER_LIBS})
target_include_directories(univerxel PRIVATE ${CORE_HEADERS} ${CLIENT_HEADERS} ${SERVER_HEADERS})
target_compile_definitions(univerxel PRIVATE ${CLIENT_DEFS})
target_compile_features(univerxel PUBLIC cxx_std_17)
target_compile_options(univerxel PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra>
$<$<CXX_COMPILER_ID:MSVC>:
/WX /W4>)
# Standalone server
add_executable(univerxel-server EXCLUDE_FROM_ALL "src/server.cpp" ${ICON} ${CORE_SOURCES} ${SERVER_SOURCES})
target_compile_features(univerxel-server PUBLIC cxx_std_17)
target_link_libraries(univerxel-server ${CORE_LIBS} ${SERVER_LIBS})
target_link_libraries(univerxel-server PRIVATE ${CORE_LIBS} ${SERVER_LIBS})
target_include_directories(univerxel-server PRIVATE ${CORE_HEADERS} ${SERVER_HEADERS})
target_compile_definitions(univerxel-server PRIVATE STANDALONE_SERVER=1)
target_compile_features(univerxel-server PUBLIC cxx_std_17)
target_compile_options(univerxel-server PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra>
$<$<CXX_COMPILER_ID:MSVC>:
/WX /W4>)
# Dumb client
add_executable(univerxel-client EXCLUDE_FROM_ALL "src/client.cpp" ${ICON} ${CORE_SOURCES} ${CLIENT_SOURCES})
target_compile_features(univerxel-client PUBLIC cxx_std_17)
target_link_libraries(univerxel-client ${CORE_LIBS} ${CLIENT_LIBS})
target_link_libraries(univerxel-client PRIVATE ${CORE_LIBS} ${CLIENT_LIBS})
target_include_directories(univerxel-client PRIVATE ${CORE_HEADERS} ${CLIENT_HEADERS})
target_compile_definitions(univerxel-client PRIVATE LIGHT_CLIENT=1 ${CLIENT_DEFS})
target_compile_features(univerxel-client PUBLIC cxx_std_17)
target_compile_options(univerxel-client PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra>
$<$<CXX_COMPILER_ID:MSVC>:
/WX /W4>)
# Resource client files + default zstd.dict
file(COPY resource/content DESTINATION ${CMAKE_BINARY_DIR})
if(RENDER_VK)
file(COPY resource/shaders/vk DESTINATION ${CMAKE_BINARY_DIR}/content/shaders)
endif()
# Serialize entity model
file(GLOB_RECURSE MODELS_SOURCES "src/client/render/api/Models.cpp")
add_executable(generate_models EXCLUDE_FROM_ALL "src/tools/generate_models.cpp" ${MODELS_SOURCES})
target_compile_features(generate_models PUBLIC cxx_std_17)
target_link_libraries(generate_models glm::glm_static)
target_link_libraries(generate_models PRIVATE glm::glm_static)
target_include_directories(generate_models PRIVATE "deps/robin_hood" "deps/meshoptimizer")
# Docs

View File

@ -38,7 +38,7 @@ PROJECT_NAME = Univerxel
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 0.0.1
PROJECT_NUMBER = 0.0.2
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

View File

@ -96,13 +96,13 @@ CMake options: `-DKEY=VAL`
Key | Usage | Default
--- | --- | ---
SIMD_LEVEL | SIMD processor acceleration (sse2, sse4.1, avx2, avx512f) | `avx2`
USE_FMA | Fast math | `1`
CMAKE_BUILD_TYPE | Level of optimization | `Release`
PROFILING | Tracy profiling | `0`
LOG_DEBUG | Debug logs | `0`
LOG_TRACE | Trace logs | `0`
IPO | Link time optimisation | `1`
NATIVE | Optimize for native CPU | `0`
RENDER_VK | Include Vulkan renderer | `1`
1. Compile
```sh

65
TODO.md
View File

@ -4,15 +4,16 @@
Released as `0.0.1`: `Pre alpha 1`
- [ ] From the ground up cleanup
- [x] From the ground up cleanup
## System
- [~] Dependencies updater
- [x] Tracy
- [ ] ImGui
- [x] FastNoise
- Compile time
- [ ] Warnings
- [x] Warnings
- [ ] Unit build
- [ ] Ninga
- [ ] Clang
@ -28,60 +29,80 @@ Released as `0.0.1`: `Pre alpha 1`
- [ ] Review documentation
- [ ] Code cleanup
- [ ] Proper wiki
- [ ] Yaml config
- [ ] Proper logger
- [ ] I18L
## Data
- [ ] Element hierarchy
- [ ] Collide
- [~] Element hierarchy
- [x] Raycast
- [ ] Soft part break
- [ ] Relative position
- [ ] Rotate
- [ ] Merge World and Entities
- [x] Relative position
- [x] Rotate
- [ ] Bounding hierarchy
- [x] Merge World and Entities
- [ ] Galaxy
- [ ] Orbit system
- [~] Orbit system
- [x] Circular
- [ ] Elliptic
- [ ] Gravity referential
- [ ] Collisions
- [ ] Point
- [ ] Step
- [ ] Ray
- [ ] Continuous
- Bilateral advancement
- GJK
- Edits
- [ ] More shapes
- [ ] Rotate
- [ ] Anchor
- [ ] Multi-block system
- [ ] Cross chunk structure
- [ ] Lighting
- [ ] Passive chunk save
- Generation
- SIMD
- [ ] Dynamic SIMD level libs
- [ ] https://github.com/Auburn/FastNoise2/releases
- [ ] Double precision
- [ ] Surface features
- [ ] Biomes
- https://imgur.com/kM8b5Zq
- https://imgur.com/a/bh2iy
- https://speciesdevblog.files.wordpress.com/2012/11/biomemap.png
- [ ] ECS
- [x] Data oriented
- [ ] Inventory
- [ ] Octree
- [ ] Surface aware LOD
- [ ] Level converter
- [ ] Area converter
- [ ] Minecraft import
- [ ] Base unit size change
- [ ] Local generation prediction
- [ ] Modding API
- [~] Modding API
- [x] Define boundaries
- [ ] Version handling
- [ ] Client-Server negotiation
- [ ] Save file index
- [ ] List possibilities
## Graphics
- [ ] Disable VK for now
- [~] Disable VK for now
- [ ] Slash screen
- [ ] Proper UI
- [ ] Pause menu
- [ ] Start menu
- [~] Start menu
- [ ] Using texture pack
- [ ] FontAwesome (https://github.com/juliettef/IconFontCppHeaders)
- [ ] Define standard unit size
- [x] Define standard unit sizes
- Terrain (area) 1:1m
- Objects (part) 1-8:1
- Models (instance) ~16:1 manual
- Visual debug
- [ ] Chunk / Region border
- [ ] Area box
- [x] Chunk / Region border
- [x] Area box
- [ ] Collision overview
- [ ] Effective occlusion culling
- [ ] Use average
- [ ] Cast from chunk center
- Curvature
- CubeSphere
@ -99,11 +120,13 @@ Released as `0.0.1`: `Pre alpha 1`
- https://assetstore.unity.com/packages/tools/terrain/microsplat-96478
- https://www.youtube.com/user/slipster216/videos
- [x] Biplanar
- [ ] Fix stochastic (needs regionPosition bound in f16 range)
- [ ] Distance resampling
- [ ] Tesselation
- [ ] Better LOD selection
- Dynamic to target fps
- [ ] Planet scale LOD (using chunk level average)
- [ ] Hard shadow
- [ ] Ambiant occlusion
- [ ] World lights
- [ ] HDR
- https://www.youtube.com/watch?v=iikdcAA7cww
@ -116,6 +139,7 @@ Released as `0.0.1`: `Pre alpha 1`
- Bloom
- [ ] Procedural Skybox
- [ ] Deferred
- [ ] Ambiant occlusion
- [ ] Cascaded shadow maps
- [ ] Avoid transparent back-face
- [ ] Translucency
@ -136,3 +160,4 @@ Released as `0.0.1`: `Pre alpha 1`
- [ ] Commands
- [~] Authentication
- [ ] Clean kick
- [ ] Check certificate

27
deps/FastNoise2/.fix-include-path.patch vendored Normal file
View File

@ -0,0 +1,27 @@
From 2b251207adef7ab9b9f243d4bcfa2cfc1e515114 Mon Sep 17 00:00:00 2001
From: Shu <me@wadza.fr>
Date: Tue, 5 Jan 2021 18:11:53 +0100
Subject: [PATCH] Fix: include path
---
src/CMakeLists.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c72a4be..f5d134e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -55,8 +55,8 @@ set(install_targets ${install_targets} FastNoise PARENT_SCOPE)
set(install_fastnoise_headers ${FastNoise_headers} PARENT_SCOPE)
set(install_fastsimd_headers ${FastSIMD_headers} PARENT_SCOPE)
-target_include_directories(FastNoise PUBLIC
- $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>
+target_include_directories(FastNoise SYSTEM PUBLIC
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
$<INSTALL_INTERFACE:include>
)
--
2.30.0

16
deps/FastNoise2/.update.sh vendored Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
cd $BASEDIR
git clone https://github.com/Auburn/FastNoise2 .up
cd .up
echo "Apply master"
#TAG=$(git describe --tags $(git rev-list --tags --max-count=1))
#echo "Apply tag $TAG"
#git checkout $TAG
git apply ../.fix-include-path.patch
cp -rf src include LICENSE ..
cd ..
rm -rf .up
echo "Done"

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2018 Jimmie Bergmann
Copyright (c) 2020 Jordan Peck
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -0,0 +1,33 @@
#pragma once
#include <memory>
#include "FastSIMD/FastSIMD.h"
#include "FastNoise_Config.h"
#include "Generators/BasicGenerators.h"
#include "Generators/Value.h"
#include "Generators/Perlin.h"
#include "Generators/Simplex.h"
#include "Generators/Cellular.h"
#include "Generators/Fractal.h"
#include "Generators/DomainWarp.h"
#include "Generators/DomainWarpFractal.h"
#include "Generators/Modifiers.h"
#include "Generators/Blends.h"
namespace FastNoise
{
template<typename T>
inline SmartNode<T> New( FastSIMD::eLevel maxLevel = FastSIMD::Level_Null )
{
static_assert( std::is_base_of_v<Generator, T>, "Use FastSIMD::New() to create non FastNoise classes" );
return SmartNode<T>( FastSIMD::New<T>( maxLevel ) );
}
inline SmartNode<> NewFromEncodedNodeTree( const char* encodedNodeTreeString, FastSIMD::eLevel maxLevel = FastSIMD::Level_Null )
{
return Metadata::DeserialiseSmartNode( encodedNodeTreeString, maxLevel );
}
}

View File

@ -0,0 +1,331 @@
#pragma once
#include <functional>
#include <memory>
#include <type_traits>
#include <vector>
#include <cstdint>
#include "FastNoise_Config.h"
#include "FastSIMD/FastSIMD.h"
namespace FastNoise
{
class Generator;
template<typename T>
struct PerDimensionVariable;
struct NodeData;
struct Metadata
{
Metadata( const char* className )
{
name = className;
id = AddMetadataClass( this );
}
static const std::vector<const Metadata*>& GetMetadataClasses()
{
return sMetadataClasses;
}
static const Metadata* GetMetadataClass( std::uint16_t nodeId )
{
if( nodeId < sMetadataClasses.size() )
{
return sMetadataClasses[nodeId];
}
return nullptr;
}
static std::string SerialiseNodeData( NodeData* nodeData, bool fixUp = false );
static SmartNode<> DeserialiseSmartNode( const char* serialisedBase64NodeData, FastSIMD::eLevel level = FastSIMD::Level_Null );
static NodeData* DeserialiseNodeData( const char* serialisedBase64NodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut );
struct MemberVariable
{
enum eType
{
EFloat,
EInt,
EEnum
};
union ValueUnion
{
float f;
std::int32_t i;
ValueUnion( float v = 0 )
{
f = v;
}
ValueUnion( std::int32_t v )
{
i = v;
}
operator float()
{
return f;
}
operator std::int32_t()
{
return i;
}
bool operator ==( const ValueUnion& rhs ) const
{
return i == rhs.i;
}
};
const char* name;
eType type;
int dimensionIdx = -1;
ValueUnion valueDefault, valueMin, valueMax;
std::vector<const char*> enumNames;
std::function<void( Generator*, ValueUnion )> setFunc;
};
template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
void AddVariable( const char* name, T defaultV, U&& func, T minV = 0, T maxV = 0 )
{
MemberVariable member;
member.name = name;
member.valueDefault = defaultV;
member.valueMin = minV;
member.valueMax = maxV;
member.type = std::is_same_v<T, float> ? MemberVariable::EFloat : MemberVariable::EInt;
member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v ) { func( dynamic_cast<GetArg<U, 0>>(g), v ); };
memberVariables.push_back( member );
}
template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
void AddVariable( const char* name, T defaultV, void(U::* func)(T), T minV = 0, T maxV = 0 )
{
MemberVariable member;
member.name = name;
member.valueDefault = defaultV;
member.valueMin = minV;
member.valueMax = maxV;
member.type = std::is_same_v<T, float> ? MemberVariable::EFloat : MemberVariable::EInt;
member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v ) { (dynamic_cast<U*>(g)->*func)(v); };
memberVariables.push_back( member );
}
template<typename T, typename U, typename = std::enable_if_t<std::is_enum_v<T>>, typename... NAMES>
void AddVariableEnum( const char* name, T defaultV, void(U::* func)(T), NAMES... names )
{
MemberVariable member;
member.name = name;
member.type = MemberVariable::EEnum;
member.valueDefault = (int32_t)defaultV;
member.enumNames = { names... };
member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v ) { (dynamic_cast<U*>(g)->*func)((T)v.i); };
memberVariables.push_back( member );
}
template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
void AddPerDimensionVariable( const char* name, T defaultV, U&& func, T minV = 0, T maxV = 0 )
{
for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<T>::varArray ) / sizeof( *PerDimensionVariable<T>::varArray ); idx++ )
{
MemberVariable member;
member.name = name;
member.valueDefault = defaultV;
member.valueMin = minV;
member.valueMax = maxV;
member.type = std::is_same_v<T, float> ? MemberVariable::EFloat : MemberVariable::EInt;
member.dimensionIdx = idx;
member.setFunc = [func, idx]( Generator* g, MemberVariable::ValueUnion v ) { func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx] = v; };
memberVariables.push_back( member );
}
}
struct MemberNode
{
const char* name;
int dimensionIdx = -1;
std::function<bool( Generator*, SmartNodeArg<> )> setFunc;
};
template<typename T, typename U>
void AddGeneratorSource( const char* name, void(U::* func)(SmartNodeArg<T>) )
{
MemberNode member;
member.name = name;
member.setFunc = [func]( Generator* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
(dynamic_cast<U*>(g)->*func)( downCast );
}
return (bool)downCast;
};
memberNodes.push_back( member );
}
template<typename U>
void AddPerDimensionGeneratorSource( const char* name, U&& func )
{
using GeneratorSourceT = typename std::invoke_result_t<U, GetArg<U, 0>>::type::Type;
using T = typename GeneratorSourceT::Type;
for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<GeneratorSourceT>::varArray ) / sizeof( *PerDimensionVariable<GeneratorSourceT>::varArray ); idx++ )
{
MemberNode member;
member.name = name;
member.dimensionIdx = idx;
member.setFunc = [func, idx]( auto* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
g->SetSourceMemberVariable( func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx], downCast );
}
return (bool)downCast;
};
memberNodes.push_back( member );
}
}
struct MemberHybrid
{
const char* name;
float valueDefault = 0.0f;
int dimensionIdx = -1;
std::function<void( Generator*, float )> setValueFunc;
std::function<bool( Generator*, SmartNodeArg<> )> setNodeFunc;
};
template<typename T, typename U>
void AddHybridSource( const char* name, float defaultValue, void(U::* funcNode)(SmartNodeArg<T>), void(U::* funcValue)(float) )
{
MemberHybrid member;
member.name = name;
member.valueDefault = defaultValue;
member.setNodeFunc = [funcNode]( auto* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
(dynamic_cast<U*>(g)->*funcNode)( downCast );
}
return (bool)downCast;
};
member.setValueFunc = [funcValue]( Generator* g, float v )
{
(dynamic_cast<U*>(g)->*funcValue)(v);
};
memberHybrids.push_back( member );
}
template<typename U>
void AddPerDimensionHybridSource( const char* name, float defaultV, U&& func )
{
using HybridSourceT = typename std::invoke_result_t<U, GetArg<U, 0>>::type::Type;
using T = typename HybridSourceT::Type;
for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<HybridSourceT>::varArray ) / sizeof( *PerDimensionVariable<HybridSourceT>::varArray ); idx++ )
{
MemberHybrid member;
member.name = name;
member.valueDefault = defaultV;
member.dimensionIdx = idx;
member.setNodeFunc = [func, idx]( auto* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
g->SetSourceMemberVariable( func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx], downCast );
}
return (bool)downCast;
};
member.setValueFunc = [func, idx]( Generator* g, float v ) { func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx] = v; };
memberHybrids.push_back( member );
}
}
std::uint16_t id;
const char* name;
std::vector<const char*> groups;
std::vector<MemberVariable> memberVariables;
std::vector<MemberNode> memberNodes;
std::vector<MemberHybrid> memberHybrids;
virtual Generator* NodeFactory( FastSIMD::eLevel level = FastSIMD::Level_Null ) const = 0;
private:
template<typename F, typename Ret, typename... Args>
static std::tuple<Args...> GetArg_Helper( Ret( F::* )(Args...) const );
template<typename F, std::size_t I>
using GetArg = std::tuple_element_t<I, decltype(GetArg_Helper( &F::operator() ))>;
static std::uint16_t AddMetadataClass( const Metadata* newMetadata )
{
sMetadataClasses.emplace_back( newMetadata );
return (std::uint16_t)sMetadataClasses.size() - 1;
}
static std::vector<const Metadata*> sMetadataClasses;
};
struct NodeData
{
NodeData( const Metadata* metadata );
const Metadata* metadata;
std::vector<Metadata::MemberVariable::ValueUnion> variables;
std::vector<NodeData*> nodes;
std::vector<std::pair<NodeData*, float>> hybrids;
bool operator ==( const NodeData& rhs ) const
{
return metadata == rhs.metadata &&
variables == rhs.variables &&
nodes == rhs.nodes &&
hybrids == rhs.hybrids;
}
};
}
#define FASTNOISE_METADATA( ... ) public:\
FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );\
const FastNoise::Metadata* GetMetadata() const override;\
struct Metadata : __VA_ARGS__::Metadata{\
Generator* NodeFactory( FastSIMD::eLevel ) const override;
#define FASTNOISE_METADATA_ABSTRACT( ... ) public:\
struct Metadata : __VA_ARGS__::Metadata{

View File

@ -0,0 +1,130 @@
#pragma once
#ifndef FASTSIMD_BUILD_CLASS
#error Do not include this file
#endif
#ifndef FASTNOISE_CLASS
#define FASTNOISE_CLASS( CLASS ) FastNoise::CLASS
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Generator.h"
#else
#include "Generators/Generator.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/BasicGenerators.h"
#else
#include "Generators/BasicGenerators.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Value.h"
#else
#include "Generators/Value.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Perlin.h"
#else
#include "Generators/Perlin.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Simplex.h"
#else
#include "Generators/Simplex.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Cellular.h"
#else
#include "Generators/Cellular.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Fractal.h"
#else
#include "Generators/Fractal.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/DomainWarp.h"
#else
#include "Generators/DomainWarp.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/DomainWarpFractal.h"
#else
#include "Generators/DomainWarpFractal.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Modifiers.h"
#else
#include "Generators/Modifiers.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Blends.h"
#else
#include "Generators/Blends.inl"
#endif
// Nodes
// Order is important!
// Always add to bottom of list,
// inserting will break existing encoded node trees
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Constant ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( White ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Checkerboard ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( SineWave ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PositionOutput ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DistanceToOrigin ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Value ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Perlin ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Simplex ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( OpenSimplex2 ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularValue ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularDistance ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularLookup ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalFBm ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalBillow ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalRidged ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalRidgedMulti ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpGradient ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpFractalProgressive ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpFractalIndependant ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainScale ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainOffset ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainRotate ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( SeedOffset ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Remap ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( ConvertRGBA8 ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Add ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Subtract ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Multiply ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Divide ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Min ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Max ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( MinSmooth ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( MaxSmooth ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Fade ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Terrace ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PowFloat ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PowInt ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainAxisScale ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( AddDimension ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( RemoveDimension ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( GeneratorCache ) )

View File

@ -0,0 +1,20 @@
#pragma once
#include "FastSIMD/FastSIMD.h"
#define FASTNOISE_CALC_MIN_MAX 1
namespace FastNoise
{
const FastSIMD::Level_BitFlags SUPPORTED_SIMD_LEVELS =
FastSIMD::Level_Scalar |
FastSIMD::Level_SSE2 |
FastSIMD::Level_SSE41 |
FastSIMD::Level_AVX2 |
FastSIMD::Level_AVX512 ;
template<typename T = class Generator>
using SmartNode = std::shared_ptr<T>;
template<typename T = class Generator>
using SmartNodeArg = const SmartNode<T>&;
}

View File

@ -0,0 +1,109 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Constant : public virtual Generator
{
public:
void SetValue( float value ) { mValue = value; }
protected:
float mValue = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariable( "Value", 1.0f, &Constant::SetValue );
}
};
};
class White : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
}
};
};
class Checkerboard : public virtual Generator
{
public:
void SetSize( float value ) { mSize = value; }
protected:
float mSize = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariable( "Size", 1.0f, &Checkerboard::SetSize );
}
};
};
class SineWave : public virtual Generator
{
public:
void SetScale( float value ) { mScale = value; }
protected:
float mScale = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariable( "Scale", 1.0f, &SineWave::SetScale );
}
};
};
class PositionOutput : public virtual Generator
{
public:
template<Dim D>
void Set( float multiplier, float offset = 0.0f ) { mMultiplier[(int)D] = multiplier; mOffset[(int)D] = offset; }
protected:
PerDimensionVariable<float> mMultiplier;
PerDimensionVariable<float> mOffset;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddPerDimensionVariable( "Multiplier", 0.0f, []( PositionOutput* p ) { return std::ref( p->mMultiplier ); } );
this->AddPerDimensionVariable( "Offset", 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); } );
}
};
};
class DistanceToOrigin : public virtual Generator
{
public:
void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
protected:
DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariableEnum( "Distance Function", DistanceFunction::Euclidean, &DistanceToOrigin::SetDistanceFunction, "Euclidean", "Euclidean Squared", "Manhattan", "Hybrid" );
}
};
};
}

View File

@ -0,0 +1,96 @@
#include <cassert>
#include "FastSIMD/InlInclude.h"
#include "BasicGenerators.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Constant, FS> : public virtual FastNoise::Constant, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return float32v( mValue );
}
};
template<typename FS>
class FS_T<FastNoise::White, FS> : public virtual FastNoise::White, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
size_t idx = 0;
((pos = FS_Casti32_f32( (FS_Castf32_i32( pos ) ^ (FS_Castf32_i32( pos ) >> 16)) * int32v( FnPrimes::Lookup[idx++] ) )), ...);
return FnUtils::GetValueCoord( seed, FS_Castf32_i32( pos )... );
}
};
template<typename FS>
class FS_T<FastNoise::Checkerboard, FS> : public virtual FastNoise::Checkerboard, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v multiplier = FS_Reciprocal_f32( float32v( mSize ) );
int32v value = (FS_Convertf32_i32( pos * multiplier ) ^ ...);
return float32v( 1.0f ) ^ FS_Casti32_f32( value << 31 );
}
};
template<typename FS>
class FS_T<FastNoise::SineWave, FS> : public virtual FastNoise::SineWave, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v multiplier = FS_Reciprocal_f32( float32v( mScale ) );
return (FS_Sin_f32( pos * multiplier ) * ...);
}
};
template<typename FS>
class FS_T<FastNoise::PositionOutput, FS> : public virtual FastNoise::PositionOutput, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
size_t offsetIdx = 0;
size_t multiplierIdx = 0;
(((pos += float32v( mOffset[offsetIdx++] )) *= float32v( mMultiplier[multiplierIdx++] )), ...);
return (pos + ...);
}
};
template<typename FS>
class FS_T<FastNoise::DistanceToOrigin, FS> : public virtual FastNoise::DistanceToOrigin, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FnUtils::CalcDistance( mDistanceFunction, pos... );
}
};

View File

@ -0,0 +1,198 @@
#pragma once
#include "Generator.h"
#include <climits>
namespace FastNoise
{
class OperatorSourceLHS : public virtual Generator
{
public:
void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
void SetRHS( float value ) { mRHS = value; }
protected:
GeneratorSource mLHS;
HybridSource mRHS;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddGeneratorSource( "LHS", &OperatorSourceLHS::SetLHS );
this->AddHybridSource( "RHS", 0.0f, &OperatorSourceLHS::SetRHS, &OperatorSourceLHS::SetRHS );
}
};
};
class OperatorHybridLHS : public virtual Generator
{
public:
void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
void SetLHS( float value ) { mLHS = value; }
void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
void SetRHS( float value ) { mRHS = value; }
protected:
HybridSource mLHS;
HybridSource mRHS;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddHybridSource( "LHS", 0.0f, &OperatorHybridLHS::SetLHS, &OperatorHybridLHS::SetLHS );
this->AddHybridSource( "RHS", 0.0f, &OperatorHybridLHS::SetRHS, &OperatorHybridLHS::SetRHS );
}
};
};
class Add : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class Subtract : public virtual OperatorHybridLHS
{
FASTNOISE_METADATA( OperatorHybridLHS )
using OperatorHybridLHS::Metadata::Metadata;
};
};
class Multiply : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class Divide : public virtual OperatorHybridLHS
{
FASTNOISE_METADATA( OperatorHybridLHS )
using OperatorHybridLHS::Metadata::Metadata;
};
};
class Min : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class Max : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class PowFloat : public virtual Generator
{
public:
void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
void SetValue( float value ) { mValue = value; }
void SetPow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPow, gen ); }
void SetPow( float value ) { mPow = value; }
protected:
HybridSource mValue;
HybridSource mPow;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddHybridSource( "Value", 2.0f, &PowFloat::SetValue, &PowFloat::SetValue );
this->AddHybridSource( "Pow", 2.0f, &PowFloat::SetPow, &PowFloat::SetPow );
}
};
};
class PowInt : public virtual OperatorHybridLHS
{
public:
void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
void SetPow( int32_t value ) { mPow = value; }
protected:
GeneratorSource mValue;
int32_t mPow;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddGeneratorSource( "Value", &PowInt::SetValue );
this->AddVariable( "Pow", 2, &PowInt::SetPow, 2, INT_MAX );
}
};
};
class MinSmooth : public virtual OperatorSourceLHS
{
public:
void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
void SetSmoothness( float value ) { mSmoothness = value; }
protected:
HybridSource mSmoothness = 0.1f;
FASTNOISE_METADATA( OperatorSourceLHS )
Metadata( const char* className ) : OperatorSourceLHS::Metadata( className )
{
this->AddHybridSource( "Smoothness", 0.1f, &MinSmooth::SetSmoothness, &MinSmooth::SetSmoothness );
}
};
};
class MaxSmooth : public virtual OperatorSourceLHS
{
public:
void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
void SetSmoothness( float value ) { mSmoothness = value; }
protected:
HybridSource mSmoothness = 0.1f;
FASTNOISE_METADATA( OperatorSourceLHS )
Metadata( const char* className ) : OperatorSourceLHS::Metadata( className )
{
this->AddHybridSource( "Smoothness", 0.1f, &MaxSmooth::SetSmoothness, &MaxSmooth::SetSmoothness );
}
};
};
class Fade : public virtual Generator
{
public:
void SetA( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mA, gen ); }
void SetB( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mB, gen ); }
void SetFade( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFade, gen ); }
void SetFade( float value ) { mFade = value; }
protected:
GeneratorSource mA;
GeneratorSource mB;
HybridSource mFade = 0.5f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddGeneratorSource( "A", &Fade::SetA );
this->AddGeneratorSource( "B", &Fade::SetB );
this->AddHybridSource( "Fade", 0.5f, &Fade::SetFade, &Fade::SetFade );
}
};
};
}

View File

@ -0,0 +1,174 @@
#include "FastSIMD/InlInclude.h"
#include "Blends.h"
template<typename FS>
class FS_T<FastNoise::Add, FS> : public virtual FastNoise::Add, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) + this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Subtract, FS> : public virtual FastNoise::Subtract, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) - this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Multiply, FS> : public virtual FastNoise::Multiply, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) * this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Divide, FS> : public virtual FastNoise::Divide, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) / this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::PowFloat, FS> : public virtual FastNoise::PowFloat, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FS_Pow_f32( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
}
};
template<typename FS>
class FS_T<FastNoise::PowInt, FS> : public virtual FastNoise::PowInt, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v value = this->GetSourceValue( mValue, seed, pos... );
float32v pow = value * value;
for( int32_t i = 2; i < mPow; i++ )
{
pow *= value;
}
return pow;
}
};
template<typename FS>
class FS_T<FastNoise::Min, FS> : public virtual FastNoise::Min, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FS_Min_f32( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
}
};
template<typename FS>
class FS_T<FastNoise::Max, FS> : public virtual FastNoise::Max, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FS_Max_f32( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
}
};
template<typename FS>
class FS_T<FastNoise::MinSmooth, FS> : public virtual FastNoise::MinSmooth, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v a = this->GetSourceValue( mLHS, seed, pos... );
float32v b = this->GetSourceValue( mRHS, seed, pos... );
float32v smoothness = FS_Max_f32( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
float32v h = FS_Max_f32( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
h *= FS_Reciprocal_f32( smoothness );
return FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS_Min_f32( a, b ) );
}
};
template<typename FS>
class FS_T<FastNoise::MaxSmooth, FS> : public virtual FastNoise::MaxSmooth, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v a = -this->GetSourceValue( mLHS, seed, pos... );
float32v b = -this->GetSourceValue( mRHS, seed, pos... );
float32v smoothness = FS_Max_f32( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
float32v h = FS_Max_f32( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
h *= FS_Reciprocal_f32( smoothness );
return -FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS_Min_f32( a, b ) );
}
};
template<typename FS>
class FS_T<FastNoise::Fade, FS> : public virtual FastNoise::Fade, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v fade = FS_Abs_f32( this->GetSourceValue( mFade, seed, pos... ) );
return FS_FMulAdd_f32( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
}
};

View File

@ -0,0 +1,104 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Cellular : public virtual Generator
{
public:
void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
void SetJitterModifier( float value ) { mJitterModifier = value; }
void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
protected:
HybridSource mJitterModifier = 1.0f;
DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
const float kJitter2D = 0.437015f;
const float kJitter3D = 0.396143f;
const float kJitter4D = 0.366025f;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
this->AddHybridSource( "Jitter Modifier", 1.0f, &Cellular::SetJitterModifier, &Cellular::SetJitterModifier );
this->AddVariableEnum( "Distance Function", DistanceFunction::EuclideanSquared, &Cellular::SetDistanceFunction, "Euclidean", "Euclidean Squared", "Manhattan", "Hybrid" );
}
};
};
class CellularValue : public virtual Cellular
{
public:
void SetValueIndex( int value ) { mValueIndex = value; }
protected:
static const int kMaxDistanceCount = 4;
int mValueIndex = 0;
FASTNOISE_METADATA( Cellular )
Metadata( const char* className ) : Cellular::Metadata( className )
{
this->AddVariable( "Value Index", 0, &CellularValue::SetValueIndex, 0, kMaxDistanceCount - 1 );
}
};
};
class CellularDistance : public virtual Cellular
{
public:
enum class ReturnType
{
Index0,
Index0Add1,
Index0Sub1,
Index0Mul1,
Index0Div1
};
void SetDistanceIndex0( int value ) { mDistanceIndex0 = value; }
void SetDistanceIndex1( int value ) { mDistanceIndex1 = value; }
void SetReturnType( ReturnType value ) { mReturnType = value; }
protected:
static const int kMaxDistanceCount = 4;
ReturnType mReturnType = ReturnType::Index0;
int mDistanceIndex0 = 0;
int mDistanceIndex1 = 1;
FASTNOISE_METADATA( Cellular )
Metadata( const char* className ) : Cellular::Metadata( className )
{
this->AddVariable( "Distance Index 0", 0, &CellularDistance::SetDistanceIndex0, 0, kMaxDistanceCount - 1 );
this->AddVariable( "Distance Index 1", 1, &CellularDistance::SetDistanceIndex1, 0, kMaxDistanceCount - 1 );
this->AddVariableEnum( "Return Type", ReturnType::Index0, &CellularDistance::SetReturnType, "Index0", "Index0Add1", "Index0Sub1", "Index0Mul1", "Index0Div1" );
}
};
};
class CellularLookup : public virtual Cellular
{
public:
void SetLookup( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLookup, gen ); }
void SetLookupFrequency( float freq ) { mLookupFreq = freq; }
protected:
GeneratorSource mLookup;
float mLookupFreq = 0.1f;
FASTNOISE_METADATA( Cellular )
Metadata( const char* className ) : Cellular::Metadata( className )
{
this->AddGeneratorSource( "Lookup", &CellularLookup::SetLookup );
this->AddVariable( "Lookup Frequency", 0.1f, &CellularLookup::SetLookupFrequency );
}
};
};
}

View File

@ -0,0 +1,655 @@
#include "FastSIMD/InlInclude.h"
#include <cfloat>
#include <array>
#include "Cellular.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Cellular, FS> : public virtual FastNoise::Cellular, public FS_T<FastNoise::Generator, FS>
{
};
template<typename FS>
class FS_T<FastNoise::CellularValue, FS> : public virtual FastNoise::CellularValue, public FS_T<FastNoise::Cellular, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v jitter = float32v( kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
std::array<float32v, kMaxDistanceCount> value;
std::array<float32v, kMaxDistanceCount> distance;
value.fill( float32v( INFINITY ) );
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
for( int i = 0; ; i++ )
{
mask32v closer = newDistance < distance[i];
float32v localDistance = distance[i];
float32v localCellValue = value[i];
distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
value[i] = FS_Select_f32( closer, newCellValue, value[i] );
if( i > mValueIndex )
{
break;
}
newDistance = FS_Select_f32( closer, localDistance, newDistance );
newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return value[mValueIndex];
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v jitter = float32v( kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
std::array<float32v, kMaxDistanceCount> value;
std::array<float32v, kMaxDistanceCount> distance;
value.fill( float32v( INFINITY ) );
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v yd = FS_Converti32_f32( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v zd = FS_Converti32_f32( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
for( int i = 0; ; i++ )
{
mask32v closer = newDistance < distance[i];
float32v localDistance = distance[i];
float32v localCellValue = value[i];
distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
value[i] = FS_Select_f32( closer, newCellValue, value[i] );
if( i > mValueIndex )
{
break;
}
newDistance = FS_Select_f32( closer, localDistance, newDistance );
newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return value[mValueIndex];
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const final
{
float32v jitter = float32v( kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
std::array<float32v, kMaxDistanceCount> value;
std::array<float32v, kMaxDistanceCount> distance;
value.fill( float32v( INFINITY ) );
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
wcBase *= int32v( FnPrimes::W );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
float32v wcf = wcfBase;
int32v wc = wcBase;
for( int wi = 0; wi < 3; wi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
wd = FS_FMulAdd_f32( wd, invMag, wcf );
float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
for( int i = 0; ; i++ )
{
mask32v closer = newDistance < distance[i];
float32v localDistance = distance[i];
float32v localCellValue = value[i];
distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
value[i] = FS_Select_f32( closer, newCellValue, value[i] );
if( i > mValueIndex )
{
break;
}
newDistance = FS_Select_f32( closer, localDistance, newDistance );
newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
}
wcf += float32v( 1 );
wc += int32v( FnPrimes::W );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return value[mValueIndex];
}
};
template<typename FS>
class FS_T<FastNoise::CellularDistance, FS> : public virtual FastNoise::CellularDistance, public FS_T<FastNoise::Cellular, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v jitter = float32v( kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
std::array<float32v, kMaxDistanceCount> distance;
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for ( int yi = 0; yi < 3; yi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
for( int i = kMaxDistanceCount - 1; i > 0; i-- )
{
distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
}
distance[0] = FS_Min_f32( distance[0], newDistance );
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return GetReturn( distance );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v jitter = float32v( kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
std::array<float32v, kMaxDistanceCount> distance;
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
for( int i = kMaxDistanceCount - 1; i > 0; i-- )
{
distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
}
distance[0] = FS_Min_f32( distance[0], newDistance );
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return GetReturn( distance );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v jitter = float32v( kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
std::array<float32v, kMaxDistanceCount> distance;
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
wcBase *= int32v( FnPrimes::W );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
float32v wcf = wcfBase;
int32v wc = wcBase;
for( int wi = 0; wi < 3; wi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
wd = FS_FMulAdd_f32( wd, invMag, wcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
for( int i = kMaxDistanceCount - 1; i > 0; i-- )
{
distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
}
distance[0] = FS_Min_f32( distance[0], newDistance );
wcf += float32v( 1 );
wc += int32v( FnPrimes::W );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return GetReturn( distance );
}
FS_INLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance ) const
{
if( mDistanceFunction == FastNoise::DistanceFunction::Euclidean )
{
distance[mDistanceIndex0] *= FS_InvSqrt_f32( distance[mDistanceIndex0] );
distance[mDistanceIndex1] *= FS_InvSqrt_f32( distance[mDistanceIndex1] );
}
switch( mReturnType )
{
default:
case ReturnType::Index0:
{
return distance[mDistanceIndex0];
}
case ReturnType::Index0Add1:
{
return distance[mDistanceIndex0] + distance[mDistanceIndex1];
}
case ReturnType::Index0Sub1:
{
return distance[mDistanceIndex0] - distance[mDistanceIndex1];
}
case ReturnType::Index0Mul1:
{
return distance[mDistanceIndex0] * distance[mDistanceIndex1];
}
case ReturnType::Index0Div1:
{
return distance[mDistanceIndex0] * FS_Reciprocal_f32( distance[mDistanceIndex1] );
}
}
}
};
template<typename FS>
class FS_T<FastNoise::CellularLookup, FS> : public virtual FastNoise::CellularLookup, public FS_T<FastNoise::Cellular, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v jitter = float32v( kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
float32v distance( FLT_MAX );
float32v cellX, cellY;
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
mask32v closer = newDistance < distance;
distance = FS_Min_f32( newDistance, distance );
cellX = FS_Select_f32( closer, xd + x, cellX );
cellY = FS_Select_f32( closer, yd + y, cellY );
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v jitter = float32v( kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
float32v distance( FLT_MAX );
float32v cellX, cellY, cellZ;
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
mask32v closer = newDistance < distance;
distance = FS_Min_f32( newDistance, distance );
cellX = FS_Select_f32( closer, xd + x, cellX );
cellY = FS_Select_f32( closer, yd + y, cellY );
cellZ = FS_Select_f32( closer, zd + z, cellZ );
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v jitter = float32v( kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
float32v distance( FLT_MAX );
float32v cellX, cellY, cellZ, cellW;
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
wcBase *= int32v( FnPrimes::W );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
float32v wcf = wcfBase;
int32v wc = wcBase;
for( int wi = 0; wi < 3; wi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
wd = FS_FMulAdd_f32( wd, invMag, wcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
mask32v closer = newDistance < distance;
distance = FS_Min_f32( newDistance, distance );
cellX = FS_Select_f32( closer, xd + x, cellX );
cellY = FS_Select_f32( closer, yd + y, cellY );
cellZ = FS_Select_f32( closer, zd + z, cellZ );
cellW = FS_Select_f32( closer, wd + w, cellW );
wcf += float32v( 1 );
wc += int32v( FnPrimes::W );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
}
};

View File

@ -0,0 +1,37 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class DomainWarp : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetWarpAmplitude( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWarpAmplitude, gen ); }
void SetWarpAmplitude( float value ) { mWarpAmplitude = value; }
void SetWarpFrequency( float value ) { mWarpFrequency = value; }
protected:
GeneratorSource mSource;
HybridSource mWarpAmplitude = 1.0f;
float mWarpFrequency = 0.5f;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Domain Warp" );
this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
this->AddHybridSource( "Warp Amplitude", 1.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude );
this->AddVariable( "Warp Frequency", 0.5f, &DomainWarp::SetWarpFrequency );
}
};
};
class DomainWarpGradient : public virtual DomainWarp
{
FASTNOISE_METADATA( DomainWarp )
using DomainWarp::Metadata::Metadata;
};
};
}

View File

@ -0,0 +1,181 @@
#include "FastSIMD/InlInclude.h"
#include "DomainWarp.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::DomainWarp, FS> : public virtual FastNoise::DomainWarp, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), (pos * float32v( mWarpFrequency ))..., pos... );
return this->GetSourceValue( mSource, seed, pos...);
}
public:
float GetWarpFrequency() const { return mWarpFrequency; }
const FastNoise::HybridSource& GetWarpAmplitude() const { return mWarpAmplitude; }
const FastNoise::GeneratorSource& GetWarpSource() const { return mSource; }
virtual void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const = 0;
virtual void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const = 0;
virtual void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const = 0;
};
template<typename FS>
class FS_T<FastNoise::DomainWarpGradient, FS> : public virtual FastNoise::DomainWarpGradient, public FS_T<FastNoise::DomainWarp, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
public:
void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
#define GRADIENT_COORD( _x, _y )\
int32v hash##_x##_y = FnUtils::HashPrimesHB(seed, x##_x, y##_y );\
float32v x##_x##_y = FS_Converti32_f32( hash##_x##_y & int32v( 0xffff ) );\
float32v y##_x##_y = FS_Converti32_f32( (hash##_x##_y >> 16) & int32v( 0xffff ) );
GRADIENT_COORD( 0, 0 );
GRADIENT_COORD( 1, 0 );
GRADIENT_COORD( 0, 1 );
GRADIENT_COORD( 1, 1 );
#undef GRADIENT_COORD
float32v normalise = warpAmp * float32v( 1.0f / (0xffff / 2.0f) );
xOut = FS_FMulAdd_f32( FnUtils::Lerp( FnUtils::Lerp( x00, x10, xs ), FnUtils::Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f ), normalise, xOut );
yOut = FS_FMulAdd_f32( FnUtils::Lerp( FnUtils::Lerp( y00, y10, xs ), FnUtils::Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f ), normalise, yOut );
}
void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
#define GRADIENT_COORD( _x, _y, _z )\
int32v hash##_x##_y##_z = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z );\
float32v x##_x##_y##_z = FS_Converti32_f32( hash##_x##_y##_z & int32v( 0x3ff ) );\
float32v y##_x##_y##_z = FS_Converti32_f32( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
float32v z##_x##_y##_z = FS_Converti32_f32( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
GRADIENT_COORD( 0, 0, 0 );
GRADIENT_COORD( 1, 0, 0 );
GRADIENT_COORD( 0, 1, 0 );
GRADIENT_COORD( 1, 1, 0 );
GRADIENT_COORD( 0, 0, 1 );
GRADIENT_COORD( 1, 0, 1 );
GRADIENT_COORD( 0, 1, 1 );
GRADIENT_COORD( 1, 1, 1 );
#undef GRADIENT_COORD
float32v x0z = FnUtils::Lerp( FnUtils::Lerp( x000, x100, xs ), FnUtils::Lerp( x010, x110, xs ), ys );
float32v y0z = FnUtils::Lerp( FnUtils::Lerp( y000, y100, xs ), FnUtils::Lerp( y010, y110, xs ), ys );
float32v z0z = FnUtils::Lerp( FnUtils::Lerp( z000, z100, xs ), FnUtils::Lerp( z010, z110, xs ), ys );
float32v x1z = FnUtils::Lerp( FnUtils::Lerp( x001, x101, xs ), FnUtils::Lerp( x011, x111, xs ), ys );
float32v y1z = FnUtils::Lerp( FnUtils::Lerp( y001, y101, xs ), FnUtils::Lerp( y011, y111, xs ), ys );
float32v z1z = FnUtils::Lerp( FnUtils::Lerp( z001, z101, xs ), FnUtils::Lerp( z011, z111, xs ), ys );
float32v normalise = warpAmp * float32v( 1.0f / (0x3ff / 2.0f) );
xOut = FS_FMulAdd_f32( FnUtils::Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f ), normalise, xOut );
yOut = FS_FMulAdd_f32( FnUtils::Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f ), normalise, yOut );
zOut = FS_FMulAdd_f32( FnUtils::Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f ), normalise, zOut );
}
void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
float32v ws = FS_Floor_f32( w );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
int32v w1 = w0 + int32v( FnPrimes::W );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
ws = FnUtils::InterpHermite( w - ws );
#define GRADIENT_COORD( _x, _y, _z, _w )\
int32v hash##_x##_y##_z##_w = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
float32v x##_x##_y##_z##_w = FS_Converti32_f32( hash##_x##_y##_z##_w & int32v( 0xff ) );\
float32v y##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
float32v z##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
float32v w##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
GRADIENT_COORD( 0, 0, 0, 0 );
GRADIENT_COORD( 1, 0, 0, 0 );
GRADIENT_COORD( 0, 1, 0, 0 );
GRADIENT_COORD( 1, 1, 0, 0 );
GRADIENT_COORD( 0, 0, 1, 0 );
GRADIENT_COORD( 1, 0, 1, 0 );
GRADIENT_COORD( 0, 1, 1, 0 );
GRADIENT_COORD( 1, 1, 1, 0 );
GRADIENT_COORD( 0, 0, 0, 1 );
GRADIENT_COORD( 1, 0, 0, 1 );
GRADIENT_COORD( 0, 1, 0, 1 );
GRADIENT_COORD( 1, 1, 0, 1 );
GRADIENT_COORD( 0, 0, 1, 1 );
GRADIENT_COORD( 1, 0, 1, 1 );
GRADIENT_COORD( 0, 1, 1, 1 );
GRADIENT_COORD( 1, 1, 1, 1 );
#undef GRADIENT_COORD
float32v x0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0000, x1000, xs ), FnUtils::Lerp( x0100, x1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0010, x1010, xs ), FnUtils::Lerp( x0110, x1110, xs ), ys ), zs );
float32v y0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0000, y1000, xs ), FnUtils::Lerp( y0100, y1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0010, y1010, xs ), FnUtils::Lerp( y0110, y1110, xs ), ys ), zs );
float32v z0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0000, z1000, xs ), FnUtils::Lerp( z0100, z1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0010, z1010, xs ), FnUtils::Lerp( z0110, z1110, xs ), ys ), zs );
float32v w0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0000, w1000, xs ), FnUtils::Lerp( w0100, w1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0010, w1010, xs ), FnUtils::Lerp( w0110, w1110, xs ), ys ), zs );
float32v x1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0001, x1001, xs ), FnUtils::Lerp( x0101, x1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0011, x1011, xs ), FnUtils::Lerp( x0111, x1111, xs ), ys ), zs );
float32v y1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0001, y1001, xs ), FnUtils::Lerp( y0101, y1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0011, y1011, xs ), FnUtils::Lerp( y0111, y1111, xs ), ys ), zs );
float32v z1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0001, z1001, xs ), FnUtils::Lerp( z0101, z1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0011, z1011, xs ), FnUtils::Lerp( z0111, z1111, xs ), ys ), zs );
float32v w1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0001, w1001, xs ), FnUtils::Lerp( w0101, w1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0011, w1011, xs ), FnUtils::Lerp( w0111, w1111, xs ), ys ), zs );
float32v normalise = warpAmp * float32v( 1.0f / (0xff / 2.0f) );
xOut = FS_FMulAdd_f32( FnUtils::Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f ), normalise, xOut );
yOut = FS_FMulAdd_f32( FnUtils::Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f ), normalise, yOut );
zOut = FS_FMulAdd_f32( FnUtils::Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f ), normalise, zOut );
wOut = FS_FMulAdd_f32( FnUtils::Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f ), normalise, wOut );
}
};

View File

@ -0,0 +1,26 @@
#pragma once
#include "Fractal.h"
#include "DomainWarp.h"
namespace FastNoise
{
class DomainWarpFractalProgressive : public virtual Fractal<DomainWarp>
{
FASTNOISE_METADATA( Fractal<DomainWarp> )
Metadata( const char* className ) : Fractal<DomainWarp>::Metadata( className, "Domain Warp Source" )
{
groups.push_back( "Domain Warp" );
}
};
};
class DomainWarpFractalIndependant : public virtual Fractal<DomainWarp>
{
FASTNOISE_METADATA( Fractal<DomainWarp> )
Metadata( const char* className ) : Fractal<DomainWarp>::Metadata( className, "Domain Warp Source" )
{
groups.push_back( "Domain Warp" );
}
};
};
}

View File

@ -0,0 +1,71 @@
#include "FastSIMD/InlInclude.h"
#include "DomainWarpFractal.h"
template<typename FS>
class FS_T<FastNoise::DomainWarpFractalProgressive, FS> : public virtual FastNoise::DomainWarpFractalProgressive, public FS_T<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
auto* warp = this->GetSourceSIMD( mSource );
float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, pos... );
float32v freq = float32v( warp->GetWarpFrequency() );
int32v seedInc = seed;
float32v gain = this->GetSourceValue( mGain, seed, pos... );
float32v lacunarity( mLacunarity );
warp->Warp( seedInc, amp, (pos * freq)..., pos... );
for (int i = 1; i < mOctaves; i++)
{
seedInc -= int32v( -1 );
freq *= lacunarity;
amp *= gain;
warp->Warp( seedInc, amp, (pos * freq)..., pos... );
}
return this->GetSourceValue( warp->GetWarpSource(), seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::DomainWarpFractalIndependant, FS> : public virtual FastNoise::DomainWarpFractalIndependant, public FS_T<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return [this, seed] ( std::remove_reference_t<P>... noisePos, std::remove_reference_t<P>... warpPos )
{
auto* warp = this->GetSourceSIMD( mSource );
float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, noisePos... );
float32v freq = float32v( warp->GetWarpFrequency() );
int32v seedInc = seed;
float32v gain = this->GetSourceValue( mGain, seed, noisePos... );
float32v lacunarity( mLacunarity );
warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
for( int i = 1; i < mOctaves; i++ )
{
seedInc -= int32v( -1 );
freq *= lacunarity;
amp *= gain;
warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
}
return this->GetSourceValue( warp->GetWarpSource(), seed, warpPos... );
} ( pos..., pos... );
}
};

View File

@ -0,0 +1,103 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
template<typename T = Generator>
class Fractal : public virtual Generator
{
public:
void SetSource( SmartNodeArg<T> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetGain( float value ) { mGain = value; CalculateFractalBounding(); }
void SetGain( SmartNodeArg<> gen ) { mGain = 1.0f; this->SetSourceMemberVariable( mGain, gen ); CalculateFractalBounding(); }
void SetOctaveCount( int32_t value ) { mOctaves = value; CalculateFractalBounding(); }
void SetLacunarity( float value ) { mLacunarity = value; }
protected:
GeneratorSourceT<T> mSource;
HybridSource mGain = 0.5f;
int32_t mOctaves = 3;
float mLacunarity = 2.0f;
float mFractalBounding = 1.0f / 1.75f;
virtual void CalculateFractalBounding()
{
float gain = std::abs( mGain.constant );
float amp = gain;
float ampFractal = 1.0f;
for( int32_t i = 1; i < mOctaves; i++ )
{
ampFractal += amp;
amp *= gain;
}
mFractalBounding = 1.0f / ampFractal;
}
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className, const char* sourceName = "Source" ) : Generator::Metadata( className )
{
groups.push_back( "Fractal" );
this->AddGeneratorSource( sourceName, &Fractal::SetSource );
this->AddHybridSource( "Gain", 0.5f, &Fractal::SetGain, &Fractal::SetGain );
this->AddVariable( "Octaves", 3, &Fractal::SetOctaveCount, 2, 16 );
this->AddVariable( "Lacunarity", 2.0f, &Fractal::SetLacunarity );
}
};
};
class FractalFBm : public virtual Fractal<>
{
FASTNOISE_METADATA( Fractal )
using Fractal::Metadata::Metadata;
};
};
class FractalBillow : public virtual Fractal<>
{
FASTNOISE_METADATA( Fractal )
using Fractal::Metadata::Metadata;
};
};
class FractalRidged : public virtual Fractal<>
{
FASTNOISE_METADATA( Fractal )
using Fractal::Metadata::Metadata;
};
};
class FractalRidgedMulti : public virtual Fractal<>
{
public:
void SetWeightAmplitude( float value ) { mWeightAmp = value; CalculateFractalBounding(); }
protected:
float mWeightAmp = 2.0f;
float mWeightBounding = 2.0f / 1.75f;
void CalculateFractalBounding() override
{
Fractal::CalculateFractalBounding();
float weight = 1.0f;
float totalWeight = weight;
for( int32_t i = 1; i < mOctaves; i++ )
{
weight *= mWeightAmp;
totalWeight += 1.0f / weight;
}
mWeightBounding = 2.0f / totalWeight;
}
FASTNOISE_METADATA( Fractal )
Metadata( const char* className ) : Fractal::Metadata( className )
{
this->AddVariable( "Weight Amplitude", 2.0f, &FractalRidgedMulti::SetWeightAmplitude );
}
};
};
}

View File

@ -0,0 +1,128 @@
#include "FastSIMD/InlInclude.h"
#include "Fractal.h"
template<typename FS, typename T>
class FS_T<FastNoise::Fractal<T>, FS> : public virtual FastNoise::Fractal<T>, public FS_T<FastNoise::Generator, FS>
{
};
template<typename FS>
class FS_T<FastNoise::FractalFBm, FS> : public virtual FastNoise::FractalFBm, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v gain = this->GetSourceValue( mGain , seed, pos... );
float32v sum = this->GetSourceValue( mSource, seed, pos... );
float32v lacunarity( mLacunarity );
float32v amp( 1 );
for( int i = 1; i < mOctaves; i++ )
{
seed -= int32v( -1 );
amp *= gain;
sum += this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) * amp;
}
return sum * float32v( mFractalBounding );
}
};
template<typename FS>
class FS_T<FastNoise::FractalBillow, FS> : public virtual FastNoise::FractalBillow, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v sum = FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) ) * float32v( 2 ) - float32v( 1 );
float32v gain = this->GetSourceValue( mGain, seed, pos... );
float32v lacunarity( mLacunarity );
float32v amp( 1 );
for( int i = 1; i < mOctaves; i++ )
{
seed -= int32v( -1 );
amp *= gain;
sum += (FS_Abs_f32(this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) ) * float32v( 2 ) - float32v( 1 )) * amp;
}
return sum * float32v( mFractalBounding );
}
};
template<typename FS>
class FS_T<FastNoise::FractalRidged, FS> : public virtual FastNoise::FractalRidged, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT(int32v seed, P... pos) const
{
float32v sum = float32v( 1 ) - FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) );
float32v gain = this->GetSourceValue( mGain, seed, pos... );
float32v lacunarity( mLacunarity );
float32v amp( 1 );
for( int i = 1; i < mOctaves; i++ )
{
seed -= int32v( -1 );
amp *= gain;
sum -= (float32v( 1 ) - FS_Abs_f32( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) )) * amp;
}
return sum;
}
};
template<typename FS>
class FS_T<FastNoise::FractalRidgedMulti, FS> : public virtual FastNoise::FractalRidgedMulti, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v offset( 1 );
float32v sum = offset - FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) );
float32v gain = this->GetSourceValue( mGain, seed, pos... ) * float32v( 6 );
float32v lacunarity( mLacunarity );
float32v amp = sum;
float32v weightAmp( mWeightAmp );
float32v weight = weightAmp;
float32v totalWeight( 1.0f );
for( int i = 1; i < mOctaves; i++ )
{
amp *= gain;
amp = FS_Min_f32( FS_Max_f32( amp, float32v( 0 ) ), float32v( 1 ) );
seed -= int32v( -1 );
float32v value = offset - FS_Abs_f32( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ));
value *= amp;
amp = value;
float32v weightRecip = FS_Reciprocal_f32( float32v( weight ) );
sum += value * weightRecip;
totalWeight += weightRecip;
weight *= weightAmp;
}
return sum * float32v( mWeightBounding ) - offset;
}
};

View File

@ -0,0 +1,149 @@
#pragma once
#include <cassert>
#include <cmath>
#include "FastNoise/FastNoiseMetadata.h"
namespace FastNoise
{
enum class Dim
{
X, Y, Z, W,
Count
};
enum class DistanceFunction
{
Euclidean,
EuclideanSquared,
Manhattan,
Hybrid,
};
struct OutputMinMax
{
float min = INFINITY;
float max = -INFINITY;
OutputMinMax& operator <<( float v )
{
min = fminf( min, v );
max = fmaxf( max, v );
return *this;
}
OutputMinMax& operator <<( const OutputMinMax& v )
{
min = fminf( min, v.min );
max = fmaxf( max, v.max );
return *this;
}
};
template<typename T>
struct BaseSource
{
using Type = T;
SmartNode<T> base;
void* simdGeneratorPtr = nullptr;
protected:
BaseSource() = default;
};
template<typename T>
struct GeneratorSourceT : BaseSource<T>
{ };
template<typename T>
struct HybridSourceT : BaseSource<T>
{
float constant;
HybridSourceT( float f = 0.0f )
{
constant = f;
}
};
class Generator
{
public:
using Metadata = FastNoise::Metadata;
friend Metadata;
virtual ~Generator() = default;
virtual FastSIMD::eLevel GetSIMDLevel() const = 0;
virtual const Metadata* GetMetadata() const = 0;
virtual OutputMinMax GenUniformGrid2D( float* noiseOut,
int32_t xStart, int32_t yStart,
int32_t xSize, int32_t ySize,
float frequency, int32_t seed ) const = 0;
virtual OutputMinMax GenUniformGrid3D( float* noiseOut,
int32_t xStart, int32_t yStart, int32_t zStart,
int32_t xSize, int32_t ySize, int32_t zSize,
float frequency, int32_t seed ) const = 0;
virtual OutputMinMax GenPositionArray2D( float* noiseOut, int32_t count,
const float* xPosArray, const float* yPosArray,
float xOffset, float yOffset, int32_t seed ) const = 0;
virtual OutputMinMax GenPositionArray3D( float* noiseOut, int32_t count,
const float* xPosArray, const float* yPosArray, const float* zPosArray,
float xOffset, float yOffset, float zOffset, int32_t seed ) const = 0;
virtual OutputMinMax GenTileable2D( float* noiseOut,
int32_t xSize, int32_t ySize,
float frequency, int32_t seed ) const = 0;
protected:
template<typename T>
void SetSourceMemberVariable( BaseSource<T>& memberVariable, SmartNodeArg<T> gen )
{
static_assert( std::is_base_of_v<Generator, T> );
assert( gen.get() );
assert( GetSIMDLevel() == gen->GetSIMDLevel() ); // Ensure that all SIMD levels match
memberVariable.base = gen;
SetSourceSIMDPtr( dynamic_cast<Generator*>( gen.get() ), &memberVariable.simdGeneratorPtr );
}
private:
virtual void SetSourceSIMDPtr( Generator* base, void** simdPtr ) = 0;
};
using GeneratorSource = GeneratorSourceT<Generator>;
using HybridSource = HybridSourceT<Generator>;
template<typename T>
struct PerDimensionVariable
{
using Type = T;
T varArray[(int)Dim::Count];
template<typename U = T>
PerDimensionVariable( U value = 0 )
{
for( T& element : varArray )
{
element = value;
}
}
T& operator[]( size_t i )
{
return varArray[i];
}
const T& operator[]( size_t i ) const
{
return varArray[i];
}
};
}

View File

@ -0,0 +1,343 @@
#include <cassert>
#include <cstring>
#include "FastSIMD/InlInclude.h"
#include "Generator.h"
#ifdef FS_SIMD_CLASS
#pragma warning( disable:4250 )
#endif
template<typename FS>
class FS_T<FastNoise::Generator, FS> : public virtual FastNoise::Generator
{
FASTSIMD_DECLARE_FS_TYPES;
public:
virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const = 0;
virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const = 0;
virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const { return Gen( seed, x, y, z ); };
#define FASTNOISE_IMPL_GEN_T\
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const override { return GenT( seed, x, y ); }\
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const override { return GenT( seed, x, y, z ); }\
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const override { return GenT( seed, x, y, z, w ); }
FastSIMD::eLevel GetSIMDLevel() const final
{
return FS::SIMD_Level;
}
using VoidPtrStorageType = FS_T<Generator, FS>*;
void SetSourceSIMDPtr( Generator* base, void** simdPtr ) final
{
auto simd = dynamic_cast<VoidPtrStorageType>( base );
assert( simd );
*simdPtr = reinterpret_cast<void*>( simd );
}
template<typename T, typename... POS>
FS_INLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::HybridSourceT<T>& memberVariable, int32v seed, POS... pos ) const
{
if( memberVariable.simdGeneratorPtr )
{
auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
return simdGen->Gen( seed, pos... );
}
return float32v( memberVariable.constant );
}
template<typename T, typename... POS>
FS_INLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::GeneratorSourceT<T>& memberVariable, int32v seed, POS... pos ) const
{
assert( memberVariable.simdGeneratorPtr );
auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
return simdGen->Gen( seed, pos... );
}
template<typename T>
FS_INLINE const FS_T<T, FS>* GetSourceSIMD( const FastNoise::GeneratorSourceT<T>& memberVariable ) const
{
assert( memberVariable.simdGeneratorPtr );
auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
auto simdT = static_cast<FS_T<T, FS>*>( simdGen );
return simdT;
}
FastNoise::OutputMinMax GenUniformGrid2D( float* noiseOut, int32_t xStart, int32_t yStart, int32_t xSize, int32_t ySize, float frequency, int32_t seed ) const final
{
assert( xSize >= (int32_t)FS_Size_32() );
float32v min( INFINITY );
float32v max( -INFINITY );
int32v xIdx( xStart );
int32v yIdx( yStart );
float32v freqV( frequency );
int32v xSizeV( xSize );
int32v xMax = xSizeV + xIdx + int32v( -1 );
size_t totalValues = xSize * ySize;
size_t index = 0;
xIdx += int32v::FS_Incremented();
while( index < totalValues - FS_Size_32() )
{
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
xIdx += int32v( FS_Size_32() );
mask32v xReset = xIdx > xMax;
yIdx = FS_MaskedIncrement_i32( yIdx, xReset );
xIdx = FS_MaskedSub_i32( xIdx, xSizeV, xReset );
}
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos );
return DoRemaining( noiseOut, totalValues, index, min, max, gen );
}
FastNoise::OutputMinMax GenUniformGrid3D( float* noiseOut, int32_t xStart, int32_t yStart, int32_t zStart, int32_t xSize, int32_t ySize, int32_t zSize, float frequency, int32_t seed ) const final
{
assert( xSize >= (int32_t)FS_Size_32() );
float32v min( INFINITY );
float32v max( -INFINITY );
int32v xIdx( xStart );
int32v yIdx( yStart );
int32v zIdx( zStart );
float32v freqV( frequency );
int32v xSizeV( xSize );
int32v xMax = xSizeV + xIdx + int32v( -1 );
int32v ySizeV( ySize );
int32v yMax = ySizeV + yIdx + int32v( -1 );
size_t totalValues = xSize * ySize * zSize;
size_t index = 0;
xIdx += int32v::FS_Incremented();
while( index < totalValues - FS_Size_32() )
{
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
xIdx += int32v( FS_Size_32() );
mask32v xReset = xIdx > xMax;
yIdx = FS_MaskedIncrement_i32( yIdx, xReset );
xIdx = FS_MaskedSub_i32( xIdx, xSizeV, xReset );
mask32v yReset = yIdx > yMax;
zIdx = FS_MaskedIncrement_i32( zIdx, yReset );
yIdx = FS_MaskedSub_i32( yIdx, ySizeV, yReset );
}
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
return DoRemaining( noiseOut, totalValues, index, min, max, gen );
}
FastNoise::OutputMinMax GenPositionArray2D( float* noiseOut, int32_t count, const float* xPosArray, const float* yPosArray, float xOffset, float yOffset, int32_t seed ) const final
{
float32v min( INFINITY );
float32v max( -INFINITY );
size_t index = 0;
while( index < count - FS_Size_32() )
{
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
}
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos );
return DoRemaining( noiseOut, count, index, min, max, gen );
}
FastNoise::OutputMinMax GenPositionArray3D( float* noiseOut, int32_t count, const float* xPosArray, const float* yPosArray, const float* zPosArray, float xOffset, float yOffset, float zOffset, int32_t seed ) const final
{
float32v min( INFINITY );
float32v max( -INFINITY );
int32_t index = 0;
while( index < int64_t(count) - FS_Size_32() )
{
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
}
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
return DoRemaining( noiseOut, count, index, min, max, gen );
}
FastNoise::OutputMinMax GenTileable2D( float* noiseOut, int32_t xSize, int32_t ySize, float frequency, int32_t seed ) const final
{
assert( xSize >= (int32_t)FS_Size_32() );
float32v min( INFINITY );
float32v max( -INFINITY );
int32v xIdx( 0 );
int32v yIdx( 0 );
int32v xSizeV( xSize );
int32v ySizeV( ySize );
int32v xMax = xSizeV + xIdx + int32v( -1 );
size_t totalValues = xSize * ySize;
size_t index = 0;
float pi2Recip( 0.15915493667f );
float xSizePi = (float)xSize * pi2Recip;
float ySizePi = (float)ySize * pi2Recip;
float32v xFreq = float32v( frequency * xSizePi );
float32v yFreq = float32v( frequency * ySizePi );
float32v xMul = float32v( 1 / xSizePi );
float32v yMul = float32v( 1 / ySizePi );
xIdx += int32v::FS_Incremented();
while( index < totalValues - FS_Size_32() )
{
float32v xF = FS_Converti32_f32( xIdx ) * xMul;
float32v yF = FS_Converti32_f32( yIdx ) * yMul;
float32v xPos = FS_Cos_f32( xF ) * xFreq;
float32v yPos = FS_Cos_f32( yF ) * yFreq;
float32v zPos = FS_Sin_f32( xF ) * xFreq;
float32v wPos = FS_Sin_f32( yF ) * yFreq;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
xIdx += int32v( FS_Size_32() );
mask32v xReset = xIdx > xMax;
yIdx = FS_MaskedIncrement_i32( yIdx, xReset );
xIdx = FS_MaskedSub_i32( xIdx, xSizeV, xReset );
}
float32v xF = FS_Converti32_f32( xIdx ) * xMul;
float32v yF = FS_Converti32_f32( yIdx ) * yMul;
float32v xPos = FS_Cos_f32( xF ) * xFreq;
float32v yPos = FS_Cos_f32( yF ) * yFreq;
float32v zPos = FS_Sin_f32( xF ) * xFreq;
float32v wPos = FS_Sin_f32( yF ) * yFreq;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
return DoRemaining( noiseOut, totalValues, index, min, max, gen );
}
private:
static FS_INLINE FastNoise::OutputMinMax DoRemaining( float* noiseOut, size_t totalValues, size_t index, float32v min, float32v max, float32v finalGen )
{
FastNoise::OutputMinMax minMax;
size_t remaining = totalValues - index;
if( remaining == FS_Size_32() )
{
FS_Store_f32( &noiseOut[index], finalGen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, finalGen );
max = FS_Max_f32( max, finalGen );
#endif
}
else
{
std::memcpy( &noiseOut[index], &finalGen, remaining * sizeof( int32_t ) );
#if FASTNOISE_CALC_MIN_MAX
do
{
minMax << noiseOut[index];
}
while( ++index < totalValues );
#endif
}
#if FASTNOISE_CALC_MIN_MAX
float* minP = reinterpret_cast<float*>(&min);
float* maxP = reinterpret_cast<float*>(&max);
for( size_t i = 0; i < FS_Size_32(); i++ )
{
minMax << FastNoise::OutputMinMax{ minP[i], maxP[i] };
}
#endif
return minMax;
}
};

View File

@ -0,0 +1,321 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class DomainScale : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetScale( float value ) { mScale = value; }
protected:
GeneratorSource mSource;
float mScale = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainScale::SetSource );
this->AddVariable( "Scale", 1.0f, &DomainScale::SetScale );
}
};
};
class DomainOffset : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
template<Dim D>
void SetOffset( float value ) { mOffset[(int)D] = value; }
template<Dim D>
void SetOffset( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mOffset[(int)D], gen ); }
protected:
GeneratorSource mSource;
PerDimensionVariable<HybridSource> mOffset;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainOffset::SetSource );
this->AddPerDimensionHybridSource( "Offset", 0.0f, []( DomainOffset* p ) { return std::ref( p->mOffset ); } );
}
};
};
class DomainRotate : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetYaw( float value ) { mYawCos = cosf( value ); mYawSin = sinf( value ); CalculateRotation(); }
void SetPitch( float value ) { mPitchCos = cosf( value ); mPitchSin = sinf( value ); CalculateRotation(); }
void SetRoll( float value ) { mRollCos = cosf( value ); mRollSin = sinf( value ); CalculateRotation(); }
protected:
GeneratorSource mSource;
float mYawCos = 1.0f;
float mYawSin = 0.0f;
float mPitchCos = 1.0f;
float mPitchSin = 0.0f;
float mRollCos = 1.0f;
float mRollSin = 0.0f;
float mXa = 1.0f;
float mXb = 0.0f;
float mXc = 0.0f;
float mYa = 0.0f;
float mYb = 1.0f;
float mYc = 0.0f;
float mZa = 0.0f;
float mZb = 0.0f;
float mZc = 1.0f;
void CalculateRotation()
{
mXa = mYawCos * mPitchCos;
mXb = mYawCos * mPitchSin * mRollSin - mYawSin * mRollCos;
mXc = mYawCos * mPitchSin * mRollCos + mYawSin * mRollSin;
mYa = mYawSin * mPitchCos;
mYb = mYawSin * mPitchSin * mRollSin + mYawCos * mRollCos;
mYc = mYawSin * mPitchSin * mRollCos - mYawCos * mRollSin;
mZa = -mPitchSin;
mZb = mPitchCos * mRollSin;
mZc = mPitchCos * mRollCos;
}
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainRotate::SetSource );
this->AddVariable( "Yaw", 0.0f, &DomainRotate::SetYaw );
this->AddVariable( "Pitch", 0.0f, &DomainRotate::SetPitch );
this->AddVariable( "Roll", 0.0f, &DomainRotate::SetRoll );
}
};
};
class SeedOffset : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetOffset( int32_t value ) { mOffset = value; }
protected:
GeneratorSource mSource;
int32_t mOffset = 1;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &SeedOffset::SetSource );
this->AddVariable( "Seed Offset", 1, &SeedOffset::SetOffset );
}
};
};
class Remap : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetRemap( float fromMin, float fromMax, float toMin, float toMax ) { mFromMin = fromMin; mFromMax = fromMax; mToMin = toMin; mToMax = toMax; }
protected:
GeneratorSource mSource;
float mFromMin = -1.0f;
float mFromMax = 1.0f;
float mToMin = 0.0f;
float mToMax = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &Remap::SetSource );
this->AddVariable( "From Min", -1.0f,
[]( Remap* p, float f )
{
p->mFromMin = f;
});
this->AddVariable( "From Max", 1.0f,
[]( Remap* p, float f )
{
p->mFromMax = f;
});
this->AddVariable( "To Min", 0.0f,
[]( Remap* p, float f )
{
p->mToMin = f;
});
this->AddVariable( "To Max", 1.0f,
[]( Remap* p, float f )
{
p->mToMax = f;
});
}
};
};
class ConvertRGBA8 : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetMinMax( float min, float max ) { mMin = min; mMax = max; }
protected:
GeneratorSource mSource;
float mMin = -1.0f;
float mMax = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &ConvertRGBA8::SetSource );
this->AddVariable( "Min", -1.0f,
[]( ConvertRGBA8* p, float f )
{
p->mMin = f;
});
this->AddVariable( "Max", 1.0f,
[]( ConvertRGBA8* p, float f )
{
p->mMax = f;
});
}
};
};
class Terrace : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetMultiplier( float multiplier ) { mMultiplier = multiplier; mMultiplierRecip = 1 / multiplier; }
void SetSmoothness( float smoothness ) { mSmoothness = smoothness; if( mSmoothness != 0.0f ) mSmoothnessRecip = 1 + 1 / smoothness; }
protected:
GeneratorSource mSource;
float mMultiplier = 1.0f;
float mMultiplierRecip = 1.0f;
float mSmoothness = 0.0f;
float mSmoothnessRecip = 0.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &Terrace::SetSource );
this->AddVariable( "Multiplier", 1.0f, &Terrace::SetMultiplier );
this->AddVariable( "Smoothness", 0.0f, &Terrace::SetSmoothness );
}
};
};
class DomainAxisScale : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
template<Dim D>
void SetScale( float value ) { mScale[(int)D] = value; }
protected:
GeneratorSource mSource;
PerDimensionVariable<float> mScale;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainAxisScale::SetSource );
this->AddPerDimensionVariable( "Scale", 1.0f, []( DomainAxisScale* p ) { return std::ref( p->mScale ); } );
}
};
};
class AddDimension : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetNewDimensionPosition( float value ) { mNewDimensionPosition = value; }
void SetNewDimensionPosition( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mNewDimensionPosition, gen ); }
protected:
GeneratorSource mSource;
HybridSource mNewDimensionPosition;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &AddDimension::SetSource );
this->AddHybridSource( "New Dimension Position", 0.0f, &AddDimension::SetNewDimensionPosition, &AddDimension::SetNewDimensionPosition );
}
};
};
class RemoveDimension : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetRemoveDimension( Dim dimension ) { mRemoveDimension = dimension; }
protected:
GeneratorSource mSource;
Dim mRemoveDimension = Dim::Y;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &RemoveDimension::SetSource );
this->AddVariableEnum( "Remove Dimension", Dim::Y, &RemoveDimension::SetRemoveDimension, "X", "Y", "Z", "W" );
}
};
};
class GeneratorCache : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
protected:
GeneratorSource mSource;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &GeneratorCache::SetSource );
}
};
};
}

View File

@ -0,0 +1,277 @@
#include "FastSIMD/InlInclude.h"
#include "Modifiers.h"
template<typename FS>
class FS_T<FastNoise::DomainScale, FS> : public virtual FastNoise::DomainScale, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mSource, seed, (pos * float32v( mScale ))... );
}
};
template<typename FS>
class FS_T<FastNoise::DomainOffset, FS> : public virtual FastNoise::DomainOffset, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return [this, seed]( std::remove_reference_t<P>... sourcePos, std::remove_reference_t<P>... offset )
{
size_t idx = 0;
((offset += this->GetSourceValue( mOffset[idx++], seed, sourcePos... )), ...);
return this->GetSourceValue( mSource, seed, offset... );
} (pos..., pos...);
}
};
template<typename FS>
class FS_T<FastNoise::DomainRotate, FS> : public virtual FastNoise::DomainRotate, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
if( mPitchSin == 0.0f && mRollSin == 0.0f )
{
return this->GetSourceValue( mSource, seed,
FS_FNMulAdd_f32( y, float32v( mYawSin ), x * float32v( mYawCos ) ),
FS_FMulAdd_f32( x, float32v( mYawSin ), y * float32v( mYawCos ) ) );
}
return Gen( seed, x, y, float32v( 0 ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
return this->GetSourceValue( mSource, seed,
FS_FMulAdd_f32( x, float32v( mXa ), FS_FMulAdd_f32( y, float32v( mXb ), z * float32v( mXc ) ) ),
FS_FMulAdd_f32( x, float32v( mYa ), FS_FMulAdd_f32( y, float32v( mYb ), z * float32v( mYc ) ) ),
FS_FMulAdd_f32( x, float32v( mZa ), FS_FMulAdd_f32( y, float32v( mZb ), z * float32v( mZc ) ) ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
// No rotation for 4D yet
return this->GetSourceValue( mSource, seed, x, y, z, w );
}
};
template<typename FS>
class FS_T<FastNoise::SeedOffset, FS> : public virtual FastNoise::SeedOffset, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mSource, seed + int32v( mOffset ), pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Remap, FS> : public virtual FastNoise::Remap, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v source = this->GetSourceValue( mSource, seed, pos... );
return float32v( mToMin ) + (( source - float32v( mFromMin ) ) / float32v( mFromMax - mFromMin ) * float32v( mToMax - mToMin ));
}
};
template<typename FS>
class FS_T<FastNoise::ConvertRGBA8, FS> : public virtual FastNoise::ConvertRGBA8, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v source = this->GetSourceValue( mSource, seed, pos... );
source = FS_Min_f32( source, float32v( mMax ));
source = FS_Max_f32( source, float32v( mMin ));
source -= float32v( mMin );
source *= float32v( 255.0f / (mMax - mMin) );
int32v byteVal = FS_Convertf32_i32( source );
int32v output = int32v( 255 << 24 );
output |= byteVal;
output |= byteVal << 8;
output |= byteVal << 16;
return FS_Casti32_f32( output );
}
};
template<typename FS>
class FS_T<FastNoise::Terrace, FS> : public virtual FastNoise::Terrace, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v source = this->GetSourceValue( mSource, seed, pos... );
source *= float32v( mMultiplier );
float32v rounded = FS_Round_f32( source );
if( mSmoothness != 0.0f )
{
float32v diff = rounded - source;
mask32v diffSign = diff < float32v( 0 );
diff = FS_Abs_f32( diff );
diff = float32v( 0.5f ) - diff;
diff *= float32v( mSmoothnessRecip );
diff = FS_Min_f32( diff, float32v( 0.5f ) );
diff = FS_Select_f32( diffSign, float32v( 0.5f ) - diff, diff - float32v( 0.5f ) );
rounded += diff;
}
return rounded * float32v( mMultiplierRecip );
}
};
template<typename FS>
class FS_T<FastNoise::DomainAxisScale, FS> : public virtual FastNoise::DomainAxisScale, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
size_t idx = 0;
((pos *= float32v( mScale[idx++] )), ...);
return this->GetSourceValue( mSource, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::AddDimension, FS> : public virtual FastNoise::AddDimension, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
if constexpr( sizeof...(P) == (size_t)FastNoise::Dim::Count )
{
return this->GetSourceValue( mSource, seed, pos... );
}
else
{
return this->GetSourceValue( mSource, seed, pos..., this->GetSourceValue( mNewDimensionPosition, seed, pos... ) );
}
}
};
template<typename FS>
class FS_T<FastNoise::RemoveDimension, FS> : public virtual FastNoise::RemoveDimension, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
return this->GetSourceValue( mSource, seed, x, y );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
switch( mRemoveDimension )
{
case FastNoise::Dim::X:
return this->GetSourceValue( mSource, seed, y, z );
case FastNoise::Dim::Y:
return this->GetSourceValue( mSource, seed, x, z );
case FastNoise::Dim::Z:
return this->GetSourceValue( mSource, seed, x, y );
default:
return this->GetSourceValue( mSource, seed, x, y, z );
}
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
switch( mRemoveDimension )
{
case FastNoise::Dim::X:
return this->GetSourceValue( mSource, seed, y, z, w );
case FastNoise::Dim::Y:
return this->GetSourceValue( mSource, seed, x, z, w );
case FastNoise::Dim::Z:
return this->GetSourceValue( mSource, seed, x, y, w );
case FastNoise::Dim::W:
return this->GetSourceValue( mSource, seed, x, y, z );
default:
return this->GetSourceValue( mSource, seed, x, y, z, w );
}
}
};
template<typename FS>
class FS_T<FastNoise::GeneratorCache, FS> : public virtual FastNoise::GeneratorCache, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
thread_local static void* CachedGenerator = nullptr;
thread_local static float32v CachedValue;
thread_local static float32v CachedPos[sizeof...( P )];
// TLS is not always aligned, so use FS_Load/FS_Store to access SIMD types
float32v arrayPos[] = { pos... };
bool isSame = (CachedGenerator == mSource.simdGeneratorPtr);
for( size_t i = 0; i < sizeof...( P ); i++ )
{
isSame &= !FS_AnyMask_bool( arrayPos[i] != FS_Load_f32( &CachedPos[i] ) );
}
if( !isSame )
{
CachedGenerator = mSource.simdGeneratorPtr;
float32v value = this->GetSourceValue( mSource, seed, pos... );
FS_Store_f32( &CachedValue, value );
for( size_t i = 0; i < sizeof...(P); i++ )
{
FS_Store_f32( &CachedPos[i], arrayPos[i] );
}
return value;
}
return FS_Load_f32( &CachedValue );
}
};

View File

@ -0,0 +1,16 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Perlin : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
}

View File

@ -0,0 +1,109 @@
#include "FastSIMD/InlInclude.h"
#include "Perlin.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Perlin, FS> : public virtual FastNoise::Perlin, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
float32v xf0 = xs = x - xs;
float32v yf0 = ys = y - ys;
float32v xf1 = xf0 - float32v( 1 );
float32v yf1 = yf0 - float32v( 1 );
xs = FnUtils::InterpQuintic( xs );
ys = FnUtils::InterpQuintic( ys );
return float32v( 0.579106986522674560546875f ) * FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0 ), xf0, yf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1 ), xf0, yf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
float32v xf0 = xs = x - xs;
float32v yf0 = ys = y - ys;
float32v zf0 = zs = z - zs;
float32v xf1 = xf0 - float32v( 1 );
float32v yf1 = yf0 - float32v( 1 );
float32v zf1 = zf0 - float32v( 1 );
xs = FnUtils::InterpQuintic( xs );
ys = FnUtils::InterpQuintic( ys );
zs = FnUtils::InterpQuintic( zs );
return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
float32v ws = FS_Floor_f32( w );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
int32v w1 = w0 + int32v( FnPrimes::W );
float32v xf0 = xs = x - xs;
float32v yf0 = ys = y - ys;
float32v zf0 = zs = z - zs;
float32v wf0 = ws = w - ws;
float32v xf1 = xf0 - float32v( 1 );
float32v yf1 = yf0 - float32v( 1 );
float32v zf1 = zf0 - float32v( 1 );
float32v wf1 = wf0 - float32v( 1 );
xs = FnUtils::InterpQuintic( xs );
ys = FnUtils::InterpQuintic( ys );
zs = FnUtils::InterpQuintic( zs );
ws = FnUtils::InterpQuintic( ws );
return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws );
}
};

View File

@ -0,0 +1,27 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Simplex : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
class OpenSimplex2 : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
}

View File

@ -0,0 +1,373 @@
#include "FastSIMD/InlInclude.h"
#include "Simplex.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Simplex, FS> : public virtual FastNoise::Simplex, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
const float SQRT3 = 1.7320508075688772935274463415059f;
const float F2 = 0.5f * (SQRT3 - 1.0f);
const float G2 = (3.0f - SQRT3) / 6.0f;
float32v f = float32v( F2 ) * (x + y);
float32v x0 = FS_Floor_f32( x + f );
float32v y0 = FS_Floor_f32( y + f );
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
float32v g = float32v( G2 ) * (x0 + y0);
x0 = x - (x0 - g);
y0 = y - (y0 - g);
mask32v i1 = x0 > y0;
//mask32v j1 = ~i1; //NMasked funcs
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v x2 = x0 + float32v( G2 * 2 - 1 );
float32v y2 = y0 + float32v( G2 * 2 - 1 );
float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, float32v( 0.5f ) ) );
float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, float32v( 0.5f ) ) );
float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, float32v( 0.5f ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
return float32v( 38.283687591552734375f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
const float F3 = 1.0f / 3.0f;
const float G3 = 1.0f / 2.0f;
float32v s = float32v( F3 ) * (x + y + z);
x += s;
y += s;
z += s;
float32v x0 = FS_Floor_f32( x );
float32v y0 = FS_Floor_f32( y );
float32v z0 = FS_Floor_f32( z );
float32v xi = x - x0;
float32v yi = y - y0;
float32v zi = z - z0;
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
mask32v x_ge_y = xi >= yi;
mask32v y_ge_z = yi >= zi;
mask32v x_ge_z = xi >= zi;
float32v g = float32v( G3 ) * (xi + yi + zi);
x0 = xi - g;
y0 = yi - g;
z0 = zi - g;
mask32v i1 = x_ge_y & x_ge_z;
mask32v j1 = FS_BitwiseAndNot_m32( y_ge_z, x_ge_y );
mask32v k1 = FS_BitwiseAndNot_m32( ~x_ge_z, y_ge_z );
mask32v i2 = x_ge_y | x_ge_z;
mask32v j2 = ~x_ge_y | y_ge_z;
mask32v k2 = x_ge_z & y_ge_z; //NMasked
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G3 );
float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G3 );
float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G3 );
float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G3 * 2 );
float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G3 * 2 );
float32v z2 = FS_NMaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G3 * 2 );
float32v x3 = x0 + float32v( G3 * 3 - 1 );
float32v y3 = y0 + float32v( G3 * 3 - 1 );
float32v z3 = z0 + float32v( G3 * 3 - 1 );
float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, float32v( 0.6f ) ) ) );
float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, float32v( 0.6f ) ) ) );
float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, float32v( 0.6f ) ) ) );
float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, float32v( 0.6f ) ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t3 = FS_Max_f32( t3, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
t3 *= t3; t3 *= t3;
float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k ), x0, y0, z0 );
float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ), FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ) ), x1, y1, z1 );
float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ), FS_NMaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ) ), x2, y2, z2 );
float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ) ), x3, y3, z3 );
return float32v( 32.69428253173828125f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, n3 * t3 ) ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
const float SQRT5 = 2.236067977499f;
const float F4 = (SQRT5 - 1.0f) / 4.0f;
const float G4 = (5.0f - SQRT5) / 20.0f;
float32v s = float32v( F4 ) * (x + y + z + w);
x += s;
y += s;
z += s;
w += s;
float32v x0 = FS_Floor_f32( x );
float32v y0 = FS_Floor_f32( y );
float32v z0 = FS_Floor_f32( z );
float32v w0 = FS_Floor_f32( w );
float32v xi = x - x0;
float32v yi = y - y0;
float32v zi = z - z0;
float32v wi = w - w0;
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
int32v l = FS_Convertf32_i32( w0 ) * int32v( FnPrimes::W );
float32v g = float32v( G4 ) * (xi + yi + zi + wi);
x0 = xi - g;
y0 = yi - g;
z0 = zi - g;
w0 = wi - g;
int32v rankx( 0 );
int32v ranky( 0 );
int32v rankz( 0 );
int32v rankw( 0 );
mask32v x_ge_y = x0 >= y0;
rankx = FS_MaskedIncrement_i32( rankx, x_ge_y );
ranky = FS_MaskedIncrement_i32( ranky, ~x_ge_y );
mask32v x_ge_z = x0 >= z0;
rankx = FS_MaskedIncrement_i32( rankx, x_ge_z );
rankz = FS_MaskedIncrement_i32( rankz, ~x_ge_z );
mask32v x_ge_w = x0 >= w0;
rankx = FS_MaskedIncrement_i32( rankx, x_ge_w );
rankw = FS_MaskedIncrement_i32( rankw, ~x_ge_w );
mask32v y_ge_z = y0 >= z0;
ranky = FS_MaskedIncrement_i32( ranky, y_ge_z );
rankz = FS_MaskedIncrement_i32( rankz, ~y_ge_z );
mask32v y_ge_w = y0 >= w0;
ranky = FS_MaskedIncrement_i32( ranky, y_ge_w );
rankw = FS_MaskedIncrement_i32( rankw, ~y_ge_w );
mask32v z_ge_w = z0 >= w0;
rankz = FS_MaskedIncrement_i32( rankz, z_ge_w );
rankw = FS_MaskedIncrement_i32( rankw, ~z_ge_w );
mask32v i1 = rankx > int32v( 2 );
mask32v j1 = ranky > int32v( 2 );
mask32v k1 = rankz > int32v( 2 );
mask32v l1 = rankw > int32v( 2 );
mask32v i2 = rankx > int32v( 1 );
mask32v j2 = ranky > int32v( 1 );
mask32v k2 = rankz > int32v( 1 );
mask32v l2 = rankw > int32v( 1 );
mask32v i3 = rankx > int32v( 0 );
mask32v j3 = ranky > int32v( 0 );
mask32v k3 = rankz > int32v( 0 );
mask32v l3 = rankw > int32v( 0 );
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G4 );
float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G4 );
float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G4 );
float32v w1 = FS_MaskedSub_f32( w0, float32v( 1 ), l1 ) + float32v( G4 );
float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G4 * 2 );
float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G4 * 2 );
float32v z2 = FS_MaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G4 * 2 );
float32v w2 = FS_MaskedSub_f32( w0, float32v( 1 ), l2 ) + float32v( G4 * 2 );
float32v x3 = FS_MaskedSub_f32( x0, float32v( 1 ), i3 ) + float32v( G4 * 3 );
float32v y3 = FS_MaskedSub_f32( y0, float32v( 1 ), j3 ) + float32v( G4 * 3 );
float32v z3 = FS_MaskedSub_f32( z0, float32v( 1 ), k3 ) + float32v( G4 * 3 );
float32v w3 = FS_MaskedSub_f32( w0, float32v( 1 ), l3 ) + float32v( G4 * 3 );
float32v x4 = x0 + float32v( G4 * 4 - 1 );
float32v y4 = y0 + float32v( G4 * 4 - 1 );
float32v z4 = z0 + float32v( G4 * 4 - 1 );
float32v w4 = w0 + float32v( G4 * 4 - 1 );
float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, FS_FNMulAdd_f32( w0, w0, float32v( 0.6f ) ) ) ) );
float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, FS_FNMulAdd_f32( w1, w1, float32v( 0.6f ) ) ) ) );
float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, FS_FNMulAdd_f32( w2, w2, float32v( 0.6f ) ) ) ) );
float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, FS_FNMulAdd_f32( w3, w3, float32v( 0.6f ) ) ) ) );
float32v t4 = FS_FNMulAdd_f32( x4, x4, FS_FNMulAdd_f32( y4, y4, FS_FNMulAdd_f32( z4, z4, FS_FNMulAdd_f32( w4, w4, float32v( 0.6f ) ) ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t3 = FS_Max_f32( t3, float32v( 0 ) );
t4 = FS_Max_f32( t4, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
t3 *= t3; t3 *= t3;
t4 *= t4; t4 *= t4;
float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k, l ), x0, y0, z0, w0 );
float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ),
FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ),
FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ),
FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l1 ) ), x1, y1, z1, w1 );
float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ),
FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ),
FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ),
FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l2 ) ), x2, y2, z2, w2 );
float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i3 ),
FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j3 ),
FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k3 ),
FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l3 ) ), x3, y3, z3, w3 );
float32v n4 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ), l + int32v( FnPrimes::W ) ), x4, y4, z4, w4 );
return float32v( 27.f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, FS_FMulAdd_f32( n3, t3, n4 * t4 ) ) ) );
}
};
template<typename FS>
class FS_T<FastNoise::OpenSimplex2, FS> : public virtual FastNoise::OpenSimplex2, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
const float SQRT3 = 1.7320508075f;
const float F2 = 0.5f * (SQRT3 - 1.0f);
const float G2 = (3.0f - SQRT3) / 6.0f;
float32v f = float32v( F2 ) * (x + y);
float32v x0 = FS_Floor_f32( x + f );
float32v y0 = FS_Floor_f32( y + f );
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
float32v g = float32v( G2 ) * (x0 + y0);
x0 = x - (x0 - g);
y0 = y - (y0 - g);
mask32v i1 = x0 > y0;
//mask32v j1 = ~i1; //NMasked funcs
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v x2 = x0 + float32v( (G2 * 2) - 1 );
float32v y2 = y0 + float32v( (G2 * 2) - 1 );
float32v t0 = float32v( 0.5f ) - (x0 * x0) - (y0 * y0);
float32v t1 = float32v( 0.5f ) - (x1 * x1) - (y1 * y1);
float32v t2 = float32v( 0.5f ) - (x2 * x2) - (y2 * y2);
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
float32v n0 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
float32v n1 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
float32v n2 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
return float32v( 49.918426513671875f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
float32v xr = f - x;
float32v yr = f - y;
float32v zr = f - z;
float32v val( 0 );
for( size_t i = 0; ; i++ )
{
float32v v0xr = FS_Round_f32( xr );
float32v v0yr = FS_Round_f32( yr );
float32v v0zr = FS_Round_f32( zr );
float32v d0xr = xr - v0xr;
float32v d0yr = yr - v0yr;
float32v d0zr = zr - v0zr;
float32v score0xr = FS_Abs_f32( d0xr );
float32v score0yr = FS_Abs_f32( d0yr );
float32v score0zr = FS_Abs_f32( d0zr );
mask32v dir0xr = FS_Max_f32( score0yr, score0zr ) <= score0xr;
mask32v dir0yr = FS_BitwiseAndNot_m32( FS_Max_f32( score0zr, score0xr ) <= score0yr, dir0xr );
mask32v dir0zr = ~(dir0xr | dir0yr);
float32v v1xr = FS_MaskedAdd_f32( v0xr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0xr ), dir0xr );
float32v v1yr = FS_MaskedAdd_f32( v0yr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0yr ), dir0yr );
float32v v1zr = FS_MaskedAdd_f32( v0zr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0zr ), dir0zr );
float32v d1xr = xr - v1xr;
float32v d1yr = yr - v1yr;
float32v d1zr = zr - v1zr;
int32v hv0xr = FS_Convertf32_i32( v0xr ) * int32v( FnPrimes::X );
int32v hv0yr = FS_Convertf32_i32( v0yr ) * int32v( FnPrimes::Y );
int32v hv0zr = FS_Convertf32_i32( v0zr ) * int32v( FnPrimes::Z );
int32v hv1xr = FS_Convertf32_i32( v1xr ) * int32v( FnPrimes::X );
int32v hv1yr = FS_Convertf32_i32( v1yr ) * int32v( FnPrimes::Y );
int32v hv1zr = FS_Convertf32_i32( v1zr ) * int32v( FnPrimes::Z );
float32v t0 = FS_FNMulAdd_f32( d0zr, d0zr, FS_FNMulAdd_f32( d0yr, d0yr, FS_FNMulAdd_f32( d0xr, d0xr, float32v( 0.6f ) ) ) );
float32v t1 = FS_FNMulAdd_f32( d1zr, d1zr, FS_FNMulAdd_f32( d1yr, d1yr, FS_FNMulAdd_f32( d1xr, d1xr, float32v( 0.6f ) ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
float32v v0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
float32v v1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
val = FS_FMulAdd_f32( v0, t0, FS_FMulAdd_f32( v1, t1, val ) );
if( i == 1 )
{
break;
}
xr += float32v( 0.5f );
yr += float32v( 0.5f );
zr += float32v( 0.5f );
seed = ~seed;
}
return float32v( 32.69428253173828125f ) * val;
}
};

View File

@ -0,0 +1,306 @@
#pragma once
#include "FastSIMD/InlInclude.h"
#include <climits>
namespace FastNoise
{
namespace Primes
{
static constexpr int32_t X = 501125321;
static constexpr int32_t Y = 1136930381;
static constexpr int32_t Z = 1720413743;
static constexpr int32_t W = 1066037191;
static constexpr int32_t Lookup[] = { X,Y,Z,W };
}
template<typename FS>
struct Utils
{
using float32v = typename FS::float32v;
using int32v = typename FS::int32v;
using mask32v = typename FS::mask32v;
static constexpr float ROOT2 = 1.4142135623730950488f;
static constexpr float ROOT3 = 1.7320508075688772935f;
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
{
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
// Bit-4 = Choose X Y ordering
mask32v xy;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
{
xy = int32_t( index & int32v( 1 << 2 ) ) != 0;
}
else
{
xy = index << 29;
if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
{
xy >>= 31;
}
}
float32v a = FS_Select_f32( xy, fY, fX );
float32v b = FS_Select_f32( xy, fX, fY );
// Bit-1 = b flip sign
b ^= FS_Casti32_f32( index << 31 );
// Bit-2 = Mul a by 2 or Root3
mask32v aMul2;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
{
aMul2 = int32_t( index & int32v( 1 << 1 ) ) != 0;
}
else
{
aMul2 = (index << 30) >> 31;
}
a *= FS_Select_f32( aMul2, float32v( 2 ), float32v( ROOT3 ) );
// b zero value if a mul 2
b = FS_NMask_f32( b, aMul2 );
// Bit-8 = Flip sign of a + b
return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
{
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
// Bit-8 = Flip sign of a + b
return FS_FMulAdd_f32( gX, fX, fY * gY ) ^ FS_Casti32_f32( (index >> 3) << 31 );
}
template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
{
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
return FS_FMulAdd_f32( gX, fX, fY * gY );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
{
// ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
// ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
int32v bit1 = (hash << 31);
int32v bit2 = (hash >> 1) << 31;
mask32v bit4;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
{
bit4 = int32_t( hash & int32v( 1 << 2 ) ) != 0;
}
else
{
bit4 = hash << 29;
if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
{
bit4 >>= 31;
}
}
fX ^= FS_Casti32_f32( bit1 );
fY ^= FS_Casti32_f32( bit2 );
float32v a = FS_Select_f32( bit4, fY, fX );
float32v b = FS_Select_f32( bit4, fX, fY );
return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
{
float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
return FS_FMulAdd_f32( gX, fX, fY * gY );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512> * = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
{
float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
return FS_FMulAdd_f32( gX, fX, fY * gY );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512 > * = nullptr >
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
{
int32v hasha13 = hash & int32v( 13 );
//if h < 8 then x, else y
float32v u = FS_Select_f32( hasha13 < int32v( 8 ), fX, fY );
//if h < 4 then y else if h is 12 or 14 then x else z
float32v v = FS_Select_f32( hasha13 == int32v( 12 ), fX, fZ );
v = FS_Select_f32( hasha13 < int32v( 2 ), fY, v );
//if h1 then -u else u
//if h2 then -v else v
float32v h1 = FS_Casti32_f32( hash << 31 );
float32v h2 = FS_Casti32_f32( (hash & int32v( 2 )) << 30 );
//then add them
return ( u ^ h1 ) + ( v ^ h2 );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
{
float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, fZ * gZ ));
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512>* = nullptr >
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
{
int32v p = hash & int32v( 3 << 3 );
float32v a = FS_Select_f32( p > int32v( 0 ), fX, fY );
float32v b;
if constexpr( FS::SIMD_Level <= FastSIMD::Level_SSE2 )
{
b = FS_Select_f32( p > int32v( 1 << 3 ), fY, fZ );
}
else
{
b = FS_Select_f32( hash << 27, fY, fZ );
}
float32v c = FS_Select_f32( p > int32v( 2 << 3 ), fZ, fW );
float32v aSign = FS_Casti32_f32( hash << 31 );
float32v bSign = FS_Casti32_f32( (hash << 30) & int32v( 0x80000000 ) );
float32v cSign = FS_Casti32_f32( (hash << 29) & int32v( 0x80000000 ) );
return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
{
float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, FS_FMulAdd_f32( fZ, gZ, fW * gW ) ));
}
template<typename SIMD = FS, typename... P>
FS_INLINE static int32v HashPrimes( int32v seed, P... primedPos )
{
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= int32v( 0x27d4eb2d );
return (hash >> 15) ^ hash;
}
template<typename SIMD = FS, typename... P>
FS_INLINE static int32v HashPrimesHB( int32v seed, P... primedPos )
{
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= int32v( 0x27d4eb2d );
return hash;
}
template<typename SIMD = FS, typename... P>
FS_INLINE static float32v GetValueCoord( int32v seed, P... primedPos )
{
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= hash * int32v( 0x27d4eb2d );
return FS_Converti32_f32( hash ) * float32v( 1.0f / (float)INT_MAX );
}
template<typename SIMD = FS>
FS_INLINE static float32v Lerp( float32v a, float32v b, float32v t )
{
return FS_FMulAdd_f32( t, b - a, a );
}
template<typename SIMD = FS>
FS_INLINE static float32v InterpHermite( float32v t )
{
return t * t * FS_FNMulAdd_f32( t, float32v( 2 ), float32v( 3 ));
}
template<typename SIMD = FS>
FS_INLINE static float32v InterpQuintic( float32v t )
{
return t * t * t * FS_FMulAdd_f32( t, FS_FMulAdd_f32( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
}
template<typename SIMD = FS, typename... P>
FS_INLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
{
switch( distFunc )
{
default:
case DistanceFunction::Euclidean:
{
float32v distSqr = dX * dX;
((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
return FS_InvSqrt_f32( distSqr ) * distSqr;
}
case DistanceFunction::EuclideanSquared:
{
float32v distSqr = dX * dX;
((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
return distSqr;
}
case DistanceFunction::Manhattan:
{
float32v dist = FS_Abs_f32( dX );
dist += (FS_Abs_f32( d ) + ...);
return dist;
}
case DistanceFunction::Hybrid:
{
float32v both = FS_FMulAdd_f32( dX, dX, FS_Abs_f32( dX ) );
((both += FS_FMulAdd_f32( d, d, FS_Abs_f32( d ) )), ...);
return both;
}
}
}
};
}
using FnUtils = FastNoise::Utils<FS_SIMD_CLASS>;
namespace FnPrimes = FastNoise::Primes;

View File

@ -0,0 +1,16 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Value : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
}

View File

@ -0,0 +1,88 @@
#include "FastSIMD/InlInclude.h"
#include "Value.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Value, FS> : public virtual FastNoise::Value, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
return FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0 ), FnUtils::GetValueCoord( seed, x1, y0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1 ), FnUtils::GetValueCoord( seed, x1, y1 ), xs ), ys );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
return FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0 ), FnUtils::GetValueCoord( seed, x1, y0, z0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0 ), FnUtils::GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1 ), FnUtils::GetValueCoord( seed, x1, y0, z1 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1 ), FnUtils::GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
float32v ws = FS_Floor_f32( w );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
int32v w1 = w0 + int32v( FnPrimes::W );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
ws = FnUtils::InterpHermite( w - ws );
return FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w1 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
}
};

View File

@ -0,0 +1,83 @@
#pragma once
#include <type_traits>
#include <tuple>
#include <stdexcept>
#include "FastSIMD/FunctionList.h"
template<typename T, size_t Size>
class VecN;
template<typename T>
class VecN<T, 0>
{
protected:
template<typename... A>
constexpr VecN( A... ) {}
template<typename... A>
void ForEach( A... ) const {}
template<typename... A>
void ForEachR( A... ) const {}
};
template<typename T, size_t S>
class VecN : public VecN<T, S - 1>
{
public:
static constexpr size_t Size = S;
typedef std::integral_constant<size_t, Size - 1> Index;
constexpr VecN() : Base(), value() {}
template<typename... A>
constexpr VecN( A... args ) :
Base( args... ),
value( std::get<Index::value>( std::make_tuple( args... ) ) )
{
}
template<size_t I>
FS_INLINE std::enable_if_t<(I < Size), T&> At()
{
return VecN<T, I + 1>::value;
}
template<size_t I>
FS_INLINE std::enable_if_t<(I < Size), T> At() const
{
return VecN<T, I + 1>::value;
}
template<size_t I>
FS_INLINE std::enable_if_t<(I >= Size), T&> At() const
{
throw std::out_of_range( "Index of of range" );
}
template<typename F, typename... A>
FS_INLINE void ForEach( F&& func, A&&... other )
{
Base::ForEach( func, other... );
func( Index(), value, (other.template At<Index::value>())... );
}
template<typename F, typename... A>
FS_INLINE void ForEachR( F&& func, A&&... other )
{
func( Index(), value, (other.template At<Index::value>())... );
Base::ForEachR( func, other... );
}
protected:
typedef VecN<T, Size - 1> Base;
typedef std::integral_constant<size_t, Size - 1> Index;
T value;
};

View File

@ -0,0 +1,52 @@
#pragma once
#include <cstdint>
#include "FastSIMD_Config.h"
namespace FastSIMD
{
typedef uint32_t Level_BitFlags;
enum eLevel : Level_BitFlags
{
Level_Null = 0, // Uninitilised
Level_Scalar = 1 << 0, // 80386 instruction set (Not SIMD)
Level_SSE = 1 << 1, // SSE (XMM) supported by CPU (not testing for O.S. support)
Level_SSE2 = 1 << 2, // SSE2
Level_SSE3 = 1 << 3, // SSE3
Level_SSSE3 = 1 << 4, // Supplementary SSE3 (SSSE3)
Level_SSE41 = 1 << 5, // SSE4.1
Level_SSE42 = 1 << 6, // SSE4.2
Level_AVX = 1 << 7, // AVX supported by CPU and operating system
Level_AVX2 = 1 << 8, // AVX2
Level_AVX512 = 1 << 9, // AVX512, AVX512DQ supported by CPU and operating system
Level_NEON = 1 << 16, // ARM NEON
};
const Level_BitFlags COMPILED_SIMD_LEVELS =
(FASTSIMD_COMPILE_SCALAR ? Level_Scalar : 0) |
(FASTSIMD_COMPILE_SSE ? Level_SSE : 0) |
(FASTSIMD_COMPILE_SSE2 ? Level_SSE2 : 0) |
(FASTSIMD_COMPILE_SSE3 ? Level_SSE3 : 0) |
(FASTSIMD_COMPILE_SSSE3 ? Level_SSSE3 : 0) |
(FASTSIMD_COMPILE_SSE41 ? Level_SSE41 : 0) |
(FASTSIMD_COMPILE_SSE42 ? Level_SSE42 : 0) |
(FASTSIMD_COMPILE_AVX ? Level_AVX : 0) |
(FASTSIMD_COMPILE_AVX2 ? Level_AVX2 : 0) |
(FASTSIMD_COMPILE_AVX512 ? Level_AVX512 : 0) |
(FASTSIMD_COMPILE_NEON ? Level_NEON : 0) ;
eLevel CPUMaxSIMDLevel();
template<typename T>
T* New( eLevel maxSIMDLevel = Level_Null );
template<typename T, eLevel SIMD_LEVEL>
T* ClassFactory();
#define FASTSIMD_LEVEL_SUPPORT( ... ) \
static const FastSIMD::Level_BitFlags Supported_SIMD_Levels = __VA_ARGS__
};

View File

@ -0,0 +1,29 @@
#pragma once
#if defined(__arm__) || defined(__aarch64__)
#define FASTSIMD_x86 0
#define FASTSIMD_ARM 1
#else
#define FASTSIMD_x86 1
#define FASTSIMD_ARM 0
#endif
#define FASTSIMD_64BIT (INTPTR_MAX == INT64_MAX)
#define FASTSIMD_COMPILE_SCALAR (!(FASTSIMD_x86 && FASTSIMD_64BIT)) // Don't compile for x86 64bit since CPU is guaranteed SSE2 support
#define FASTSIMD_COMPILE_SSE (FASTSIMD_x86 & 000) // Not supported
#define FASTSIMD_COMPILE_SSE2 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSE3 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSSE3 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSE41 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSE42 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_AVX (FASTSIMD_x86 & 000) // Not supported
#define FASTSIMD_COMPILE_AVX2 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_AVX512 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_NEON (FASTSIMD_ARM & 1 )
#define FASTSIMD_USE_FMA 1
#define FASTSIMD_CONFIG_GENERATE_CONSTANTS 0

View File

@ -0,0 +1,821 @@
#pragma once
#include <cinttypes>
#include <type_traits>
#include <memory>
#include "FastSIMD/FastSIMD.h"
#ifdef _MSC_VER
#if defined( _M_IX86_FP ) && _M_IX86_FP < 2
#define FS_VECTORCALL
#else
#define FS_VECTORCALL __vectorcall
#endif
#define FS_INLINE __forceinline
#else
#define FS_VECTORCALL
#define FS_INLINE __attribute__((always_inline)) inline
#endif
#ifndef NDEBUG
#undef FS_INLINE
#define FS_INLINE inline
#endif
/// <summary>
/// Number of 32 width elements that will fit into a vector
/// </summary>
/// <remarks>
/// Compile time constant
/// </remarks>
/// <code>
/// size_t FS_Size_32()
/// </code>
#define FS_Size_32() FS::template VectorSize<32>
// Vector builders
/// <summary>
/// Vector with values incrementing from 0 based on element index {0, 1, 2, 3...}
/// </summary>
/// <code>
/// example: int32v::FS_Incremented()
/// </code>
#define FS_Incremented() Incremented()
// Load
/// <summary>
/// Copies sizeof(float32v) bytes from given memory location into float32v
/// </summary>
/// <remarks>
/// Memory does not need to be aligned
/// </remarks>
/// <code>
/// float32v FS_Load_f32( void const* ptr )
/// </code>
#define FS_Load_f32( ... ) FS::Load_f32( __VA_ARGS__ )
/// <summary>
/// Copies sizeof(int32v) bytes from given memory location into int32v
/// </summary>
/// <remarks>
/// Memory does not need to be aligned
/// </remarks>
/// <code>
/// int32v FS_Load_i32( void const* ptr )
/// </code>
#define FS_Load_i32( ... ) FS::Load_i32( __VA_ARGS__ )
// Store
/// <summary>
/// Copies all elements of float32v to given memory location
/// </summary>
/// <code>
/// void FS_Store_f32( void* ptr, float32v f )
/// </code>
#define FS_Store_f32( ... ) FS::Store_f32( __VA_ARGS__ )
/// <summary>
/// Copies all elements of int32v to given memory location
/// </summary>
/// <code>
/// void FS_Store_i32( void* ptr, int32v i )
/// </code>
#define FS_Store_i32( ... ) FS::Store_i32( __VA_ARGS__ )
// Cast
/// <summary>
/// Bitwise cast int to float
/// </summary>
/// <code>
/// float32v FS_Casti32_f32( int32v i )
/// </code>
#define FS_Casti32_f32( ... ) FS::Casti32_f32( __VA_ARGS__ )
/// <summary>
/// Bitwise cast float to int
/// </summary>
/// <code>
/// int32v FS_Castf32_i32( float32v f )
/// </code>
#define FS_Castf32_i32( ... ) FS::Castf32_i32( __VA_ARGS__ )
// Convert
/// <summary>
/// Convert int to float
/// </summary>
/// <remarks>
/// Rounding: truncate
/// </remarks>
/// <code>
/// float32v FS_Converti32_f32( int32v i )
/// </code>
#define FS_Converti32_f32( ... ) FS::Converti32_f32( __VA_ARGS__ )
/// <summary>
/// Convert float to int
/// </summary>
/// <code>
/// int32v FS_Convertf32_i32( float32v f )
/// </code>
#define FS_Convertf32_i32( ... ) FS::Convertf32_i32( __VA_ARGS__ )
// Select
/// <summary>
/// return ( m ? a : b )
/// </summary>
/// <code>
/// float32v FS_Select_f32( mask32v m, float32v a, float32v b )
/// </code>
#define FS_Select_f32( ... ) FS::Select_f32( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : b )
/// </summary>
/// <code>
/// int32v FS_Select_i32( mask32v m, int32v a, int32v b )
/// </code>
#define FS_Select_i32( ... ) FS::Select_i32( __VA_ARGS__ )
// Min, Max
/// <summary>
/// return ( a < b ? a : b )
/// </summary>
/// <code>
/// float32v FS_Min_f32( float32v a, float32v b )
/// </code>
#define FS_Min_f32( ... ) FS::Min_f32( __VA_ARGS__ )
/// <summary>
/// return ( a > b ? a : b )
/// </summary>
/// <code>
/// float32v FS_Max_f32( float32v a, float32v b )
/// </code>
#define FS_Max_f32( ... ) FS::Max_f32( __VA_ARGS__ )
/// <summary>
/// return ( a < b ? a : b )
/// </summary>
/// <code>
/// int32v FS_Min_i32( int32v a, int32v b )
/// </code>
#define FS_Min_i32( ... ) FS::Min_i32( __VA_ARGS__ )
/// <summary>
/// return ( a > b ? a : b )
/// </summary>
/// <code>
/// int32v FS_Max_i32( int32v a, int32v b )
/// </code>
#define FS_Max_i32( ... ) FS::Max_i32( __VA_ARGS__ )
// Bitwise
/// <summary>
/// return ( a & ~b )
/// </summary>
/// <code>
/// float32v FS_BitwiseAndNot_f32( float32v a, float32v b )
/// </code>
#define FS_BitwiseAndNot_f32( ... ) FS::BitwiseAndNot_f32( __VA_ARGS__ )
/// <summary>
/// return ( a & ~b )
/// </summary>
/// <code>
/// int32v FS_BitwiseAndNot_i32( int32v a, int32v b )
/// </code>
#define FS_BitwiseAndNot_i32( ... ) FS::BitwiseAndNot_i32( __VA_ARGS__ )
/// <summary>
/// return ( a & ~b )
/// </summary>
/// <code>
/// mask32v FS_BitwiseAndNot_m32( mask32v a, mask32v b )
/// </code>
#define FS_BitwiseAndNot_m32( ... ) FastSIMD::BitwiseAndNot_m32<FS>( __VA_ARGS__ )
/// <summary>
/// return ZeroExtend( a >> b )
/// </summary>
/// <code>
/// float32v FS_BitwiseShiftRightZX_f32( float32v a, int32_t b )
/// </code>
#define FS_BitwiseShiftRightZX_f32( ... ) FS::BitwiseShiftRightZX_f32( __VA_ARGS__ )
/// <summary>
/// return ZeroExtend( a >> b )
/// </summary>
/// <code>
/// float32v FS_BitwiseShiftRightZX_i32( int32v a, int32_t b )
/// </code>
#define FS_BitwiseShiftRightZX_i32( ... ) FS::BitwiseShiftRightZX_i32( __VA_ARGS__ )
// Abs
/// <summary>
/// return ( a < 0 ? -a : a )
/// </summary>
/// <code>
/// float32v FS_Abs_f32( float32v a )
/// </code>
#define FS_Abs_f32( ... ) FS::Abs_f32( __VA_ARGS__ )
/// <summary>
/// return ( a < 0 ? -a : a )
/// </summary>
/// <code>
/// int32v FS_Abs_i32( int32v a )
/// </code>
#define FS_Abs_i32( ... ) FS::Abs_i32( __VA_ARGS__ )
// Float math
/// <summary>
/// return sqrt( a )
/// </summary>
/// <code>
/// float32v FS_Sqrt_f32( float32v a )
/// </code>
#define FS_Sqrt_f32( ... ) FS::Sqrt_f32( __VA_ARGS__ )
/// <summary>
/// return APPROXIMATE( 1.0 / sqrt( a ) )
/// </summary>
/// <code>
/// float32v FS_InvSqrt_f32( float32v a )
/// </code>
#define FS_InvSqrt_f32( ... ) FS::InvSqrt_f32( __VA_ARGS__ )
/// <summary>
/// return APPROXIMATE( 1.0 / a )
/// </summary>
/// <code>
/// float32v FS_Reciprocal_f32( float32v a )
/// </code>
#define FS_Reciprocal_f32( ... ) FS::Reciprocal_f32( __VA_ARGS__ )
// Floor, Ceil, Round
/// <summary>
/// return floor( a )
/// </summary>
/// <remarks>
/// Rounding: Towards negative infinity
/// </remarks>
/// <code>
/// float32v FS_Floor_f32( float32v a )
/// </code>
#define FS_Floor_f32( ... ) FS::Floor_f32( __VA_ARGS__ )
/// <summary>
/// return ceil( a )
/// </summary>
/// <remarks>
/// Rounding: Towards positive infinity
/// </remarks>
/// <code>
/// float32v FS_Ceil_f32( float32v a )
/// </code>
#define FS_Ceil_f32( ... ) FS::Ceil_f32( __VA_ARGS__ )
/// <summary>
/// return round( a )
/// </summary>
/// <remarks>
/// Rounding: Banker's rounding
/// </remarks>
/// <code>
/// float32v FS_Round_f32( float32v a )
/// </code>
#define FS_Round_f32( ... ) FS::Round_f32( __VA_ARGS__ )
// Trig
/// <summary>
/// return APPROXIMATE( cos( a ) )
/// </summary>
/// <code>
/// float32v FS_Cos_f32( float32v a )
/// </code>
#define FS_Cos_f32( ... ) FastSIMD::Cos_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return APPROXIMATE( sin( a ) )
/// </summary>
/// <code>
/// float32v FS_Sin_f32( float32v a )
/// </code>
#define FS_Sin_f32( ... ) FastSIMD::Sin_f32<FS>( __VA_ARGS__ )
// Math
/// <summary>
/// return pow( v, pow )
/// </summary>
/// <code>
/// float32v FS_Pow_f32( float32v v, float32v pow )
/// </code>
#define FS_Pow_f32( ... ) FastSIMD::Pow_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return log( a )
/// </summary>
/// <remarks>
/// a <= 0 returns 0
/// </remarks>
/// <code>
/// float32v FS_Log_f32( float32v a )
/// </code>
#define FS_Log_f32( ... ) FastSIMD::Log_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return exp( a )
/// </summary>
/// <remarks>
/// a will be clamped to -88.376, 88.376
/// </remarks>
/// <code>
/// float32v FS_Exp_f32( float32v a )
/// </code>
#define FS_Exp_f32( ... ) FastSIMD::Exp_f32<FS>( __VA_ARGS__ )
// Mask
/// <summary>
/// return ( m ? a : 0 )
/// </summary>
/// <code>
/// int32v FS_Mask_i32( int32v a, mask32v m )
/// </code>
#define FS_Mask_i32( ... ) FS::Mask_i32( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : 0 )
/// </summary>
/// <code>
/// float32v FS_Mask_f32( float32v a, mask32v m )
/// </code>
#define FS_Mask_f32( ... ) FS::Mask_f32( __VA_ARGS__ )
/// <summary>
/// return ( m ? 0 : a )
/// </summary>
/// <code>
/// int32v FS_NMask_i32( int32v a, mask32v m )
/// </code>
#define FS_NMask_i32( ... ) FS::NMask_i32( __VA_ARGS__ )
/// <summary>
/// return ( m ? 0 : a )
/// </summary>
/// <code>
/// float32v FS_NMask_f32( float32v a, mask32v m )
/// </code>
#define FS_NMask_f32( ... ) FS::NMask_f32( __VA_ARGS__ )
/// <summary>
/// return m.contains( true )
/// </summary>
/// <code>
/// bool FS_AnyMask_bool( mask32v m )
/// </code>
#define FS_AnyMask_bool( ... ) FS::AnyMask_bool( __VA_ARGS__ )
// FMA
/// <summary>
/// return ( (a * b) + c )
/// </summary>
/// <code>
/// float32v FS_FMulAdd_f32( float32v a, float32v b, float32v c )
/// </code>
#define FS_FMulAdd_f32( ... ) FastSIMD::FMulAdd_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( -(a * b) + c )
/// </summary>
/// <code>
/// float32v FS_FNMulAdd_f32( float32v a, float32v b, float32v c )
/// </code>
#define FS_FNMulAdd_f32( ... ) FastSIMD::FNMulAdd_f32<FS>( __VA_ARGS__ )
// Masked float
/// <summary>
/// return ( m ? (a + b) : a )
/// </summary>
/// <code>
/// float32v FS_MaskedAdd_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_MaskedAdd_f32( ... ) FastSIMD::MaskedAdd_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a - b) : a )
/// </summary>
/// <code>
/// float32v FS_MaskedSub_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_MaskedSub_f32( ... ) FastSIMD::MaskedSub_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a * b) : a )
/// </summary>
/// <code>
/// float32v FS_MaskedMul_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_MaskedMul_f32( ... ) FastSIMD::MaskedMul_f32<FS>( __VA_ARGS__ )
// Masked int32
/// <summary>
/// return ( m ? (a + b) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedAdd_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_MaskedAdd_i32( ... ) FastSIMD::MaskedAdd_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a - b) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedSub_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_MaskedSub_i32( ... ) FastSIMD::MaskedSub_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a * b) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedMul_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_MaskedMul_i32( ... ) FastSIMD::MaskedMul_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a + 1) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedIncrement_i32( int32v a, mask32v m )
/// </code>
#define FS_MaskedIncrement_i32( ... ) FastSIMD::MaskedIncrement_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a - 1) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedDecrement_i32( int32v a, mask32v m )
/// </code>
#define FS_MaskedDecrement_i32( ... ) FastSIMD::MaskedDecrement_i32<FS>( __VA_ARGS__ )
// NMasked float
/// <summary>
/// return ( m ? a : (a + b) )
/// </summary>
/// <code>
/// float32v FS_NMaskedAdd_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_NMaskedAdd_f32( ... ) FastSIMD::NMaskedAdd_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a - b) )
/// </summary>
/// <code>
/// float32v FS_NMaskedSub_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_NMaskedSub_f32( ... ) FastSIMD::NMaskedSub_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a * b) )
/// </summary>
/// <code>
/// float32v FS_NMaskedMul_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_NMaskedMul_f32( ... ) FastSIMD::NMaskedMul_f32<FS>( __VA_ARGS__ )
// NMasked int32
/// <summary>
/// return ( m ? a : (a + b) )
/// </summary>
/// <code>
/// int32v FS_NMaskedAdd_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_NMaskedAdd_i32( ... ) FastSIMD::NMaskedAdd_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a - b) )
/// </summary>
/// <code>
/// int32v FS_NMaskedSub_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_NMaskedSub_i32( ... ) FastSIMD::NMaskedSub_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a * b) )
/// </summary>
/// <code>
/// int32v FS_NMaskedMul_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_NMaskedMul_i32( ... ) FastSIMD::NMaskedMul_i32<FS>( __VA_ARGS__ )
namespace FastSIMD
{
//FMA
template<typename FS>
FS_INLINE typename FS::float32v FMulAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::float32v c )
{
return (a * b) + c;
}
template<typename FS>
FS_INLINE typename FS::float32v FNMulAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::float32v c )
{
return -(a * b) + c;
}
// Masked float
template<typename FS>
FS_INLINE typename FS::float32v MaskedAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a + FS::Mask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v MaskedSub_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a - FS::Mask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v MaskedMul_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a * FS::Mask_f32( b, m );
}
// Masked int32
template<typename FS>
FS_INLINE typename FS::int32v MaskedAdd_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a + FS::Mask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v MaskedSub_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a - FS::Mask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v MaskedMul_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a * FS::Mask_i32( b, m );
}
// NMasked float
template<typename FS>
FS_INLINE typename FS::float32v NMaskedAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a + FS::NMask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v NMaskedSub_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a - FS::NMask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v NMaskedMul_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a * FS::NMask_f32( b, m );
}
// NMasked int32
template<typename FS>
FS_INLINE typename FS::int32v NMaskedAdd_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a + FS::NMask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v NMaskedSub_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a - FS::NMask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v NMaskedMul_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a * FS::NMask_i32( b, m );
}
template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedIncrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return a - m;
}
template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedIncrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return MaskedSub_i32<FS>( a, typename FS::int32v( -1 ), m );
}
template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedDecrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return a + m;
}
template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedDecrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return MaskedAdd_i32<FS>( a, typename FS::int32v( -1 ), m );
}
// Bitwise
template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::mask32v BitwiseAndNot_m32( typename FS::mask32v a, typename FS::mask32v b )
{
return FS::BitwiseAndNot_i32( a, b );
}
template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::mask32v BitwiseAndNot_m32( typename FS::mask32v a, typename FS::mask32v b )
{
return a & (~b);
}
// Trig
template<typename FS>
FS_INLINE typename FS::float32v Cos_f32( typename FS::float32v value )
{
typedef typename FS::int32v int32v;
typedef typename FS::float32v float32v;
typedef typename FS::mask32v mask32v;
value = FS_Abs_f32( value );
value -= FS_Floor_f32( value * float32v( 0.1591549f ) ) * float32v( 6.283185f );
mask32v geHalfPi = value >= float32v( 1.570796f );
mask32v geHalfPi2 = value >= float32v( 3.141593f );
mask32v geHalfPi3 = value >= float32v( 4.7123889f );
float32v cosAngle = value ^ FS_Mask_f32( ( value ^ float32v( 3.141593f ) - value ), geHalfPi );
cosAngle = cosAngle ^ FS_Mask_f32( FS_Casti32_f32( int32v( 0x80000000 ) ), geHalfPi2 );
cosAngle = cosAngle ^ FS_Mask_f32( cosAngle ^ ( float32v( 6.283185f ) - value ), geHalfPi3 );
cosAngle *= cosAngle;
cosAngle = FS_FMulAdd_f32( cosAngle, FS_FMulAdd_f32( cosAngle, float32v( 0.03679168f ), float32v( -0.49558072f ) ), float32v( 0.99940307f ) );
return cosAngle ^ FS_Mask_f32( FS_Casti32_f32( int32v( 0x80000000 ) ), FS_BitwiseAndNot_m32( geHalfPi, geHalfPi3 ) );
}
template<typename FS>
FS_INLINE typename FS::float32v Sin_f32( typename FS::float32v value )
{
return Cos_f32<FS>( typename FS::float32v( 1.570796f ) - value );
}
template<typename FS>
FS_INLINE typename FS::float32v Exp_f32( typename FS::float32v x )
{
typedef typename FS::int32v int32v;
typedef typename FS::float32v float32v;
x = FS_Min_f32( x, float32v( 88.3762626647949f ) );
x = FS_Max_f32( x, float32v( -88.3762626647949f ) );
/* express exp(x) as exp(g + n*log(2)) */
float32v fx = x * float32v( 1.44269504088896341f );
fx += float32v( 0.5f );
float32v flr = FS_Floor_f32( fx );
fx = FS_MaskedSub_f32( flr, float32v( 1 ), flr > fx );
x -= fx * float32v( 0.693359375f );
x -= fx * float32v( -2.12194440e-4f );
float32v y( 1.9875691500E-4f );
y *= x;
y += float32v( 1.3981999507E-3f );
y *= x;
y += float32v( 8.3334519073E-3f );
y *= x;
y += float32v( 4.1665795894E-2f );
y *= x;
y += float32v( 1.6666665459E-1f );
y *= x;
y += float32v( 5.0000001201E-1f );
y *= x * x;
y += x + float32v( 1 );
/* build 2^n */
int32v i = FS_Convertf32_i32( fx );
// another two AVX2 instructions
i += int32v( 0x7f );
i <<= 23;
float32v pow2n = FS_Casti32_f32( i );
return y * pow2n;
}
template<typename FS>
FS_INLINE typename FS::float32v Log_f32( typename FS::float32v x )
{
typedef typename FS::int32v int32v;
typedef typename FS::float32v float32v;
typedef typename FS::mask32v mask32v;
mask32v validMask = x > float32v( 0 );
x = FS_Max_f32( x, FS_Casti32_f32( int32v( 0x00800000 ) ) ); /* cut off denormalized stuff */
// can be done with AVX2
int32v i = FS_BitwiseShiftRightZX_i32( FS_Castf32_i32( x ), 23 );
/* keep only the fractional part */
x &= FS_Casti32_f32( int32v( ~0x7f800000 ) );
x |= float32v( 0.5f );
// this is again another AVX2 instruction
i -= int32v( 0x7f );
float32v e = FS_Converti32_f32( i );
e += float32v( 1 );
mask32v mask = x < float32v( 0.707106781186547524f );
x = FS_MaskedAdd_f32( x, x, mask );
x -= float32v( 1 );
e = FS_MaskedSub_f32( e, float32v( 1 ), mask );
float32v y = float32v( 7.0376836292E-2f );
y *= x;
y += float32v( -1.1514610310E-1f );
y *= x;
y += float32v( 1.1676998740E-1f );
y *= x;
y += float32v( -1.2420140846E-1f );
y *= x;
y += float32v( 1.4249322787E-1f );
y *= x;
y += float32v( -1.6668057665E-1f );
y *= x;
y += float32v( 2.0000714765E-1f );
y *= x;
y += float32v( -2.4999993993E-1f );
y *= x;
y += float32v( 3.3333331174E-1f );
y *= x;
float32v xx = x * x;
y *= xx;
y *= e * float32v( -2.12194440e-4f );
y -= xx * float32v( 0.5f );
x += y;
x += e * float32v( 0.693359375f );
return FS_Mask_f32( x, validMask );
}
template<typename FS>
FS_INLINE typename FS::float32v Pow_f32( typename FS::float32v value, typename FS::float32v pow )
{
return Exp_f32<FS>( pow * Log_f32<FS>( value ) );
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include "FunctionList.h"
template<typename CLASS, typename FS>
class FS_T;
#define FASTSIMD_DECLARE_FS_TYPES \
using float32v = typename FS::float32v;\
using int32v = typename FS::int32v;\
using mask32v = typename FS::mask32v

View File

@ -0,0 +1,37 @@
#pragma once
#include "FastSIMD.h"
namespace FastSIMD
{
template<eLevel... T>
struct SIMDTypeContainer
{
static constexpr eLevel MinimumCompiled = Level_Null;
template<eLevel L>
static constexpr eLevel GetNextCompiledAfter = Level_Null;
};
template<eLevel HEAD, eLevel... TAIL>
struct SIMDTypeContainer<HEAD, TAIL...>
{
static constexpr eLevel MinimumCompiled = (HEAD & COMPILED_SIMD_LEVELS) != 0 ? HEAD : SIMDTypeContainer<TAIL...>::MinimumCompiled;
template<eLevel L>
static constexpr eLevel GetNextCompiledAfter = (L == HEAD) ? SIMDTypeContainer<TAIL...>::MinimumCompiled : SIMDTypeContainer<TAIL...>::template GetNextCompiledAfter<L>;
};
using SIMDTypeList = SIMDTypeContainer<
Level_Scalar,
Level_SSE,
Level_SSE2,
Level_SSE3,
Level_SSSE3,
Level_SSE41,
Level_SSE42,
Level_AVX,
Level_AVX2,
Level_AVX512,
Level_NEON>;
}

95
deps/FastNoise2/src/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,95 @@
set(CMAKE_CXX_STANDARD 17)
file(GLOB_RECURSE FastSIMD_headers "../include/FastSIMD/*.h")
file(GLOB_RECURSE FastSIMD_include_inl "../include/FastSIMD/*.inl")
file(GLOB FastSIMD_inline "FastSIMD/*.inl")
file(GLOB_RECURSE FastSIMD_internal_headers "FastSIMD/Internal/*.h")
file(GLOB_RECURSE FastSIMD_internal_inl "FastSIMD/Internal/*.inl")
list(APPEND FastSIMD_headers ${FastSIMD_inline})
list(APPEND FastSIMD_headers ${FastSIMD_include_inl})
list(APPEND FastSIMD_internal_headers ${FastSIMD_internal_inl})
set(FastSIMD_sources
FastSIMD/FastSIMD.cpp
FastSIMD/FastSIMD_Level_AVX2.cpp
FastSIMD/FastSIMD_Level_AVX512.cpp
FastSIMD/FastSIMD_Level_NEON.cpp
FastSIMD/FastSIMD_Level_Scalar.cpp
FastSIMD/FastSIMD_Level_SSE2.cpp
FastSIMD/FastSIMD_Level_SSE3.cpp
FastSIMD/FastSIMD_Level_SSE41.cpp
FastSIMD/FastSIMD_Level_SSE42.cpp
FastSIMD/FastSIMD_Level_SSSE3.cpp
)
file(GLOB FastNoise_headers "../include/FastNoise/*.h")
file(GLOB FastNoise_inl "../include/FastNoise/*.inl")
file(GLOB_RECURSE FastNoise_generators_headers "../include/FastNoise/Generators/*.h")
file(GLOB_RECURSE FastNoise_generators_inl "../include/FastNoise/Generators/*.inl")
list(APPEND FastNoise_headers ${FastNoise_inl})
list(APPEND FastNoise_generators_headers ${FastNoise_generators_inl})
set(FastNoise_source
FastNoise/FastNoiseMetadata.cpp
)
source_group("SIMD" FILES ${FastSIMD_headers})
source_group("SIMD" FILES ${FastSIMD_sources})
source_group("SIMD\\internals" FILES ${FastSIMD_internal_headers})
source_group("FastNoise" FILES ${FastNoise_headers})
source_group("FastNoise" FILES ${FastNoise_source})
source_group("FastNoise\\Generators" FILES ${FastNoise_generators_headers})
add_library(FastNoise
${FastNoise_headers}
${FastNoise_source}
${FastNoise_generators_headers}
${FastSIMD_headers}
${FastSIMD_internal_headers}
${FastSIMD_sources}
)
set(install_targets ${install_targets} FastNoise PARENT_SCOPE)
set(install_fastnoise_headers ${FastNoise_headers} PARENT_SCOPE)
set(install_fastsimd_headers ${FastSIMD_headers} PARENT_SCOPE)
target_include_directories(FastNoise SYSTEM PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
$<INSTALL_INTERFACE:include>
)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
target_compile_options(FastNoise PRIVATE /GL- /GS- /fp:fast)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set_source_files_properties(FastSIMD/FastSIMD_Level_Scalar.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE2.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE3.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSSE3.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE41.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE42.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
endif()
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2")
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
if(MSVC)
target_compile_options(FastNoise PRIVATE /GS- /fp:fast)
else()
target_compile_options(FastNoise PRIVATE "-ffast-math")
endif()
if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR "${CMAKE_CXX_FLAGS}" MATCHES "-m32")
set_source_files_properties(FastSIMD/FastSIMD_Level_Scalar.cpp PROPERTIES COMPILE_FLAGS "-msse")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE2.cpp PROPERTIES COMPILE_FLAGS "-msse2")
endif()
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE3.cpp PROPERTIES COMPILE_FLAGS "-msse3")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSSE3.cpp PROPERTIES COMPILE_FLAGS "-mssse3")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE41.cpp PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE42.cpp PROPERTIES COMPILE_FLAGS "-msse4.2")
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX2.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX512.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512dq -mfma")
endif()

127
deps/FastNoise2/src/FastNoise/Base64.h vendored Normal file
View File

@ -0,0 +1,127 @@
#pragma once
#include <cstring>
#include <string>
#include <vector>
#include <cstdint>
namespace FastNoise
{
/** https://gist.github.com/tomykaira/f0fd86b6c73063283afe550bc5d77594
* The MIT License (MIT)
* Copyright (c) 2016 tomykaira
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
namespace Base64
{
static std::string Encode( const std::vector<uint8_t>& data )
{
static constexpr char sEncodingTable[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/'
};
size_t in_len = data.size();
size_t out_len = 4 * ((in_len + 2) / 3);
std::string ret( out_len, '\0' );
size_t i;
char* p = const_cast<char*>(ret.c_str());
for( i = 0; i < in_len - 2; i += 3 )
{
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int)(data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
if( i < in_len )
{
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
if( i == (in_len - 1) )
{
*p++ = sEncodingTable[((data[i] & 0x3) << 4)];
*p++ = '=';
}
else
{
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)];
}
*p++ = '=';
}
return ret;
}
static std::vector<uint8_t> Decode( const char* input )
{
static constexpr unsigned char kDecodingTable[] = {
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64,
64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};
size_t in_len = std::strlen( input );
if( in_len % 4 != 0 ) return {};
size_t out_len = in_len / 4 * 3;
if( input[in_len - 1] == '=' ) out_len--;
if( input[in_len - 2] == '=' ) out_len--;
std::vector<uint8_t> out( out_len );
for( size_t i = 0, j = 0; i < in_len; )
{
uint32_t a = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t b = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t c = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t d = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t triple = (a << 3 * 6) + (b << 2 * 6) + (c << 1 * 6) + (d << 0 * 6);
if( j < out_len ) out[j++] = (triple >> 2 * 8) & 0xFF;
if( j < out_len ) out[j++] = (triple >> 1 * 8) & 0xFF;
if( j < out_len ) out[j++] = (triple >> 0 * 8) & 0xFF;
}
return out;
}
};
}

View File

@ -0,0 +1,390 @@
#include "FastNoise/FastNoiseMetadata.h"
#include "Base64.h"
#include <unordered_set>
#include <unordered_map>
#include <cassert>
#include <cstdint>
using namespace FastNoise;
std::vector<const Metadata*> Metadata::sMetadataClasses;
NodeData::NodeData( const Metadata* data )
{
metadata = data;
if( metadata )
{
for( const auto& value : metadata->memberVariables )
{
variables.push_back( value.valueDefault );
}
for( const auto& value : metadata->memberNodes )
{
(void)value;
nodes.push_back( nullptr );
}
for( const auto& value : metadata->memberHybrids )
{
hybrids.emplace_back( nullptr, value.valueDefault );
}
}
}
template<typename T>
void AddToDataStream( std::vector<uint8_t>& dataStream, T value )
{
for( size_t i = 0; i < sizeof( T ); i++ )
{
dataStream.push_back( (uint8_t)(value >> (i * 8)) );
}
}
bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint8_t>& dataStream, std::unordered_map<const NodeData*, uint16_t>& referenceIds, std::unordered_set<const NodeData*> dependancies = {} )
{
const Metadata* metadata = nodeData->metadata;
if( !metadata ||
nodeData->variables.size() != metadata->memberVariables.size() ||
nodeData->nodes.size() != metadata->memberNodes.size() ||
nodeData->hybrids.size() != metadata->memberHybrids.size() )
{
assert( 0 ); // Member size mismatch with metadata
return false;
}
if( fixUp )
{
dependancies.insert( nodeData );
for( auto& node : nodeData->nodes )
{
if( dependancies.find( node ) != dependancies.end() )
{
node = nullptr;
}
}
for( auto& hybrid : nodeData->hybrids )
{
if( dependancies.find( hybrid.first ) != dependancies.end() )
{
hybrid.first = nullptr;
}
}
}
auto reference = referenceIds.find( nodeData );
if( reference != referenceIds.end() )
{
AddToDataStream( dataStream, UINT16_MAX );
AddToDataStream( dataStream, reference->second );
return true;
}
AddToDataStream( dataStream, metadata->id );
for( size_t i = 0; i < metadata->memberVariables.size(); i++ )
{
AddToDataStream( dataStream, nodeData->variables[i].i );
}
for( size_t i = 0; i < metadata->memberNodes.size(); i++ )
{
if( fixUp && nodeData->nodes[i] )
{
std::unique_ptr<Generator> gen( metadata->NodeFactory() );
SmartNode<> node( nodeData->nodes[i]->metadata->NodeFactory() );
if( !metadata->memberNodes[i].setFunc( gen.get(), node ) )
{
nodeData->nodes[i] = nullptr;
return false;
}
}
if( !nodeData->nodes[i] || !SerialiseNodeDataInternal( nodeData->nodes[i], fixUp, dataStream, referenceIds, dependancies ) )
{
return false;
}
}
for( size_t i = 0; i < metadata->memberHybrids.size(); i++ )
{
if( !nodeData->hybrids[i].first )
{
AddToDataStream( dataStream, (uint8_t)0 );
Metadata::MemberVariable::ValueUnion v = nodeData->hybrids[i].second;
AddToDataStream( dataStream, v.i );
}
else
{
if( fixUp )
{
std::unique_ptr<Generator> gen( metadata->NodeFactory() );
std::shared_ptr<Generator> node( nodeData->hybrids[i].first->metadata->NodeFactory() );
if( !metadata->memberHybrids[i].setNodeFunc( gen.get(), node ) )
{
nodeData->hybrids[i].first = nullptr;
return false;
}
}
AddToDataStream( dataStream, (uint8_t)1 );
if( !SerialiseNodeDataInternal( nodeData->hybrids[i].first, fixUp, dataStream, referenceIds, dependancies ) )
{
return false;
}
}
}
referenceIds.emplace( nodeData, (uint16_t)referenceIds.size() );
return true;
}
std::string Metadata::SerialiseNodeData( NodeData* nodeData, bool fixUp )
{
std::vector<uint8_t> serialData;
std::unordered_map<const NodeData*, uint16_t> referenceIds;
if( !SerialiseNodeDataInternal( nodeData, fixUp, serialData, referenceIds ) )
{
return "";
}
return Base64::Encode( serialData );
}
template<typename T>
bool GetFromDataStream( const std::vector<uint8_t>& dataStream, size_t& idx, T& value )
{
if( dataStream.size() < idx + sizeof( T ) )
{
return false;
}
value = *reinterpret_cast<const T*>( dataStream.data() + idx );
idx += sizeof( T );
return true;
}
SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialisedNodeData, size_t& serialIdx, std::unordered_map<uint16_t, SmartNode<>>& referenceNodes, FastSIMD::eLevel level = FastSIMD::Level_Null )
{
uint16_t nodeId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
{
return nullptr;
}
if( nodeId == UINT16_MAX )
{
uint16_t referenceId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, referenceId ) )
{
return nullptr;
}
auto refNode = referenceNodes.find( referenceId );
if( refNode == referenceNodes.end() )
{
return nullptr;
}
return refNode->second;
}
const Metadata* metadata = Metadata::GetMetadataClass( nodeId );
if( !metadata )
{
return nullptr;
}
SmartNode<> generator( metadata->NodeFactory( level ) );
for( const auto& var : metadata->memberVariables )
{
Metadata::MemberVariable::ValueUnion v;
if( !GetFromDataStream( serialisedNodeData, serialIdx, v ) )
{
return nullptr;
}
var.setFunc( generator.get(), v );
}
for( const auto& node : metadata->memberNodes )
{
SmartNode<> nodeGen = DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level );
if( !nodeGen || !node.setFunc( generator.get(), nodeGen ) )
{
return nullptr;
}
}
for( const auto& hybrid : metadata->memberHybrids )
{
uint8_t isGenerator;
if( !GetFromDataStream( serialisedNodeData, serialIdx, isGenerator ) || isGenerator > 1 )
{
return nullptr;
}
if( isGenerator )
{
SmartNode<> nodeGen = DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level );
if( !nodeGen || !hybrid.setNodeFunc( generator.get(), nodeGen ) )
{
return nullptr;
}
}
else
{
float v;
if( !GetFromDataStream( serialisedNodeData, serialIdx, v ) )
{
return nullptr;
}
hybrid.setValueFunc( generator.get(), v );
}
}
referenceNodes.emplace( (uint16_t)referenceNodes.size(), generator );
return generator;
}
SmartNode<> Metadata::DeserialiseSmartNode( const char* serialisedBase64NodeData, FastSIMD::eLevel level )
{
std::vector<uint8_t> dataStream = Base64::Decode( serialisedBase64NodeData );
size_t startIdx = 0;
std::unordered_map<uint16_t, SmartNode<>> referenceNodes;
return DeserialiseSmartNodeInternal( dataStream, startIdx, referenceNodes, level );
}
NodeData* DeserialiseNodeDataInternal( const std::vector<uint8_t>& serialisedNodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut, size_t& serialIdx, std::unordered_map<uint16_t, NodeData*>& referenceNodes )
{
uint16_t nodeId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
{
return nullptr;
}
if( nodeId == UINT16_MAX )
{
uint16_t referenceId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, referenceId ) )
{
return nullptr;
}
auto refNode = referenceNodes.find( referenceId );
if( refNode == referenceNodes.end() )
{
return nullptr;
}
return refNode->second;
}
const Metadata* metadata = Metadata::GetMetadataClass( nodeId );
if( !metadata )
{
return nullptr;
}
std::unique_ptr<NodeData> nodeData( new NodeData( metadata ) );
for( auto& var : nodeData->variables )
{
if( !GetFromDataStream( serialisedNodeData, serialIdx, var ) )
{
return nullptr;
}
}
for( auto& node : nodeData->nodes )
{
node = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx, referenceNodes );
if( !node )
{
return nullptr;
}
}
for( auto& hybrid : nodeData->hybrids )
{
uint8_t isGenerator;
if( !GetFromDataStream( serialisedNodeData, serialIdx, isGenerator ) || isGenerator > 1 )
{
return nullptr;
}
if( isGenerator )
{
hybrid.first = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx, referenceNodes );
if( !hybrid.first )
{
return nullptr;
}
}
else
{
if( !GetFromDataStream( serialisedNodeData, serialIdx, hybrid.second ) )
{
return nullptr;
}
}
}
referenceNodes.emplace( (uint16_t)referenceNodes.size(), nodeData.get() );
return nodeDataOut.emplace_back( std::move( nodeData ) ).get();
}
NodeData* Metadata::DeserialiseNodeData( const char* serialisedBase64NodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut )
{
std::vector<uint8_t> dataStream = Base64::Decode( serialisedBase64NodeData );
size_t startIdx = 0;
std::unordered_map<uint16_t, NodeData*> referenceNodes;
return DeserialiseNodeDataInternal( dataStream, nodeDataOut, startIdx, referenceNodes );
}
#define FASTSIMD_BUILD_CLASS2( CLASS ) \
const CLASS::Metadata g ## CLASS ## Metadata( #CLASS );\
const FastNoise::Metadata* CLASS::GetMetadata() const\
{\
return &g ## CLASS ## Metadata;\
}\
Generator* CLASS::Metadata::NodeFactory( FastSIMD::eLevel l ) const\
{\
return FastSIMD::New<CLASS>( l );\
}
#define FASTSIMD_BUILD_CLASS( CLASS ) FASTSIMD_BUILD_CLASS2( CLASS )
#define FASTNOISE_CLASS( CLASS ) CLASS
#define FASTSIMD_INCLUDE_HEADER_ONLY
#include "FastNoise/FastNoise_BuildList.inl"

View File

@ -0,0 +1,17 @@
#include "FS_Class.inl"
#ifdef FASTSIMD_INCLUDE_CHECK
#include __FILE__
#endif
#include "FS_Class.inl"
#pragma once
FASTSIMD_CLASS_DECLARATION( Example )
{
FASTSIMD_CLASS_SETUP( FastSIMD::Level_AVX2 | FastSIMD::Level_SSE41 | FastSIMD::Level_SSE2 | FastSIMD::Level_Scalar );
public:
FS_EXTERNAL_FUNC( void DoStuff( int* data ) );
FS_EXTERNAL_FUNC( void DoArray( int* data0, int* data1, int size ) );
};

View File

@ -0,0 +1,125 @@
#define FASTSIMD_INTELLISENSE
#include "Example.h"
//template<typename T>// Generic function, used if no specialised function found
//FS_CLASS( Example ) < T, FS_SIMD_CLASS::SIMD_Level >::FS_CLASS( Example )()
//{
// int test = 1;
//
// test += test;
//}
template<typename F, FastSIMD::ELevel S> // Generic function, used if no specialised function found
void FS_CLASS( Example )<F, S>::DoStuff( int* data )
{
int32v a = int32v( 1 );
FS_Store_i32( data, a );
}
//template<typename CLASS_T, typename SIMD_T> // Different function for level SSE2 or AVX2
//void FS_CLASS( Example )::DoStuff( int* data )
//{
// int32v a = _mm_loadu_si128( reinterpret_cast<__m128i const*>(data) );
//
// a += _mm_set_epi32( 2, 3, 4, 5 );
//
// a -= _mm_castps_si128( FS_VecZero_f32( ) );
//
// FS_Store_i32( data, a );
//}
//
//
//template<typename CLASS_T, FastSIMD::Level LEVEL_T>
//void FS_CLASS( Example )::DoArray( int* data0, int* data1, int size )
//{
// for ( int i = 0; i < size; i += FS_VectorSize_i32() )
// {
// int32v a = FS_Load_i32( &data0[i] );
// int32v b = FS_Load_i32( &data1[i] );
//
// a *= b;
//
// a <<= 1;
//
// a -= FS_VecZero_i32();
//
// (~a);
//
// FS_Store_i32( &data0[i], a );
// }
//}
template<typename F, FastSIMD::ELevel S>
void FS_CLASS( Example )<F, S>::DoArray( int* data0, int* data1, int size )
{
for ( size_t i = 0; i < size; i += int32v::FS_Size() )
{
int32v a = FS_Load_i32( &data0[i] );
int32v b = FS_Load_i32( &data1[i] );
a += b;
a <<= 1;
a *= b;
a -= int32v::FS_Zero();
(~a);
FS_Store_i32( &data0[i], a );
}
}
template<typename T_FS>
class FS_CLASS( Example )<T_FS, FastSIMD::Level_AVX2> : public FS_CLASS( Example )<T_FS, FastSIMD::Level_Null>
{
//typedef FastSIMD_AVX2 T_FS;
FASTSIMD_CLASS_SETUP( FastSIMD::COMPILED_SIMD_LEVELS );
public:
void DoArray( int* data0, int* data1, int size )
{
for ( size_t i = 0; i < size; i += int32v::FS_Size() )
{
int32v a = FS_Load_i32( &data0[i] );
int32v b = FS_Load_i32( &data1[i] );
//a += gfhfdghdfgh();
a += b;
a <<= 2;
a *= b;
a -= int32v::FS_Zero();
(~a);
FS_Store_i32( &data0[i], a );
}
}
};
//
//template<typename T>
//typename std::enable_if<(T::SIMD_Level <= 1)>::type FS_CLASS( Example )<T, FS_SIMD_CLASS::SIMD_Level>::DoArray( int* data0, int* data1, int size )
//{
// for ( int i = 0; i < size; i += FS_VectorSize_i32() )
// {
// int32v a = FS_Load_i32( &data0[i] );
// int32v b = FS_Load_i32( &data1[i] );
//
// a += b;
//
// a <<= 1;
//
// a -= FS_VecZero_i32();
//
// (~a);
//
// FS_Store_i32( &data0[i], a );
// }
//}

View File

@ -0,0 +1,239 @@
#include "FastSIMD/FastSIMD.h"
#include <algorithm>
#include <cstdint>
#ifdef __GNUG__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#include "FastSIMD/TypeList.h"
static FastSIMD::eLevel simdLevel = FastSIMD::Level_Null;
static_assert(FastSIMD::SIMDTypeList::MinimumCompiled & FastSIMD::COMPILED_SIMD_LEVELS, "FASTSIMD_FALLBACK_SIMD_LEVEL is not a compiled SIMD level, check FastSIMD_Config.h");
#if FASTSIMD_x86
// Define interface to cpuid instruction.
// input: eax = functionnumber, ecx = 0
// output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
static void cpuid( int output[4], int functionnumber )
{
#if defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
int a, b, c, d;
__asm("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(functionnumber), "c"(0) : );
output[0] = a;
output[1] = b;
output[2] = c;
output[3] = d;
#elif defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) // Microsoft or Intel compiler, intrin.h included
__cpuidex( output, functionnumber, 0 ); // intrinsic function for CPUID
#else // unknown platform. try inline assembly with masm/intel syntax
__asm
{
mov eax, functionnumber
xor ecx, ecx
cpuid;
mov esi, output
mov[esi], eax
mov[esi + 4], ebx
mov[esi + 8], ecx
mov[esi + 12], edx
}
#endif
}
// Define interface to xgetbv instruction
static int64_t xgetbv( int ctr )
{
#if (defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000) || (defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
return _xgetbv( ctr ); // intrinsic function for XGETBV
#elif defined( __GNUC__ ) // use inline assembly, Gnu/AT&T syntax
uint32_t a, d;
__asm("xgetbv" : "=a"(a), "=d"(d) : "c"(ctr) : );
return a | (uint64_t( d ) << 32);
#else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax
uint32_t a, d;
__asm {
mov ecx, ctr
_emit 0x0f
_emit 0x01
_emit 0xd0; // xgetbv
mov a, eax
mov d, edx
}
return a | (uint64_t( d ) << 32);
#endif
}
#endif
FastSIMD::eLevel FastSIMD::CPUMaxSIMDLevel()
{
if ( simdLevel > Level_Null )
{
return simdLevel;
}
#if FASTSIMD_x86
int abcd[4] = { 0,0,0,0 }; // cpuid results
#if !FASTSIMD_64BIT
simdLevel = Level_Scalar; // default value
cpuid( abcd, 0 ); // call cpuid function 0
if ( abcd[0] == 0 )
return simdLevel; // no further cpuid function supported
cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
if ( (abcd[3] & (1 << 0)) == 0 )
return simdLevel; // no floating point
if ( (abcd[3] & (1 << 23)) == 0 )
return simdLevel; // no MMX
if ( (abcd[3] & (1 << 15)) == 0 )
return simdLevel; // no conditional move
if ( (abcd[3] & (1 << 24)) == 0 )
return simdLevel; // no FXSAVE
if ( (abcd[3] & (1 << 25)) == 0 )
return simdLevel; // no SSE
simdLevel = Level_SSE;
// 1: SSE supported
if ( (abcd[3] & (1 << 26)) == 0 )
return simdLevel; // no SSE2
#else
cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
#endif
simdLevel = Level_SSE2; // default value for 64bit
// 2: SSE2 supported
if ( (abcd[2] & (1 << 0)) == 0 )
return simdLevel; // no SSE3
simdLevel = Level_SSE3;
// 3: SSE3 supported
if ( (abcd[2] & (1 << 9)) == 0 )
return simdLevel; // no SSSE3
simdLevel = Level_SSSE3;
// 4: SSSE3 supported
if ( (abcd[2] & (1 << 19)) == 0 )
return simdLevel; // no SSE4.1
simdLevel = Level_SSE41;
// 5: SSE4.1 supported
if ( (abcd[2] & (1 << 23)) == 0 )
return simdLevel; // no POPCNT
if ( (abcd[2] & (1 << 20)) == 0 )
return simdLevel; // no SSE4.2
simdLevel = Level_SSE42;
// 6: SSE4.2 supported
if ( (abcd[2] & (1 << 26)) == 0 )
return simdLevel; // no XSAVE
if ( (abcd[2] & (1 << 27)) == 0 )
return simdLevel; // no OSXSAVE
if ( (abcd[2] & (1 << 28)) == 0 )
return simdLevel; // no AVX
uint64_t osbv = xgetbv( 0 );
if ( (osbv & 6) != 6 )
return simdLevel; // AVX not enabled in O.S.
simdLevel = Level_AVX;
// 7: AVX supported
cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
if ( (abcd[1] & (1 << 5)) == 0 )
return simdLevel; // no AVX2
simdLevel = Level_AVX2;
// 8: AVX2 supported
if( (osbv & (0xE0)) != 0xE0 )
return simdLevel; // AVX512 not enabled in O.S.
if ( (abcd[1] & (1 << 16)) == 0 )
return simdLevel; // no AVX512
cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
if ( (abcd[0] & 0x60) != 0x60 )
return simdLevel; // no AVX512
// 9: AVX512 supported
cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
if ( (abcd[1] & (1 << 31)) == 0 )
return simdLevel; // no AVX512VL
// 10: AVX512VL supported
if ( (abcd[1] & 0x40020000) != 0x40020000 )
return simdLevel; // no AVX512BW, AVX512DQ
simdLevel = Level_AVX512;
// 11: AVX512BW & AVX512DQ supported
#endif
#if FASTSIMD_ARM
simdLevel = Level_NEON;
#endif
return simdLevel;
}
template<typename CLASS_T, FastSIMD::eLevel SIMD_LEVEL>
CLASS_T* SIMDLevelSelector( FastSIMD::eLevel maxSIMDLevel )
{
if constexpr( ( CLASS_T::Supported_SIMD_Levels & SIMD_LEVEL ) != 0 )
{
CLASS_T* newClass = SIMDLevelSelector<CLASS_T, FastSIMD::SIMDTypeList::GetNextCompiledAfter<SIMD_LEVEL>>( maxSIMDLevel );
if( !newClass && SIMD_LEVEL <= maxSIMDLevel )
{
return FastSIMD::ClassFactory<CLASS_T, SIMD_LEVEL>();
}
return newClass;
}
else
{
if constexpr( SIMD_LEVEL == FastSIMD::Level_Null )
{
return nullptr;
}
return SIMDLevelSelector<CLASS_T, FastSIMD::SIMDTypeList::GetNextCompiledAfter<SIMD_LEVEL>>( maxSIMDLevel );
}
}
template<typename CLASS_T>
CLASS_T* FastSIMD::New( eLevel maxSIMDLevel )
{
if( maxSIMDLevel == Level_Null )
{
maxSIMDLevel = CPUMaxSIMDLevel();
}
else
{
maxSIMDLevel = std::min( maxSIMDLevel, CPUMaxSIMDLevel() );
}
static_assert(( CLASS_T::Supported_SIMD_Levels & FastSIMD::SIMDTypeList::MinimumCompiled ), "MinimumCompiled SIMD Level must be supported by this class" );
return SIMDLevelSelector<CLASS_T, SIMDTypeList::MinimumCompiled>( maxSIMDLevel );
}
#define FASTSIMD_BUILD_CLASS( CLASS ) \
template CLASS* FastSIMD::New( FastSIMD::eLevel );
#define FASTSIMD_INCLUDE_HEADER_ONLY
#include "FastSIMD_BuildList.inl"

View File

@ -0,0 +1,10 @@
#pragma once
#ifndef FASTSIMD_BUILD_CLASS
#error Do not include this file
#endif
//#include "Example/Example.inl"
//FASTSIMD_BUILD_CLASS( Example )
#include "FastNoise/FastNoise_BuildList.inl"

View File

@ -0,0 +1,17 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_AVX2
// To compile AVX2 support enable AVX(2) code generation compiler flags for this file
#ifndef __AVX__
#ifdef _MSC_VER
#error To compile AVX set C++ code generation to use /arch:AVX on FastSIMD_Level_AVX2.cpp, or change "#define FASTSIMD_COMPILE_AVX2" in FastSIMD_Config.h
#else
#error To compile AVX add build command "-march=core-avx" on FastSIMD_Level_AVX2.cpp, or change "#define FASTSIMD_COMPILE_AVX2" in FastSIMD_Config.h
#endif
#endif
#include "Internal/AVX.h"
#define FS_SIMD_CLASS FastSIMD::AVX2
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,17 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_AVX512
// To compile AVX512 support enable AVX512 code generation compiler flags for this file
#ifndef __AVX512DQ__
#ifdef _MSC_VER
#error To compile AVX512 set C++ code generation to use /arch:AVX512 on FastSIMD_Level_AVX512.cpp, or change "#define FASTSIMD_COMPILE_AVX512" in FastSIMD_Config.h
#else
#error To compile AVX512 add build command "-mavx512f -mavx512dq" on FastSIMD_Level_AVX512.cpp, or change "#define FASTSIMD_COMPILE_AVX512" in FastSIMD_Config.h
#endif
#endif
#include "Internal/AVX512.h"
#define FS_SIMD_CLASS FastSIMD::AVX512
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_NEON
#include "Internal/NEON.h"
#define FS_SIMD_CLASS FastSIMD::NEON
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE2
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE2
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE3
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE3
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE41
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE41
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE42
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE42
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSSE3
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSSE3
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SCALAR
#include "Internal/Scalar.h"
#define FS_SIMD_CLASS FastSIMD::Scalar
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,448 @@
#pragma once
#ifdef __GNUG__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#include "VecTools.h"
namespace FastSIMD
{
struct AVX_f32x8
{
FASTSIMD_INTERNAL_TYPE_SET( AVX_f32x8, __m256 );
FS_INLINE static AVX_f32x8 Incremented()
{
return _mm256_set_ps( 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f );
}
FS_INLINE explicit AVX_f32x8( float f )
{
*this = _mm256_set1_ps( f );
}
FS_INLINE explicit AVX_f32x8( float f0, float f1, float f2, float f3, float f4, float f5, float f6, float f7 )
{
*this = _mm256_set_ps( f7, f6, f5, f4, f3, f2, f1, f0 );
}
FS_INLINE AVX_f32x8& operator+=( const AVX_f32x8& rhs )
{
*this = _mm256_add_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator-=( const AVX_f32x8& rhs )
{
*this = _mm256_sub_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator*=( const AVX_f32x8& rhs )
{
*this = _mm256_mul_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator/=( const AVX_f32x8& rhs )
{
*this = _mm256_div_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator&=( const AVX_f32x8& rhs )
{
*this = _mm256_and_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator|=( const AVX_f32x8& rhs )
{
*this = _mm256_or_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator^=( const AVX_f32x8& rhs )
{
*this = _mm256_xor_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i neg1 = _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() );
#else
const __m256i neg1 = _mm256_set1_epi32( -1 );
#endif
return _mm256_xor_ps( *this, _mm256_castsi256_ps( neg1 ) );
}
FS_INLINE AVX_f32x8 operator-() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i minInt = _mm256_slli_epi32( _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() ), 31 );
#else
const __m256i minInt = _mm256_set1_epi32( 0x80000000 );
#endif
return _mm256_xor_ps( *this, _mm256_castsi256_ps( minInt ) );
}
FS_INLINE __m256i operator==( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_EQ_OS ) );
}
FS_INLINE __m256i operator!=( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_NEQ_OS ) );
}
FS_INLINE __m256i operator>( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_GT_OS ) );
}
FS_INLINE __m256i operator<( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_LT_OS ) );
}
FS_INLINE __m256i operator>=( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_GE_OS ) );
}
FS_INLINE __m256i operator<=( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_LE_OS ) );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( AVX_f32x8 )
struct AVX2_i32x8
{
FASTSIMD_INTERNAL_TYPE_SET( AVX2_i32x8, __m256i );
FS_INLINE static AVX2_i32x8 Incremented()
{
return _mm256_set_epi32( 7, 6, 5, 4, 3, 2, 1, 0 );
}
FS_INLINE explicit AVX2_i32x8( int32_t f )
{
*this = _mm256_set1_epi32( f );
}
FS_INLINE explicit AVX2_i32x8( int32_t i0, int32_t i1, int32_t i2, int32_t i3, int32_t i4, int32_t i5, int32_t i6, int32_t i7 )
{
*this = _mm256_set_epi32( i7, i6, i5, i4, i3, i2, i1, i0 );
}
FS_INLINE AVX2_i32x8& operator+=( const AVX2_i32x8& rhs )
{
*this = _mm256_add_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator-=( const AVX2_i32x8& rhs )
{
*this = _mm256_sub_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator*=( const AVX2_i32x8& rhs )
{
*this = _mm256_mullo_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator&=( const AVX2_i32x8& rhs )
{
*this = _mm256_and_si256( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator|=( const AVX2_i32x8& rhs )
{
*this = _mm256_or_si256( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator^=( const AVX2_i32x8& rhs )
{
*this = _mm256_xor_si256( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator>>=( int32_t rhs )
{
*this = _mm256_srai_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator<<=( int32_t rhs )
{
*this = _mm256_slli_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i neg1 = _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() );
#else
const __m256i neg1 = _mm256_set1_epi32( -1 );
#endif
return _mm256_xor_si256( *this, neg1 );
}
FS_INLINE AVX2_i32x8 operator-() const
{
return _mm256_sub_epi32( _mm256_setzero_si256(), *this );
}
FS_INLINE AVX2_i32x8 operator==( const AVX2_i32x8& rhs )
{
return _mm256_cmpeq_epi32( *this, rhs );
}
FS_INLINE AVX2_i32x8 operator>( const AVX2_i32x8& rhs )
{
return _mm256_cmpgt_epi32( *this, rhs );
}
FS_INLINE AVX2_i32x8 operator<( const AVX2_i32x8& rhs )
{
return _mm256_cmpgt_epi32( rhs, *this );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( AVX2_i32x8, int32_t )
template<eLevel LEVEL_T>
class AVX_T
{
public:
static_assert( LEVEL_T >= Level_AVX && LEVEL_T <= Level_AVX2, "Cannot create template with unsupported SIMD level" );
static constexpr eLevel SIMD_Level = LEVEL_T;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 256 / ElementSize;
typedef AVX_f32x8 float32v;
typedef AVX2_i32x8 int32v;
typedef AVX2_i32x8 mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return _mm256_loadu_ps( reinterpret_cast<float const*>(p) );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return _mm256_loadu_si256( reinterpret_cast<__m256i const*>(p) );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
_mm256_storeu_ps( reinterpret_cast<float*>(p), a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
_mm256_storeu_si256( reinterpret_cast<__m256i*>(p), a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return _mm256_castsi256_ps( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return _mm256_castps_si256( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return _mm256_cvtepi32_ps( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return _mm256_cvtps_epi32( a );
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return _mm256_blendv_ps( b, a, _mm256_castsi256_ps( m ) );
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm256_castps_si256( _mm256_blendv_ps( _mm256_castsi256_ps( b ), _mm256_castsi256_ps( a ), _mm256_castsi256_ps( m ) ) );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return _mm256_min_ps( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return _mm256_max_ps( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return _mm256_min_epi32( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return _mm256_max_epi32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return _mm256_andnot_ps( b, a );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return _mm256_andnot_si256( b, a );
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( _mm256_srli_epi32( Castf32_i32( a ), b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return _mm256_srli_epi32( a, b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i intMax = _mm256_srli_epi32( _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() ), 1 );
#else
const __m256i intMax = _mm256_set1_epi32( 0x7FFFFFFF );
#endif
return _mm256_and_ps( a, _mm256_castsi256_ps( intMax ) );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return _mm256_abs_epi32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return _mm256_sqrt_ps( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return _mm256_rsqrt_ps( a );
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
return _mm256_rcp_ps( a );
}
// Floor, Ceil, Round
FS_INLINE static float32v Floor_f32( float32v a )
{
return _mm256_round_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
return _mm256_round_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Round_f32( float32v a )
{
return _mm256_round_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
//Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return a & m;
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return _mm256_and_ps( a, _mm256_castsi256_ps( m ) );
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return _mm256_andnot_si256( m, a );
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return _mm256_andnot_ps( _mm256_castsi256_ps( m ), a );
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return _mm256_movemask_ps( _mm256_castsi256_ps( m ) );
}
};
#if FASTSIMD_COMPILE_AVX
typedef AVX_T<Level_AVX> AVX;
#endif
#if FASTSIMD_COMPILE_AVX2
typedef AVX_T<Level_AVX2> AVX2;
#if FASTSIMD_USE_FMA
template<>
FS_INLINE AVX2::float32v FMulAdd_f32<AVX2>( AVX2::float32v a, AVX2::float32v b, AVX2::float32v c )
{
return _mm256_fmadd_ps( a, b, c );
}
template<>
FS_INLINE AVX2::float32v FNMulAdd_f32<AVX2>( AVX2::float32v a, AVX2::float32v b, AVX2::float32v c )
{
return _mm256_fnmadd_ps( a, b, c );
}
#endif
#endif
}

View File

@ -0,0 +1,516 @@
#pragma once
#include <immintrin.h>
#include "VecTools.h"
namespace FastSIMD
{
struct AVX512_f32x16
{
FASTSIMD_INTERNAL_TYPE_SET( AVX512_f32x16, __m512 );
FS_INLINE static AVX512_f32x16 Incremented()
{
return _mm512_set_ps( 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f );
}
FS_INLINE explicit AVX512_f32x16( float f )
{
*this = _mm512_set1_ps( f );
}
FS_INLINE explicit AVX512_f32x16( float f0, float f1, float f2, float f3, float f4, float f5, float f6, float f7, float f8, float f9, float f10, float f11, float f12, float f13, float f14, float f15 )
{
*this = _mm512_set_ps( f15, f14, f13, f12, f11, f10, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0 );
}
FS_INLINE AVX512_f32x16& operator+=( const AVX512_f32x16& rhs )
{
*this = _mm512_add_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator-=( const AVX512_f32x16& rhs )
{
*this = _mm512_sub_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator*=( const AVX512_f32x16& rhs )
{
*this = _mm512_mul_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator/=( const AVX512_f32x16& rhs )
{
*this = _mm512_div_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator&=( const AVX512_f32x16& rhs )
{
*this = _mm512_and_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator|=( const AVX512_f32x16& rhs )
{
*this = _mm512_or_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator^=( const AVX512_f32x16& rhs )
{
*this = _mm512_xor_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m512i neg1 = _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() );
#else
const __m512i neg1 = _mm512_set1_epi32( -1 );
#endif
return _mm512_xor_ps( *this, _mm512_castsi512_ps( neg1 ) );
}
FS_INLINE AVX512_f32x16 operator-() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m512i minInt = _mm512_slli_epi32( _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() ), 31 );
#else
const __m512i minInt = _mm512_set1_epi32( 0x80000000 );
#endif
return _mm512_xor_ps( *this, _mm512_castsi512_ps( minInt ) );
}
FS_INLINE __mmask16 operator==( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_EQ_OS );
}
FS_INLINE __mmask16 operator!=( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_NEQ_OS );
}
FS_INLINE __mmask16 operator>( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_GT_OS );
}
FS_INLINE __mmask16 operator<( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_LT_OS );
}
FS_INLINE __mmask16 operator>=( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_GE_OS );
}
FS_INLINE __mmask16 operator<=( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_LE_OS );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( AVX512_f32x16 )
struct AVX512_i32x16
{
FASTSIMD_INTERNAL_TYPE_SET( AVX512_i32x16, __m512i );
FS_INLINE static AVX512_i32x16 Incremented()
{
return _mm512_set_epi32( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 );
}
FS_INLINE explicit AVX512_i32x16( int32_t i )
{
*this = _mm512_set1_epi32( i );
}
FS_INLINE explicit AVX512_i32x16( int32_t i0, int32_t i1, int32_t i2, int32_t i3, int32_t i4, int32_t i5, int32_t i6, int32_t i7, int32_t i8, int32_t i9, int32_t i10, int32_t i11, int32_t i12, int32_t i13, int32_t i14, int32_t i15 )
{
*this = _mm512_set_epi32( i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0 );
}
FS_INLINE AVX512_i32x16& operator+=( const AVX512_i32x16& rhs )
{
*this = _mm512_add_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator-=( const AVX512_i32x16& rhs )
{
*this = _mm512_sub_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator*=( const AVX512_i32x16& rhs )
{
*this = _mm512_mullo_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator&=( const AVX512_i32x16& rhs )
{
*this = _mm512_and_si512( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator|=( const AVX512_i32x16& rhs )
{
*this = _mm512_or_si512( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator^=( const AVX512_i32x16& rhs )
{
*this = _mm512_xor_si512( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator>>=( int32_t rhs )
{
*this = _mm512_srai_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator<<=( int32_t rhs )
{
*this = _mm512_slli_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m512i neg1 = _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() );
#else
const __m512i neg1 = _mm512_set1_epi32( -1 );
#endif
return _mm512_xor_si512( *this, neg1 );
}
FS_INLINE AVX512_i32x16 operator-() const
{
return _mm512_sub_epi32( _mm512_setzero_si512(), *this );
}
FS_INLINE __mmask16 operator==( const AVX512_i32x16& rhs )
{
return _mm512_cmpeq_epi32_mask( *this, rhs );
}
FS_INLINE __mmask16 operator>( const AVX512_i32x16& rhs )
{
return _mm512_cmpgt_epi32_mask( *this, rhs );
}
FS_INLINE __mmask16 operator<( const AVX512_i32x16& rhs )
{
return _mm512_cmplt_epi32_mask( *this, rhs );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( AVX512_i32x16, int32_t )
template<eLevel LEVEL_T>
class AVX512_T
{
public:
static_assert( LEVEL_T == Level_AVX512, "Cannot create template with unsupported SIMD level" );
static constexpr eLevel SIMD_Level = LEVEL_T;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 512 / ElementSize;
typedef AVX512_f32x16 float32v;
typedef AVX512_i32x16 int32v;
typedef __mmask16 mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return _mm512_loadu_ps( p );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return _mm512_loadu_si512( p );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
_mm512_storeu_ps( p, a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
_mm512_storeu_si512( p, a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return _mm512_castsi512_ps( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return _mm512_castps_si512( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return _mm512_cvtepi32_ps( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return _mm512_cvtps_epi32( a );
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return _mm512_mask_blend_ps( m, b, a );
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm512_mask_blend_epi32( m, b, a );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return _mm512_min_ps( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return _mm512_max_ps( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return _mm512_min_epi32( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return _mm512_max_epi32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return _mm512_andnot_ps( b, a );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return _mm512_andnot_si512( b, a );
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( _mm512_srli_epi32( Castf32_i32( a ), b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return _mm512_srli_epi32( a, b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
return _mm512_abs_ps( a );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return _mm512_abs_epi32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return _mm512_sqrt_ps( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return _mm512_rsqrt14_ps( a );
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
return _mm512_rcp14_ps( a );
}
// Floor, Ceil, Round
FS_INLINE static float32v Floor_f32( float32v a )
{
return _mm512_roundscale_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
return _mm512_roundscale_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Round_f32( float32v a )
{
return _mm512_roundscale_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
//Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return _mm512_maskz_mov_epi32( m, a );
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return _mm512_maskz_mov_ps( m, a );
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return _mm512_maskz_mov_epi32( ~m, a );
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return _mm512_maskz_mov_ps( ~m, a );
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return m;
}
};
#if FASTSIMD_COMPILE_AVX512
typedef AVX512_T<Level_AVX512> AVX512;
#if FASTSIMD_USE_FMA
template<>
FS_INLINE AVX512::float32v FMulAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::float32v c )
{
return _mm512_fmadd_ps( a, b, c );
}
template<>
FS_INLINE AVX512::float32v FNMulAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::float32v c )
{
return _mm512_fnmadd_ps( a, b, c );
}
#endif
// Masked float
template<>
FS_INLINE AVX512::float32v MaskedAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_add_ps( a, m, a, b );
}
template<>
FS_INLINE AVX512::float32v MaskedSub_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_ps( a, m, a, b );
}
template<>
FS_INLINE AVX512::float32v MaskedMul_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_mul_ps( a, m, a, b );
}
// Masked int32
template<>
FS_INLINE AVX512::int32v MaskedAdd_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_add_epi32( a, m, a, b );
}
template<>
FS_INLINE AVX512::int32v MaskedSub_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_epi32( a, m, a, b );
}
template<>
FS_INLINE AVX512::int32v MaskedMul_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_mullo_epi32( a, m, a, b );
}
// NMasked float
template<>
FS_INLINE AVX512::float32v NMaskedAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_add_ps( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::float32v NMaskedSub_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_ps( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::float32v NMaskedMul_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_mul_ps( a, ~m, a, b );
}
// NMasked int32
template<>
FS_INLINE AVX512::int32v NMaskedAdd_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_add_epi32( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::int32v NMaskedSub_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_epi32( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::int32v NMaskedMul_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_mul_epi32( a, ~m, a, b );
}
#endif
}

View File

@ -0,0 +1,424 @@
#pragma once
#include <arm_neon.h>
#include "VecTools.h"
struct NEON_f32x4
{
FASTSIMD_INTERNAL_TYPE_SET( NEON_f32x4, float32x4_t );
constexpr FS_INLINE static uint8_t Size()
{
return 4;
}
FS_INLINE static NEON_f32x4 Zero()
{
return vdupq_n_f32( 0 );
}
FS_INLINE static NEON_f32x4 Incremented()
{
alignas(16) const float f[4]{ 0.0f, 1.0f, 2.0f, 3.0f };
return vld1q_f32( f );
}
FS_INLINE explicit NEON_f32x4( float f )
{
*this = vdupq_n_f32( f );
}
FS_INLINE explicit NEON_f32x4( float f0, float f1, float f2, float f3 )
{
alignas(16) const float f[4]{ f0, f1, f2, f3 };
*this = vld1q_f32( f );
}
FS_INLINE NEON_f32x4& operator+=( const NEON_f32x4& rhs )
{
*this = vaddq_f32( *this, rhs );
return *this;
}
FS_INLINE NEON_f32x4& operator-=( const NEON_f32x4& rhs )
{
*this = vsubq_f32( *this, rhs );
return *this;
}
FS_INLINE NEON_f32x4& operator*=( const NEON_f32x4& rhs )
{
*this = vmulq_f32( *this, rhs );
return *this;
}
FS_INLINE NEON_f32x4& operator/=( const NEON_f32x4& rhs )
{
float32x4_t reciprocal = vrecpeq_f32( rhs );
// use a couple Newton-Raphson steps to refine the estimate. Depending on your
// application's accuracy requirements, you may be able to get away with only
// one refinement (instead of the two used here). Be sure to test!
reciprocal = vmulq_f32( vrecpsq_f32( rhs, reciprocal ), reciprocal );
reciprocal = vmulq_f32( vrecpsq_f32( rhs, reciprocal ), reciprocal );
// and finally, compute a/b = a*(1/b)
*this = vmulq_f32( *this, reciprocal );
return *this;
}
FS_INLINE NEON_f32x4 operator-() const
{
return vnegq_f32( *this );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( NEON_f32x4 )
struct NEON_i32x4
{
FASTSIMD_INTERNAL_TYPE_SET( NEON_i32x4, int32x4_t );
constexpr FS_INLINE static uint8_t Size()
{
return 4;
}
FS_INLINE static NEON_i32x4 Zero()
{
return vdupq_n_s32( 0 );
}
FS_INLINE static NEON_i32x4 Incremented()
{
alignas(16) const int32_t f[4]{ 0, 1, 2, 3 };
return vld1q_s32( f );
}
FS_INLINE explicit NEON_i32x4( int32_t i )
{
*this = vdupq_n_s32( i );
}
FS_INLINE explicit NEON_i32x4( int32_t i0, int32_t i1, int32_t i2, int32_t i3 )
{
alignas(16) const int32_t f[4]{ i0, i1, i2, i3 };
*this = vld1q_s32( f );
}
FS_INLINE NEON_i32x4& operator+=( const NEON_i32x4& rhs )
{
*this = vaddq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator-=( const NEON_i32x4& rhs )
{
*this = vsubq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator*=( const NEON_i32x4& rhs )
{
*this = vmulq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator&=( const NEON_i32x4& rhs )
{
*this = vandq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator|=( const NEON_i32x4& rhs )
{
*this = vorrq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator^=( const NEON_i32x4& rhs )
{
*this = veorq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator>>=( const int32_t rhs )
{
*this = vshrq_n_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator<<=( const int32_t rhs )
{
*this = vshlq_n_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4 operator~() const
{
return vmvnq_s32( *this );
}
FS_INLINE NEON_i32x4 operator-() const
{
return vnegq_s32( *this );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( NEON_i32x4, int32_t )
template<FastSIMD::eLevel LEVEL_T>
class FastSIMD_NEON_T
{
public:
static const FastSIMD::eLevel SIMD_Level = LEVEL_T;
static const size_t VectorSize = 128 / 8;
typedef NEON_f32x4 float32v;
typedef NEON_i32x4 int32v;
typedef NEON_i32x4 mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return vld1q_f32( reinterpret_cast<float const*>(p) );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return vld1q_s32( reinterpret_cast<int32_t const*>(p) );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
vst1q_f32( reinterpret_cast<float*>(p), a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
vst1q_s32( reinterpret_cast<int32_t*>(p), a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return vreinterpretq_f32_s32( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return vreinterpretq_s32_f32( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return vcvtq_f32_s32( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return vcvtq_s32_f32( a );
}
// Comparisons
FS_INLINE static mask32v Equal_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vceq_f32( a, b ) );
}
FS_INLINE static mask32v GreaterThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcgtq_f32( a, b ) );
}
FS_INLINE static mask32v LessThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcltq_f32( a, b ) );
}
FS_INLINE static mask32v GreaterEqualThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcgeq_f32( a, b ) );
}
FS_INLINE static mask32v LessEqualThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcleq_f32( a, b ) );
}
FS_INLINE static mask32v Equal_i32( int32v a, int32v b )
{
return vceq_s32( a, b );
}
FS_INLINE static mask32v GreaterThan_i32( int32v a, int32v b )
{
return vcgtq_s32( a, b );
}
FS_INLINE static mask32v LessThan_i32( int32v a, int32v b )
{
return vcltq_s32( a, b );
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return vbslq_f32( vreinterpretq_u32_s32( mask ), b, a );
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return vbslq_s32( vreinterpretq_u32_s32( mask ), b, a );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return vminq_f32( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return vmaxq_f32( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return vminq_s32( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return vmaxq_s32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAnd_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( vandq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseOr_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( vorrq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseXor_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( veorq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseNot_f32( float32v a )
{
return vreinterpretq_f32_s32( vmvn_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( vandq_s32( vreinterpretq_s32_f32( a ), vmvn_s32( vreinterpretq_s32_f32( b ) ) ) );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return vandq_s32( a , vmvn_s32( b ) );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
return vabsq_f32( a );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return vabsq_s32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return vsqrtq_f32( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return vrsqrteq_f32( a );
}
// Floor, Ceil, Round: http://dss.stephanierct.com/DevBlog/?p=8
FS_INLINE static float32v Floor_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const float32x4_t f1 = vdupq_n_f32( 1.0f ); //_mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const float32x4_t f1 = vdupq_n_f32( 1.0f );
#endif
float32x4_t fval = vrndmq_f32( a );
return vsubq_f32( fval, BitwiseAnd_f32( vcltq_f32( a, fval ), f1 ) );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 f1 = vdupq_n_f32( 1.0f ); //_mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const __m128 f1 = vdupq_n_f32( 1.0f );
#endif
float32x4_t fval = vrndmq_f32( a );
return vaddq_f32( fval, BitwiseAnd_f32( vcltq_f32( a, fval ), f1 ) );
}
template<FastSIMD::eLevel L = LEVEL_T>
FS_INLINE static FS_ENABLE_IF( L < FastSIMD::ELevel_SSE41, float32v ) Round_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 nearest2 = _mm_castsi128_ps( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 2 ) );
#else
const __m128 nearest2 = vdupq_n_f32( 1.99999988079071044921875f );
#endif
__m128 aTrunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) ); // truncate a
__m128 rmd = _mm_sub_ps( a, aTrunc ); // get remainder
__m128 rmd2 = _mm_mul_ps( rmd, nearest2 ); // mul remainder by near 2 will yield the needed offset
__m128 rmd2Trunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( rmd2 ) ); // after being truncated of course
return _mm_add_ps( aTrunc, rmd2Trunc );
}
template<FastSIMD::eLevel L = LEVEL_T>
FS_INLINE static FS_ENABLE_IF( L >= FastSIMD::ELevel_SSE41, float32v ) Round_f32( float32v a )
{
return vrndnq_f32( a );
}
// Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return a & m;
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return BitwiseAnd_f32( a, vreinterpretq_f32_s32( m ) );
}
};
#if FASTSIMD_COMPILE_NEON
typedef FastSIMD_SSE_T<FastSIMD::ELevel_NEON> FastSIMD_NEON;
#endif

View File

@ -0,0 +1,541 @@
#pragma once
#ifdef __GNUG__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#include "VecTools.h"
namespace FastSIMD
{
struct SSE_f32x4
{
FASTSIMD_INTERNAL_TYPE_SET( SSE_f32x4, __m128 );
FS_INLINE static SSE_f32x4 Incremented()
{
return _mm_set_ps( 3.0f, 2.0f, 1.0f, 0.0f );
}
FS_INLINE explicit SSE_f32x4( float f )
{
*this = _mm_set1_ps( f );
}
FS_INLINE explicit SSE_f32x4( float f0, float f1, float f2, float f3 )
{
*this = _mm_set_ps( f3, f2, f1, f0 );
}
FS_INLINE SSE_f32x4& operator+=( const SSE_f32x4& rhs )
{
*this = _mm_add_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator-=( const SSE_f32x4& rhs )
{
*this = _mm_sub_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator*=( const SSE_f32x4& rhs )
{
*this = _mm_mul_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator/=( const SSE_f32x4& rhs )
{
*this = _mm_div_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator&=( const SSE_f32x4& rhs )
{
*this = _mm_and_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator|=( const SSE_f32x4& rhs )
{
*this = _mm_or_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator^=( const SSE_f32x4& rhs )
{
*this = _mm_xor_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i neg1 = _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() );
#else
const __m128i neg1 = _mm_set1_epi32( -1 );
#endif
return _mm_xor_ps( *this, _mm_castsi128_ps( neg1 ) );
}
FS_INLINE SSE_f32x4 operator-() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i minInt = _mm_slli_epi32( _mm_cmpeq_epi32( _mm_undefined_si128(), _mm_setzero_si128() ), 31 );
#else
const __m128i minInt = _mm_set1_epi32( 0x80000000 );
#endif
return _mm_xor_ps( *this, _mm_castsi128_ps( minInt ) );
}
FS_INLINE __m128i operator==( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpeq_ps( *this, rhs ) );
}
FS_INLINE __m128i operator!=( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpneq_ps( *this, rhs ) );
}
FS_INLINE __m128i operator>( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpgt_ps( *this, rhs ) );
}
FS_INLINE __m128i operator<( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmplt_ps( *this, rhs ) );
}
FS_INLINE __m128i operator>=( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpge_ps( *this, rhs ) );
}
FS_INLINE __m128i operator<=( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmple_ps( *this, rhs ) );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( SSE_f32x4 )
template<eLevel LEVEL_T>
struct SSE_i32x4
{
FASTSIMD_INTERNAL_TYPE_SET( SSE_i32x4, __m128i );
FS_INLINE static SSE_i32x4 Incremented()
{
return _mm_set_epi32( 3, 2, 1, 0 );
}
FS_INLINE explicit SSE_i32x4( int32_t i )
{
*this = _mm_set1_epi32( i );
}
FS_INLINE explicit SSE_i32x4( int32_t i0, int32_t i1, int32_t i2, int32_t i3 )
{
*this = _mm_set_epi32( i3, i2, i1, i0 );
}
FS_INLINE SSE_i32x4& operator+=( const SSE_i32x4& rhs )
{
*this = _mm_add_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator-=( const SSE_i32x4& rhs )
{
*this = _mm_sub_epi32( *this, rhs );
return *this;
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE SSE_i32x4& operator*=( const SSE_i32x4& rhs )
{
__m128i tmp1 = _mm_mul_epu32( *this, rhs ); /* mul 2,0*/
__m128i tmp2 = _mm_mul_epu32( _mm_srli_si128( *this, 4 ), _mm_srli_si128( rhs, 4 ) ); /* mul 3,1 */
*this = _mm_unpacklo_epi32( _mm_shuffle_epi32( tmp1, _MM_SHUFFLE( 0, 0, 2, 0 ) ), _mm_shuffle_epi32( tmp2, _MM_SHUFFLE( 0, 0, 2, 0 ) ) ); /* shuffle results to [63..0] and pack */
return *this;
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE SSE_i32x4& operator*=( const SSE_i32x4& rhs )
{
*this = _mm_mullo_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator&=( const SSE_i32x4& rhs )
{
*this = _mm_and_si128( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator|=( const SSE_i32x4& rhs )
{
*this = _mm_or_si128( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator^=( const SSE_i32x4& rhs )
{
*this = _mm_xor_si128( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator>>=( int32_t rhs )
{
*this = _mm_srai_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator<<=( int32_t rhs )
{
*this = _mm_slli_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i neg1 = _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() );
#else
const __m128i neg1 = _mm_set1_epi32( -1 );
#endif
return _mm_xor_si128( *this, neg1 );
}
FS_INLINE SSE_i32x4 operator-() const
{
return _mm_sub_epi32( _mm_setzero_si128(), *this );
}
FS_INLINE SSE_i32x4 operator==( const SSE_i32x4& rhs )
{
return _mm_cmpeq_epi32( *this, rhs );
}
FS_INLINE SSE_i32x4 operator>( const SSE_i32x4& rhs )
{
return _mm_cmpgt_epi32( *this, rhs );
}
FS_INLINE SSE_i32x4 operator<( const SSE_i32x4& rhs )
{
return _mm_cmplt_epi32( *this, rhs );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT_TEMPLATED( SSE_i32x4, int32_t )
template<eLevel LEVEL_T>
class SSE_T
{
public:
static_assert( LEVEL_T >= Level_SSE && LEVEL_T <= Level_SSE42, "Cannot create template with unsupported SIMD level" );
static constexpr eLevel SIMD_Level = LEVEL_T;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 128 / ElementSize;
typedef SSE_f32x4 float32v;
typedef SSE_i32x4<LEVEL_T> int32v;
typedef SSE_i32x4<LEVEL_T> mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return _mm_loadu_ps( reinterpret_cast<float const*>(p) );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return _mm_loadu_si128( reinterpret_cast<__m128i const*>(p) );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
_mm_storeu_ps( reinterpret_cast<float*>(p), a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
_mm_storeu_si128( reinterpret_cast<__m128i*>(p), a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return _mm_castsi128_ps( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return _mm_castps_si128( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return _mm_cvtepi32_ps( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return _mm_cvtps_epi32( a );
}
// Select
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
__m128 mf = _mm_castsi128_ps( m );
return _mm_xor_ps( b, _mm_and_ps( mf, _mm_xor_ps( a, b ) ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return _mm_blendv_ps( b, a, _mm_castsi128_ps( m ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm_xor_si128( b, _mm_and_si128( m, _mm_xor_si128( a, b ) ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm_castps_si128( _mm_blendv_ps( _mm_castsi128_ps( b ), _mm_castsi128_ps( a ), _mm_castsi128_ps( m ) ) );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return _mm_min_ps( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return _mm_max_ps( a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return Select_i32( a < b, a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return _mm_min_epi32( a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return Select_i32( a > b, a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return _mm_max_epi32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return _mm_andnot_ps( b, a );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return _mm_andnot_si128( b, a );
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( _mm_srli_epi32( Castf32_i32( a ), b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return _mm_srli_epi32( a, b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i intMax = _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 1 );
#else
const __m128i intMax = _mm_set1_epi32( 0x7FFFFFFF );
#endif
return _mm_and_ps( a, _mm_castsi128_ps( intMax ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSSE3)>* = nullptr>
FS_INLINE static int32v Abs_i32( int32v a )
{
__m128i signMask = _mm_srai_epi32( a, 31 );
return _mm_sub_epi32( _mm_xor_si128( a, signMask ), signMask );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSSE3)>* = nullptr>
FS_INLINE static int32v Abs_i32( int32v a )
{
return _mm_abs_epi32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return _mm_sqrt_ps( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return _mm_rsqrt_ps( a );
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
return _mm_rcp_ps( a );
}
// Floor, Ceil, Round: http://dss.stephanierct.com/DevBlog/?p=8
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Floor_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 f1 = _mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const __m128 f1 = _mm_set1_ps( 1.0f );
#endif
__m128 fval = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );
return _mm_sub_ps( fval, _mm_and_ps( _mm_cmplt_ps( a, fval ), f1 ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Floor_f32( float32v a )
{
return _mm_round_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Ceil_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 f1 = _mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const __m128 f1 = _mm_set1_ps( 1.0f );
#endif
__m128 fval = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );
__m128 cmp = _mm_cmplt_ps( fval, a );
return _mm_add_ps( fval, _mm_and_ps( cmp, f1 ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Ceil_f32( float32v a )
{
return _mm_round_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Round_f32( float32v a )
{
__m128 aSign = _mm_and_ps( a, _mm_castsi128_ps( int32v( 0x80000000 ) ) );
return _mm_cvtepi32_ps( _mm_cvttps_epi32( a + float32v(_mm_or_ps( aSign, float32v( 0.5f ) ) ) ) );
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 nearest2 = _mm_castsi128_ps( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 2 ) );
#else
const __m128 nearest2 = _mm_set1_ps( 1.99999988079071044921875f );
#endif
__m128 aTrunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) ); // truncate a
__m128 rmd = _mm_sub_ps( a, aTrunc ); // get remainder
__m128 rmd2 = _mm_mul_ps( rmd, nearest2 ); // mul remainder by near 2 will yield the needed offset
__m128 rmd2Trunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( rmd2 ) ); // after being truncated of course
return _mm_add_ps( aTrunc, rmd2Trunc );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Round_f32( float32v a )
{
return _mm_round_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
// Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return a & m;
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return _mm_and_ps( a, _mm_castsi128_ps( m ) );
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return _mm_andnot_si128( m, a );
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return _mm_andnot_ps( _mm_castsi128_ps( m ), a );
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return _mm_movemask_ps( _mm_castsi128_ps( m ) );
}
};
#if FASTSIMD_COMPILE_SSE
typedef SSE_T<Level_SSE> SSE;
#endif
#if FASTSIMD_COMPILE_SSE2
typedef SSE_T<Level_SSE2> SSE2;
#endif
#if FASTSIMD_COMPILE_SSE3
typedef SSE_T<Level_SSE3> SSE3;
#endif
#if FASTSIMD_COMPILE_SSSE3
typedef SSE_T<Level_SSSE3> SSSE3;
#endif
#if FASTSIMD_COMPILE_SSE41
typedef SSE_T<Level_SSE41> SSE41;
#endif
#if FASTSIMD_COMPILE_SSE42
typedef SSE_T<Level_SSE42> SSE42;
#endif
}

View File

@ -0,0 +1,429 @@
#pragma once
#include "VecTools.h"
#include <algorithm>
#include <cmath>
namespace FastSIMD
{
template<typename OUT, typename IN>
OUT ScalarCast( IN a )
{
union
{
OUT o;
IN i;
} u;
u.i = a;
return u.o;
}
struct Scalar_Float
{
FASTSIMD_INTERNAL_TYPE_SET( Scalar_Float, float );
FS_INLINE static Scalar_Float Incremented()
{
return 0.0f;
}
FS_INLINE Scalar_Float& operator+=( const Scalar_Float& rhs )
{
vector += rhs;
return *this;
}
FS_INLINE Scalar_Float& operator-=( const Scalar_Float& rhs )
{
vector -= rhs;
return *this;
}
FS_INLINE Scalar_Float& operator*=( const Scalar_Float& rhs )
{
vector *= rhs;
return *this;
}
FS_INLINE Scalar_Float& operator/=( const Scalar_Float& rhs )
{
vector /= rhs;
return *this;
}
FS_INLINE Scalar_Float& operator&=( const Scalar_Float& rhs )
{
*this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) & ScalarCast<int32_t, float>( rhs ) );
return *this;
}
FS_INLINE Scalar_Float& operator|=( const Scalar_Float& rhs )
{
*this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) | ScalarCast<int32_t, float>( rhs ) );
return *this;
}
FS_INLINE Scalar_Float& operator^=( const Scalar_Float& rhs )
{
*this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) ^ ScalarCast<int32_t, float>( rhs ) );
return *this;
}
FS_INLINE Scalar_Float operator~() const
{
return ScalarCast<float>( ~ScalarCast<int32_t, float>( *this ) );
}
FS_INLINE Scalar_Float operator-() const
{
return -vector;
}
FS_INLINE bool operator==( const Scalar_Float& rhs )
{
return vector == rhs;
}
FS_INLINE bool operator!=( const Scalar_Float& rhs )
{
return vector != rhs;
}
FS_INLINE bool operator>( const Scalar_Float& rhs )
{
return vector > rhs;
}
FS_INLINE bool operator<( const Scalar_Float& rhs )
{
return vector < rhs;
}
FS_INLINE bool operator>=( const Scalar_Float& rhs )
{
return vector >= rhs;
}
FS_INLINE bool operator<=( const Scalar_Float& rhs )
{
return vector <= rhs;
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( Scalar_Float )
struct Scalar_Int
{
FASTSIMD_INTERNAL_TYPE_SET( Scalar_Int, int32_t );
FS_INLINE static Scalar_Int Incremented()
{
return 0;
}
FS_INLINE Scalar_Int& operator+=( const Scalar_Int& rhs )
{
vector += rhs;
return *this;
}
FS_INLINE Scalar_Int& operator-=( const Scalar_Int& rhs )
{
vector -= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator*=( const Scalar_Int& rhs )
{
vector *= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator&=( const Scalar_Int& rhs )
{
vector &= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator|=( const Scalar_Int& rhs )
{
vector |= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator^=( const Scalar_Int& rhs )
{
vector ^= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator>>=( int32_t rhs )
{
vector >>= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator<<=( int32_t rhs )
{
vector <<= rhs;
return *this;
}
FS_INLINE Scalar_Int operator~() const
{
return ~vector;
}
FS_INLINE Scalar_Int operator-() const
{
return -vector;
}
FS_INLINE bool operator==( const Scalar_Int& rhs )
{
return vector == rhs;
}
FS_INLINE bool operator>( const Scalar_Int& rhs )
{
return vector > rhs;
}
FS_INLINE bool operator<( const Scalar_Int& rhs )
{
return vector < rhs;
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( Scalar_Int, int32_t )
struct Scalar_Mask
{
FASTSIMD_INTERNAL_TYPE_SET( Scalar_Mask, bool );
FS_INLINE Scalar_Mask operator~() const
{
return !vector;
}
FS_INLINE Scalar_Mask& operator&=( const Scalar_Mask& rhs )
{
vector = vector && rhs;
return *this;
}
FS_INLINE Scalar_Mask& operator|=( const Scalar_Mask& rhs )
{
vector = vector || rhs;
return *this;
}
FS_INLINE Scalar_Mask operator&( const Scalar_Mask& rhs )
{
return vector && rhs;
}
FS_INLINE Scalar_Mask operator|( const Scalar_Mask& rhs )
{
return vector || rhs;
}
};
class Scalar
{
public:
static constexpr eLevel SIMD_Level = FastSIMD::Level_Scalar;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 32 / ElementSize;
typedef Scalar_Float float32v;
typedef Scalar_Int int32v;
typedef Scalar_Mask mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return *reinterpret_cast<float32v const*>(p);
}
FS_INLINE static int32v Load_i32( void const* p )
{
return *reinterpret_cast<int32v const*>(p);
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
*reinterpret_cast<float32v*>(p) = a;
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
*reinterpret_cast<int32v*>(p) = a;
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return ScalarCast<float, int32_t>( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return ScalarCast<int32_t, float>( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return static_cast<float>(a);
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return static_cast<int32_t>(nearbyint( a ));
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return m ? a : b;
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return m ? a : b;
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return fminf( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return fmaxf( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return std::min( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return std::max( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return Casti32_f32( Castf32_i32( a ) & ~Castf32_i32( b ) );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return a & ~b;
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( int32_t( uint32_t( Castf32_i32( a ) ) >> b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return int32_t( uint32_t( a ) >> b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
return fabsf( a );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return abs( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return sqrtf( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
float xhalf = 0.5f * (float)a;
a = Casti32_f32( 0x5f3759df - ((int32_t)Castf32_i32( a ) >> 1) );
a *= (1.5f - xhalf * (float)a * (float)a);
return a;
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
// pow( pow(x,-0.5), 2 ) = pow( x, -1 ) = 1.0 / x
a = Casti32_f32( (0xbe6eb3beU - (int32_t)Castf32_i32( a )) >> 1 );
return a * a;
}
// Floor, Ceil, Round
FS_INLINE static float32v Floor_f32( float32v a )
{
return floorf( a );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
return ceilf( a );
}
FS_INLINE static float32v Round_f32( float32v a )
{
return nearbyintf( a );
}
// Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return m ? a : int32v(0);
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return m ? a : float32v(0);
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return m ? int32v(0) : a;
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return m ? float32v(0) : a;
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return m;
}
};
}

View File

@ -0,0 +1,22 @@
#pragma once
#include "FastSIMD/FastSIMD.h"
#include "FastSIMD/TypeList.h"
template<typename CLASS, typename FS>
class FS_T;
template<typename CLASS, FastSIMD::eLevel LEVEL>
CLASS* FastSIMD::ClassFactory()
{
if constexpr( ( CLASS::Supported_SIMD_Levels & LEVEL & FastSIMD::COMPILED_SIMD_LEVELS ) != 0 )
{
static_assert( std::is_base_of_v<CLASS, FS_T<CLASS, FS_SIMD_CLASS>> );
return new FS_T<CLASS, FS_SIMD_CLASS>;
}
return nullptr;
}
#define FASTSIMD_BUILD_CLASS( CLASS ) \
template CLASS* FastSIMD::ClassFactory<CLASS, FS_SIMD_CLASS::SIMD_Level>();
#include "../FastSIMD_BuildList.inl"

View File

@ -0,0 +1,66 @@
#pragma once
#include <cinttypes>
#include "FastSIMD/FastSIMD.h"
#include "FastSIMD/FunctionList.h"
#define FASTSIMD_INTERNAL_TYPE_SET( CLASS, TYPE ) \
TYPE vector; \
FS_INLINE CLASS() { } \
FS_INLINE CLASS( const TYPE& v ) : vector(v) {}; \
FS_INLINE CLASS& operator = ( const TYPE& v ) { vector = v; return *this; } \
FS_INLINE operator TYPE() const { return vector; }
#define FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, OPERATOR, OPERATOREQUALS ) \
FS_INLINE static TYPE operator OPERATOR ( TYPE lhs, TYPE2 rhs ) \
{ \
lhs OPERATOREQUALS rhs; \
return lhs; \
}
#define FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE, TYPE2, OPERATOR, OPERATOREQUALS ) \
template<FastSIMD::eLevel L> \
FS_INLINE static TYPE operator OPERATOR ( TYPE lhs, TYPE2 rhs ) \
{ \
lhs OPERATOREQUALS rhs; \
return lhs; \
}
#define FASTSIMD_INTERNAL_OPERATORS_FLOAT( TYPE ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, /, /= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, ^, ^= )
#define FASTSIMD_INTERNAL_OPERATORS_FLOAT_TEMPLATED( TYPE ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, /, /= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, ^, ^= )
#define FASTSIMD_INTERNAL_OPERATORS_INT( TYPE, TYPE2 ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, ^, ^= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, >>, >>= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, <<, <<= )
#define FASTSIMD_INTERNAL_OPERATORS_INT_TEMPLATED( TYPE, TYPE2 ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, ^, ^= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, TYPE2, >>, >>= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, TYPE2, <<, <<= )

File diff suppressed because it is too large Load Diff

View File

@ -1,318 +0,0 @@
/*
* Copyright (C) 2010 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef CPU_FEATURES_H
#define CPU_FEATURES_H
#include <sys/cdefs.h>
#include <stdint.h>
__BEGIN_DECLS
/* A list of valid values returned by android_getCpuFamily().
* They describe the CPU Architecture of the current process.
*/
typedef enum {
ANDROID_CPU_FAMILY_UNKNOWN = 0,
ANDROID_CPU_FAMILY_ARM,
ANDROID_CPU_FAMILY_X86,
ANDROID_CPU_FAMILY_MIPS,
ANDROID_CPU_FAMILY_ARM64,
ANDROID_CPU_FAMILY_X86_64,
ANDROID_CPU_FAMILY_MIPS64,
ANDROID_CPU_FAMILY_MAX /* do not remove */
} AndroidCpuFamily;
/* Return the CPU family of the current process.
*
* Note that this matches the bitness of the current process. I.e. when
* running a 32-bit binary on a 64-bit capable CPU, this will return the
* 32-bit CPU family value.
*/
extern AndroidCpuFamily android_getCpuFamily(void);
/* Return a bitmap describing a set of optional CPU features that are
* supported by the current device's CPU. The exact bit-flags returned
* depend on the value returned by android_getCpuFamily(). See the
* documentation for the ANDROID_CPU_*_FEATURE_* flags below for details.
*/
extern uint64_t android_getCpuFeatures(void);
/* The list of feature flags for ANDROID_CPU_FAMILY_ARM that can be
* recognized by the library (see note below for 64-bit ARM). Value details
* are:
*
* VFPv2:
* CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs
* support these instructions. VFPv2 is a subset of VFPv3 so this will
* be set whenever VFPv3 is set too.
*
* ARMv7:
* CPU supports the ARMv7-A basic instruction set.
* This feature is mandated by the 'armeabi-v7a' ABI.
*
* VFPv3:
* CPU supports the VFPv3-D16 instruction set, providing hardware FPU
* support for single and double precision floating point registers.
* Note that only 16 FPU registers are available by default, unless
* the D32 bit is set too. This feature is also mandated by the
* 'armeabi-v7a' ABI.
*
* VFP_D32:
* CPU VFP optional extension that provides 32 FPU registers,
* instead of 16. Note that ARM mandates this feature is the 'NEON'
* feature is implemented by the CPU.
*
* NEON:
* CPU FPU supports "ARM Advanced SIMD" instructions, also known as
* NEON. Note that this mandates the VFP_D32 feature as well, per the
* ARM Architecture specification.
*
* VFP_FP16:
* Half-width floating precision VFP extension. If set, the CPU
* supports instructions to perform floating-point operations on
* 16-bit registers. This is part of the VFPv4 specification, but
* not mandated by any Android ABI.
*
* VFP_FMA:
* Fused multiply-accumulate VFP instructions extension. Also part of
* the VFPv4 specification, but not mandated by any Android ABI.
*
* NEON_FMA:
* Fused multiply-accumulate NEON instructions extension. Optional
* extension from the VFPv4 specification, but not mandated by any
* Android ABI.
*
* IDIV_ARM:
* Integer division available in ARM mode. Only available
* on recent CPUs (e.g. Cortex-A15).
*
* IDIV_THUMB2:
* Integer division available in Thumb-2 mode. Only available
* on recent CPUs (e.g. Cortex-A15).
*
* iWMMXt:
* Optional extension that adds MMX registers and operations to an
* ARM CPU. This is only available on a few XScale-based CPU designs
* sold by Marvell. Pretty rare in practice.
*
* AES:
* CPU supports AES instructions. These instructions are only
* available for 32-bit applications running on ARMv8 CPU.
*
* CRC32:
* CPU supports CRC32 instructions. These instructions are only
* available for 32-bit applications running on ARMv8 CPU.
*
* SHA2:
* CPU supports SHA2 instructions. These instructions are only
* available for 32-bit applications running on ARMv8 CPU.
*
* SHA1:
* CPU supports SHA1 instructions. These instructions are only
* available for 32-bit applications running on ARMv8 CPU.
*
* PMULL:
* CPU supports 64-bit PMULL and PMULL2 instructions. These
* instructions are only available for 32-bit applications
* running on ARMv8 CPU.
*
* If you want to tell the compiler to generate code that targets one of
* the feature set above, you should probably use one of the following
* flags (for more details, see technical note at the end of this file):
*
* -mfpu=vfp
* -mfpu=vfpv2
* These are equivalent and tell GCC to use VFPv2 instructions for
* floating-point operations. Use this if you want your code to
* run on *some* ARMv6 devices, and any ARMv7-A device supported
* by Android.
*
* Generated code requires VFPv2 feature.
*
* -mfpu=vfpv3-d16
* Tell GCC to use VFPv3 instructions (using only 16 FPU registers).
* This should be generic code that runs on any CPU that supports the
* 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this.
*
* Generated code requires VFPv3 feature.
*
* -mfpu=vfpv3
* Tell GCC to use VFPv3 instructions with 32 FPU registers.
* Generated code requires VFPv3|VFP_D32 features.
*
* -mfpu=neon
* Tell GCC to use VFPv3 instructions with 32 FPU registers, and
* also support NEON intrinsics (see <arm_neon.h>).
* Generated code requires VFPv3|VFP_D32|NEON features.
*
* -mfpu=vfpv4-d16
* Generated code requires VFPv3|VFP_FP16|VFP_FMA features.
*
* -mfpu=vfpv4
* Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features.
*
* -mfpu=neon-vfpv4
* Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA
* features.
*
* -mcpu=cortex-a7
* -mcpu=cortex-a15
* Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|
* NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2
* This flag implies -mfpu=neon-vfpv4.
*
* -mcpu=iwmmxt
* Allows the use of iWMMXt instrinsics with GCC.
*
* IMPORTANT NOTE: These flags should only be tested when
* android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM, i.e. this is a
* 32-bit process.
*
* When running a 64-bit ARM process on an ARMv8 CPU,
* android_getCpuFeatures() will return a different set of bitflags
*/
enum {
ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0),
ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1),
ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2),
ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3),
ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4),
ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5),
ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6),
ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7),
ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8),
ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9),
ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10),
ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11),
ANDROID_CPU_ARM_FEATURE_AES = (1 << 12),
ANDROID_CPU_ARM_FEATURE_PMULL = (1 << 13),
ANDROID_CPU_ARM_FEATURE_SHA1 = (1 << 14),
ANDROID_CPU_ARM_FEATURE_SHA2 = (1 << 15),
ANDROID_CPU_ARM_FEATURE_CRC32 = (1 << 16),
};
/* The bit flags corresponding to the output of android_getCpuFeatures()
* when android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM64. Value details
* are:
*
* FP:
* CPU has Floating-point unit.
*
* ASIMD:
* CPU has Advanced SIMD unit.
*
* AES:
* CPU supports AES instructions.
*
* CRC32:
* CPU supports CRC32 instructions.
*
* SHA2:
* CPU supports SHA2 instructions.
*
* SHA1:
* CPU supports SHA1 instructions.
*
* PMULL:
* CPU supports 64-bit PMULL and PMULL2 instructions.
*/
enum {
ANDROID_CPU_ARM64_FEATURE_FP = (1 << 0),
ANDROID_CPU_ARM64_FEATURE_ASIMD = (1 << 1),
ANDROID_CPU_ARM64_FEATURE_AES = (1 << 2),
ANDROID_CPU_ARM64_FEATURE_PMULL = (1 << 3),
ANDROID_CPU_ARM64_FEATURE_SHA1 = (1 << 4),
ANDROID_CPU_ARM64_FEATURE_SHA2 = (1 << 5),
ANDROID_CPU_ARM64_FEATURE_CRC32 = (1 << 6),
};
/* The bit flags corresponding to the output of android_getCpuFeatures()
* when android_getCpuFamily() returns ANDROID_CPU_FAMILY_X86 or
* ANDROID_CPU_FAMILY_X86_64.
*/
enum {
ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0),
ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1),
ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2),
ANDROID_CPU_X86_FEATURE_SSE4_1 = (1 << 3),
ANDROID_CPU_X86_FEATURE_SSE4_2 = (1 << 4),
};
/* The bit flags corresponding to the output of android_getCpuFeatures()
* when android_getCpuFamily() returns ANDROID_CPU_FAMILY_MIPS
* or ANDROID_CPU_FAMILY_MIPS64. Values are:
*
* R6:
* CPU executes MIPS Release 6 instructions natively, and
* supports obsoleted R1..R5 instructions only via kernel traps.
*
* MSA:
* CPU supports Mips SIMD Architecture instructions.
*/
enum {
ANDROID_CPU_MIPS_FEATURE_R6 = (1 << 0),
ANDROID_CPU_MIPS_FEATURE_MSA = (1 << 1),
};
/* Return the number of CPU cores detected on this device. */
extern int android_getCpuCount(void);
/* The following is used to force the CPU count and features
* mask in sandboxed processes. Under 4.1 and higher, these processes
* cannot access /proc, which is the only way to get information from
* the kernel about the current hardware (at least on ARM).
*
* It _must_ be called only once, and before any android_getCpuXXX
* function, any other case will fail.
*
* This function return 1 on success, and 0 on failure.
*/
extern int android_setCpu(int cpu_count,
uint64_t cpu_features);
#ifdef __arm__
/* Retrieve the ARM 32-bit CPUID value from the kernel.
* Note that this cannot work on sandboxed processes under 4.1 and
* higher, unless you called android_setCpuArm() before.
*/
extern uint32_t android_getCpuIdArm(void);
/* An ARM-specific variant of android_setCpu() that also allows you
* to set the ARM CPUID field.
*/
extern int android_setCpuArm(int cpu_count,
uint64_t cpu_features,
uint32_t cpu_id);
#endif
__END_DECLS
#endif /* CPU_FEATURES_H */

View File

@ -1,556 +0,0 @@
// FastNoiseSIMD.cpp
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#include "FastNoiseSIMD.h"
#include <assert.h>
#include <stdlib.h>
#include <algorithm>
#include <cstdint>
#ifdef FN_COMPILE_NO_SIMD_FALLBACK
#define SIMD_LEVEL_H FN_NO_SIMD_FALLBACK
#include "FastNoiseSIMD_internal.h"
#endif
#ifdef FN_COMPILE_SSE2
#define SIMD_LEVEL_H FN_SSE2
#include "FastNoiseSIMD_internal.h"
#endif
#ifdef FN_COMPILE_SSE41
#define SIMD_LEVEL_H FN_SSE41
#include "FastNoiseSIMD_internal.h"
#endif
#ifdef FN_COMPILE_AVX2
#define SIMD_LEVEL_H FN_AVX2
#include "FastNoiseSIMD_internal.h"
#endif
#ifdef FN_COMPILE_AVX512
#define SIMD_LEVEL_H FN_AVX512
#include "FastNoiseSIMD_internal.h"
#endif
#ifdef FN_COMPILE_NEON
#define SIMD_LEVEL_H FN_NEON
#include "FastNoiseSIMD_internal.h"
#endif
// CPUid
#ifdef _WIN32
#include <intrin.h>
#elif defined(FN_ARM)
#if !defined(__aarch64__) && !defined(FN_IOS)
#include "ARM/cpu-features.h"
#endif
#else
#include <cpuid.h>
#include "inttypes.h"
#endif
int FastNoiseSIMD::s_currentSIMDLevel = -1;
#ifdef FN_ARM
int GetFastestSIMD()
{
#if defined(__aarch64__) || defined(FN_IOS)
return FN_NEON;
#else
if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM)
{
auto cpuFeatures = android_getCpuFeatures();
if (cpuFeatures & ANDROID_CPU_ARM_FEATURE_NEON)
#ifdef FN_USE_FMA
if (cpuFeatures & ANDROID_CPU_ARM_FEATURE_NEON_FMA)
#endif
return FN_NEON;
}
return FN_NO_SIMD_FALLBACK;
#endif
}
#else
#ifdef _WIN32
void cpuid(int32_t out[4], int32_t x) {
__cpuidex(out, x, 0);
}
uint64_t xgetbv(unsigned int x) {
return _xgetbv(x);
}
#else
void cpuid(int32_t out[4], int32_t x) {
__cpuid_count(x, 0, out[0], out[1], out[2], out[3]);
}
uint64_t xgetbv(unsigned int index) {
uint32_t eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((uint64_t)edx << 32) | eax;
}
#define _XCR_XFEATURE_ENABLED_MASK 0
#endif
int GetFastestSIMD()
{
//https://github.com/Mysticial/FeatureDetector
int cpuInfo[4];
cpuid(cpuInfo, 0);
int nIds = cpuInfo[0];
if (nIds < 0x00000001)
return FN_NO_SIMD_FALLBACK;
cpuid(cpuInfo, 0x00000001);
// SSE2
if ((cpuInfo[3] & 1 << 26) == 0)
return FN_NO_SIMD_FALLBACK;
// SSE41
if ((cpuInfo[2] & 1 << 19) == 0)
return FN_SSE2;
// AVX
bool cpuXSaveSuport = (cpuInfo[2] & 1 << 26) != 0;
bool osAVXSuport = (cpuInfo[2] & 1 << 27) != 0;
bool cpuAVXSuport = (cpuInfo[2] & 1 << 28) != 0;
if (cpuXSaveSuport && osAVXSuport && cpuAVXSuport)
{
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
if ((xcrFeatureMask & 0x6) != 0x6)
return FN_SSE41;
}
else
return FN_SSE41;
// AVX2 FMA3
if (nIds < 0x00000007)
return FN_SSE41;
#ifdef FN_USE_FMA
bool cpuFMA3Support = (cpuInfo[2] & 1 << 12) != 0;
#else
bool cpuFMA3Support = true;
#endif
cpuid(cpuInfo, 0x00000007);
bool cpuAVX2Support = (cpuInfo[1] & 1 << 5) != 0;
if (!cpuFMA3Support || !cpuAVX2Support)
return FN_SSE41;
// AVX512
bool cpuAVX512Support = (cpuInfo[1] & 1 << 16) != 0;
bool oxAVX512Support = (xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0xe6) == 0xe6;
if (!cpuAVX512Support || !oxAVX512Support)
return FN_AVX2;
return FN_AVX512;
}
#endif
FastNoiseSIMD* FastNoiseSIMD::NewFastNoiseSIMD(int seed)
{
GetSIMDLevel();
#ifdef FN_COMPILE_NEON
#ifdef FN_COMPILE_NO_SIMD_FALLBACK
if (s_currentSIMDLevel >= FN_NEON)
#endif
return new FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_NEON)(seed);
#endif
#ifdef FN_COMPILE_AVX512
if (s_currentSIMDLevel >= FN_AVX512)
return new FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_AVX512)(seed);
#endif
#ifdef FN_COMPILE_AVX2
if (s_currentSIMDLevel >= FN_AVX2)
return new FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_AVX2)(seed);
#endif
#ifdef FN_COMPILE_SSE41
if (s_currentSIMDLevel >= FN_SSE41)
return new FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_SSE41)(seed);
#endif
#ifdef FN_COMPILE_SSE2
#ifdef FN_COMPILE_NO_SIMD_FALLBACK
if (s_currentSIMDLevel >= FN_SSE2)
#endif
return new FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_SSE2)(seed);
#endif
#ifdef FN_COMPILE_NO_SIMD_FALLBACK
return new FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_NO_SIMD_FALLBACK)(seed);
#endif
}
int FastNoiseSIMD::GetSIMDLevel()
{
if (s_currentSIMDLevel < 0)
s_currentSIMDLevel = GetFastestSIMD();
return s_currentSIMDLevel;
}
void FastNoiseSIMD::FreeNoiseSet(float* floatArray)
{
#ifdef FN_ALIGNED_SETS
GetSIMDLevel();
if (s_currentSIMDLevel > FN_NO_SIMD_FALLBACK)
#ifdef _WIN32
_aligned_free(floatArray);
#else
free(floatArray);
#endif
else
#endif
delete[] floatArray;
}
int FastNoiseSIMD::AlignedSize(int size)
{
#ifdef FN_ALIGNED_SETS
GetSIMDLevel();
#ifdef FN_COMPILE_NEON
if (s_currentSIMDLevel >= FN_NEON)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_NEON)::AlignedSize(size);
#endif
#ifdef FN_COMPILE_AVX512
if (s_currentSIMDLevel >= FN_AVX512)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_AVX512)::AlignedSize(size);
#endif
#ifdef FN_COMPILE_AVX2
if (s_currentSIMDLevel >= FN_AVX2)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_AVX2)::AlignedSize(size);
#endif
#ifdef FN_COMPILE_SSE2
if (s_currentSIMDLevel >= FN_SSE2)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_SSE2)::AlignedSize(size);
#endif
#endif
return size;
}
float* FastNoiseSIMD::GetEmptySet(int size)
{
#ifdef FN_ALIGNED_SETS
GetSIMDLevel();
#ifdef FN_COMPILE_NEON
if (s_currentSIMDLevel >= FN_NEON)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_NEON)::GetEmptySet(size);
#endif
#ifdef FN_COMPILE_AVX512
if (s_currentSIMDLevel >= FN_AVX512)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_AVX512)::GetEmptySet(size);
#endif
#ifdef FN_COMPILE_AVX2
if (s_currentSIMDLevel >= FN_AVX2)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_AVX2)::GetEmptySet(size);
#endif
#ifdef FN_COMPILE_SSE2
if (s_currentSIMDLevel >= FN_SSE2)
return FastNoiseSIMD_internal::FASTNOISE_SIMD_CLASS(FN_SSE2)::GetEmptySet(size);
#endif
#endif
return new float[size];
}
FastNoiseVectorSet* FastNoiseSIMD::GetVectorSet(int xSize, int ySize, int zSize)
{
FastNoiseVectorSet* vectorSet = new FastNoiseVectorSet();
FillVectorSet(vectorSet, xSize, ySize, zSize);
return vectorSet;
}
void FastNoiseSIMD::FillVectorSet(FastNoiseVectorSet* vectorSet, int xSize, int ySize, int zSize)
{
assert(vectorSet);
vectorSet->SetSize(xSize*ySize*zSize);
vectorSet->sampleScale = 0;
int index = 0;
for (int ix = 0; ix < xSize; ix++)
{
for (int iy = 0; iy < ySize; iy++)
{
for (int iz = 0; iz < zSize; iz++)
{
vectorSet->xSet[index] = float(ix);
vectorSet->ySet[index] = float(iy);
vectorSet->zSet[index] = float(iz);
index++;
}
}
}
}
FastNoiseVectorSet* FastNoiseSIMD::GetSamplingVectorSet(int sampleScale, int xSize, int ySize, int zSize)
{
FastNoiseVectorSet* vectorSet = new FastNoiseVectorSet();
FillSamplingVectorSet(vectorSet, sampleScale, xSize, ySize, zSize);
return vectorSet;
}
void FastNoiseSIMD::FillSamplingVectorSet(FastNoiseVectorSet* vectorSet, int sampleScale, int xSize, int ySize, int zSize)
{
assert(vectorSet);
if (sampleScale <= 0)
{
FillVectorSet(vectorSet, xSize, ySize, zSize);
return;
}
vectorSet->sampleSizeX = xSize;
vectorSet->sampleSizeY = ySize;
vectorSet->sampleSizeZ = zSize;
int sampleSize = 1 << sampleScale;
int sampleMask = sampleSize - 1;
int xSizeSample = xSize;
int ySizeSample = ySize;
int zSizeSample = zSize;
if (xSizeSample & sampleMask)
xSizeSample = (xSizeSample & ~sampleMask) + sampleSize;
if (ySizeSample & sampleMask)
ySizeSample = (ySizeSample & ~sampleMask) + sampleSize;
if (zSizeSample & sampleMask)
zSizeSample = (zSizeSample & ~sampleMask) + sampleSize;
xSizeSample = (xSizeSample >> sampleScale) + 1;
ySizeSample = (ySizeSample >> sampleScale) + 1;
zSizeSample = (zSizeSample >> sampleScale) + 1;
vectorSet->SetSize(xSizeSample*ySizeSample*zSizeSample);
vectorSet->sampleScale = sampleScale;
int index = 0;
for (int ix = 0; ix < xSizeSample; ix++)
{
for (int iy = 0; iy < ySizeSample; iy++)
{
for (int iz = 0; iz < zSizeSample; iz++)
{
vectorSet->xSet[index] = float(ix*sampleSize);
vectorSet->ySet[index] = float(iy*sampleSize);
vectorSet->zSet[index] = float(iz*sampleSize);
index++;
}
}
}
}
float* FastNoiseSIMD::GetNoiseSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier)
{
float* noiseSet = GetEmptySet(xSize, ySize, zSize);
FillNoiseSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
return noiseSet;
}
void FastNoiseSIMD::FillNoiseSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier)
{
switch (m_noiseType)
{
case Value:
FillValueSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case ValueFractal:
FillValueFractalSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case Perlin:
FillPerlinSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case PerlinFractal:
FillPerlinFractalSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case Simplex:
FillSimplexSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case SimplexFractal:
FillSimplexFractalSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case WhiteNoise:
FillWhiteNoiseSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case Cellular:
FillCellularSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case Cubic:
FillCubicSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
case CubicFractal:
FillCubicFractalSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);
break;
default:
break;
}
}
void FastNoiseSIMD::FillNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset, float yOffset, float zOffset)
{
switch (m_noiseType)
{
case Value:
FillValueSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case ValueFractal:
FillValueFractalSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case Perlin:
FillPerlinSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case PerlinFractal:
FillPerlinFractalSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case Simplex:
FillSimplexSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case SimplexFractal:
FillSimplexFractalSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case WhiteNoise:
FillWhiteNoiseSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case Cellular:
FillCellularSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case Cubic:
FillCubicSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
case CubicFractal:
FillCubicFractalSet(noiseSet, vectorSet, xOffset, yOffset, zOffset);
break;
default:
break;
}
}
float* FastNoiseSIMD::GetSampledNoiseSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int sampleScale)
{
float* noiseSet = GetEmptySet(xSize, ySize, zSize);
FillSampledNoiseSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, sampleScale);
return noiseSet;
}
#define GET_SET(f) \
float* FastNoiseSIMD::Get##f##Set(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier)\
{\
float* noiseSet = GetEmptySet(xSize, ySize, zSize);\
\
Fill##f##Set(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier);\
\
return noiseSet;\
}
GET_SET(WhiteNoise)
GET_SET(Value)
GET_SET(ValueFractal)
GET_SET(Perlin)
GET_SET(PerlinFractal)
GET_SET(Simplex)
GET_SET(SimplexFractal)
GET_SET(Cellular)
GET_SET(Cubic)
GET_SET(CubicFractal)
float FastNoiseSIMD::CalculateFractalBounding(int octaves, float gain)
{
float amp = gain;
float ampFractal = 1.0f;
for (int i = 1; i < octaves; i++)
{
ampFractal += amp;
amp *= gain;
}
return 1.0f / ampFractal;
}
void FastNoiseSIMD::SetCellularDistance2Indicies(int cellularDistanceIndex0, int cellularDistanceIndex1)
{
m_cellularDistanceIndex0 = std::min(cellularDistanceIndex0, cellularDistanceIndex1);
m_cellularDistanceIndex1 = std::max(cellularDistanceIndex0, cellularDistanceIndex1);
m_cellularDistanceIndex0 = std::min(std::max(m_cellularDistanceIndex0, 0), FN_CELLULAR_INDEX_MAX);
m_cellularDistanceIndex1 = std::min(std::max(m_cellularDistanceIndex1, 0), FN_CELLULAR_INDEX_MAX);
}
void FastNoiseVectorSet::Free()
{
size = -1;
FastNoiseSIMD::FreeNoiseSet(xSet);
xSet = nullptr;
ySet = nullptr;
zSet = nullptr;
}
void FastNoiseVectorSet::SetSize(int _size)
{
Free();
size = _size;
int alignedSize = FastNoiseSIMD::AlignedSize(size);
xSet = FastNoiseSIMD::GetEmptySet(alignedSize * 3);
ySet = xSet + alignedSize;
zSet = ySet + alignedSize;
}

View File

@ -1,365 +0,0 @@
// FastNoiseSIMD.h
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
// VERSION: 0.7.0
#ifndef FASTNOISE_SIMD_H
#define FASTNOISE_SIMD_H
#if defined(__arm__) || defined(__aarch64__)
#define FN_ARM
//#define FN_IOS
#define FN_COMPILE_NEON
#else
// Comment out lines to not compile for certain instruction sets
#define FN_COMPILE_SSE2
#define FN_COMPILE_SSE41
// To compile AVX2 set C++ code generation to use /arch:AVX(2) on FastNoiseSIMD_avx2.cpp
// Note: This does not break support for pre AVX CPUs, AVX code is only run if support is detected
// #define FN_COMPILE_AVX2
// Only the latest compilers will support this
// #define FN_COMPILE_AVX512
// Using FMA instructions with AVX(51)2/NEON provides a small performance increase but can cause
// minute variations in noise output compared to other SIMD levels due to higher calculation precision
// Intel compiler will always generate FMA instructions, use /Qfma- or -no-fma to disable
#define FN_USE_FMA
#endif
// Using aligned sets of memory for float arrays allows faster storing of SIMD data
// Comment out to allow unaligned float arrays to be used as sets
#define FN_ALIGNED_SETS
// SSE2/NEON support is guaranteed on 64bit CPUs so no fallback is needed
#if !(defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) || defined(__aarch64__) || defined(FN_IOS)) || defined(_DEBUG)
#define FN_COMPILE_NO_SIMD_FALLBACK
#endif
/*
Tested Compilers:
-MSVC v120/v140
-Intel 16.0
-GCC 4.7 Linux
-Clang MacOSX
CPU instruction support:
SSE2
Intel Pentium 4 - 2001
AMD Opteron/Athlon - 2003
SEE4.1
Intel Penryn - 2007
AMD Bulldozer - Q4 2011
AVX
Intel Sandy Bridge - Q1 2011
AMD Bulldozer - Q4 2011
AVX2
Intel Haswell - Q2 2013
AMD Carrizo - Q2 2015
FMA3
Intel Haswell - Q2 2013
AMD Piledriver - 2012
AVX-512F
Intel Skylake-X - Q2 2017
*/
struct FastNoiseVectorSet;
class FastNoiseSIMD
{
public:
enum NoiseType { Value, ValueFractal, Perlin, PerlinFractal, Simplex, SimplexFractal, WhiteNoise, Cellular, Cubic, CubicFractal };
enum FractalType { FBM, Billow, RigidMulti };
enum PerturbType { None, Gradient, GradientFractal, Normalise, Gradient_Normalise, GradientFractal_Normalise };
enum CellularDistanceFunction { Euclidean, Manhattan, Natural };
enum CellularReturnType { CellValue, Distance, Distance2, Distance2Add, Distance2Sub, Distance2Mul, Distance2Div, NoiseLookup, Distance2Cave };
// Creates new FastNoiseSIMD for the highest supported instuction set of the CPU
static FastNoiseSIMD* NewFastNoiseSIMD(int seed = 1337);
// Returns highest detected level of CPU support
// 5: ARM NEON
// 4: AVX-512F
// 3: AVX2 & FMA3
// 2: SSE4.1
// 1: SSE2
// 0: Fallback, no SIMD support
static int GetSIMDLevel(void);
// Sets the SIMD level for newly created FastNoiseSIMD objects
// 5: ARM NEON
// 4: AVX-512F
// 3: AVX2 & FMA3
// 2: SSE4.1
// 1: SSE2
// 0: Fallback, no SIMD support
// -1: Auto-detect fastest supported (Default)
// Caution: Setting this manually can cause crashes on CPUs that do not support that level
// Caution: Changing this after creating FastNoiseSIMD objects has undefined behaviour
static void SetSIMDLevel(int level) { s_currentSIMDLevel = level; }
// Free a noise set from memory
static void FreeNoiseSet(float* noiseSet);
// Create an empty (aligned) noise set for use with FillNoiseSet()
static float* GetEmptySet(int size);
// Create an empty (aligned) noise set for use with FillNoiseSet()
static float* GetEmptySet(int xSize, int ySize, int zSize) { return GetEmptySet(xSize*ySize*zSize); }
// Rounds the size up to the nearest aligned size for the current SIMD level
static int AlignedSize(int size);
// Returns seed used for all noise types
int GetSeed(void) const { return m_seed; }
// Sets seed used for all noise types
// Default: 1337
void SetSeed(int seed) { m_seed = seed; }
// Sets frequency for all noise types
// Default: 0.01
void SetFrequency(float frequency) { m_frequency = frequency; }
// Sets noise return type of (Get/Fill)NoiseSet()
// Default: Simplex
void SetNoiseType(NoiseType noiseType) { m_noiseType = noiseType; }
// Sets scaling factor for individual axis
// Defaults: 1.0
void SetAxisScales(float xScale, float yScale, float zScale) { m_xScale = xScale; m_yScale = yScale; m_zScale = zScale; }
// Sets octave count for all fractal noise types
// Default: 3
void SetFractalOctaves(int octaves) { m_octaves = octaves; m_fractalBounding = CalculateFractalBounding(m_octaves, m_gain); }
// Sets octave lacunarity for all fractal noise types
// Default: 2.0
void SetFractalLacunarity(float lacunarity) { m_lacunarity = lacunarity; }
// Sets octave gain for all fractal noise types
// Default: 0.5
void SetFractalGain(float gain) { m_gain = gain; m_fractalBounding = CalculateFractalBounding(m_octaves, m_gain); }
// Sets method for combining octaves in all fractal noise types
// Default: FBM
void SetFractalType(FractalType fractalType) { m_fractalType = fractalType; }
// Sets return type from cellular noise calculations
// Default: Distance
void SetCellularReturnType(CellularReturnType cellularReturnType) { m_cellularReturnType = cellularReturnType; }
// Sets distance function used in cellular noise calculations
// Default: Euclidean
void SetCellularDistanceFunction(CellularDistanceFunction cellularDistanceFunction) { m_cellularDistanceFunction = cellularDistanceFunction; }
// Sets the type of noise used if cellular return type is set the NoiseLookup
// Default: Simplex
void SetCellularNoiseLookupType(NoiseType cellularNoiseLookupType) { m_cellularNoiseLookupType = cellularNoiseLookupType; }
// Sets relative frequency on the cellular noise lookup return type
// Default: 0.2
void SetCellularNoiseLookupFrequency(float cellularNoiseLookupFrequency) { m_cellularNoiseLookupFrequency = cellularNoiseLookupFrequency; }
// Sets the 2 distance indicies used for distance2 return types
// Default: 0, 1
// Note: index0 should be lower than index1
// Both indicies must be >= 0, index1 must be < 4
void SetCellularDistance2Indicies(int cellularDistanceIndex0, int cellularDistanceIndex1);
// Sets the maximum distance a cellular point can move from it's grid position
// Setting this high will make artifacts more common
// Default: 0.45
void SetCellularJitter(float cellularJitter) { m_cellularJitter = cellularJitter; }
// Enables position perturbing for all noise types
// Default: None
void SetPerturbType(PerturbType perturbType) { m_perturbType = perturbType; }
// Sets the maximum distance the input position can be perturbed
// Default: 1.0
void SetPerturbAmp(float perturbAmp) { m_perturbAmp = perturbAmp / 511.5f; }
// Set the relative frequency for the perturb gradient
// Default: 0.5
void SetPerturbFrequency(float perturbFrequency) { m_perturbFrequency = perturbFrequency; }
// Sets octave count for perturb fractal types
// Default: 3
void SetPerturbFractalOctaves(int perturbOctaves) { m_perturbOctaves = perturbOctaves; m_perturbFractalBounding = CalculateFractalBounding(m_perturbOctaves, m_perturbGain); }
// Sets octave lacunarity for perturb fractal types
// Default: 2.0
void SetPerturbFractalLacunarity(float perturbLacunarity) { m_perturbLacunarity = perturbLacunarity; }
// Sets octave gain for perturb fractal types
// Default: 0.5
void SetPerturbFractalGain(float perturbGain) { m_perturbGain = perturbGain; m_perturbFractalBounding = CalculateFractalBounding(m_perturbOctaves, m_perturbGain); }
// Sets the length for vectors after perturb normalising
// Default: 1.0
void SetPerturbNormaliseLength(float perturbNormaliseLength) { m_perturbNormaliseLength = perturbNormaliseLength; }
static FastNoiseVectorSet* GetVectorSet(int xSize, int ySize, int zSize);
static FastNoiseVectorSet* GetSamplingVectorSet(int sampleScale, int xSize, int ySize, int zSize);
static void FillVectorSet(FastNoiseVectorSet* vectorSet, int xSize, int ySize, int zSize);
static void FillSamplingVectorSet(FastNoiseVectorSet* vectorSet, int sampleScale, int xSize, int ySize, int zSize);
float* GetNoiseSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
void FillNoiseSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
void FillNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f);
float* GetSampledNoiseSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int sampleScale);
virtual void FillSampledNoiseSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int sampleScale) = 0;
virtual void FillSampledNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
float* GetWhiteNoiseSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
virtual void FillWhiteNoiseSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillWhiteNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
float* GetValueSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
float* GetValueFractalSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
virtual void FillValueSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillValueFractalSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillValueSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
virtual void FillValueFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
float* GetPerlinSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
float* GetPerlinFractalSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
virtual void FillPerlinSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillPerlinFractalSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillPerlinSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
virtual void FillPerlinFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
float* GetSimplexSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
float* GetSimplexFractalSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
virtual void FillSimplexSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillSimplexFractalSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillSimplexSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
virtual void FillSimplexFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
float* GetCellularSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
virtual void FillCellularSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillCellularSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
float* GetCubicSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
float* GetCubicFractalSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f);
virtual void FillCubicSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillCubicFractalSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0;
virtual void FillCubicSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
virtual void FillCubicFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0;
virtual ~FastNoiseSIMD() { }
protected:
int m_seed = 1337;
float m_frequency = 0.01f;
NoiseType m_noiseType = SimplexFractal;
float m_xScale = 1.0f;
float m_yScale = 1.0f;
float m_zScale = 1.0f;
int m_octaves = 3;
float m_lacunarity = 2.0f;
float m_gain = 0.5f;
FractalType m_fractalType = FBM;
float m_fractalBounding;
CellularDistanceFunction m_cellularDistanceFunction = Euclidean;
CellularReturnType m_cellularReturnType = Distance;
NoiseType m_cellularNoiseLookupType = Simplex;
float m_cellularNoiseLookupFrequency = 0.2f;
int m_cellularDistanceIndex0 = 0;
int m_cellularDistanceIndex1 = 1;
float m_cellularJitter = 0.45f;
PerturbType m_perturbType = None;
float m_perturbAmp = 1.0f;
float m_perturbFrequency = 0.5f;
int m_perturbOctaves = 3;
float m_perturbLacunarity = 2.0f;
float m_perturbGain = 0.5f;
float m_perturbFractalBounding;
float m_perturbNormaliseLength = 1.0f;
static int s_currentSIMDLevel;
static float CalculateFractalBounding(int octaves, float gain);
};
struct FastNoiseVectorSet
{
public:
int size = -1;
float* xSet = nullptr;
float* ySet = nullptr;
float* zSet = nullptr;
// Only used for sampled vector sets
int sampleScale = 0;
int sampleSizeX = -1;
int sampleSizeY = -1;
int sampleSizeZ = -1;
FastNoiseVectorSet() {}
FastNoiseVectorSet(int _size) { SetSize(_size); }
~FastNoiseVectorSet() { Free(); }
void Free();
void SetSize(int _size);
};
#define FN_CELLULAR_INDEX_MAX 3
#define FN_NO_SIMD_FALLBACK 0
#define FN_SSE2 1
#define FN_SSE41 2
#define FN_AVX2 3
#define FN_AVX512 4
#define FN_NEON 5
#endif

View File

@ -1,49 +0,0 @@
// FastNoiseSIMD_avx2.cpp
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#include "FastNoiseSIMD.h"
// DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC
// To compile AVX2 support enable AVX(2) code generation compiler flags for this file
#ifdef FN_COMPILE_AVX2
#ifndef __AVX__
#ifdef __GNUC__
#error To compile AVX2 add build command "-march=core-avx2" on FastNoiseSIMD_avx2.cpp, or remove "#define FN_COMPILE_AVX2" from FastNoiseSIMD.h
#else
#error To compile AVX2 set C++ code generation to use /arch:AVX(2) on FastNoiseSIMD_avx2.cpp, or remove "#define FN_COMPILE_AVX2" from FastNoiseSIMD.h
#endif
#endif
#define SIMD_LEVEL_H FN_AVX2
#include "FastNoiseSIMD_internal.h"
#include <immintrin.h> //AVX2 FMA3
#define SIMD_LEVEL FN_AVX2
#include "FastNoiseSIMD_internal.cpp"
#endif

View File

@ -1,53 +0,0 @@
// FastNoiseSIMD_avx512.cpp
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#include "FastNoiseSIMD.h"
// DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC
// To compile AVX512 support enable AVX(2) code generation compiler flags for this file
#ifdef FN_COMPILE_AVX512
#ifndef __AVX__
#ifdef __GNUC__
#error To compile AVX512 add build command "-march=core-avx2" on FastNoiseSIMD_avx512.cpp, or remove "#define FN_COMPILE_AVX512" from FastNoiseSIMD.h
#else
#error To compile AVX512 set C++ code generation to use /arch:AVX(2) on FastNoiseSIMD_avx512.cpp, or remove "#define FN_COMPILE_AVX512" from FastNoiseSIMD.h
#endif
#endif
#define SIMD_LEVEL_H FN_AVX512
#include "FastNoiseSIMD_internal.h"
#ifdef _WIN32
#include <intrin.h> //AVX512
#else
#include <x86intrin.h> //AVX512
#endif
#define SIMD_LEVEL FN_AVX512
#include "FastNoiseSIMD_internal.cpp"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,77 +0,0 @@
// FastNoiseSIMD_internal.h
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#ifndef SIMD_LEVEL_H
#error Dont include this file without defining SIMD_LEVEL_H
#else
#define FASTNOISE_SIMD_CLASS2(x) FastNoiseSIMD_L##x
#define FASTNOISE_SIMD_CLASS(level) FASTNOISE_SIMD_CLASS2(level)
namespace FastNoiseSIMD_internal
{
class FASTNOISE_SIMD_CLASS(SIMD_LEVEL_H) : public FastNoiseSIMD
{
public:
// Do not call this, use SetSIMDLevel(int) to have NewFastNoiseSIMD() return the level you want
FASTNOISE_SIMD_CLASS(SIMD_LEVEL_H)(int seed = 1337);
static float* GetEmptySet(int size);
static int AlignedSize(int size);
void FillSampledNoiseSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, int sampleScale) override;
void FillSampledNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillWhiteNoiseSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillWhiteNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillValueSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillValueFractalSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillValueSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillValueFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillPerlinSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillPerlinFractalSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillPerlinSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillPerlinFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillSimplexSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillSimplexFractalSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillSimplexSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillSimplexFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillCellularSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillCellularSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillCubicSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillCubicFractalSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override;
void FillCubicSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
void FillCubicFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override;
};
}
#undef SIMD_LEVEL_H
#endif

View File

@ -1,38 +0,0 @@
// FastNoiseSIMD_neon.cpp
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#include "FastNoiseSIMD.h"
#ifdef FN_COMPILE_NEON
#define SIMD_LEVEL_H FN_NEON
#include "FastNoiseSIMD_internal.h"
#include <arm_neon.h>
#define SIMD_LEVEL FN_NEON
#include "FastNoiseSIMD_internal.cpp"
#endif

View File

@ -1,41 +0,0 @@
// FastNoiseSIMD_sse2.cpp
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#include "FastNoiseSIMD.h"
// DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC
// Depending on the compiler this file may need to have SSE2 code generation compiler flags enabled
#ifdef FN_COMPILE_SSE2
#define SIMD_LEVEL_H FN_SSE2
#include "FastNoiseSIMD_internal.h"
#include <emmintrin.h> //SSE2
#define SIMD_LEVEL FN_SSE2
#include "FastNoiseSIMD_internal.cpp"
#endif

View File

@ -1,41 +0,0 @@
// FastNoiseSIMD_sse41.cpp
//
// MIT License
//
// Copyright(c) 2017 Jordan Peck
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
// off every 'zix'.)
//
#include "FastNoiseSIMD.h"
// DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC
// Depending on the compiler this file may need to have SSE4.1 code generation compiler flags enabled
#ifdef FN_COMPILE_SSE41
#define SIMD_LEVEL_H FN_SSE41
#include "FastNoiseSIMD_internal.h"
#include <smmintrin.h> //SSE4.1
#define SIMD_LEVEL FN_SSE41
#include "FastNoiseSIMD_internal.cpp"
#endif

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2016 Jordan Peck
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

76
deps/imgui/imgui_stdlib.cpp vendored Normal file
View File

@ -0,0 +1,76 @@
// dear imgui: wrappers for C++ standard library (STL) types (std::string, etc.)
// This is also an example of how you may wrap your own similar types.
// Compatibility:
// - std::string support is only guaranteed to work from C++11.
// If you try to use it pre-C++11, please share your findings (w/ info about compiler/architecture)
// Changelog:
// - v0.10: Initial version. Added InputText() / InputTextMultiline() calls with std::string
#include "imgui.h"
#include "imgui_stdlib.h"
struct InputTextCallback_UserData
{
std::string* Str;
ImGuiInputTextCallback ChainCallback;
void* ChainCallbackUserData;
};
static int InputTextCallback(ImGuiInputTextCallbackData* data)
{
InputTextCallback_UserData* user_data = (InputTextCallback_UserData*)data->UserData;
if (data->EventFlag == ImGuiInputTextFlags_CallbackResize)
{
// Resize string callback
// If for some reason we refuse the new length (BufTextLen) and/or capacity (BufSize) we need to set them back to what we want.
std::string* str = user_data->Str;
IM_ASSERT(data->Buf == str->c_str());
str->resize(data->BufTextLen);
data->Buf = (char*)str->c_str();
}
else if (user_data->ChainCallback)
{
// Forward to user callback, if any
data->UserData = user_data->ChainCallbackUserData;
return user_data->ChainCallback(data);
}
return 0;
}
bool ImGui::InputText(const char* label, std::string* str, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data)
{
IM_ASSERT((flags & ImGuiInputTextFlags_CallbackResize) == 0);
flags |= ImGuiInputTextFlags_CallbackResize;
InputTextCallback_UserData cb_user_data;
cb_user_data.Str = str;
cb_user_data.ChainCallback = callback;
cb_user_data.ChainCallbackUserData = user_data;
return InputText(label, (char*)str->c_str(), str->capacity() + 1, flags, InputTextCallback, &cb_user_data);
}
bool ImGui::InputTextMultiline(const char* label, std::string* str, const ImVec2& size, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data)
{
IM_ASSERT((flags & ImGuiInputTextFlags_CallbackResize) == 0);
flags |= ImGuiInputTextFlags_CallbackResize;
InputTextCallback_UserData cb_user_data;
cb_user_data.Str = str;
cb_user_data.ChainCallback = callback;
cb_user_data.ChainCallbackUserData = user_data;
return InputTextMultiline(label, (char*)str->c_str(), str->capacity() + 1, size, flags, InputTextCallback, &cb_user_data);
}
bool ImGui::InputTextWithHint(const char* label, const char* hint, std::string* str, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data)
{
IM_ASSERT((flags & ImGuiInputTextFlags_CallbackResize) == 0);
flags |= ImGuiInputTextFlags_CallbackResize;
InputTextCallback_UserData cb_user_data;
cb_user_data.Str = str;
cb_user_data.ChainCallback = callback;
cb_user_data.ChainCallbackUserData = user_data;
return InputTextWithHint(label, hint, (char*)str->c_str(), str->capacity() + 1, flags, InputTextCallback, &cb_user_data);
}

22
deps/imgui/imgui_stdlib.h vendored Normal file
View File

@ -0,0 +1,22 @@
// dear imgui: wrappers for C++ standard library (STL) types (std::string, etc.)
// This is also an example of how you may wrap your own similar types.
// Compatibility:
// - std::string support is only guaranteed to work from C++11.
// If you try to use it pre-C++11, please share your findings (w/ info about compiler/architecture)
// Changelog:
// - v0.10: Initial version. Added InputText() / InputTextMultiline() calls with std::string
#pragma once
#include <string>
namespace ImGui
{
// ImGui::InputText() with std::string
// Because text input needs dynamic resizing, we need to setup a callback to grow the capacity
IMGUI_API bool InputText(const char* label, std::string* str, ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL);
IMGUI_API bool InputTextMultiline(const char* label, std::string* str, const ImVec2& size = ImVec2(0, 0), ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL);
IMGUI_API bool InputTextWithHint(const char* label, const char* hint, std::string* str, ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL);
}

View File

@ -1,25 +0,0 @@
Copyright (c) 2016, Ansel Sermersheim
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,198 +0,0 @@
/***********************************************************************
*
* Copyright (c) 2015-2020 Ansel Sermersheim
*
* This file is part of CsLibGuarded.
*
* CsLibGuarded is free software, released under the BSD 2-Clause license.
* For license details refer to LICENSE provided with this project.
*
* CopperSpice is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* https://opensource.org/licenses/BSD-2-Clause
*
***********************************************************************/
#ifndef CSLIBGUARDED_PLAIN_GUARDED_H
#define CSLIBGUARDED_PLAIN_GUARDED_H
#include <memory>
#include <mutex>
namespace libguarded
{
/**
\headerfile cs_plain_guarded.h <CsLibGuarded/cs_plain_guarded.h>
This templated class wraps an object and allows only one thread at a
time to access the protected object.
This class will use std::mutex for the internal locking mechanism by
default. Other classes which are useful for the mutex type are
std::recursive_mutex, std::timed_mutex, and
std::recursive_timed_mutex.
The handle returned by the various lock methods is moveable but not
copyable.
*/
template <typename T, typename M = std::mutex>
class plain_guarded
{
private:
class deleter;
public:
using handle = std::unique_ptr<T, deleter>;
/**
Construct a guarded object. This constructor will accept any
number of parameters, all of which are forwarded to the
constructor of T.
*/
template <typename... Us>
plain_guarded(Us &&... data);
/**
Acquire a handle to the protected object. As a side effect, the
protected object will be locked from access by any other
thread. The lock will be automatically released when the handle
is destroyed.
*/
[[nodiscard]] handle lock();
/**
Attempt to acquire a handle to the protected object. Returns a
null handle if the object is already locked. As a side effect,
the protected object will be locked from access by any other
thread. The lock will be automatically released when the handle
is destroyed.
*/
[[nodiscard]] handle try_lock();
/**
Attempt to acquire a handle to the protected object. As a side
effect, the protected object will be locked from access by any
other thread. The lock will be automatically released when the
handle is destroyed.
Returns a null handle if the object is already locked, and does
not become available for locking before the time duration has
elapsed.
Calling this method requires that the underlying mutex type M
supports the try_lock_for method. This is not true if M is the
default std::mutex.
*/
template <class Duration>
[[nodiscard]] handle try_lock_for(const Duration &duration);
/**
Attempt to acquire a handle to the protected object. As a side
effect, the protected object will be locked from access by any other
thread. The lock will be automatically released when the handle is
destroyed.
Returns a null handle if the object is already locked, and does not
become available for locking before reaching the specified timepoint.
Calling this method requires that the underlying mutex type M
supports the try_lock_until method. This is not true if M is the
default std::mutex.
*/
template <class TimePoint>
[[nodiscard]] handle try_lock_until(const TimePoint &timepoint);
private:
T m_obj;
M m_mutex;
};
template <typename T, typename M>
class plain_guarded<T, M>::deleter
{
public:
using pointer = T *;
deleter(std::unique_lock<M> lock);
void operator()(T *ptr);
private:
std::unique_lock<M> m_lock;
};
template <typename T, typename M>
plain_guarded<T, M>::deleter::deleter(std::unique_lock<M> lock)
: m_lock(std::move(lock))
{
}
template <typename T, typename M>
void plain_guarded<T, M>::deleter::operator()(T *)
{
if (m_lock.owns_lock()) {
m_lock.unlock();
}
}
template <typename T, typename M>
template <typename... Us>
plain_guarded<T, M>::plain_guarded(Us &&... data)
: m_obj(std::forward<Us>(data)...)
{
}
template <typename T, typename M>
auto plain_guarded<T, M>::lock() -> handle
{
std::unique_lock<M> lock(m_mutex);
return handle(&m_obj, deleter(std::move(lock)));
}
template <typename T, typename M>
auto plain_guarded<T, M>::try_lock() -> handle
{
std::unique_lock<M> lock(m_mutex, std::try_to_lock);
if (lock.owns_lock()) {
return handle(&m_obj, deleter(std::move(lock)));
} else {
return handle(nullptr, deleter(std::move(lock)));
}
}
template <typename T, typename M>
template <typename Duration>
auto plain_guarded<T, M>::try_lock_for(const Duration &d) -> handle
{
std::unique_lock<M> lock(m_mutex, d);
if (lock.owns_lock()) {
return handle(&m_obj, deleter(std::move(lock)));
} else {
return handle(nullptr, deleter(std::move(lock)));
}
}
template <typename T, typename M>
template <typename TimePoint>
auto plain_guarded<T, M>::try_lock_until(const TimePoint &tp) -> handle
{
std::unique_lock<M> lock(m_mutex, tp);
if (lock.owns_lock()) {
return handle(&m_obj, deleter(std::move(lock)));
} else {
return handle(nullptr, deleter(std::move(lock)));
}
}
template <typename T, typename M = std::mutex>
using guarded [[deprecated("renamed to plain_guarded")]] = plain_guarded<T, M>;
} // namespace libguarded
#endif

View File

@ -1,234 +0,0 @@
/***********************************************************************
*
* Copyright (c) 2015-2020 Ansel Sermersheim
*
* This file is part of CsLibGuarded.
*
* CsLibGuarded is free software, released under the BSD 2-Clause license.
* For license details refer to LICENSE provided with this project.
*
* CopperSpice is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* https://opensource.org/licenses/BSD-2-Clause
*
***********************************************************************/
#ifndef CSLIBGUARDED_SHARED_GUARDED_H
#define CSLIBGUARDED_SHARED_GUARDED_H
#include <memory>
#include <shared_mutex>
namespace libguarded
{
/**
\headerfile cs_shared_guarded.h <CsLibGuarded/cs_shared_guarded.h>
This templated class wraps an object and allows only one thread at
a time to modify the protected object.
This class will use std::shared_timed_mutex for the internal
locking mechanism by default. In C++17 the std::shared_mutex class
is also available.
The handle returned by the various lock methods is moveable but not
copyable.
*/
template <typename T, typename M = std::shared_timed_mutex, typename L = std::shared_lock<M>>
class shared_guarded
{
private:
class deleter;
class shared_deleter;
public:
using handle = std::unique_ptr<T, deleter>;
using shared_handle = std::unique_ptr<const T, shared_deleter>;
template <typename... Us>
shared_guarded(Us &&... data);
// exclusive access
[[nodiscard]] handle lock();
[[nodiscard]] handle try_lock();
template <class Duration>
[[nodiscard]] handle try_lock_for(const Duration &duration);
template <class TimePoint>
[[nodiscard]] handle try_lock_until(const TimePoint &timepoint);
// shared access, note "shared" in method names
[[nodiscard]] shared_handle lock_shared() const;
[[nodiscard]] shared_handle try_lock_shared() const;
template <class Duration>
[[nodiscard]] shared_handle try_lock_shared_for(const Duration &duration) const;
template <class TimePoint>
[[nodiscard]] shared_handle try_lock_shared_until(const TimePoint &timepoint) const;
private:
T m_obj;
mutable M m_mutex;
};
template <typename T, typename M, typename L>
class shared_guarded<T, M, L>::deleter
{
public:
using pointer = T *;
deleter(std::unique_lock<M> lock);
void operator()(T *ptr);
private:
std::unique_lock<M> m_lock;
};
template <typename T, typename M, typename L>
shared_guarded<T, M, L>::deleter::deleter(std::unique_lock<M> lock)
: m_lock(std::move(lock))
{
}
template <typename T, typename M, typename L>
void shared_guarded<T, M, L>::deleter::operator()(T *)
{
if (m_lock.owns_lock()) {
m_lock.unlock();
}
}
template <typename T, typename M, typename L>
class shared_guarded<T, M, L>::shared_deleter
{
public:
using pointer = const T *;
shared_deleter(L lock);
void operator()(const T *ptr);
private:
L m_lock;
};
template <typename T, typename M, typename L>
shared_guarded<T, M, L>::shared_deleter::shared_deleter(L lock)
: m_lock(std::move(lock))
{
}
template <typename T, typename M, typename L>
void shared_guarded<T, M, L>::shared_deleter::operator()(const T *)
{
if (m_lock.owns_lock()) {
m_lock.unlock();
}
}
template <typename T, typename M, typename L>
template <typename... Us>
shared_guarded<T, M, L>::shared_guarded(Us &&... data)
: m_obj(std::forward<Us>(data)...)
{
}
template <typename T, typename M, typename L>
auto shared_guarded<T, M, L>::lock() -> handle
{
std::unique_lock<M> lock(m_mutex);
return handle(&m_obj, deleter(std::move(lock)));
}
template <typename T, typename M, typename L>
auto shared_guarded<T, M, L>::try_lock() -> handle
{
std::unique_lock<M> lock(m_mutex, std::try_to_lock);
if (lock.owns_lock()) {
return handle(&m_obj, deleter(std::move(lock)));
} else {
return handle(nullptr, deleter(std::move(lock)));
}
}
template <typename T, typename M, typename L>
template <typename Duration>
auto shared_guarded<T, M, L>::try_lock_for(const Duration &duration) -> handle
{
std::unique_lock<M> lock(m_mutex, duration);
if (lock.owns_lock()) {
return handle(&m_obj, deleter(std::move(lock)));
} else {
return handle(nullptr, deleter(std::move(lock)));
}
}
template <typename T, typename M, typename L>
template <typename TimePoint>
auto shared_guarded<T, M, L>::try_lock_until(const TimePoint &timepoint) -> handle
{
std::unique_lock<M> lock(m_mutex, timepoint);
if (lock.owns_lock()) {
return handle(&m_obj, deleter(std::move(lock)));
} else {
return handle(nullptr, deleter(std::move(lock)));
}
}
template <typename T, typename M, typename L>
auto shared_guarded<T, M, L>::lock_shared() const -> shared_handle
{
L lock(m_mutex);
return shared_handle(&m_obj, shared_deleter(std::move(lock)));
}
template <typename T, typename M, typename L>
auto shared_guarded<T, M, L>::try_lock_shared() const -> shared_handle
{
L lock(m_mutex, std::try_to_lock);
if (lock.owns_lock()) {
return shared_handle(&m_obj, shared_deleter(std::move(lock)));
} else {
return shared_handle(nullptr, shared_deleter(std::move(lock)));
}
}
template <typename T, typename M, typename L>
template <typename Duration>
auto shared_guarded<T, M, L>::try_lock_shared_for(const Duration &d) const -> shared_handle
{
L lock(m_mutex, d);
if (lock.owns_lock()) {
return shared_handle(&m_obj, shared_deleter(std::move(lock)));
} else {
return shared_handle(nullptr, shared_deleter(std::move(lock)));
}
}
template <typename T, typename M, typename L>
template <typename TimePoint>
auto shared_guarded<T, M, L>::try_lock_shared_until(const TimePoint &tp) const -> shared_handle
{
L lock(m_mutex, tp);
if (lock.owns_lock()) {
return shared_handle(&m_obj, shared_deleter(std::move(lock)));
} else {
return shared_handle(nullptr, shared_deleter(std::move(lock)));
}
}
} // namespace libguarded
#endif

View File

@ -1,76 +0,0 @@
# mini-yaml
[![Build Status](https://travis-ci.org/jimmiebergmann/mini-yaml.svg?branch=master)](https://github.com/jimmiebergmann/mini-yaml#build-status)
Single header YAML 1.0 C++11 serializer/deserializer.
## Quickstart
#### file.txt
```
key: foo bar
list:
- hello world
- integer: 123
boolean: true
```
#### .cpp
```cpp
Yaml::Node root;
Yaml::Parse(root, "file.txt");
// Print all scalars.
std::cout << root["key"].As<std::string>() << std::endl;
std::cout << root["list"][0].As<std::string>() << std::endl;
std::cout << root["list"][1]["integer"].As<int>() << std::endl;
std::cout << root["list"][1]["boolean"].As<bool>() << std::endl;
// Iterate second sequence item.
Node & item = root[1];
for(auto it = item.Begin(); it != item.End(); it++)
{
std::cout << (*it).first << ": " << (*it).second.As<string>() << std::endl;
}
```
#### Output
```
foo bar
hello world
123
1
integer: 123
boolean: true
```
See [Best practice](https://github.com/jimmiebergmann/mini-yaml#best-practice).
## Usage
Put [/yaml](https://github.com/jimmiebergmann/mini-yaml/blob/master/yaml) in your project directory and simply #include "[yaml/Yaml.hpp](https://github.com/jimmiebergmann/mini-yaml/blob/master/yaml/Yaml.hpp)".
See [examples/FirstExample.cpp](https://github.com/jimmiebergmann/mini-yaml/blob/master/examples/FirstExample.cpp) for additional examples.
## Best practice
Always use references when accessing node content, if not intended to make a copy. Modifying copied node wont affect the original node content.
See example:
```cpp
Yaml::Node root;
Yaml::Node & ref = root; // The content of "root" is not being copied.
ref["key"] = "value"; // Modifying "root" node content.
Yaml::Node copy = root; // The content of "root" is copied to "copy".
// Slow operation if "root" contains a lot of content.
copy["key"] = "value"; // Modifying "copy" node content. "root" is left untouched.
```
## Build status
Builds are passed if all tests are good and no memory leaks were found.
| Branch | Status |
| ------ | ------ |
| master | [![Build Status](https://travis-ci.org/jimmiebergmann/mini-yaml.svg?branch=master)](https://travis-ci.org/jimmiebergmann/mini-yaml) |
| dev | [![Build Status](https://travis-ci.org/jimmiebergmann/mini-yaml.svg?branch=dev)](https://travis-ci.org/jimmiebergmann/mini-yaml)|
## Todo
- Parse/serialize tags(!!type).
- Parse anchors.
- Parse flow sequences/maps.
- Parse complex keys.
- Parse sets.

2773
deps/mini-yaml/Yaml.cpp vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,656 +0,0 @@
/*
* MIT License
*
* Copyright(c) 2018 Jimmie Bergmann
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files(the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
/*
YAML documentation:
http://yaml.org/spec/1.0/index.html
https://www.codeproject.com/Articles/28720/YAML-Parser-in-C
*/
#pragma once
#include <exception>
#include <string>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <map>
/**
* @breif Namespace wrapping mini-yaml classes.
*
*/
namespace Yaml
{
/**
* @breif Forward declarations.
*
*/
class Node;
/**
* @breif Helper classes and functions
*
*/
namespace impl
{
/**
* @breif Helper functionality, converting string to any data type.
* Strings are left untouched.
*
*/
template<typename T>
struct StringConverter
{
static T Get(const std::string & data)
{
T type;
std::stringstream ss(data);
ss >> type;
return type;
}
static T Get(const std::string & data, const T & defaultValue)
{
T type;
std::stringstream ss(data);
ss >> type;
if(ss.fail())
{
return defaultValue;
}
return type;
}
};
template<>
struct StringConverter<std::string>
{
static std::string Get(const std::string & data)
{
return data;
}
static std::string Get(const std::string & data, const std::string & defaultValue)
{
if(data.size() == 0)
{
return defaultValue;
}
return data;
}
};
template<>
struct StringConverter<bool>
{
static bool Get(const std::string & data)
{
std::string tmpData = data;
std::transform(tmpData.begin(), tmpData.end(), tmpData.begin(), ::tolower);
if(tmpData == "true" || tmpData == "yes" || tmpData == "1")
{
return true;
}
return false;
}
static bool Get(const std::string & data, const bool & defaultValue)
{
if(data.size() == 0)
{
return defaultValue;
}
return Get(data);
}
};
}
/**
* @breif Exception class.
*
*/
class Exception : public std::runtime_error
{
public:
/**
* @breif Enumeration of exception types.
*
*/
enum eType
{
InternalError, ///< Internal error.
ParsingError, ///< Invalid parsing data.
OperationError ///< User operation error.
};
/**
* @breif Constructor.
*
* @param message Exception message.
* @param type Type of exception.
*
*/
Exception(const std::string & message, const eType type);
/**
* @breif Get type of exception.
*
*/
eType Type() const;
/**
* @breif Get message of exception.
*
*/
const char * Message() const;
private:
eType m_Type; ///< Type of exception.
};
/**
* @breif Internal exception class.
*
* @see Exception
*
*/
class InternalException : public Exception
{
public:
/**
* @breif Constructor.
*
* @param message Exception message.
*
*/
InternalException(const std::string & message);
};
/**
* @breif Parsing exception class.
*
* @see Exception
*
*/
class ParsingException : public Exception
{
public:
/**
* @breif Constructor.
*
* @param message Exception message.
*
*/
ParsingException(const std::string & message);
};
/**
* @breif Operation exception class.
*
* @see Exception
*
*/
class OperationException : public Exception
{
public:
/**
* @breif Constructor.
*
* @param message Exception message.
*
*/
OperationException(const std::string & message);
};
/**
* @breif Iterator class.
*
*/
class Iterator
{
public:
friend class Node;
/**
* @breif Default constructor.
*
*/
Iterator();
/**
* @breif Copy constructor.
*
*/
Iterator(const Iterator & it);
/**
* @breif Assignment operator.
*
*/
Iterator & operator = (const Iterator & it);
/**
* @breif Destructor.
*
*/
~Iterator();
/**
* @breif Get node of iterator.
* First pair item is the key of map value, empty if type is sequence.
*
*/
std::pair<const std::string &, Node &> operator *();
/**
* @breif Post-increment operator.
*
*/
Iterator & operator ++ (int);
/**
* @breif Post-decrement operator.
*
*/
Iterator & operator -- (int);
/**
* @breif Check if iterator is equal to other iterator.
*
*/
bool operator == (const Iterator & it);
/**
* @breif Check if iterator is not equal to other iterator.
*
*/
bool operator != (const Iterator & it);
private:
enum eType
{
None,
SequenceType,
MapType
};
eType m_Type; ///< Type of iterator.
void * m_pImp; ///< Implementation of iterator class.
};
/**
* @breif Constant iterator class.
*
*/
class ConstIterator
{
public:
friend class Node;
/**
* @breif Default constructor.
*
*/
ConstIterator();
/**
* @breif Copy constructor.
*
*/
ConstIterator(const ConstIterator & it);
/**
* @breif Assignment operator.
*
*/
ConstIterator & operator = (const ConstIterator & it);
/**
* @breif Destructor.
*
*/
~ConstIterator();
/**
* @breif Get node of iterator.
* First pair item is the key of map value, empty if type is sequence.
*
*/
std::pair<const std::string &, const Node &> operator *();
/**
* @breif Post-increment operator.
*
*/
ConstIterator & operator ++ (int);
/**
* @breif Post-decrement operator.
*
*/
ConstIterator & operator -- (int);
/**
* @breif Check if iterator is equal to other iterator.
*
*/
bool operator == (const ConstIterator & it);
/**
* @breif Check if iterator is not equal to other iterator.
*
*/
bool operator != (const ConstIterator & it);
private:
enum eType
{
None,
SequenceType,
MapType
};
eType m_Type; ///< Type of iterator.
void * m_pImp; ///< Implementation of constant iterator class.
};
/**
* @breif Node class.
*
*/
class Node
{
public:
friend class Iterator;
/**
* @breif Enumeration of node types.
*
*/
enum eType
{
None,
SequenceType,
MapType,
ScalarType
};
/**
* @breif Default constructor.
*
*/
Node();
/**
* @breif Copy constructor.
*
*/
Node(const Node & node);
/**
* @breif Assignment constructors.
* Converts node to scalar type if needed.
*
*/
Node(const std::string & value);
Node(const char * value);
/**
* @breif Destructor.
*
*/
~Node();
/**
* @breif Functions for checking type of node.
*
*/
eType Type() const;
bool IsNone() const;
bool IsSequence() const;
bool IsMap() const;
bool IsScalar() const;
/**
* @breif Completely clear node.
*
*/
void Clear();
/**
* @breif Get node as given template type.
*
*/
template<typename T>
T As() const
{
return impl::StringConverter<T>::Get(AsString());
}
/**
* @breif Get node as given template type.
*
*/
template<typename T>
T As(const T & defaultValue) const
{
return impl::StringConverter<T>::Get(AsString(), defaultValue);
}
/**
* @breif Get size of node.
* Nodes of type None or Scalar will return 0.
*
*/
size_t Size() const;
// Sequence operators
/**
* @breif Insert sequence item at given index.
* Converts node to sequence type if needed.
* Adding new item to end of sequence if index is larger than sequence size.
*
*/
Node & Insert(const size_t index);
/**
* @breif Add new sequence index to back.
* Converts node to sequence type if needed.
*
*/
Node & PushFront();
/**
* @breif Add new sequence index to front.
* Converts node to sequence type if needed.
*
*/
Node & PushBack();
/**
* @breif Get sequence/map item.
* Converts node to sequence/map type if needed.
*
* @param index Sequence index. Returns None type Node if index is unknown.
* @param key Map key. Creates a new node if key is unknown.
*
*/
Node & operator [] (const size_t index);
Node & operator [] (const std::string & key);
/**
* @breif Erase item.
* No action if node is not a sequence or map.
*
*/
void Erase(const size_t index);
void Erase(const std::string & key);
/**
* @breif Assignment operators.
*
*/
Node & operator = (const Node & node);
Node & operator = (const std::string & value);
Node & operator = (const char * value);
/**
* @breif Get start iterator.
*
*/
Iterator Begin();
ConstIterator Begin() const;
/**
* @breif Get end iterator.
*
*/
Iterator End();
ConstIterator End() const;
private:
/**
* @breif Get as string. If type is scalar, else empty.
*
*/
const std::string & AsString() const;
void * m_pImp; ///< Implementation of node class.
};
/**
* @breif Parsing functions.
* Population given root node with deserialized data.
*
* @param root Root node to populate.
* @param filename Path of input file.
* @param stream Input stream.
* @param string String of input data.
* @param buffer Char array of input data.
* @param size Buffer size.
*
* @throw InternalException An internal error occurred.
* @throw ParsingException Invalid input YAML data.
* @throw OperationException If filename or buffer pointer is invalid.
*
*/
void Parse(Node & root, const char * filename);
void Parse(Node & root, std::iostream & stream);
void Parse(Node & root, const std::string & string);
void Parse(Node & root, const char * buffer, const size_t size);
/**
* @breif Serialization configuration structure,
* describing output behavior.
*
*/
struct SerializeConfig
{
/**
* @breif Constructor.
*
* @param spaceIndentation Number of spaces per indentation.
* @param scalarMaxLength Maximum length of scalars. Serialized as folder scalars if exceeded.
* Ignored if equal to 0.
* @param sequenceMapNewline Put maps on a new line if parent node is a sequence.
* @param mapScalarNewline Put scalars on a new line if parent node is a map.
*
*/
SerializeConfig(const size_t spaceIndentation = 2,
const size_t scalarMaxLength = 64,
const bool sequenceMapNewline = false,
const bool mapScalarNewline = false);
size_t SpaceIndentation; ///< Number of spaces per indentation.
size_t ScalarMaxLength; ///< Maximum length of scalars. Serialized as folder scalars if exceeded.
bool SequenceMapNewline; ///< Put maps on a new line if parent node is a sequence.
bool MapScalarNewline; ///< Put scalars on a new line if parent node is a map.
};
/**
* @breif Serialization functions.
*
* @param root Root node to serialize.
* @param filename Path of output file.
* @param stream Output stream.
* @param string String of output data.
* @param config Serialization configurations.
*
* @throw InternalException An internal error occurred.
* @throw OperationException If filename or buffer pointer is invalid.
* If config is invalid.
*
*/
void Serialize(const Node & root, const char * filename, const SerializeConfig & config = {2, 64, false, false});
void Serialize(const Node & root, std::iostream & stream, const SerializeConfig & config = {2, 64, false, false});
void Serialize(const Node & root, std::string & string, const SerializeConfig & config = {2, 64, false, false});
}

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.11)
#cmake_minimum_required(VERSION 2.8.11)
cmake_policy(SET CMP0003 NEW)
cmake_policy(SET CMP0048 NEW)
project(picoquic VERSION 0.0.1 LANGUAGES C CXX)

View File

@ -1,4 +1,4 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11)
#CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11)
CMAKE_POLICY(SET CMP0003 NEW)
cmake_policy(SET CMP0048 NEW)
project(picotls VERSION 0.0.1)

View File

@ -1,7 +1,7 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
TARGETDIR="$BASEDIR/../content/shaders"
TARGETDIR="$BASEDIR/../vk"
GLSL=${GLSL:="glslc"}
rm $TARGETDIR/*.spv

Some files were not shown because too many files have changed in this diff Show More