1
0
Fork 0
Univerxel/src/client/render/vk/Allocator.cpp

363 lines
15 KiB
C++

#include "Allocator.hpp"
#include "PhysicalDeviceInfo.hpp"
#include <TracyVulkan.hpp>
#include <memory.h>
#include <cassert>
using namespace render::vk;
constexpr auto HOST_EASILY_WRITABLE = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkDeviceSize MIN_ALLOC_SIZE = 1 << 28;
const auto NO_DELETER = Allocator::MemoryDeleter(nullptr);
Allocator::memory_ptr Allocator::GetNull() { return Allocator::memory_ptr(nullptr, NO_DELETER); }
Allocator::Allocator(VkDevice device, const PhysicalDeviceInfo &info): physicalDevice(info.device), device(device) {
if(info.hasMemoryBudget()) {
properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
properties2.pNext = &budget;
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
} else {
LOG_W("No memory budget. Process may go out of memory.");
}
updateProperties();
{
if (!info.queueIndices.transferFamily.has_value()) {
LOG_W("No transfer queue family. Using graphics one");
}
const auto family = info.queueIndices.transferFamily.value_or(info.queueIndices.graphicsFamily.value());
vkGetDeviceQueue(device, family, 0, &transferQueue);
VkCommandPoolCreateInfo poolInfo{};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.queueFamilyIndex = family;
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
if (vkCreateCommandPool(device, &poolInfo, ALLOC, &transferPool) != VK_SUCCESS) {
FATAL("Failed to create transfer pool!");
}
}
{
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandPool = transferPool;
allocInfo.commandBufferCount = 1;
vkAllocateCommandBuffers(device, &allocInfo, &transferBuffer);
tracyCtx = TracyVkContext(info.device, device, transferQueue, transferBuffer);
}
}
Allocator::~Allocator() {
TracyVkDestroy(tracyCtx);
vkFreeCommandBuffers(device, transferPool, 1, &transferBuffer);
vkDestroyCommandPool(device, transferPool, ALLOC);
//NOTE: all allocations are delete by ~vector
}
void Allocator::setTracyZone(const char* name) {
TracyVkCollect(tracyCtx, transferBuffer);
TracyVkZone(tracyCtx, transferBuffer, name);
(void)name;
}
Allocator::memory_ptr Allocator::createBuffer(const buffer_requirement& requirement, VkMemoryPropertyFlags properties, buffer_info& out) {
VkBufferCreateInfo bufferInfo{};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = requirement.size;
bufferInfo.usage = requirement.usage;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
if (vkCreateBuffer(device, &bufferInfo, ALLOC, &out.buffer) != VK_SUCCESS) {
LOG_E("Failed to create buffer");
return GetNull();
}
out.offset = 0;
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(device, out.buffer, &memRequirements);
auto memory = allocate(memRequirements, properties);
if (!memory || vkBindBufferMemory(device, out.buffer, memory->ref, memory->offset) != VK_SUCCESS) {
LOG_E("Failed to allocate buffer memory");
return GetNull();
}
if (requirement.size != 0 && requirement.data != nullptr) {
if (memory->ptr != nullptr) {
memory->write(requirement.data, requirement.data_size, requirement.data_offset);
} else {
Allocator::buffer_info stagingBuffer;
if(auto stagingMemory = createBuffer({requirement.size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT}, HOST_EASILY_WRITABLE, stagingBuffer)) {
stagingMemory->write(requirement.data, requirement.data_size, requirement.data_offset);
copyBuffer(stagingBuffer, out, requirement.size);
vkDestroyBuffer(device, stagingBuffer.buffer, ALLOC); //TODO: move to buffer destructor
} else {
FATAL("Cannot allocate staging memory");
return GetNull();
}
}
}
return memory;
}
Allocator::memory_ptr Allocator::createBuffers(const std::vector<buffer_requirement>& requirements, VkMemoryPropertyFlags properties, std::vector<buffer_info>& out) {
assert(!requirements.empty());
out.resize(requirements.size()+1);
// Create buffers
VkMemoryRequirements memRequirements = {0, 0, UINT32_MAX};
std::vector<VkDeviceSize> sizes;
sizes.resize(requirements.size());
for (size_t i = 0; i < requirements.size(); i++) {
VkBufferCreateInfo bufferInfo{};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = requirements[i].size;
bufferInfo.usage = requirements[i].usage;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
if (vkCreateBuffer(device, &bufferInfo, ALLOC, &out[i].buffer) != VK_SUCCESS) {
LOG_E("Failed to create buffer");
return GetNull();
}
VkMemoryRequirements individualMemRequirements;
vkGetBufferMemoryRequirements(device, out[i].buffer, &individualMemRequirements);
memRequirements.alignment = std::max(memRequirements.alignment, individualMemRequirements.alignment);
memRequirements.memoryTypeBits &= individualMemRequirements.memoryTypeBits;
sizes[i] = individualMemRequirements.size;
}
// Align blocks
auto aligned = [&](VkDeviceSize offset) {
if (offset % memRequirements.alignment == 0)
return offset;
return offset + memRequirements.alignment - (offset % memRequirements.alignment);
};
out[0].offset = 0;
for (size_t i = 1; i < out.size(); i++) {
out[i].offset = aligned(out[i-1].offset + sizes[i-1]);
}
memRequirements.size = out.back().offset;
out.pop_back();
// Bind memory
auto memory = allocate(memRequirements, properties);
if (!memory) {
LOG_E("Failed to allocate buffers");
return GetNull();
}
for (size_t i = 0; i < requirements.size(); i++) {
if (vkBindBufferMemory(device, out[i].buffer, memory->ref, memory->offset + out[i].offset) != VK_SUCCESS) {
LOG_E("Failed to bind buffer");
return GetNull();
}
}
VkDeviceSize stagingSize = 0;
for (auto& requirement: requirements)
if (requirement.data != nullptr)
stagingSize = std::max(stagingSize, requirement.size);
// Copy datas
if (stagingSize != 0) {
if (memory->ptr != nullptr) {
for (size_t i = 0; i < requirements.size(); i++) {
if (requirements[i].data != nullptr && requirements[i].size != 0) {
assert(requirements[i].data_size + requirements[i].data_offset <= requirements[i].size);
memory->write(requirements[i].data, requirements[i].data_size, out[i].offset + requirements[i].data_offset);
}
}
} else {
Allocator::buffer_info stagingBuffer;
if(auto stagingMemory = createBuffer({stagingSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT}, HOST_EASILY_WRITABLE, stagingBuffer)) {
for (size_t i = 0; i < requirements.size(); i++) {
if (requirements[i].data != nullptr && requirements[i].size != 0) {
assert(requirements[i].data_size + requirements[i].data_offset <= requirements[i].size);
stagingMemory->write(requirements[i].data, requirements[i].data_size, requirements[i].data_offset);
copyBuffer(stagingBuffer, out[i], requirements[i].size);
}
}
vkDestroyBuffer(device, stagingBuffer.buffer, ALLOC); //TODO: move to buffer destructor
} else {
FATAL("Cannot allocate staging memory");
return GetNull();
}
}
}
return memory;
}
void Allocator::updateProperties() {
if (hasBudget()) {
vkGetPhysicalDeviceMemoryProperties2(physicalDevice, &properties2);
#if LOG_TRACE
LOG_T("Available heaps:")
for (size_t i = 0; i < getProperties().memoryHeapCount; i++) {
LOG_T('\t' << i << ": " << budget.heapUsage[i] << '/' << budget.heapBudget[i]);
}
#endif
} else {
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &properties);
}
}
Allocator::memory_ptr Allocator::allocate(VkMemoryRequirements requirements, VkMemoryPropertyFlags properties) {
// Search in existing allocations
for (auto& alloc: allocations) {
if ((requirements.memoryTypeBits & (1 << alloc->memoryType)) &&
(getProperties().memoryTypes[alloc->memoryType].propertyFlags & properties) == properties &&
alloc->size > requirements.size
) {
VkDeviceSize start = 0;
auto aligned = [&](VkDeviceSize offset) {
if (offset % requirements.alignment == 0)
return offset;
return offset + requirements.alignment - (offset % requirements.alignment);
};
auto it = alloc->areas.cbegin();
auto done = [&] {
alloc->areas.insert(it, {requirements.size, start});
return memory_ptr(new memory_area{alloc->memory, requirements.size, start, alloc->ptr != nullptr ? alloc->ptr + start : nullptr}, alloc->deleter);
};
while (it != alloc->areas.cend()) {
if (it->offset - start > requirements.size) {
return done();
}
start = aligned(it->offset + it->size);
++it;
}
if (alloc->size - start > requirements.size) {
return done();
}
}
}
LOG_T("Need to allocate more");
VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = std::max(MIN_ALLOC_SIZE, requirements.size);
if (const auto memIdx = findMemory(requirements.memoryTypeBits, properties, allocInfo.allocationSize)) {
allocInfo.memoryTypeIndex = memIdx.value();
} else if (const auto memIdx = findMemory(requirements.memoryTypeBits, properties, requirements.size)) {
LOG_W("Memory heavily limited cannot allocate full page");
allocInfo.allocationSize = requirements.size;
allocInfo.memoryTypeIndex = memIdx.value();
} else {
LOG_E("No suitable memory heap within memory budget");
LOG_D(requirements.memoryTypeBits << ' ' << properties << ' ' << requirements.size);
return GetNull();
}
VkDeviceMemory memory;
if (vkAllocateMemory(device, &allocInfo, ALLOC, &memory) != VK_SUCCESS) {
LOG_E("Failed to allocate memory!");
return GetNull();
}
void *ptr = nullptr;
if ((getProperties().memoryTypes[allocInfo.memoryTypeIndex].propertyFlags & HOST_EASILY_WRITABLE) == HOST_EASILY_WRITABLE) {
vkMapMemory(device, memory, 0, VK_WHOLE_SIZE, 0, &ptr);
}
auto allocation = allocations.emplace_back(new Allocation(device, memory, allocInfo.allocationSize, allocInfo.memoryTypeIndex, ptr)).get();
allocation->areas.push_back({requirements.size, 0});
return memory_ptr(new memory_area{memory, requirements.size, 0, ptr}, allocation->deleter);
}
void Allocator::copyBuffer(buffer_info src, buffer_info dst, VkDeviceSize size) {
//FIXME: assert no out of range
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(transferBuffer, &beginInfo);
VkBufferCopy copyRegion{};
copyRegion.srcOffset = 0;
copyRegion.dstOffset = 0;
copyRegion.size = size;
vkCmdCopyBuffer(transferBuffer, src.buffer, dst.buffer, 1, &copyRegion);
vkEndCommandBuffer(transferBuffer);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &transferBuffer;
vkQueueSubmit(transferQueue, 1, &submitInfo, VK_NULL_HANDLE);
vkQueueWaitIdle(transferQueue); //MAYBE: use fences
vkResetCommandBuffer(transferBuffer, 0);
}
std::optional<uint32_t> Allocator::findMemory(uint32_t typeFilter, VkMemoryPropertyFlags requirement, VkDeviceSize size) {
updateProperties();
#if LOG_TRACE
LOG_T("Available memory:");
for (uint32_t i = 0; i < getProperties().memoryTypeCount; i++) {
LOG_T('\t' << i << ": "
<< getProperties().memoryTypes[i].heapIndex << ' '
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) ? "local " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ? "visible " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) ? "coherent " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) ? "cached " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) ? "lazy " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT) ? "protected " : "")
<< getProperties().memoryHeaps[getProperties().memoryTypes[i].heapIndex].size);
}
#endif
for (uint32_t i = 0; i < getProperties().memoryTypeCount; i++) {
if ((typeFilter & (1 << i)) && (getProperties().memoryTypes[i].propertyFlags & requirement) == requirement) {
VkDeviceSize usage = size;
for(const auto& alloc: allocations) {
if(alloc->memoryType == i)
usage += alloc->size;
}
const auto heapIndex = getProperties().memoryTypes[i].heapIndex;
const VkDeviceSize heapSize = getProperties().memoryHeaps[heapIndex].size;
if (heapSize >= usage && (!hasBudget() || budget.heapBudget[heapIndex] >= budget.heapUsage[heapIndex] + size)) {
return i;
} else {
LOG_T("Out of budget " << usage << '/' << heapSize << " : " << budget.heapUsage[heapIndex] + size << '/' << budget.heapBudget[heapIndex]);
}
}
}
return {};
}
void Allocator::memory_area::write(const void* data, size_t data_size, size_t write_offset) {
assert(ptr != nullptr && size >= write_offset + data_size);
memcpy(ptr + write_offset, data, data_size);
}
void Allocator::MemoryDeleter::operator()(memory_area* area) {
assert(area != nullptr && "Deleting null area");
if(owner != nullptr) {
for (auto it = owner->areas.begin(); it != owner->areas.end(); ++it) {
if(it->offset == area->offset) {
assert(it->size == area->size);
owner->areas.erase(it);
//MAYBE: remove if empty
delete area;
return;
}
}
}
LOG_E("Allocation area not found");
delete area;
}
Allocator::Allocation::Allocation(VkDevice device, VkDeviceMemory memory, VkDeviceSize size, uint32_t memoryType, void *ptr):
device(device), memory(memory), size(size), memoryType(memoryType), ptr(ptr), deleter(this) { }
Allocator::Allocation::~Allocation() {
if(!areas.empty())
LOG_E("Freeing " << areas.size() << " floating buffers");
if(ptr != nullptr)
vkUnmapMemory(device, memory);
vkFreeMemory(device, memory, ALLOC);
}