1
0
Fork 0
Univerxel/src/client/render/vk/Allocator.cpp

352 lines
15 KiB
C++

#include "Allocator.hpp"
#include "PhysicalDeviceInfo.hpp"
#include <memory.h>
#include <cassert>
using namespace render::vk;
Allocator* Allocator::sInstance = nullptr;
constexpr VkDeviceSize MIN_ALLOC_SIZE = 1 << 28;
const auto NO_DELETER = memory::Deleter(nullptr);
memory::ptr memory::GetNull() { return memory::ptr(nullptr, NO_DELETER); }
Allocator::Allocator(VkDevice device, const PhysicalDeviceInfo &info): physicalDevice(info.device),
capabilities({info.features.samplerAnisotropy == VK_TRUE ? std::make_optional(info.properties.limits.maxSamplerAnisotropy) : std::nullopt,
info.properties.limits.maxSamplerLodBias}), device(device)
{
if(info.hasMemoryBudget()) {
properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
properties2.pNext = &budget;
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
} else {
LOG_W("No memory budget. Process may go out of memory.");
}
updateProperties();
{ // Transfer
if (!info.queueIndices.transferFamily.has_value()) {
LOG_W("No transfer queue family. Fallback to graphics one");
}
const auto family = info.queueIndices.transferFamily.value_or(info.queueIndices.graphicsFamily.value());
vkGetDeviceQueue(device, family, 0, &transferQueue);
VkCommandPoolCreateInfo poolInfo{};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.queueFamilyIndex = family;
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
if (vkCreateCommandPool(device, &poolInfo, ALLOC, &transferPool) != VK_SUCCESS) {
FATAL("Failed to create transfer pool!");
}
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandPool = transferPool;
allocInfo.commandBufferCount = 1;
vkAllocateCommandBuffers(device, &allocInfo, &transferBuffer);
}
{ // Graphics
vkGetDeviceQueue(device, info.queueIndices.graphicsFamily.value(), 0, &graphicsQueue);
VkCommandPoolCreateInfo poolInfo{};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.queueFamilyIndex = info.queueIndices.graphicsFamily.value();
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
if (vkCreateCommandPool(device, &poolInfo, ALLOC, &graphicsPool) != VK_SUCCESS) {
FATAL("Failed to create graphics pool!");
}
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandPool = graphicsPool;
allocInfo.commandBufferCount = 1;
vkAllocateCommandBuffers(device, &allocInfo, &graphicsBuffer);
}
}
Allocator::~Allocator() {
vkFreeCommandBuffers(device, transferPool, 1, &transferBuffer);
vkFreeCommandBuffers(device, graphicsPool, 1, &graphicsBuffer);
vkDestroyCommandPool(device, transferPool, ALLOC);
vkDestroyCommandPool(device, graphicsPool, ALLOC);
//NOTE: all allocations are delete by ~vector
}
void Allocator::updateProperties() {
if (hasBudget()) {
vkGetPhysicalDeviceMemoryProperties2(physicalDevice, &properties2);
#if LOG_TRACE
LOG_T("Available heaps:")
for (size_t i = 0; i < getProperties().memoryHeapCount; i++) {
LOG_T('\t' << i << ": " << budget.heapUsage[i] << '/' << budget.heapBudget[i]);
}
#endif
} else {
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &properties);
}
}
memory::ptr Allocator::allocate(VkMemoryRequirements requirements, VkMemoryPropertyFlags properties, bool optimalTiling) {
// Search in existing allocations
for (auto& alloc: allocations) {
if ((requirements.memoryTypeBits & (1 << alloc->memoryType)) &&
(getProperties().memoryTypes[alloc->memoryType].propertyFlags & properties) == properties &&
alloc->size > requirements.size && alloc->optimalTiling == optimalTiling
) {
VkDeviceSize start = 0;
auto aligned = [&](VkDeviceSize offset) {
if (offset % requirements.alignment == 0)
return offset;
return offset + requirements.alignment - (offset % requirements.alignment);
};
auto it = alloc->areas.cbegin();
auto done = [&] {
alloc->areas.insert(it, {requirements.size, start});
return memory::ptr(new memory::area{alloc->memory, requirements.size, start, alloc->ptr != nullptr ? static_cast<uint8_t*>(alloc->ptr) + start : nullptr}, alloc->deleter);
};
while (it != alloc->areas.cend()) {
if (it->offset - start > requirements.size) {
return done();
}
start = aligned(it->offset + it->size);
++it;
}
if (alloc->size - start > requirements.size) {
return done();
}
}
}
LOG_T("Need to allocate more");
VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = std::max(MIN_ALLOC_SIZE, requirements.size);
if (const auto memIdx = findMemory(requirements.memoryTypeBits, properties, allocInfo.allocationSize)) {
allocInfo.memoryTypeIndex = memIdx.value();
} else if (const auto memIdx = findMemory(requirements.memoryTypeBits, properties, requirements.size)) {
LOG_W("Memory heavily limited cannot allocate full page");
allocInfo.allocationSize = requirements.size;
allocInfo.memoryTypeIndex = memIdx.value();
} else {
LOG_E("No suitable memory heap within memory budget");
LOG_D(requirements.memoryTypeBits << ' ' << properties << ' ' << requirements.size);
return memory::GetNull();
}
VkDeviceMemory memory;
if (vkAllocateMemory(device, &allocInfo, ALLOC, &memory) != VK_SUCCESS) {
LOG_E("Failed to allocate memory!");
return memory::GetNull();
}
void *ptr = nullptr;
if ((getProperties().memoryTypes[allocInfo.memoryTypeIndex].propertyFlags & memory::HOST_EASILY_WRITABLE) == memory::HOST_EASILY_WRITABLE) {
vkMapMemory(device, memory, 0, VK_WHOLE_SIZE, 0, &ptr);
} //TODO: allow flushable memory
auto allocation = allocations.emplace_back(new Allocation(device, memory, allocInfo.allocationSize, allocInfo.memoryTypeIndex, optimalTiling, ptr)).get();
allocation->areas.push_back({requirements.size, 0});
return memory::ptr(new memory::area{memory, requirements.size, 0, ptr}, allocation->deleter);
}
void beginCmd(VkCommandBuffer buffer) {
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(buffer, &beginInfo);
}
void submitCmd(VkCommandBuffer buffer, VkQueue queue) {
vkEndCommandBuffer(buffer);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &buffer;
vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
//TODO: parallelize
vkQueueWaitIdle(queue); //MAYBE: use fences
vkResetCommandBuffer(buffer, 0);
}
void Allocator::transfer(std::function<void(VkCommandBuffer)> call) {
beginCmd(transferBuffer);
call(transferBuffer);
submitCmd(transferBuffer, transferQueue);
}
void Allocator::copyBuffer(VkBuffer src, VkBuffer dst, VkDeviceSize size) {
beginCmd(transferBuffer);
VkBufferCopy copyRegion{};
copyRegion.srcOffset = 0;
copyRegion.dstOffset = 0;
copyRegion.size = size;
vkCmdCopyBuffer(transferBuffer, src, dst, 1, &copyRegion);
submitCmd(transferBuffer, transferQueue);
}
void Allocator::transitionImageLayout(VkImage image, VkFormat format, VkImageLayout oldLayout, VkImageLayout newLayout, uint32_t mipLevels, uint32_t arrayLayers) {
beginCmd(graphicsBuffer);
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = oldLayout;
barrier.newLayout = newLayout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = mipLevels;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.layerCount = arrayLayers;
if (newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
if (PhysicalDeviceInfo::HasStencilComponent(format)) {
barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
} else {
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkPipelineStageFlags sourceStage;
VkPipelineStageFlags destinationStage;
if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
} else {
FATAL("Unsupported layout transition!");
}
vkCmdPipelineBarrier(graphicsBuffer, sourceStage, destinationStage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
submitCmd(graphicsBuffer, graphicsQueue);
}
void Allocator::copyBufferToImage(VkBuffer src, VkImage dest, uint32_t width, uint32_t height, uint32_t mipLevels, uint32_t arrayLayer) {
beginCmd(transferBuffer);
VkDeviceSize offset = 0;
std::vector<VkBufferImageCopy> regions{mipLevels};
for (size_t i = 0; i < mipLevels; i++) {
regions[i].imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
regions[i].imageSubresource.mipLevel = i;
regions[i].imageSubresource.baseArrayLayer = arrayLayer;
regions[i].imageSubresource.layerCount = 1;
regions[i].imageOffset = {0, 0, 0};
regions[i].imageExtent = {width >> i, height >> i, 1};
regions[i].bufferOffset = offset;
regions[i].bufferRowLength = std::max<uint32_t>(4, regions[i].imageExtent.width);
regions[i].bufferImageHeight = std::max<uint32_t>(4, regions[i].imageExtent.height);
offset += regions[i].bufferRowLength * regions[i].bufferImageHeight;
}
vkCmdCopyBufferToImage(transferBuffer, src, dest, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels, regions.data());
submitCmd(transferBuffer, transferQueue);
}
std::optional<uint32_t> Allocator::findMemory(uint32_t typeFilter, VkMemoryPropertyFlags requirement, VkDeviceSize size) {
updateProperties();
#if LOG_TRACE
LOG_T("Available memory:");
for (uint32_t i = 0; i < getProperties().memoryTypeCount; i++) {
LOG_T('\t' << i << ": "
<< getProperties().memoryTypes[i].heapIndex << ' '
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) ? "local " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ? "visible " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) ? "coherent " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) ? "cached " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) ? "lazy " : "")
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT) ? "protected " : "")
<< getProperties().memoryHeaps[getProperties().memoryTypes[i].heapIndex].size);
}
#endif
for (uint32_t i = 0; i < getProperties().memoryTypeCount; i++) {
if ((typeFilter & (1 << i)) && (getProperties().memoryTypes[i].propertyFlags & requirement) == requirement) {
VkDeviceSize usage = size;
for(const auto& alloc: allocations) {
if(alloc->memoryType == i)
usage += alloc->size;
}
const auto heapIndex = getProperties().memoryTypes[i].heapIndex;
const VkDeviceSize heapSize = getProperties().memoryHeaps[heapIndex].size;
if (heapSize >= usage && (!hasBudget() || budget.heapBudget[heapIndex] >= budget.heapUsage[heapIndex] + size)) {
return i;
} else {
LOG_T("Out of budget " << usage << '/' << heapSize << " : " << budget.heapUsage[heapIndex] + size << '/' << budget.heapBudget[heapIndex]);
}
}
}
return {};
}
void render::vk::memory::area::write(const void* data, VkDeviceSize data_size, VkDeviceSize write_offset) {
assert(ptr != nullptr && size >= write_offset + data_size);
memcpy(static_cast<uint8_t*>(ptr) + write_offset, data, data_size);
}
void render::vk::memory::area::read(void* data, VkDeviceSize data_size, VkDeviceSize read_offset) {
assert(ptr != nullptr && size >= read_offset + data_size);
memcpy(data, static_cast<uint8_t*>(ptr) + read_offset, data_size);
}
void render::vk::memory::Deleter::operator()(render::vk::memory::area* area) {
assert(area != nullptr && "Deleting null area");
if(owner != nullptr) {
for (auto it = owner->areas.begin(); it != owner->areas.end(); ++it) {
if(it->offset == area->offset) {
assert(it->size == area->size);
owner->areas.erase(it);
//MAYBE: remove if empty
delete area;
return;
}
}
}
LOG_E("Allocation area not found");
delete area;
}
Allocation::Allocation(VkDevice device, VkDeviceMemory memory, VkDeviceSize size, uint32_t memoryType, bool optimalTiling, void *ptr):
device(device), memory(memory), size(size), memoryType(memoryType), optimalTiling(optimalTiling), ptr(ptr), deleter(this) { }
Allocation::~Allocation() {
if(!areas.empty())
LOG_E("Freeing " << areas.size() << " floating buffers");
if(ptr != nullptr)
vkUnmapMemory(device, memory);
vkFreeMemory(device, memory, ALLOC);
}