352 lines
15 KiB
C++
352 lines
15 KiB
C++
#include "Allocator.hpp"
|
|
|
|
#include "PhysicalDeviceInfo.hpp"
|
|
#include <memory.h>
|
|
#include <cassert>
|
|
|
|
using namespace render::vk;
|
|
|
|
Allocator* Allocator::sInstance = nullptr;
|
|
|
|
constexpr VkDeviceSize MIN_ALLOC_SIZE = 1 << 28;
|
|
const auto NO_DELETER = memory::Deleter(nullptr);
|
|
memory::ptr memory::GetNull() { return memory::ptr(nullptr, NO_DELETER); }
|
|
|
|
Allocator::Allocator(VkDevice device, const PhysicalDeviceInfo &info): physicalDevice(info.device),
|
|
capabilities({info.features.samplerAnisotropy == VK_TRUE ? std::make_optional(info.properties.limits.maxSamplerAnisotropy) : std::nullopt,
|
|
info.properties.limits.maxSamplerLodBias}), device(device)
|
|
{
|
|
if(info.hasMemoryBudget()) {
|
|
properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
|
|
properties2.pNext = &budget;
|
|
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
|
} else {
|
|
LOG_W("No memory budget. Process may go out of memory.");
|
|
}
|
|
|
|
updateProperties();
|
|
{ // Transfer
|
|
if (!info.queueIndices.transferFamily.has_value()) {
|
|
LOG_W("No transfer queue family. Fallback to graphics one");
|
|
}
|
|
const auto family = info.queueIndices.transferFamily.value_or(info.queueIndices.graphicsFamily.value());
|
|
|
|
vkGetDeviceQueue(device, family, 0, &transferQueue);
|
|
VkCommandPoolCreateInfo poolInfo{};
|
|
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
poolInfo.queueFamilyIndex = family;
|
|
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
|
|
|
if (vkCreateCommandPool(device, &poolInfo, ALLOC, &transferPool) != VK_SUCCESS) {
|
|
FATAL("Failed to create transfer pool!");
|
|
}
|
|
|
|
VkCommandBufferAllocateInfo allocInfo{};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
allocInfo.commandPool = transferPool;
|
|
allocInfo.commandBufferCount = 1;
|
|
|
|
vkAllocateCommandBuffers(device, &allocInfo, &transferBuffer);
|
|
}
|
|
{ // Graphics
|
|
vkGetDeviceQueue(device, info.queueIndices.graphicsFamily.value(), 0, &graphicsQueue);
|
|
VkCommandPoolCreateInfo poolInfo{};
|
|
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
poolInfo.queueFamilyIndex = info.queueIndices.graphicsFamily.value();
|
|
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
|
|
|
if (vkCreateCommandPool(device, &poolInfo, ALLOC, &graphicsPool) != VK_SUCCESS) {
|
|
FATAL("Failed to create graphics pool!");
|
|
}
|
|
|
|
VkCommandBufferAllocateInfo allocInfo{};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
allocInfo.commandPool = graphicsPool;
|
|
allocInfo.commandBufferCount = 1;
|
|
|
|
vkAllocateCommandBuffers(device, &allocInfo, &graphicsBuffer);
|
|
}
|
|
}
|
|
Allocator::~Allocator() {
|
|
vkFreeCommandBuffers(device, transferPool, 1, &transferBuffer);
|
|
vkFreeCommandBuffers(device, graphicsPool, 1, &graphicsBuffer);
|
|
|
|
vkDestroyCommandPool(device, transferPool, ALLOC);
|
|
vkDestroyCommandPool(device, graphicsPool, ALLOC);
|
|
//NOTE: all allocations are delete by ~vector
|
|
}
|
|
|
|
void Allocator::updateProperties() {
|
|
if (hasBudget()) {
|
|
vkGetPhysicalDeviceMemoryProperties2(physicalDevice, &properties2);
|
|
#if LOG_TRACE
|
|
LOG_T("Available heaps:")
|
|
for (size_t i = 0; i < getProperties().memoryHeapCount; i++) {
|
|
LOG_T('\t' << i << ": " << budget.heapUsage[i] << '/' << budget.heapBudget[i]);
|
|
}
|
|
#endif
|
|
} else {
|
|
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &properties);
|
|
}
|
|
}
|
|
memory::ptr Allocator::allocate(VkMemoryRequirements requirements, VkMemoryPropertyFlags properties, bool optimalTiling) {
|
|
// Search in existing allocations
|
|
for (auto& alloc: allocations) {
|
|
if ((requirements.memoryTypeBits & (1 << alloc->memoryType)) &&
|
|
(getProperties().memoryTypes[alloc->memoryType].propertyFlags & properties) == properties &&
|
|
alloc->size > requirements.size && alloc->optimalTiling == optimalTiling
|
|
) {
|
|
VkDeviceSize start = 0;
|
|
auto aligned = [&](VkDeviceSize offset) {
|
|
if (offset % requirements.alignment == 0)
|
|
return offset;
|
|
return offset + requirements.alignment - (offset % requirements.alignment);
|
|
};
|
|
auto it = alloc->areas.cbegin();
|
|
auto done = [&] {
|
|
alloc->areas.insert(it, {requirements.size, start});
|
|
return memory::ptr(new memory::area{alloc->memory, requirements.size, start, alloc->ptr != nullptr ? static_cast<uint8_t*>(alloc->ptr) + start : nullptr}, alloc->deleter);
|
|
};
|
|
while (it != alloc->areas.cend()) {
|
|
if (it->offset - start > requirements.size) {
|
|
return done();
|
|
}
|
|
start = aligned(it->offset + it->size);
|
|
++it;
|
|
}
|
|
if (alloc->size - start > requirements.size) {
|
|
return done();
|
|
}
|
|
}
|
|
}
|
|
LOG_T("Need to allocate more");
|
|
|
|
VkMemoryAllocateInfo allocInfo{};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
|
allocInfo.allocationSize = std::max(MIN_ALLOC_SIZE, requirements.size);
|
|
if (const auto memIdx = findMemory(requirements.memoryTypeBits, properties, allocInfo.allocationSize)) {
|
|
allocInfo.memoryTypeIndex = memIdx.value();
|
|
} else if (const auto memIdx = findMemory(requirements.memoryTypeBits, properties, requirements.size)) {
|
|
LOG_W("Memory heavily limited cannot allocate full page");
|
|
allocInfo.allocationSize = requirements.size;
|
|
allocInfo.memoryTypeIndex = memIdx.value();
|
|
} else {
|
|
LOG_E("No suitable memory heap within memory budget");
|
|
LOG_D(requirements.memoryTypeBits << ' ' << properties << ' ' << requirements.size);
|
|
return memory::GetNull();
|
|
}
|
|
|
|
VkDeviceMemory memory;
|
|
if (vkAllocateMemory(device, &allocInfo, ALLOC, &memory) != VK_SUCCESS) {
|
|
LOG_E("Failed to allocate memory!");
|
|
return memory::GetNull();
|
|
}
|
|
|
|
void *ptr = nullptr;
|
|
if ((getProperties().memoryTypes[allocInfo.memoryTypeIndex].propertyFlags & memory::HOST_EASILY_WRITABLE) == memory::HOST_EASILY_WRITABLE) {
|
|
vkMapMemory(device, memory, 0, VK_WHOLE_SIZE, 0, &ptr);
|
|
} //TODO: allow flushable memory
|
|
|
|
auto allocation = allocations.emplace_back(new Allocation(device, memory, allocInfo.allocationSize, allocInfo.memoryTypeIndex, optimalTiling, ptr)).get();
|
|
allocation->areas.push_back({requirements.size, 0});
|
|
|
|
return memory::ptr(new memory::area{memory, requirements.size, 0, ptr}, allocation->deleter);
|
|
}
|
|
|
|
void beginCmd(VkCommandBuffer buffer) {
|
|
VkCommandBufferBeginInfo beginInfo{};
|
|
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
|
|
|
vkBeginCommandBuffer(buffer, &beginInfo);
|
|
}
|
|
void submitCmd(VkCommandBuffer buffer, VkQueue queue) {
|
|
vkEndCommandBuffer(buffer);
|
|
|
|
VkSubmitInfo submitInfo{};
|
|
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
submitInfo.commandBufferCount = 1;
|
|
submitInfo.pCommandBuffers = &buffer;
|
|
|
|
vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
|
|
//TODO: parallelize
|
|
vkQueueWaitIdle(queue); //MAYBE: use fences
|
|
vkResetCommandBuffer(buffer, 0);
|
|
}
|
|
void Allocator::transfer(std::function<void(VkCommandBuffer)> call) {
|
|
beginCmd(transferBuffer);
|
|
call(transferBuffer);
|
|
submitCmd(transferBuffer, transferQueue);
|
|
}
|
|
void Allocator::copyBuffer(VkBuffer src, VkBuffer dst, VkDeviceSize size) {
|
|
beginCmd(transferBuffer);
|
|
|
|
VkBufferCopy copyRegion{};
|
|
copyRegion.srcOffset = 0;
|
|
copyRegion.dstOffset = 0;
|
|
copyRegion.size = size;
|
|
vkCmdCopyBuffer(transferBuffer, src, dst, 1, ©Region);
|
|
|
|
submitCmd(transferBuffer, transferQueue);
|
|
}
|
|
void Allocator::transitionImageLayout(VkImage image, VkFormat format, VkImageLayout oldLayout, VkImageLayout newLayout, uint32_t mipLevels, uint32_t arrayLayers) {
|
|
beginCmd(graphicsBuffer);
|
|
|
|
VkImageMemoryBarrier barrier{};
|
|
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
barrier.oldLayout = oldLayout;
|
|
barrier.newLayout = newLayout;
|
|
|
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
barrier.image = image;
|
|
barrier.subresourceRange.baseMipLevel = 0;
|
|
barrier.subresourceRange.levelCount = mipLevels;
|
|
barrier.subresourceRange.baseArrayLayer = 0;
|
|
barrier.subresourceRange.layerCount = arrayLayers;
|
|
|
|
if (newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
|
|
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
|
|
|
if (PhysicalDeviceInfo::HasStencilComponent(format)) {
|
|
barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
}
|
|
} else {
|
|
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
}
|
|
|
|
VkPipelineStageFlags sourceStage;
|
|
VkPipelineStageFlags destinationStage;
|
|
|
|
if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
|
|
barrier.srcAccessMask = 0;
|
|
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
|
|
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
|
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
|
|
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
|
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
|
|
barrier.srcAccessMask = 0;
|
|
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
|
|
|
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
|
destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
|
|
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
|
|
barrier.srcAccessMask = 0;
|
|
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
|
|
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
|
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
} else {
|
|
FATAL("Unsupported layout transition!");
|
|
}
|
|
|
|
vkCmdPipelineBarrier(graphicsBuffer, sourceStage, destinationStage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
|
|
|
|
submitCmd(graphicsBuffer, graphicsQueue);
|
|
}
|
|
void Allocator::copyBufferToImage(VkBuffer src, VkImage dest, uint32_t width, uint32_t height, uint32_t mipLevels, uint32_t arrayLayer) {
|
|
|
|
beginCmd(transferBuffer);
|
|
|
|
VkDeviceSize offset = 0;
|
|
std::vector<VkBufferImageCopy> regions{mipLevels};
|
|
for (size_t i = 0; i < mipLevels; i++) {
|
|
regions[i].imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
regions[i].imageSubresource.mipLevel = i;
|
|
regions[i].imageSubresource.baseArrayLayer = arrayLayer;
|
|
regions[i].imageSubresource.layerCount = 1;
|
|
|
|
regions[i].imageOffset = {0, 0, 0};
|
|
regions[i].imageExtent = {width >> i, height >> i, 1};
|
|
|
|
regions[i].bufferOffset = offset;
|
|
regions[i].bufferRowLength = std::max<uint32_t>(4, regions[i].imageExtent.width);
|
|
regions[i].bufferImageHeight = std::max<uint32_t>(4, regions[i].imageExtent.height);
|
|
|
|
offset += regions[i].bufferRowLength * regions[i].bufferImageHeight;
|
|
}
|
|
vkCmdCopyBufferToImage(transferBuffer, src, dest, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels, regions.data());
|
|
|
|
submitCmd(transferBuffer, transferQueue);
|
|
}
|
|
|
|
std::optional<uint32_t> Allocator::findMemory(uint32_t typeFilter, VkMemoryPropertyFlags requirement, VkDeviceSize size) {
|
|
updateProperties();
|
|
#if LOG_TRACE
|
|
LOG_T("Available memory:");
|
|
for (uint32_t i = 0; i < getProperties().memoryTypeCount; i++) {
|
|
LOG_T('\t' << i << ": "
|
|
<< getProperties().memoryTypes[i].heapIndex << ' '
|
|
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) ? "local " : "")
|
|
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ? "visible " : "")
|
|
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) ? "coherent " : "")
|
|
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) ? "cached " : "")
|
|
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) ? "lazy " : "")
|
|
<< ((getProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT) ? "protected " : "")
|
|
<< getProperties().memoryHeaps[getProperties().memoryTypes[i].heapIndex].size);
|
|
}
|
|
#endif
|
|
for (uint32_t i = 0; i < getProperties().memoryTypeCount; i++) {
|
|
if ((typeFilter & (1 << i)) && (getProperties().memoryTypes[i].propertyFlags & requirement) == requirement) {
|
|
VkDeviceSize usage = size;
|
|
for(const auto& alloc: allocations) {
|
|
if(alloc->memoryType == i)
|
|
usage += alloc->size;
|
|
}
|
|
const auto heapIndex = getProperties().memoryTypes[i].heapIndex;
|
|
const VkDeviceSize heapSize = getProperties().memoryHeaps[heapIndex].size;
|
|
if (heapSize >= usage && (!hasBudget() || budget.heapBudget[heapIndex] >= budget.heapUsage[heapIndex] + size)) {
|
|
return i;
|
|
} else {
|
|
LOG_T("Out of budget " << usage << '/' << heapSize << " : " << budget.heapUsage[heapIndex] + size << '/' << budget.heapBudget[heapIndex]);
|
|
}
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
void memory::area::write(const void* data, size_t data_size, size_t write_offset) {
|
|
assert(ptr != nullptr && size >= write_offset + data_size);
|
|
memcpy(static_cast<uint8_t*>(ptr) + write_offset, data, data_size);
|
|
}
|
|
void memory::area::read(void* data, size_t data_size, size_t read_offset) {
|
|
assert(ptr != nullptr && size >= read_offset + data_size);
|
|
memcpy(data, static_cast<uint8_t*>(ptr) + read_offset, data_size);
|
|
}
|
|
|
|
void memory::Deleter::operator()(memory::area* area) {
|
|
assert(area != nullptr && "Deleting null area");
|
|
if(owner != nullptr) {
|
|
for (auto it = owner->areas.begin(); it != owner->areas.end(); ++it) {
|
|
if(it->offset == area->offset) {
|
|
assert(it->size == area->size);
|
|
owner->areas.erase(it);
|
|
//MAYBE: remove if empty
|
|
delete area;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
LOG_E("Allocation area not found");
|
|
delete area;
|
|
}
|
|
Allocation::Allocation(VkDevice device, VkDeviceMemory memory, VkDeviceSize size, uint32_t memoryType, bool optimalTiling, void *ptr):
|
|
device(device), memory(memory), size(size), memoryType(memoryType), optimalTiling(optimalTiling), ptr(ptr), deleter(this) { }
|
|
Allocation::~Allocation() {
|
|
if(!areas.empty())
|
|
LOG_E("Freeing " << areas.size() << " floating buffers");
|
|
|
|
if(ptr != nullptr)
|
|
vkUnmapMemory(device, memory);
|
|
|
|
vkFreeMemory(device, memory, ALLOC);
|
|
} |