diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 2583aae867..e1aa3473fc 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "common/assert.h" @@ -681,22 +682,17 @@ struct Memory::Impl { } } - [[nodiscard]] u8* GetPointerImpl(u64 vaddr, auto on_unmapped, auto on_rasterizer) const { + template + [[nodiscard]] u8* GetPointerImpl(u64 vaddr, F&& on_unmapped, G&& on_rasterizer) const { // AARCH64 masks the upper 16 bit of all memory accesses - vaddr = vaddr & 0xffffffffffffULL; - if (!AddressSpaceContains(*current_page_table, vaddr, 1)) [[unlikely]] { - on_unmapped(); - return nullptr; - } else { + vaddr &= 0xffffffffffffULL; + if (AddressSpaceContains(*current_page_table, vaddr, 1)) [[likely]] { // Avoid adding any extra logic to this fast-path block const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw(); - if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { + if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) [[likely]] { return reinterpret_cast(pointer + vaddr); } else { switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - on_unmapped(); - return nullptr; case Common::PageType::Memory: ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr); return nullptr; @@ -707,11 +703,18 @@ struct Memory::Impl { on_rasterizer(); return host_ptr; } + case Common::PageType::Unmapped: [[unlikely]] { + on_unmapped(); + return nullptr; + } default: UNREACHABLE(); } return nullptr; } + } else { + on_unmapped(); + return nullptr; } } @@ -729,172 +732,38 @@ struct Memory::Impl { GetInteger(vaddr), []() {}, []() {}); } - /** - * Reads a particular data type out of memory at the given virtual address. - * - * @param vaddr The virtual address to read the data type from. - * - * @tparam T The data type to read out of memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - * - * @returns The instance of T read from the specified virtual address. - */ + /// @brief Reads a particular data type out of memory at the given virtual address. + /// @param vaddr The virtual address to read the data type from. + /// @tparam T The data type to read out of memory. + /// @returns The instance of T read from the specified virtual address. template - T Read(Common::ProcessAddress vaddr) { - // Fast path for aligned reads of common sizes + inline T Read(Common::ProcessAddress vaddr) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit reads are always aligned - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read8 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*ptr); - } - return 0; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit reads - if ((addr & 1) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read16 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit reads - if ((addr & 3) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read32 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit reads - if ((addr & 7) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read64 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } - - // Fall back to the general case for other types or unaligned access - T result = 0; - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr]() { + LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); + }, [&]() { + HandleRasterizerDownload(addr, sizeof(T)); + }); ptr) [[likely]] { + // It may be tempting to rewrite this particular section to use "reinterpret_cast"; + // afterall, it's trivially copyable so surely it can be copied ov- Alignment. + // Remember, alignment. memcpy() will deal with all the alignment extremely fast. + T result{}; std::memcpy(&result, ptr, sizeof(T)); + return result; } - return result; + return T{}; } - /** - * Writes a particular data type to memory at the given virtual address. - * - * @param vaddr The virtual address to write the data type to. - * - * @tparam T The data type to write to memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - */ + /// @brief Writes a particular data type to memory at the given virtual address. + /// @param vaddr The virtual address to write the data type to. + /// @tparam T The data type to write to memory. template - void Write(Common::ProcessAddress vaddr, const T data) { - // Fast path for aligned writes of common sizes + inline void Write(Common::ProcessAddress vaddr, const T data) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit writes are always aligned - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write8 @ 0x{:016X} = 0x{:02X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *ptr = static_cast(data); - } - return; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit writes - if ((addr & 1) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write16 @ 0x{:016X} = 0x{:04X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit writes - if ((addr & 3) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write32 @ 0x{:016X} = 0x{:08X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit writes - if ((addr & 7) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write64 @ 0x{:016X} = 0x{:016X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } - - // Fall back to the general case for other types or unaligned access - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, - addr, static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr, data]() { + LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data)); + }, [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); ptr) [[likely]] std::memcpy(ptr, &data, sizeof(T)); - } } template diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 52ca9bbdb6..e2aa6c7e49 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -156,6 +156,8 @@ void MaxwellDMA::Launch() { } void MaxwellDMA::CopyBlockLinearToPitch() { + + u32 bytes_per_pixel = 1; DMA::ImageOperand src_operand; src_operand.bytes_per_pixel = bytes_per_pixel; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 7bfcd6503b..68543bdd48 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -46,6 +46,38 @@ namespace Vulkan { using VideoCommon::ImageViewType; namespace { + +[[nodiscard]] VkImageAspectFlags AspectMaskFromFormat(VideoCore::Surface::PixelFormat format) { + using VideoCore::Surface::SurfaceType; + switch (VideoCore::Surface::GetFormatType(format)) { + case SurfaceType::ColorTexture: + return VK_IMAGE_ASPECT_COLOR_BIT; + case SurfaceType::Depth: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case SurfaceType::Stencil: + return VK_IMAGE_ASPECT_STENCIL_BIT; + case SurfaceType::DepthStencil: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } +} + +[[nodiscard]] VkImageSubresourceRange SubresourceRangeFromView(const ImageView& image_view) { + auto range = image_view.range; + if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) { + range.base.layer = 0; + range.extent.layers = 1; + } + return VkImageSubresourceRange{ + .aspectMask = AspectMaskFromFormat(image_view.format), + .baseMipLevel = static_cast(range.base.level), + .levelCount = static_cast(range.extent.levels), + .baseArrayLayer = static_cast(range.base.layer), + .layerCount = static_cast(range.extent.layers), + }; +} + struct PushConstants { std::array tex_scale; std::array tex_offset; @@ -417,6 +449,40 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo 0, barrier); } +void RecordShaderReadBarrier(Scheduler& scheduler, const ImageView& image_view) { + const VkImage image = image_view.ImageHandle(); + const VkImageSubresourceRange subresource_range = SubresourceRangeFromView(image_view); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([image, subresource_range](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = subresource_range, + }; + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + barrier); + }); +} + void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { const VkRenderPass render_pass = framebuffer->RenderPass(); const VkFramebuffer framebuffer_handle = framebuffer->Handle(); @@ -484,7 +550,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, BlitImageHelper::~BlitImageHelper() = default; -void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view, +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -496,10 +562,12 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); + + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, src_view](vk::CommandBuffer cmdbuf) { - // TODO: Barriers const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -538,7 +606,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView } void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, - VkImageView src_depth_view, VkImageView src_stencil_view, + ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -554,10 +622,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key); + const VkImageView src_depth_view = src_image_view.DepthView(); + const VkImageView src_stencil_view = src_image_view.StencilView(); + + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, src_stencil_view, this](vk::CommandBuffer cmdbuf) { - // TODO: Barriers const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); @@ -692,6 +763,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkSampler sampler = *nearest_sampler; const VkExtent2D extent = GetConversionExtent(src_image_view); + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ @@ -717,7 +789,6 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); - // TODO: Barriers cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); @@ -737,6 +808,7 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer const VkSampler sampler = *nearest_sampler; const VkExtent2D extent = GetConversionExtent(src_image_view); + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, this](vk::CommandBuffer cmdbuf) { @@ -763,7 +835,6 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); - // TODO: Barriers cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 3d400be6a9..bdb8cce883 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -1,4 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -43,7 +46,7 @@ public: StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); - void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); @@ -52,9 +55,9 @@ public: VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, const Region2D& src_region, const Extent3D& src_size); - void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, - VkImageView src_stencil_view, const Region2D& dst_region, - const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, + void BlitDepthStencil(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + const Region2D& dst_region, const Region2D& src_region, + Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8d1d609a35..575651905e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1086,8 +1086,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { - blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D), - dst_region, src_region, filter, operation); + blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, + operation); return; } ASSERT(src.format == dst.format); @@ -1106,8 +1106,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst }(); if (!can_blit_depth_stencil) { UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); - blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), - dst_region, src_region, filter, operation); + blit_image_helper.BlitDepthStencil(dst_framebuffer, src, dst_region, src_region, + filter, operation); return; } } @@ -1968,18 +1968,17 @@ bool Image::BlitScaleHelper(bool scale_up) { blit_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent, scale_up); } - const auto color_view = blit_view->Handle(Shader::TextureType::Color2D); - runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region, + runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region, src_region, operation, BLIT_OPERATION); } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!blit_framebuffer) { blit_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent, scale_up); } - runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(), - blit_view->StencilView(), dst_region, - src_region, operation, BLIT_OPERATION); + runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view, + dst_region, src_region, operation, + BLIT_OPERATION); } else { // TODO: Use helper blits where applicable flags &= ~ImageFlagBits::Rescaled;