From 268918aeced185684d246a2eaaf8ffce3fb795b1 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 4 Oct 2025 23:58:08 +0200 Subject: [PATCH 1/3] [vk] Implement Shader Read Barrier (#2671) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding the shader read barrier keeps every render/compute/transfer write visible before the image is sampled, so it prevents the “read-before-writes-finish” hazards. Without it you can get random stale frames, flickering post process passes, partially updated HUD textures, and corrupted depth-to-color conversions especially in scenes that render into an offscreen image and immediately feed that image to a shader (reflections, bloom, dynamic resolution, depth visualizers, etc.). This fix makes those R2T chains deterministic again across all Vulkan drivers. Co-authored-by: Ribbit Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2671 Reviewed-by: MaranBr Reviewed-by: crueter Co-authored-by: Ribbit Co-committed-by: Ribbit --- src/video_core/engines/maxwell_dma.cpp | 2 + src/video_core/renderer_vulkan/blit_image.cpp | 83 +++++++++++++++++-- src/video_core/renderer_vulkan/blit_image.h | 13 +-- .../renderer_vulkan/vk_texture_cache.cpp | 17 ++-- 4 files changed, 95 insertions(+), 20 deletions(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 52ca9bbdb6..e2aa6c7e49 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -156,6 +156,8 @@ void MaxwellDMA::Launch() { } void MaxwellDMA::CopyBlockLinearToPitch() { + + u32 bytes_per_pixel = 1; DMA::ImageOperand src_operand; src_operand.bytes_per_pixel = bytes_per_pixel; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 7bfcd6503b..68543bdd48 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -46,6 +46,38 @@ namespace Vulkan { using VideoCommon::ImageViewType; namespace { + +[[nodiscard]] VkImageAspectFlags AspectMaskFromFormat(VideoCore::Surface::PixelFormat format) { + using VideoCore::Surface::SurfaceType; + switch (VideoCore::Surface::GetFormatType(format)) { + case SurfaceType::ColorTexture: + return VK_IMAGE_ASPECT_COLOR_BIT; + case SurfaceType::Depth: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case SurfaceType::Stencil: + return VK_IMAGE_ASPECT_STENCIL_BIT; + case SurfaceType::DepthStencil: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } +} + +[[nodiscard]] VkImageSubresourceRange SubresourceRangeFromView(const ImageView& image_view) { + auto range = image_view.range; + if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) { + range.base.layer = 0; + range.extent.layers = 1; + } + return VkImageSubresourceRange{ + .aspectMask = AspectMaskFromFormat(image_view.format), + .baseMipLevel = static_cast(range.base.level), + .levelCount = static_cast(range.extent.levels), + .baseArrayLayer = static_cast(range.base.layer), + .layerCount = static_cast(range.extent.layers), + }; +} + struct PushConstants { std::array tex_scale; std::array tex_offset; @@ -417,6 +449,40 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo 0, barrier); } +void RecordShaderReadBarrier(Scheduler& scheduler, const ImageView& image_view) { + const VkImage image = image_view.ImageHandle(); + const VkImageSubresourceRange subresource_range = SubresourceRangeFromView(image_view); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([image, subresource_range](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = subresource_range, + }; + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + barrier); + }); +} + void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { const VkRenderPass render_pass = framebuffer->RenderPass(); const VkFramebuffer framebuffer_handle = framebuffer->Handle(); @@ -484,7 +550,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, BlitImageHelper::~BlitImageHelper() = default; -void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view, +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -496,10 +562,12 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); + + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, src_view](vk::CommandBuffer cmdbuf) { - // TODO: Barriers const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -538,7 +606,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView } void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, - VkImageView src_depth_view, VkImageView src_stencil_view, + ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -554,10 +622,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key); + const VkImageView src_depth_view = src_image_view.DepthView(); + const VkImageView src_stencil_view = src_image_view.StencilView(); + + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, src_stencil_view, this](vk::CommandBuffer cmdbuf) { - // TODO: Barriers const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); @@ -692,6 +763,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkSampler sampler = *nearest_sampler; const VkExtent2D extent = GetConversionExtent(src_image_view); + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ @@ -717,7 +789,6 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); - // TODO: Barriers cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); @@ -737,6 +808,7 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer const VkSampler sampler = *nearest_sampler; const VkExtent2D extent = GetConversionExtent(src_image_view); + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, this](vk::CommandBuffer cmdbuf) { @@ -763,7 +835,6 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); - // TODO: Barriers cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 3d400be6a9..bdb8cce883 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -1,4 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -43,7 +46,7 @@ public: StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); - void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); @@ -52,9 +55,9 @@ public: VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, const Region2D& src_region, const Extent3D& src_size); - void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, - VkImageView src_stencil_view, const Region2D& dst_region, - const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, + void BlitDepthStencil(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + const Region2D& dst_region, const Region2D& src_region, + Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8d1d609a35..575651905e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1086,8 +1086,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { - blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D), - dst_region, src_region, filter, operation); + blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, + operation); return; } ASSERT(src.format == dst.format); @@ -1106,8 +1106,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst }(); if (!can_blit_depth_stencil) { UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); - blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), - dst_region, src_region, filter, operation); + blit_image_helper.BlitDepthStencil(dst_framebuffer, src, dst_region, src_region, + filter, operation); return; } } @@ -1968,18 +1968,17 @@ bool Image::BlitScaleHelper(bool scale_up) { blit_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent, scale_up); } - const auto color_view = blit_view->Handle(Shader::TextureType::Color2D); - runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region, + runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region, src_region, operation, BLIT_OPERATION); } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!blit_framebuffer) { blit_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent, scale_up); } - runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(), - blit_view->StencilView(), dst_region, - src_region, operation, BLIT_OPERATION); + runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view, + dst_region, src_region, operation, + BLIT_OPERATION); } else { // TODO: Use helper blits where applicable flags &= ~ImageFlagBits::Rescaled; From d18efb18fa7c3353e2718f08bb1e9e77be000820 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 1 Oct 2025 08:04:34 +0000 Subject: [PATCH 2/3] [core] use memcpy instead of hand rolling aligned cases Hand rolling memcpy like this is always frowned upon because the compiler has more insight on whats going on (plus the code resolves to a worse version of itself on assembly). This removes some branches that are just straight up redundant. May save stuff especially for systems without fastmem enabled. Signed-off-by: lizzie --- src/core/memory.cpp | 203 ++++++++------------------------------------ 1 file changed, 36 insertions(+), 167 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 2583aae867..f84507d125 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "common/assert.h" @@ -681,22 +682,17 @@ struct Memory::Impl { } } - [[nodiscard]] u8* GetPointerImpl(u64 vaddr, auto on_unmapped, auto on_rasterizer) const { + template + [[nodiscard]] u8* GetPointerImpl(u64 vaddr, F&& on_unmapped, G&& on_rasterizer) const { // AARCH64 masks the upper 16 bit of all memory accesses - vaddr = vaddr & 0xffffffffffffULL; - if (!AddressSpaceContains(*current_page_table, vaddr, 1)) [[unlikely]] { - on_unmapped(); - return nullptr; - } else { + vaddr &= 0xffffffffffffULL; + if (AddressSpaceContains(*current_page_table, vaddr, 1)) [[likely]] { // Avoid adding any extra logic to this fast-path block const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw(); - if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { + if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) [[likely]] { return reinterpret_cast(pointer + vaddr); } else { switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - on_unmapped(); - return nullptr; case Common::PageType::Memory: ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr); return nullptr; @@ -707,11 +703,18 @@ struct Memory::Impl { on_rasterizer(); return host_ptr; } + case Common::PageType::Unmapped: [[unlikely]] { + on_unmapped(); + return nullptr; + } default: UNREACHABLE(); } return nullptr; } + } else { + on_unmapped(); + return nullptr; } } @@ -729,172 +732,38 @@ struct Memory::Impl { GetInteger(vaddr), []() {}, []() {}); } - /** - * Reads a particular data type out of memory at the given virtual address. - * - * @param vaddr The virtual address to read the data type from. - * - * @tparam T The data type to read out of memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - * - * @returns The instance of T read from the specified virtual address. - */ + /// @brief Reads a particular data type out of memory at the given virtual address. + /// @param vaddr The virtual address to read the data type from. + /// @tparam T The data type to read out of memory. + /// @returns The instance of T read from the specified virtual address. template - T Read(Common::ProcessAddress vaddr) { - // Fast path for aligned reads of common sizes + inline T Read(Common::ProcessAddress vaddr) requires(std::is_trivially_copyable_v) noexcept { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit reads are always aligned - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read8 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*ptr); - } - return 0; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit reads - if ((addr & 1) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read16 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit reads - if ((addr & 3) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read32 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit reads - if ((addr & 7) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read64 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } - - // Fall back to the general case for other types or unaligned access - T result = 0; - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr]() { + LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); + }, [&]() { + HandleRasterizerDownload(addr, sizeof(T)); + }); ptr) [[likely]] { + // It may be tempting to rewrite this particular section to use "reinterpret_cast"; + // afterall, it's trivially copyable so surely it can be copied ov- Alignment. + // Remember, alignment. memcpy() will deal with all the alignment extremely fast. + T result{}; std::memcpy(&result, ptr, sizeof(T)); + return result; } - return result; + return T{}; } - /** - * Writes a particular data type to memory at the given virtual address. - * - * @param vaddr The virtual address to write the data type to. - * - * @tparam T The data type to write to memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - */ + /// @brief Writes a particular data type to memory at the given virtual address. + /// @param vaddr The virtual address to write the data type to. + /// @tparam T The data type to write to memory. template - void Write(Common::ProcessAddress vaddr, const T data) { - // Fast path for aligned writes of common sizes + inline void Write(Common::ProcessAddress vaddr, const T data) requires(std::is_trivially_copyable_v) noexcept { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit writes are always aligned - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write8 @ 0x{:016X} = 0x{:02X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *ptr = static_cast(data); - } - return; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit writes - if ((addr & 1) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write16 @ 0x{:016X} = 0x{:04X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit writes - if ((addr & 3) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write32 @ 0x{:016X} = 0x{:08X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit writes - if ((addr & 7) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write64 @ 0x{:016X} = 0x{:016X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } - - // Fall back to the general case for other types or unaligned access - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, - addr, static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr, data]() { + LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data)); + }, [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); ptr) [[likely]] std::memcpy(ptr, &data, sizeof(T)); - } } template From bf4e271cb319ed605769a15d49406ca172baeea9 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 2 Oct 2025 05:19:45 +0000 Subject: [PATCH 3/3] fix Signed-off-by: lizzie --- src/core/memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f84507d125..e1aa3473fc 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -737,7 +737,7 @@ struct Memory::Impl { /// @tparam T The data type to read out of memory. /// @returns The instance of T read from the specified virtual address. template - inline T Read(Common::ProcessAddress vaddr) requires(std::is_trivially_copyable_v) noexcept { + inline T Read(Common::ProcessAddress vaddr) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); if (auto const ptr = GetPointerImpl(addr, [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); @@ -758,7 +758,7 @@ struct Memory::Impl { /// @param vaddr The virtual address to write the data type to. /// @tparam T The data type to write to memory. template - inline void Write(Common::ProcessAddress vaddr, const T data) requires(std::is_trivially_copyable_v) noexcept { + inline void Write(Common::ProcessAddress vaddr, const T data) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); if (auto const ptr = GetPointerImpl(addr, [addr, data]() { LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data));