From 622725c38383bd56bc4f8c94464dd9d0ac6e024c Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 08:30:12 +0000 Subject: [PATCH 1/4] [dynarmic] backport WAITPKG based spinlocks Signed-off-by: lizzie --- .../dynarmic/backend/x64/block_of_code.cpp | 2 + .../backend/x64/emit_x64_memory.cpp.inc | 3 +- .../dynarmic/backend/x64/emit_x64_memory.h | 5 ++- .../src/dynarmic/backend/x64/host_feature.h | 3 +- .../src/dynarmic/common/spin_lock_x64.cpp | 37 +++++++++++++++++-- .../src/dynarmic/common/spin_lock_x64.h | 5 ++- 6 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..4a8de6475e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -188,6 +188,8 @@ HostFeature GetHostFeatures() { features |= HostFeature::LZCNT; if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI; + if (cpu_info.has(Cpu::tWAITPKG)) + features |= HostFeature::WAITPKG; if (cpu_info.has(Cpu::tBMI2)) { // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..36a2d40de6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -430,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; - EmitExclusiveLock(code, conf, tmp, eax); + EmitExclusiveLock(code, conf, tmp, tmp2); SharedLabel end = GenSharedLabel(); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 75a47c6a80..c363ea1b6b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -343,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p } code.mov(pointer, mcl::bit_cast(GetExclusiveMonitorLockPointer(conf.global_monitor))); - EmitSpinLockLock(code, pointer, tmp); + EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG)); } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h index 7246ed18d4..34dca971cb 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h +++ b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h @@ -35,9 +35,10 @@ enum class HostFeature : u64 { BMI2 = 1ULL << 19, LZCNT = 1ULL << 20, GFNI = 1ULL << 21, + WAITPKG = 1ULL << 22, // Zen-based BMI2 - FastBMI2 = 1ULL << 22, + FastBMI2 = 1ULL << 23, // Orthographic AVX512 features on 128 and 256 vectors AVX512_Ortho = AVX512F | AVX512VL, diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index da50179de9..5307672bfe 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) { + // TODO: this is because we lack regalloc - so better to be safe :( + if (waitpkg) { + code.push(Xbyak::util::eax); + code.push(Xbyak::util::ebx); + code.push(Xbyak::util::edx); + } Xbyak::Label start, loop; - code.jmp(start, code.T_NEAR); code.L(loop); - code.pause(); + if (waitpkg) { + // TODO: This clobbers EAX and EDX did we tell the regalloc? + // ARM ptr for address-monitoring + code.umonitor(ptr); + // tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings + // tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings + // edx:eax is implicitly used as a 64-bit deadline timestamp + // Use the maximum so that we use the operating system's maximum + // allowed wait time within the IA32_UMWAIT_CONTROL register + // Enter power state designated by tmp and wait for a write to lock_ptr + code.mov(Xbyak::util::eax, 0xFFFFFFFF); + code.mov(Xbyak::util::edx, Xbyak::util::eax); + // TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it... + code.mov(Xbyak::util::ebx, 1); + code.umwait(Xbyak::util::ebx); + // CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write + // CF == 0 if we exited the wait for any other reason + } else { + code.pause(); + } code.L(start); code.mov(tmp, 1); /*code.lock();*/ code.xchg(code.dword[ptr], tmp); code.test(tmp, tmp); code.jnz(loop, code.T_NEAR); + if (waitpkg) { + code.pop(Xbyak::util::edx); + code.pop(Xbyak::util::ebx); + code.pop(Xbyak::util::eax); + } } void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { @@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() { code.align(); lock = code.getCurr(); - EmitSpinLockLock(code, ABI_PARAM1, code.eax); + EmitSpinLockLock(code, ABI_PARAM1, code.eax, false); code.ret(); code.align(); diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h index df6a3d7407..df6860e2f2 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -9,7 +12,7 @@ namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg); void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); } // namespace Dynarmic From 268918aeced185684d246a2eaaf8ffce3fb795b1 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 4 Oct 2025 23:58:08 +0200 Subject: [PATCH 2/4] [vk] Implement Shader Read Barrier (#2671) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding the shader read barrier keeps every render/compute/transfer write visible before the image is sampled, so it prevents the “read-before-writes-finish” hazards. Without it you can get random stale frames, flickering post process passes, partially updated HUD textures, and corrupted depth-to-color conversions especially in scenes that render into an offscreen image and immediately feed that image to a shader (reflections, bloom, dynamic resolution, depth visualizers, etc.). This fix makes those R2T chains deterministic again across all Vulkan drivers. Co-authored-by: Ribbit Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2671 Reviewed-by: MaranBr Reviewed-by: crueter Co-authored-by: Ribbit Co-committed-by: Ribbit --- src/video_core/engines/maxwell_dma.cpp | 2 + src/video_core/renderer_vulkan/blit_image.cpp | 83 +++++++++++++++++-- src/video_core/renderer_vulkan/blit_image.h | 13 +-- .../renderer_vulkan/vk_texture_cache.cpp | 17 ++-- 4 files changed, 95 insertions(+), 20 deletions(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 52ca9bbdb6..e2aa6c7e49 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -156,6 +156,8 @@ void MaxwellDMA::Launch() { } void MaxwellDMA::CopyBlockLinearToPitch() { + + u32 bytes_per_pixel = 1; DMA::ImageOperand src_operand; src_operand.bytes_per_pixel = bytes_per_pixel; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 7bfcd6503b..68543bdd48 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -46,6 +46,38 @@ namespace Vulkan { using VideoCommon::ImageViewType; namespace { + +[[nodiscard]] VkImageAspectFlags AspectMaskFromFormat(VideoCore::Surface::PixelFormat format) { + using VideoCore::Surface::SurfaceType; + switch (VideoCore::Surface::GetFormatType(format)) { + case SurfaceType::ColorTexture: + return VK_IMAGE_ASPECT_COLOR_BIT; + case SurfaceType::Depth: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case SurfaceType::Stencil: + return VK_IMAGE_ASPECT_STENCIL_BIT; + case SurfaceType::DepthStencil: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } +} + +[[nodiscard]] VkImageSubresourceRange SubresourceRangeFromView(const ImageView& image_view) { + auto range = image_view.range; + if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) { + range.base.layer = 0; + range.extent.layers = 1; + } + return VkImageSubresourceRange{ + .aspectMask = AspectMaskFromFormat(image_view.format), + .baseMipLevel = static_cast(range.base.level), + .levelCount = static_cast(range.extent.levels), + .baseArrayLayer = static_cast(range.base.layer), + .layerCount = static_cast(range.extent.layers), + }; +} + struct PushConstants { std::array tex_scale; std::array tex_offset; @@ -417,6 +449,40 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo 0, barrier); } +void RecordShaderReadBarrier(Scheduler& scheduler, const ImageView& image_view) { + const VkImage image = image_view.ImageHandle(); + const VkImageSubresourceRange subresource_range = SubresourceRangeFromView(image_view); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([image, subresource_range](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = subresource_range, + }; + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + barrier); + }); +} + void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { const VkRenderPass render_pass = framebuffer->RenderPass(); const VkFramebuffer framebuffer_handle = framebuffer->Handle(); @@ -484,7 +550,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, BlitImageHelper::~BlitImageHelper() = default; -void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view, +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -496,10 +562,12 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); + + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, src_view](vk::CommandBuffer cmdbuf) { - // TODO: Barriers const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -538,7 +606,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView } void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, - VkImageView src_depth_view, VkImageView src_stencil_view, + ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -554,10 +622,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key); + const VkImageView src_depth_view = src_image_view.DepthView(); + const VkImageView src_stencil_view = src_image_view.StencilView(); + + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, src_stencil_view, this](vk::CommandBuffer cmdbuf) { - // TODO: Barriers const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); @@ -692,6 +763,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkSampler sampler = *nearest_sampler; const VkExtent2D extent = GetConversionExtent(src_image_view); + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ @@ -717,7 +789,6 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); - // TODO: Barriers cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); @@ -737,6 +808,7 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer const VkSampler sampler = *nearest_sampler; const VkExtent2D extent = GetConversionExtent(src_image_view); + RecordShaderReadBarrier(scheduler, src_image_view); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, this](vk::CommandBuffer cmdbuf) { @@ -763,7 +835,6 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); - // TODO: Barriers cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 3d400be6a9..bdb8cce883 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -1,4 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -43,7 +46,7 @@ public: StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); - void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); @@ -52,9 +55,9 @@ public: VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, const Region2D& src_region, const Extent3D& src_size); - void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, - VkImageView src_stencil_view, const Region2D& dst_region, - const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, + void BlitDepthStencil(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + const Region2D& dst_region, const Region2D& src_region, + Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8d1d609a35..575651905e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1086,8 +1086,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { - blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D), - dst_region, src_region, filter, operation); + blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, + operation); return; } ASSERT(src.format == dst.format); @@ -1106,8 +1106,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst }(); if (!can_blit_depth_stencil) { UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); - blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), - dst_region, src_region, filter, operation); + blit_image_helper.BlitDepthStencil(dst_framebuffer, src, dst_region, src_region, + filter, operation); return; } } @@ -1968,18 +1968,17 @@ bool Image::BlitScaleHelper(bool scale_up) { blit_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent, scale_up); } - const auto color_view = blit_view->Handle(Shader::TextureType::Color2D); - runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region, + runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region, src_region, operation, BLIT_OPERATION); } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!blit_framebuffer) { blit_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent, scale_up); } - runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(), - blit_view->StencilView(), dst_region, - src_region, operation, BLIT_OPERATION); + runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view, + dst_region, src_region, operation, + BLIT_OPERATION); } else { // TODO: Use helper blits where applicable flags &= ~ImageFlagBits::Rescaled; From 1a13e79c3d8619ff302d9c77eacf1a35c2210b66 Mon Sep 17 00:00:00 2001 From: crueter Date: Sun, 5 Oct 2025 00:00:52 +0200 Subject: [PATCH 3/4] [cmake] fix video_core and tests comp errors on Windows (#2631) did not link to video_core thus did not properly propagate the GPUOpen target thus failed to find vk_mem_alloc also msvc sucks Signed-off-by: crueter Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2631 Reviewed-by: MaranBr --- src/tests/CMakeLists.txt | 5 ++++- src/video_core/vulkan_common/vma.h | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 1e158f3759..c1fdd374ef 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + # SPDX-FileCopyrightText: 2018 yuzu Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later @@ -21,7 +24,7 @@ add_executable(tests create_target_directory_groups(tests) -target_link_libraries(tests PRIVATE common core input_common) +target_link_libraries(tests PRIVATE common core input_common video_core) target_link_libraries(tests PRIVATE ${PLATFORM_LIBRARIES} Catch2::Catch2WithMain Threads::Threads) add_test(NAME tests COMMAND tests) diff --git a/src/video_core/vulkan_common/vma.h b/src/video_core/vulkan_common/vma.h index 911c1114b2..e022b2bf7d 100644 --- a/src/video_core/vulkan_common/vma.h +++ b/src/video_core/vulkan_common/vma.h @@ -10,4 +10,12 @@ #define VMA_STATIC_VULKAN_FUNCTIONS 0 #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4189 ) +#endif #include "vk_mem_alloc.h" + +#ifdef _MSC_VER +#pragma warning( pop ) +#endif From d7558be54327dba1de9addd2f9d8e0808924267b Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 08:30:12 +0000 Subject: [PATCH 4/4] [dynarmic] backport WAITPKG based spinlocks Signed-off-by: lizzie --- .../dynarmic/backend/x64/block_of_code.cpp | 2 + .../backend/x64/emit_x64_memory.cpp.inc | 3 +- .../dynarmic/backend/x64/emit_x64_memory.h | 5 ++- .../src/dynarmic/backend/x64/host_feature.h | 3 +- .../src/dynarmic/common/spin_lock_x64.cpp | 37 +++++++++++++++++-- .../src/dynarmic/common/spin_lock_x64.h | 5 ++- 6 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..4a8de6475e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -188,6 +188,8 @@ HostFeature GetHostFeatures() { features |= HostFeature::LZCNT; if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI; + if (cpu_info.has(Cpu::tWAITPKG)) + features |= HostFeature::WAITPKG; if (cpu_info.has(Cpu::tBMI2)) { // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..36a2d40de6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -430,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; - EmitExclusiveLock(code, conf, tmp, eax); + EmitExclusiveLock(code, conf, tmp, tmp2); SharedLabel end = GenSharedLabel(); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 75a47c6a80..c363ea1b6b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -343,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p } code.mov(pointer, mcl::bit_cast(GetExclusiveMonitorLockPointer(conf.global_monitor))); - EmitSpinLockLock(code, pointer, tmp); + EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG)); } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h index 7246ed18d4..34dca971cb 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h +++ b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h @@ -35,9 +35,10 @@ enum class HostFeature : u64 { BMI2 = 1ULL << 19, LZCNT = 1ULL << 20, GFNI = 1ULL << 21, + WAITPKG = 1ULL << 22, // Zen-based BMI2 - FastBMI2 = 1ULL << 22, + FastBMI2 = 1ULL << 23, // Orthographic AVX512 features on 128 and 256 vectors AVX512_Ortho = AVX512F | AVX512VL, diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index da50179de9..5307672bfe 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) { + // TODO: this is because we lack regalloc - so better to be safe :( + if (waitpkg) { + code.push(Xbyak::util::eax); + code.push(Xbyak::util::ebx); + code.push(Xbyak::util::edx); + } Xbyak::Label start, loop; - code.jmp(start, code.T_NEAR); code.L(loop); - code.pause(); + if (waitpkg) { + // TODO: This clobbers EAX and EDX did we tell the regalloc? + // ARM ptr for address-monitoring + code.umonitor(ptr); + // tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings + // tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings + // edx:eax is implicitly used as a 64-bit deadline timestamp + // Use the maximum so that we use the operating system's maximum + // allowed wait time within the IA32_UMWAIT_CONTROL register + // Enter power state designated by tmp and wait for a write to lock_ptr + code.mov(Xbyak::util::eax, 0xFFFFFFFF); + code.mov(Xbyak::util::edx, Xbyak::util::eax); + // TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it... + code.mov(Xbyak::util::ebx, 1); + code.umwait(Xbyak::util::ebx); + // CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write + // CF == 0 if we exited the wait for any other reason + } else { + code.pause(); + } code.L(start); code.mov(tmp, 1); /*code.lock();*/ code.xchg(code.dword[ptr], tmp); code.test(tmp, tmp); code.jnz(loop, code.T_NEAR); + if (waitpkg) { + code.pop(Xbyak::util::edx); + code.pop(Xbyak::util::ebx); + code.pop(Xbyak::util::eax); + } } void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { @@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() { code.align(); lock = code.getCurr(); - EmitSpinLockLock(code, ABI_PARAM1, code.eax); + EmitSpinLockLock(code, ABI_PARAM1, code.eax, false); code.ret(); code.align(); diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h index df6a3d7407..df6860e2f2 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -9,7 +12,7 @@ namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg); void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); } // namespace Dynarmic