From 744b6717ab7e8c78ff4a70be8f2a915b120ef55d Mon Sep 17 00:00:00 2001 From: JPikachu Date: Sat, 30 Aug 2025 01:40:55 +0100 Subject: [PATCH] [VK] [DO NOT MERGE] Force subgroup size to 32 and disable demote --- .../nvnflinger/buffer_queue_producer.cpp | 8 ++--- .../backend/spirv/emit_spirv_control_flow.cpp | 5 +++- .../backend/spirv/emit_spirv_warp.cpp | 30 ++++++++++++++----- .../renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++-- src/video_core/vulkan_common/vulkan_device.h | 1 + 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp index 1bb88a45fa..7ce453fac1 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2014 The Android Open Source Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -938,8 +941,6 @@ void BufferQueueProducer::Transact(u32 code, std::span parcel_data, break; } case TransactionId::GetBufferHistory: { - LOG_WARNING(Service_Nvnflinger, "called, transaction=GetBufferHistory"); - std::scoped_lock lock{core->mutex}; auto buffer_history_count = std::min(parcel_in.Read(), (s32)core->history.size()); @@ -955,9 +956,6 @@ void BufferQueueProducer::Transact(u32 code, std::span parcel_data, auto pos = position; for (int i = 0; i < buffer_history_count; i++) { info[i] = core->history[(pos - i) % core->history.size()]; - LOG_WARNING(Service_Nvnflinger, "frame_number={}, state={}", - core->history[(pos - i) % core->history.size()].frame_number, - (u32)core->history[(pos - i) % core->history.size()].state); pos--; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 7ad0b08ac3..221dc10bd9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -12,7 +15,7 @@ void EmitJoin(EmitContext&) { void EmitDemoteToHelperInvocation(EmitContext& ctx) { if (ctx.profile.support_demote_to_helper_invocation) { - ctx.OpDemoteToHelperInvocationEXT(); + //ctx.OpDemoteToHelperInvocation(); } else { const Id kill_label{ctx.OpLabel()}; const Id impossible_label{ctx.OpLabel()}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 77ff8c5731..121aecbe2f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -37,6 +40,7 @@ Id WarpExtract(EmitContext& ctx, Id value) { Id LoadMask(EmitContext& ctx, Id mask) { const Id value{ctx.OpLoad(ctx.U32[4], mask)}; + return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); if (!ctx.profile.warp_size_potentially_larger_than_guest) { return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); } @@ -74,6 +78,7 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { } Id AddPartitionBase(EmitContext& ctx, Id thread_id) { + return thread_id; const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))}; const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))}; return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base); @@ -82,6 +87,7 @@ Id AddPartitionBase(EmitContext& ctx, Id thread_id) { Id EmitLaneId(EmitContext& ctx) { const Id id{GetThreadId(ctx)}; + return id; if (!ctx.profile.warp_size_potentially_larger_than_guest) { return id; } @@ -166,13 +172,15 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)}; + const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)}; if (ctx.profile.warp_size_potentially_larger_than_guest) { src_thread_id = AddPartitionBase(ctx, src_thread_id); } - SetInBoundsFlag(inst, in_range); - return SelectValue(ctx, in_range, value, src_thread_id); + SetInBoundsFlag(inst, is_value_being_shuffled); + return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id); } Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, @@ -181,13 +189,15 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)}; + const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)}; if (ctx.profile.warp_size_potentially_larger_than_guest) { src_thread_id = AddPartitionBase(ctx, src_thread_id); } - SetInBoundsFlag(inst, in_range); - return SelectValue(ctx, in_range, value, src_thread_id); + SetInBoundsFlag(inst, is_value_being_shuffled); + return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id); } Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, @@ -196,13 +206,15 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)}; + const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)}; if (ctx.profile.warp_size_potentially_larger_than_guest) { src_thread_id = AddPartitionBase(ctx, src_thread_id); } - SetInBoundsFlag(inst, in_range); - return SelectValue(ctx, in_range, value, src_thread_id); + SetInBoundsFlag(inst, is_value_being_shuffled); + return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id); } Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, @@ -211,13 +223,15 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)}; + const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)}; if (ctx.profile.warp_size_potentially_larger_than_guest) { src_thread_id = AddPartitionBase(ctx, src_thread_id); } - SetInBoundsFlag(inst, in_range); - return SelectValue(ctx, in_range, value, src_thread_id); + SetInBoundsFlag(inst, is_value_being_shuffled); + return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id); } Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0226eb2c14..e853a13e05 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -894,11 +894,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pName = "main", .pSpecializationInfo = nullptr, }); - /* - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { + if (device.IsGuestWarpSizeSupported(stage_ci.stage) && (stage_infos[stage].uses_subgroup_invocation_id || stage_infos[stage].uses_subgroup_shuffles)) { + LOG_INFO(Render_Vulkan, "Forcing subgroupsize to 32"); stage_ci.pNext = &subgroup_size_ci; } - */ } VkPipelineCreateFlags flags{}; if (device.IsKhrPipelineExecutablePropertiesEnabled() && Settings::values.renderer_debug.GetValue()) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 9b78f2e599..95bd5d20e7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -356,6 +356,7 @@ public: /// Returns true if the device can be forced to use the guest warp size. bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { + return properties.subgroup_size_control.minSubgroupSize <= 32; return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; }