[VK] [DO NOT MERGE] Force subgroup size to 32 and disable demote
All checks were successful
eden-license / license-header (pull_request) Successful in 28s
All checks were successful
eden-license / license-header (pull_request) Successful in 28s
This commit is contained in:
parent
57fbdd516e
commit
744b6717ab
5 changed files with 32 additions and 17 deletions
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-FileCopyrightText: Copyright 2014 The Android Open Source Project
|
// SPDX-FileCopyrightText: Copyright 2014 The Android Open Source Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
@ -938,8 +941,6 @@ void BufferQueueProducer::Transact(u32 code, std::span<const u8> parcel_data,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case TransactionId::GetBufferHistory: {
|
case TransactionId::GetBufferHistory: {
|
||||||
LOG_WARNING(Service_Nvnflinger, "called, transaction=GetBufferHistory");
|
|
||||||
|
|
||||||
std::scoped_lock lock{core->mutex};
|
std::scoped_lock lock{core->mutex};
|
||||||
|
|
||||||
auto buffer_history_count = std::min(parcel_in.Read<s32>(), (s32)core->history.size());
|
auto buffer_history_count = std::min(parcel_in.Read<s32>(), (s32)core->history.size());
|
||||||
|
@ -955,9 +956,6 @@ void BufferQueueProducer::Transact(u32 code, std::span<const u8> parcel_data,
|
||||||
auto pos = position;
|
auto pos = position;
|
||||||
for (int i = 0; i < buffer_history_count; i++) {
|
for (int i = 0; i < buffer_history_count; i++) {
|
||||||
info[i] = core->history[(pos - i) % core->history.size()];
|
info[i] = core->history[(pos - i) % core->history.size()];
|
||||||
LOG_WARNING(Service_Nvnflinger, "frame_number={}, state={}",
|
|
||||||
core->history[(pos - i) % core->history.size()].frame_number,
|
|
||||||
(u32)core->history[(pos - i) % core->history.size()].state);
|
|
||||||
pos--;
|
pos--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
@ -12,7 +15,7 @@ void EmitJoin(EmitContext&) {
|
||||||
|
|
||||||
void EmitDemoteToHelperInvocation(EmitContext& ctx) {
|
void EmitDemoteToHelperInvocation(EmitContext& ctx) {
|
||||||
if (ctx.profile.support_demote_to_helper_invocation) {
|
if (ctx.profile.support_demote_to_helper_invocation) {
|
||||||
ctx.OpDemoteToHelperInvocationEXT();
|
//ctx.OpDemoteToHelperInvocation();
|
||||||
} else {
|
} else {
|
||||||
const Id kill_label{ctx.OpLabel()};
|
const Id kill_label{ctx.OpLabel()};
|
||||||
const Id impossible_label{ctx.OpLabel()};
|
const Id impossible_label{ctx.OpLabel()};
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
@ -37,6 +40,7 @@ Id WarpExtract(EmitContext& ctx, Id value) {
|
||||||
|
|
||||||
Id LoadMask(EmitContext& ctx, Id mask) {
|
Id LoadMask(EmitContext& ctx, Id mask) {
|
||||||
const Id value{ctx.OpLoad(ctx.U32[4], mask)};
|
const Id value{ctx.OpLoad(ctx.U32[4], mask)};
|
||||||
|
return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
|
||||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
|
return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
|
||||||
}
|
}
|
||||||
|
@ -74,6 +78,7 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
||||||
|
return thread_id;
|
||||||
const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
|
const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
|
||||||
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
|
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
|
||||||
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
|
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
|
||||||
|
@ -82,6 +87,7 @@ Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
||||||
|
|
||||||
Id EmitLaneId(EmitContext& ctx) {
|
Id EmitLaneId(EmitContext& ctx) {
|
||||||
const Id id{GetThreadId(ctx)};
|
const Id id{GetThreadId(ctx)};
|
||||||
|
return id;
|
||||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
@ -166,13 +172,15 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
|
||||||
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
|
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
|
||||||
Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
||||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||||
|
const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)};
|
||||||
|
const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)};
|
||||||
|
|
||||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetInBoundsFlag(inst, in_range);
|
SetInBoundsFlag(inst, is_value_being_shuffled);
|
||||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||||
|
@ -181,13 +189,15 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||||
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||||
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||||
|
const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)};
|
||||||
|
const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)};
|
||||||
|
|
||||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetInBoundsFlag(inst, in_range);
|
SetInBoundsFlag(inst, is_value_being_shuffled);
|
||||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||||
|
@ -196,13 +206,15 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
|
||||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||||
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||||
|
const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)};
|
||||||
|
const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)};
|
||||||
|
|
||||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetInBoundsFlag(inst, in_range);
|
SetInBoundsFlag(inst, is_value_being_shuffled);
|
||||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||||
|
@ -211,13 +223,15 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id
|
||||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||||
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||||
|
const Id same_lane{ctx.OpINotEqual(ctx.U1, src_thread_id, thread_id)};
|
||||||
|
const Id is_value_being_shuffled{ctx.OpLogicalAnd(ctx.U1, in_range, same_lane)};
|
||||||
|
|
||||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetInBoundsFlag(inst, in_range);
|
SetInBoundsFlag(inst, is_value_being_shuffled);
|
||||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
return SelectValue(ctx, is_value_being_shuffled, value, src_thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
|
Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
|
||||||
|
|
|
@ -894,11 +894,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||||
.pName = "main",
|
.pName = "main",
|
||||||
.pSpecializationInfo = nullptr,
|
.pSpecializationInfo = nullptr,
|
||||||
});
|
});
|
||||||
/*
|
if (device.IsGuestWarpSizeSupported(stage_ci.stage) && (stage_infos[stage].uses_subgroup_invocation_id || stage_infos[stage].uses_subgroup_shuffles)) {
|
||||||
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
|
LOG_INFO(Render_Vulkan, "Forcing subgroupsize to 32");
|
||||||
stage_ci.pNext = &subgroup_size_ci;
|
stage_ci.pNext = &subgroup_size_ci;
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
VkPipelineCreateFlags flags{};
|
VkPipelineCreateFlags flags{};
|
||||||
if (device.IsKhrPipelineExecutablePropertiesEnabled() && Settings::values.renderer_debug.GetValue()) {
|
if (device.IsKhrPipelineExecutablePropertiesEnabled() && Settings::values.renderer_debug.GetValue()) {
|
||||||
|
|
|
@ -356,6 +356,7 @@ public:
|
||||||
|
|
||||||
/// Returns true if the device can be forced to use the guest warp size.
|
/// Returns true if the device can be forced to use the guest warp size.
|
||||||
bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
|
bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
|
||||||
|
return properties.subgroup_size_control.minSubgroupSize <= 32;
|
||||||
return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
|
return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue