From 0a46227cb427ce4dc28d1803d58dfdfdebfd0661 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 27 Sep 2025 13:31:53 -0700 Subject: [PATCH 01/14] [vk] Implement D16 to R16 Texture Conversion --- .../renderer_vulkan/vk_texture_cache.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8d1d609a35..7327c90c12 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1229,6 +1229,18 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; + case PixelFormat::R16_UNORM: + if (src_view.format == PixelFormat::D16_UNORM) { + return blit_image_helper.ConvertD16ToR16(dst, src_view); + } + break; + + case PixelFormat::D16_UNORM: + if (src_view.format == PixelFormat::R16_UNORM) { + return blit_image_helper.ConvertR16ToD16(dst, src_view); + } + break; + case PixelFormat::A8B8G8R8_UNORM: case PixelFormat::A8B8G8R8_SNORM: case PixelFormat::A8B8G8R8_SINT: @@ -1270,7 +1282,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im case PixelFormat::R32G32_SINT: case PixelFormat::R32_FLOAT: case PixelFormat::R16_FLOAT: - case PixelFormat::R16_UNORM: case PixelFormat::R16_SNORM: case PixelFormat::R16_UINT: case PixelFormat::R16_SINT: @@ -1325,7 +1336,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im case PixelFormat::ASTC_2D_6X5_SRGB: case PixelFormat::E5B9G9R9_FLOAT: case PixelFormat::D32_FLOAT: - case PixelFormat::D16_UNORM: case PixelFormat::X8_D24_UNORM: case PixelFormat::S8_UINT: case PixelFormat::S8_UINT_D24_UNORM: From 1e74f3c015b34250c1676796031d3c60a4cbff73 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 27 Sep 2025 19:12:32 -0700 Subject: [PATCH 02/14] [vk, SPIR-V] Various changes in an attempt to fix MH on Android --- .../backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/emit_spirv_image.cpp | 145 +++++++++++++++--- .../backend/spirv/spirv_emit_context.cpp | 31 +++- .../backend/spirv/spirv_emit_context.h | 2 + .../ir_opt/collect_shader_info_pass.cpp | 15 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 49 +++++- src/shader_recompiler/shader_info.h | 15 +- .../renderer_vulkan/pipeline_helper.h | 17 +- .../renderer_vulkan/vk_texture_cache.cpp | 38 +++-- .../renderer_vulkan/vk_texture_cache.h | 17 +- 10 files changed, 287 insertions(+), 44 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 313a1deb30..ba23fb8d34 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -409,7 +409,7 @@ void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { } void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { - if (info.uses_sampled_1d) { + if (info.uses_sampled_1d || info.uses_image_1d) { ctx.AddCapability(spv::Capability::Sampled1D); } if (info.uses_sparse_residency) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 945cdb42bc..59e96ae06d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" @@ -185,6 +186,84 @@ private: spv::ImageOperandsMask mask{}; }; + +Id SampledVectorType(EmitContext& ctx, TextureComponentType component_type) { + switch (component_type) { + case TextureComponentType::Float: + return ctx.F32[4]; + case TextureComponentType::Sint: + return ctx.S32[4]; + case TextureComponentType::Uint: + return ctx.U32[4]; + } + throw LogicError("Unhandled texture component type {}", static_cast(component_type)); +} + +bool ExpectsFloatResult(const IR::Inst* inst) { + switch (inst->Type()) { + case IR::Type::F32: + case IR::Type::F32x2: + case IR::Type::F32x3: + case IR::Type::F32x4: + return true; + default: + return false; + } +} + +Id MakeFloatVector(EmitContext& ctx, float value) { + const Id scalar{ctx.Const(value)}; + return ctx.ConstantComposite(ctx.F32[4], scalar, scalar, scalar, scalar); +} + +Id NormalizeUnsignedSample(EmitContext& ctx, u32 component_bits, Id value) { + if (component_bits == 0) { + return value; + } + const double max_value = std::exp2(static_cast(component_bits)) - 1.0; + if (!(max_value > 0.0)) { + return value; + } + const float inv_max = static_cast(1.0 / max_value); + return ctx.OpFMul(ctx.F32[4], value, MakeFloatVector(ctx, inv_max)); +} + +Id NormalizeSignedSample(EmitContext& ctx, u32 component_bits, Id value) { + if (component_bits == 0) { + return value; + } + const double positive_max = component_bits > 0 ? std::exp2(static_cast(component_bits - 1)) - 1.0 : 0.0; + if (!(positive_max > 0.0)) { + return ctx.OpFClamp(ctx.F32[4], value, MakeFloatVector(ctx, -1.0f), MakeFloatVector(ctx, 1.0f)); + } + const float inv_pos = static_cast(1.0 / positive_max); + const Id scaled{ctx.OpFMul(ctx.F32[4], value, MakeFloatVector(ctx, inv_pos))}; + return ctx.OpFClamp(ctx.F32[4], scaled, MakeFloatVector(ctx, -1.0f), MakeFloatVector(ctx, 1.0f)); +} + +Id ConvertSampleToExpectedType(EmitContext& ctx, const IR::Inst* inst, + const TextureDefinition* texture_def, Id value) { + if (!texture_def || texture_def->component_type == TextureComponentType::Float) { + return value; + } + if (!ExpectsFloatResult(inst)) { + return value; + } + switch (texture_def->component_type) { + case TextureComponentType::Sint: { + const Id as_float{ctx.OpConvertSToF(ctx.F32[4], value)}; + return NormalizeSignedSample(ctx, texture_def->component_bit_size, as_float); + } + case TextureComponentType::Uint: { + const Id as_float{ctx.OpConvertUToF(ctx.F32[4], value)}; + return NormalizeUnsignedSample(ctx, texture_def->component_bit_size, as_float); + } + case TextureComponentType::Float: + break; + } + return value; +} + Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; if (def.count > 1) { @@ -449,31 +528,39 @@ Id EmitBoundImageWrite(EmitContext&) { Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; + Id sample{}; if (ctx.stage == Stage::Fragment) { const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, offset); - return Emit(&EmitContext::OpImageSparseSampleImplicitLod, - &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); + sample = Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } else { - // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as - // if the lod was explicitly zero. This may change on Turing with implicit compute - // derivatives const Id lod{ctx.Const(0.0f)}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); - return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + sample = Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const ImageOperands operands(ctx, false, true, false, lod, offset); - return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -509,13 +596,19 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const ImageOperands operands(ctx, offset, offset2); if (ctx.profile.need_gather_subpixel_offset) { coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); } - return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), - operands.MaskOptional(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, + result_type, Texture(ctx, info, index), coords, + ctx.Const(info.gather_component), operands.MaskOptional(), + operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -538,12 +631,17 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c lod = Id{}; } if (Sirit::ValidId(ms)) { - // This image is multisampled, lod must be implicit lod = Id{}; } + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const ImageOperands operands(lod, ms); - return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, + result_type, TextureImage(ctx, info, index), coords, + operands.MaskOptional(), operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, @@ -588,14 +686,19 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivatives, const IR::Value& offset, Id lod_clamp) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const auto operands = info.num_derivatives == 3 ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, ctx.Def(offset), {}, lod_clamp) : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, lod_clamp); - return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4c3e101433..3b7094eb6f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -28,9 +28,23 @@ enum class Operation { FPMax, }; +Id ComponentTypeId(EmitContext& ctx, TextureComponentType component_type) { + switch (component_type) { + case TextureComponentType::Float: + return ctx.F32[1]; + case TextureComponentType::Sint: + return ctx.S32[1]; + case TextureComponentType::Uint: + return ctx.U32[1]; + } + throw LogicError("Unhandled texture component type {}", static_cast(component_type)); +} + Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; - const Id type{ctx.F32[1]}; + const TextureComponentType component_type = desc.is_depth ? TextureComponentType::Float + : desc.component_type; + const Id type{ComponentTypeId(ctx, component_type)}; const bool depth{desc.is_depth}; const bool ms{desc.is_multisample}; switch (desc.type) { @@ -1374,6 +1388,8 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in .image_type = image_type, .count = desc.count, .is_multisample = desc.is_multisample, + .component_type = desc.component_type, + .component_bit_size = desc.component_bit_size, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1417,6 +1433,12 @@ void EmitContext::DefineInputs(const IR::Program& program) { const Info& info{program.info}; const VaryingState loads{info.loads.mask | info.passthrough.mask}; + const auto decorate_flat_if_fragment = [this](Id id) { + if (stage == Stage::Fragment) { + Decorate(id, spv::Decoration::Flat); + } + }; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1432,16 +1454,22 @@ void EmitContext::DefineInputs(const IR::Program& program) { } if (info.uses_sample_id) { sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); + decorate_flat_if_fragment(sample_id); } if (info.uses_is_helper_invocation) { is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } if (info.uses_subgroup_mask) { subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); + decorate_flat_if_fragment(subgroup_mask_eq); subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); + decorate_flat_if_fragment(subgroup_mask_lt); subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); + decorate_flat_if_fragment(subgroup_mask_le); subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); + decorate_flat_if_fragment(subgroup_mask_gt); subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); + decorate_flat_if_fragment(subgroup_mask_ge); } if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || (profile.warp_size_potentially_larger_than_guest && @@ -1461,6 +1489,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { } if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); + decorate_flat_if_fragment(primitive_id); } if (loads[IR::Attribute::Layer]) { AddCapability(spv::Capability::Geometry); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 66cdb1d3db..0606f51f65 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -38,6 +38,8 @@ struct TextureDefinition { Id image_type; u32 count; bool is_multisample; + TextureComponentType component_type{TextureComponentType::Float}; + u32 component_bit_size{}; }; struct TextureBufferDefinition { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 2bfa3227a8..8bb5cc4b33 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -14,6 +14,10 @@ namespace Shader::Optimization { namespace { +constexpr bool IsOneDimensional(TextureType type) { + return type == TextureType::Color1D || type == TextureType::ColorArray1D; +} + void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { if (count != 1) { throw NotImplementedException("Constant buffer descriptor indexing"); @@ -548,7 +552,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryDimensions: case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; - info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_sampled_1d |= IsOneDimensional(type); info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; @@ -560,7 +564,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryLod: { const auto flags{inst.Flags()}; const TextureType type{flags.type}; - info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_sampled_1d |= IsOneDimensional(type); info.uses_shadow_lod |= flags.is_depth != 0; info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; @@ -569,6 +573,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageRead: { const auto flags{inst.Flags()}; info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; + info.uses_image_1d |= IsOneDimensional(flags.type); info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; @@ -576,6 +581,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageWrite: { const auto flags{inst.Flags()}; info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; + info.uses_image_1d |= IsOneDimensional(flags.type); info.uses_image_buffers |= flags.type == TextureType::Buffer; break; } @@ -761,9 +767,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageAtomicAnd32: case IR::Opcode::ImageAtomicOr32: case IR::Opcode::ImageAtomicXor32: - case IR::Opcode::ImageAtomicExchange32: + case IR::Opcode::ImageAtomicExchange32: { + const auto flags{inst.Flags()}; info.uses_atomic_image_u32 = true; + info.uses_image_1d |= IsOneDimensional(flags.type); break; + } default: break; } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 7ff1961172..5d0056ccbd 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -19,6 +19,7 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" +#include "video_core/surface.h" namespace Shader::Optimization { namespace { @@ -248,11 +249,19 @@ bool IsTextureInstruction(const IR::Inst& inst) { } static inline TexturePixelFormat ReadTexturePixelFormatCached(Environment& env, const ConstBufferAddr& cbuf) { - return env.ReadTexturePixelFormat(GetTextureHandleCached(env, cbuf)); + const u32 handle = GetTextureHandleCached(env, cbuf); + if (handle == 0) { + return TexturePixelFormat::A8B8G8R8_UNORM; + } + return env.ReadTexturePixelFormat(handle); } static inline bool IsTexturePixelFormatIntegerCached(Environment& env, const ConstBufferAddr& cbuf) { - return env.IsTexturePixelFormatInteger(GetTextureHandleCached(env, cbuf)); + const u32 handle = GetTextureHandleCached(env, cbuf); + if (handle == 0) { + return false; + } + return env.IsTexturePixelFormatInteger(handle); } @@ -524,6 +533,8 @@ public: const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.is_depth == existing.is_depth && desc.has_secondary == existing.has_secondary && + desc.component_type == existing.component_type && + desc.component_bit_size == existing.component_bit_size && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.shift_left == existing.shift_left && @@ -598,6 +609,35 @@ bool IsPixelFormatSNorm(TexturePixelFormat pixel_format) { } } +TextureComponentType PixelFormatComponentType(TexturePixelFormat pixel_format, bool is_integer) { + if (!is_integer) { + return TextureComponentType::Float; + } + + switch (pixel_format) { + case TexturePixelFormat::A8B8G8R8_SINT: + case TexturePixelFormat::R8_SINT: + case TexturePixelFormat::R16G16B16A16_SINT: + case TexturePixelFormat::R32G32B32A32_SINT: + case TexturePixelFormat::R32G32_SINT: + case TexturePixelFormat::R16_SINT: + case TexturePixelFormat::R16G16_SINT: + case TexturePixelFormat::R8G8_SINT: + case TexturePixelFormat::R32_SINT: + return TextureComponentType::Sint; + default: + return TextureComponentType::Uint; + } +} + +u8 PixelFormatIntegerComponentBits(TexturePixelFormat pixel_format, bool is_integer) { + if (!is_integer) { + return 0; + } + return static_cast(VideoCore::Surface::PixelComponentSizeBitsInteger( + static_cast(pixel_format))); +} + void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_format) { const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -698,6 +738,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo default: break; } + const TexturePixelFormat pixel_format{ReadTexturePixelFormatCached(env, cbuf)}; + const bool is_integer{IsTexturePixelFormatIntegerCached(env, cbuf)}; u32 index; switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: @@ -718,7 +760,6 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; - const bool is_integer{IsTexturePixelFormatIntegerCached(env, cbuf)}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, @@ -764,6 +805,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .is_depth = flags.is_depth != 0, .is_multisample = is_multisample, .has_secondary = cbuf.has_secondary, + .component_type = PixelFormatComponentType(pixel_format, is_integer), + .component_bit_size = PixelFormatIntegerComponentBits(pixel_format, is_integer), .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .shift_left = cbuf.shift_left, diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index ed13e68209..d1de3be7fb 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -35,6 +38,12 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 9; +enum class TextureComponentType : u32 { + Float, + Sint, + Uint, +}; + enum class TexturePixelFormat { A8B8G8R8_UNORM, A8B8G8R8_SNORM, @@ -174,7 +183,9 @@ struct StorageBufferDescriptor { }; struct TextureBufferDescriptor { - bool has_secondary; + TextureComponentType component_type{TextureComponentType::Float}; + u8 component_bit_size{}; + bool has_secondary{}; u32 cbuf_index; u32 cbuf_offset; u32 shift_left; @@ -207,6 +218,8 @@ struct TextureDescriptor { bool is_depth; bool is_multisample; bool has_secondary; + TextureComponentType component_type{TextureComponentType::Float}; + u8 component_bit_size{}; u32 cbuf_index; u32 cbuf_offset; u32 shift_left; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 910e07a606..25f848f0cd 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -191,10 +191,19 @@ inline void PushImageDescriptors(TextureCache& texture_cache, ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; const Sampler& sampler{texture_cache.GetSampler(sampler_id)}; - const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && - !image_view.SupportsAnisotropy()}; - const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() - : sampler.Handle()}; + const bool needs_linear_fallback = sampler.RequiresLinearFiltering() && + !image_view.SupportsLinearFiltering(); + const bool needs_aniso_fallback = sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy(); + if (!image_view.SupportsLinearFiltering()) { + ASSERT_MSG(!sampler.RequiresLinearFiltering() || needs_linear_fallback, + "Linear filtering sampler bound to unsupported image view"); + } + // Prefer degrading to nearest sampling when the view lacks linear support. + const VkSampler vk_sampler = needs_linear_fallback + ? sampler.HandleWithoutLinearFiltering() + : (needs_aniso_fallback ? sampler.HandleWithDefaultAnisotropy() + : sampler.Handle()); guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 7327c90c12..f1089c0d13 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -2061,6 +2061,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }, .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), }; + supports_linear_filtering = device->IsFormatSupported( + create_info.format, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT, FormatType::Optimal); const auto create = [&](TextureType tex_type, std::optional num_layers) { VkImageViewCreateInfo ci{create_info}; ci.viewType = ImageViewType(tex_type); @@ -2111,10 +2113,13 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, - buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} { + supports_linear_filtering = true; +} ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params}, device{&runtime.device} { + supports_linear_filtering = true; if (device->HasNullDescriptor()) { return; } @@ -2175,6 +2180,7 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type, if (image_format == Shader::ImageFormat::Typeless) { return Handle(texture_type); } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || image_format == Shader::ImageFormat::R16_SINT}; if (!storage_views) { @@ -2244,15 +2250,23 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t } // Some games have samplers with garbage. Sanitize them here. const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + const VkFilter mag_filter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); + const VkFilter min_filter = MaxwellToVK::Sampler::Filter(tsc.min_filter); + const VkSamplerMipmapMode mipmap_mode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); + const f32 min_lod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(); + const f32 max_lod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(); + requires_linear_filtering = mag_filter == VK_FILTER_LINEAR || min_filter == VK_FILTER_LINEAR || + mipmap_mode == VK_SAMPLER_MIPMAP_MODE_LINEAR || max_anisotropy > 1.0f; - const auto create_sampler = [&](const f32 anisotropy) { + const auto create_sampler = [&](VkFilter mag, VkFilter min, VkSamplerMipmapMode mip, + f32 anisotropy) { return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = pnext, .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .magFilter = mag, + .minFilter = min, + .mipmapMode = mip, .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), @@ -2261,19 +2275,25 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t .maxAnisotropy = anisotropy, .compareEnable = tsc.depth_compare_enabled, .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .minLod = min_lod, + .maxLod = max_lod, .borderColor = arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), .unnormalizedCoordinates = VK_FALSE, }); }; - sampler = create_sampler(max_anisotropy); + sampler = create_sampler(mag_filter, min_filter, mipmap_mode, max_anisotropy); const f32 max_anisotropy_default = static_cast(1U << tsc.max_anisotropy); if (max_anisotropy > max_anisotropy_default) { - sampler_default_anisotropy = create_sampler(max_anisotropy_default); + sampler_default_anisotropy = create_sampler(mag_filter, min_filter, mipmap_mode, + max_anisotropy_default); + } + + if (requires_linear_filtering) { + sampler_no_linear = create_sampler(VK_FILTER_NEAREST, VK_FILTER_NEAREST, + VK_SAMPLER_MIPMAP_MODE_NEAREST, 1.0f); } } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index cd11cc8fc7..4a13531096 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -238,6 +238,10 @@ public: [[nodiscard]] bool IsRescaled() const noexcept; + [[nodiscard]] bool SupportsLinearFiltering() const noexcept { + return supports_linear_filtering; + } + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { return *image_views[static_cast(texture_type)]; } @@ -278,6 +282,7 @@ private: vk::ImageView depth_view; vk::ImageView stencil_view; vk::ImageView color_view; + bool supports_linear_filtering{}; vk::Image null_image; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; @@ -296,16 +301,26 @@ public: } [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept { - return *sampler_default_anisotropy; + return sampler_default_anisotropy ? *sampler_default_anisotropy : *sampler; + } + + [[nodiscard]] VkSampler HandleWithoutLinearFiltering() const noexcept { + return sampler_no_linear ? *sampler_no_linear : *sampler; } [[nodiscard]] bool HasAddedAnisotropy() const noexcept { return static_cast(sampler_default_anisotropy); } + [[nodiscard]] bool RequiresLinearFiltering() const noexcept { + return requires_linear_filtering; + } + private: vk::Sampler sampler; vk::Sampler sampler_default_anisotropy; + vk::Sampler sampler_no_linear; + bool requires_linear_filtering{}; }; class Framebuffer { From f9c9a79e965352be53631982ede45c6b5d8c5c07 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 27 Sep 2025 22:45:10 -0700 Subject: [PATCH 03/14] [vk, spir-v] Remove problematic features on Qualcomm/Turnip drivers --- .../vulkan_common/vulkan_device.cpp | 33 +++++++++++++++++-- src/video_core/vulkan_common/vulkan_device.h | 9 +++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0e0bec2ce3..fe1924f763 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -502,6 +502,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_qualcomm) { + if (extensions.shader_float_controls) { + LOG_WARNING(Render_Vulkan, + "Qualcomm drivers have broken VK_KHR_shader_float_controls"); + RemoveExtension(extensions.shader_float_controls, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + } LOG_WARNING(Render_Vulkan, "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); //RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); @@ -985,6 +991,17 @@ bool Device::GetSuitability(bool requires_swapchain) { // Set instance version. instance_version = properties.properties.apiVersion; + VkPhysicalDeviceDriverProperties driver_probe_props{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + }; + VkPhysicalDeviceProperties2 driver_probe{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &driver_probe_props, + }; + physical.GetProperties2(driver_probe); + const bool disable_shader_int64 = driver_probe_props.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_probe_props.driverID == VK_DRIVER_ID_MESA_TURNIP; + // Minimum of API version 1.1 is required. (This is well-supported.) ASSERT(instance_version >= VK_API_VERSION_1_1); @@ -1095,8 +1112,18 @@ bool Device::GetSuitability(bool requires_swapchain) { // Perform the feature test. physical.GetFeatures2(features2); + if (disable_shader_int64) { + features2.features.shaderInt64 = VK_FALSE; + } + // Base Vulkan 1.0 features are always valid regardless of instance version. features.features = features2.features; + if (disable_shader_int64) { + features.features.shaderInt64 = VK_FALSE; + features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE; + features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE; + LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Qualcomm/Turnip drivers"); + } // Some features are mandatory. Check those. #define CHECK_FEATURE(feature, name) \ @@ -1137,8 +1164,10 @@ bool Device::GetSuitability(bool requires_swapchain) { properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; SetNext(next, properties.subgroup_properties); - // Retrieve relevant extension properties. - if (extensions.shader_float_controls) { + // Retrieve relevant extension/core properties. + // Float controls properties are core in Vulkan 1.2; if running on 1.2+ or if the + // KHR extension is present, chain the properties struct to query capabilities. + if (instance_version >= VK_API_VERSION_1_2 || extensions.shader_float_controls) { properties.float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; SetNext(next, properties.float_controls); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index bd54144480..373d36fe6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -376,6 +376,10 @@ public: /// Returns true if shader int64 is supported. bool IsShaderInt64Supported() const { + const auto driver = GetDriverID(); + if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver == VK_DRIVER_ID_MESA_TURNIP) { + return false; + } return features.features.shaderInt64; } @@ -585,6 +589,11 @@ public: /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { + const auto driver = GetDriverID(); + if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver == VK_DRIVER_ID_MESA_TURNIP) { + return false; + } return extensions.shader_atomic_int64; } From 67f3e12c48b044582ed0e0990e101eae5e5830af Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sun, 28 Sep 2025 13:41:24 -0700 Subject: [PATCH 04/14] [SPIR-V] Enable INT64 emulation for Qualcomm drivers --- .../backend/spirv/emit_spirv_atomic.cpp | 249 +++++++++++++++++- .../frontend/maxwell/translate_program.cpp | 2 +- src/shader_recompiler/host_translate_info.h | 1 + src/shader_recompiler/profile.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 6 +- .../vulkan_common/vulkan_device.cpp | 17 +- src/video_core/vulkan_common/vulkan_device.h | 12 +- 7 files changed, 277 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 0ce73f289b..26021f5f59 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -2,14 +2,245 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include "common/assert.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { namespace { -Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0); +std::pair AtomicArgs(EmitContext& ctx); + +enum class PairAtomicOp { + Add, + SMin, + UMin, + SMax, + UMax, + And, + Or, + Xor, + Exchange, +}; + +struct PairComponents { + Id lo; + Id hi; +}; + +PairComponents ComputePairComponents(EmitContext& ctx, PairAtomicOp op, Id current_lo, Id current_hi, + Id value_lo, Id value_hi) { + switch (op) { + case PairAtomicOp::Add: { + const Id sum_lo{ctx.OpIAdd(ctx.U32[1], current_lo, value_lo)}; + const Id carry_pred{ctx.OpULessThan(ctx.U1, sum_lo, current_lo)}; + const Id carry{ctx.OpSelect(ctx.U32[1], carry_pred, ctx.Const(1u), ctx.u32_zero_value)}; + const Id sum_hi_base{ctx.OpIAdd(ctx.U32[1], current_hi, value_hi)}; + const Id sum_hi{ctx.OpIAdd(ctx.U32[1], sum_hi_base, carry)}; + return {sum_lo, sum_hi}; + } + case PairAtomicOp::SMin: { + const Id current_hi_signed{ctx.OpBitcast(ctx.S32[1], current_hi)}; + const Id value_hi_signed{ctx.OpBitcast(ctx.S32[1], value_hi)}; + const Id hi_less{ctx.OpSLessThan(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id lo_less{ctx.OpULessThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_less, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_less, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::UMin: { + const Id hi_less{ctx.OpULessThan(ctx.U1, current_hi, value_hi)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi, value_hi)}; + const Id lo_less{ctx.OpULessThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_less, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_less, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::SMax: { + const Id current_hi_signed{ctx.OpBitcast(ctx.S32[1], current_hi)}; + const Id value_hi_signed{ctx.OpBitcast(ctx.S32[1], value_hi)}; + const Id hi_greater{ctx.OpSGreaterThan(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id lo_greater{ctx.OpUGreaterThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_greater, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_greater, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::UMax: { + const Id hi_greater{ctx.OpUGreaterThan(ctx.U1, current_hi, value_hi)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi, value_hi)}; + const Id lo_greater{ctx.OpUGreaterThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_greater, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_greater, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::And: { + const Id new_lo{ctx.OpBitwiseAnd(ctx.U32[1], current_lo, value_lo)}; + const Id new_hi{ctx.OpBitwiseAnd(ctx.U32[1], current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::Or: { + const Id new_lo{ctx.OpBitwiseOr(ctx.U32[1], current_lo, value_lo)}; + const Id new_hi{ctx.OpBitwiseOr(ctx.U32[1], current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::Xor: { + const Id new_lo{ctx.OpBitwiseXor(ctx.U32[1], current_lo, value_lo)}; + const Id new_hi{ctx.OpBitwiseXor(ctx.U32[1], current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::Exchange: + return {value_lo, value_hi}; + } + ASSERT_MSG(false, "Unhandled pair atomic operation"); + return {current_lo, current_hi}; +} + +PairAtomicOp GetPairAtomicOp(Id (Sirit::Module::*func)(Id, Id, Id)) { + if (func == &Sirit::Module::OpIAdd) { + return PairAtomicOp::Add; + } + if (func == &Sirit::Module::OpSMin) { + return PairAtomicOp::SMin; + } + if (func == &Sirit::Module::OpUMin) { + return PairAtomicOp::UMin; + } + if (func == &Sirit::Module::OpSMax) { + return PairAtomicOp::SMax; + } + if (func == &Sirit::Module::OpUMax) { + return PairAtomicOp::UMax; + } + if (func == &Sirit::Module::OpBitwiseAnd) { + return PairAtomicOp::And; + } + if (func == &Sirit::Module::OpBitwiseOr) { + return PairAtomicOp::Or; + } + if (func == &Sirit::Module::OpBitwiseXor) { + return PairAtomicOp::Xor; + } + ASSERT_MSG(false, "Unsupported pair atomic opcode"); + return PairAtomicOp::Exchange; +} + +Id EmulateStorageAtomicPair(EmitContext& ctx, PairAtomicOp op, Id pointer, Id value_pair) { + const auto [scope, semantics]{AtomicArgs(ctx)}; + const Id zero{ctx.u32_zero_value}; + const Id one{ctx.Const(1u)}; + const Id low_pointer{ctx.OpAccessChain(ctx.storage_types.U32.element, pointer, zero)}; + const Id high_pointer{ctx.OpAccessChain(ctx.storage_types.U32.element, pointer, one)}; + const Id value_lo{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 0U)}; + const Id value_hi{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 1U)}; + const Id loop_header{ctx.OpLabel()}; + const Id loop_body{ctx.OpLabel()}; + const Id loop_continue{ctx.OpLabel()}; + const Id loop_merge{ctx.OpLabel()}; + const Id high_block{ctx.OpLabel()}; + const Id revert_block{ctx.OpLabel()}; + + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + ctx.OpLoopMerge(loop_merge, loop_continue, spv::LoopControlMask::MaskNone); + ctx.OpBranch(loop_body); + + ctx.AddLabel(loop_body); + const Id current_pair{ctx.OpLoad(ctx.U32[2], pointer)}; + const Id expected_lo{ctx.OpCompositeExtract(ctx.U32[1], current_pair, 0U)}; + const Id expected_hi{ctx.OpCompositeExtract(ctx.U32[1], current_pair, 1U)}; + const PairComponents new_pair{ComputePairComponents(ctx, op, expected_lo, expected_hi, value_lo, value_hi)}; + const Id low_result{ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, + new_pair.lo, expected_lo)}; + const Id low_success{ctx.OpIEqual(ctx.U1, low_result, expected_lo)}; + ctx.OpSelectionMerge(loop_continue, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(low_success, high_block, loop_continue); + + ctx.AddLabel(high_block); + const Id high_result{ctx.OpAtomicCompareExchange(ctx.U32[1], high_pointer, scope, semantics, semantics, + new_pair.hi, expected_hi)}; + const Id high_success{ctx.OpIEqual(ctx.U1, high_result, expected_hi)}; + ctx.OpBranchConditional(high_success, loop_merge, revert_block); + + ctx.AddLabel(revert_block); + ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, expected_lo, + new_pair.lo); + ctx.OpBranch(loop_continue); + + ctx.AddLabel(loop_continue); + ctx.OpBranch(loop_header); + + ctx.AddLabel(loop_merge); + return current_pair; +} + +Id EmulateSharedAtomicExchange(EmitContext& ctx, Id offset, Id value_pair) { + const Id scope{ctx.Const(static_cast(spv::Scope::Workgroup))}; + const Id semantics{ctx.u32_zero_value}; + const Id value_lo{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 0U)}; + const Id value_hi{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 1U)}; + const Id low_pointer{SharedPointer(ctx, offset, 0)}; + const Id high_pointer{SharedPointer(ctx, offset, 1)}; + const Id loop_header{ctx.OpLabel()}; + const Id loop_body{ctx.OpLabel()}; + const Id loop_continue{ctx.OpLabel()}; + const Id loop_merge{ctx.OpLabel()}; + const Id high_block{ctx.OpLabel()}; + const Id revert_block{ctx.OpLabel()}; + + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + ctx.OpLoopMerge(loop_merge, loop_continue, spv::LoopControlMask::MaskNone); + ctx.OpBranch(loop_body); + + ctx.AddLabel(loop_body); + const Id expected_lo{ctx.OpLoad(ctx.U32[1], low_pointer)}; + const Id expected_hi{ctx.OpLoad(ctx.U32[1], high_pointer)}; + const Id current_pair{ctx.OpCompositeConstruct(ctx.U32[2], expected_lo, expected_hi)}; + const Id low_result{ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, + value_lo, expected_lo)}; + const Id low_success{ctx.OpIEqual(ctx.U1, low_result, expected_lo)}; + ctx.OpSelectionMerge(loop_continue, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(low_success, high_block, loop_continue); + + ctx.AddLabel(high_block); + const Id high_result{ctx.OpAtomicCompareExchange(ctx.U32[1], high_pointer, scope, semantics, semantics, + value_hi, expected_hi)}; + const Id high_success{ctx.OpIEqual(ctx.U1, high_result, expected_hi)}; + ctx.OpBranchConditional(high_success, loop_merge, revert_block); + + ctx.AddLabel(revert_block); + ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, expected_lo, value_lo); + ctx.OpBranch(loop_continue); + + ctx.AddLabel(loop_continue); + ctx.OpBranch(loop_header); + + ctx.AddLabel(loop_merge); + return current_pair; +} + +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset) { const Id shift_id{ctx.Const(2U)}; Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; if (index_offset > 0) { @@ -96,6 +327,12 @@ Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Valu return ctx.ConstantNull(ctx.U32[2]); } + if (ctx.profile.emulate_int64_with_uint2) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + return EmulateStorageAtomicPair(ctx, GetPairAtomicOp(non_atomic_func), pointer, value); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; @@ -175,6 +412,10 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { } Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.emulate_int64_with_uint2) { + return EmulateSharedAtomicExchange(ctx, offset, value); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer_1{SharedPointer(ctx, offset, 0)}; const Id pointer_2{SharedPointer(ctx, offset, 1)}; @@ -351,6 +592,12 @@ Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { + if (ctx.profile.emulate_int64_with_uint2) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + return EmulateStorageAtomicPair(ctx, PairAtomicOp::Exchange, pointer, value); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 97b9b0cf07..1c57fc089a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -289,7 +289,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool= VK_API_VERSION_1_1); @@ -1122,7 +1128,12 @@ bool Device::GetSuitability(bool requires_swapchain) { features.features.shaderInt64 = VK_FALSE; features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE; features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE; - LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Qualcomm/Turnip drivers"); + if (shader_int64_emulation) { + LOG_WARNING(Render_Vulkan, + "Using shaderInt64 emulation on Qualcomm proprietary drivers"); + } else { + LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Turnip drivers"); + } } // Some features are mandatory. Check those. diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 373d36fe6b..25a74437b7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -374,15 +374,18 @@ public: return features.features.shaderStorageImageReadWithoutFormat; } - /// Returns true if shader int64 is supported. + /// Returns true if shader int64 is supported (natively or via emulation). bool IsShaderInt64Supported() const { - const auto driver = GetDriverID(); - if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver == VK_DRIVER_ID_MESA_TURNIP) { - return false; + if (shader_int64_emulation) { + return true; } return features.features.shaderInt64; } + /// Returns true when shader int64 operations must be emulated with 32-bit pairs. + bool UsesShaderInt64Emulation() const { + return shader_int64_emulation; + } /// Returns true if shader int16 is supported. bool IsShaderInt16Supported() const { return features.features.shaderInt16; @@ -849,6 +852,7 @@ private: bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool shader_int64_emulation{}; ///< Emulates shader Int64 using 32-bit pairs. bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. From bf302d7917f2bd4b2638ac61a751fedd5727c7a6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:40:29 +0200 Subject: [PATCH 05/14] [common] No need to specify min/max for settings; fix crash when OOB value is given for some settings (#2609) This fixes issues when migrating settings that refer to invalid filters/scales. For example if we had 5 filters, but we set filter=6, the program would crash. This also makes so specifying min/max manually isn't needed (but can still be set for cases like NCE). Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2609 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/common/settings.h | 41 ++----- src/common/settings_enums.h | 120 ++++++++------------- src/common/settings_setting.h | 67 ++++++------ src/yuzu/configuration/configure_audio.cpp | 6 +- 4 files changed, 84 insertions(+), 150 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index 8605445837..891bde608c 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -178,7 +178,7 @@ struct Values { SwitchableSetting audio_input_device_id{ linkage, "auto", "input_device", Category::Audio, Specialization::RuntimeList}; SwitchableSetting sound_index{ - linkage, AudioMode::Stereo, AudioMode::Mono, AudioMode::Surround, + linkage, AudioMode::Stereo, "sound_index", Category::SystemAudio, Specialization::Default, true, true}; SwitchableSetting volume{linkage, @@ -199,8 +199,6 @@ struct Values { SwitchableSetting use_multi_core{linkage, true, "use_multi_core", Category::Core}; SwitchableSetting memory_layout_mode{linkage, MemoryLayout::Memory_4Gb, - MemoryLayout::Memory_4Gb, - MemoryLayout::Memory_12Gb, "memory_layout_mode", Category::Core, Specialization::Default, @@ -240,9 +238,8 @@ struct Values { #endif "cpu_backend", Category::Cpu}; - SwitchableSetting cpu_accuracy{linkage, CpuAccuracy::Auto, - CpuAccuracy::Auto, CpuAccuracy::Paranoid, - "cpu_accuracy", Category::Cpu}; + SwitchableSetting cpu_accuracy{linkage, CpuAccuracy::Auto, + "cpu_accuracy", Category::Cpu}; SwitchableSetting use_fast_cpu_time{linkage, false, @@ -324,10 +321,10 @@ struct Values { // Renderer SwitchableSetting renderer_backend{ - linkage, RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, + linkage, RendererBackend::Vulkan, "backend", Category::Renderer}; SwitchableSetting shader_backend{ - linkage, ShaderBackend::SpirV, ShaderBackend::Glsl, ShaderBackend::SpirV, + linkage, ShaderBackend::SpirV, "shader_backend", Category::Renderer, Specialization::RuntimeList}; SwitchableSetting vulkan_device{linkage, 0, "vulkan_device", Category::Renderer, Specialization::RuntimeList}; @@ -342,8 +339,6 @@ struct Values { Category::Renderer}; SwitchableSetting optimize_spirv_output{linkage, SpirvOptimizeMode::Never, - SpirvOptimizeMode::Never, - SpirvOptimizeMode::Always, "optimize_spirv_output", Category::Renderer}; SwitchableSetting use_asynchronous_gpu_emulation{ @@ -354,12 +349,10 @@ struct Values { #else AstcDecodeMode::Gpu, #endif - AstcDecodeMode::Cpu, - AstcDecodeMode::CpuAsynchronous, "accelerate_astc", Category::Renderer}; SwitchableSetting vsync_mode{ - linkage, VSyncMode::Fifo, VSyncMode::Immediate, VSyncMode::FifoRelaxed, + linkage, VSyncMode::Fifo, "use_vsync", Category::Renderer, Specialization::RuntimeList, true, true}; SwitchableSetting nvdec_emulation{linkage, NvdecEmulation::Gpu, @@ -372,8 +365,6 @@ struct Values { #else FullscreenMode::Exclusive, #endif - FullscreenMode::Borderless, - FullscreenMode::Exclusive, "fullscreen_mode", Category::Renderer, Specialization::Default, @@ -381,8 +372,6 @@ struct Values { true}; SwitchableSetting aspect_ratio{linkage, AspectRatio::R16_9, - AspectRatio::R16_9, - AspectRatio::Stretch, "aspect_ratio", Category::Renderer, Specialization::Default, @@ -430,8 +419,6 @@ struct Values { #else GpuAccuracy::High, #endif - GpuAccuracy::Normal, - GpuAccuracy::Extreme, "gpu_accuracy", Category::RendererAdvanced, Specialization::Default, @@ -442,8 +429,6 @@ struct Values { SwitchableSetting dma_accuracy{linkage, DmaAccuracy::Default, - DmaAccuracy::Default, - DmaAccuracy::Safe, "dma_accuracy", Category::RendererAdvanced, Specialization::Default, @@ -456,20 +441,14 @@ struct Values { #else AnisotropyMode::Automatic, #endif - AnisotropyMode::Automatic, - AnisotropyMode::X16, "max_anisotropy", Category::RendererAdvanced}; SwitchableSetting astc_recompression{linkage, AstcRecompression::Uncompressed, - AstcRecompression::Uncompressed, - AstcRecompression::Bc3, "astc_recompression", Category::RendererAdvanced}; SwitchableSetting vram_usage_mode{linkage, VramUsageMode::Conservative, - VramUsageMode::Conservative, - VramUsageMode::Aggressive, "vram_usage_mode", Category::RendererAdvanced}; SwitchableSetting skip_cpu_inner_invalidation{linkage, @@ -595,14 +574,10 @@ struct Values { // System SwitchableSetting language_index{linkage, Language::EnglishAmerican, - Language::Japanese, - Language::Serbian, "language_index", Category::System}; - SwitchableSetting region_index{linkage, Region::Usa, Region::Japan, - Region::Taiwan, "region_index", Category::System}; - SwitchableSetting time_zone_index{linkage, TimeZone::Auto, - TimeZone::Auto, TimeZone::Zulu, + SwitchableSetting region_index{linkage, Region::Usa, "region_index", Category::System}; + SwitchableSetting time_zone_index{linkage, TimeZone::Auto, "time_zone_index", Category::System}; // Measured in seconds since epoch SwitchableSetting custom_rtc_enabled{ diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index ebfa4ceb9e..0e5a08d845 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -10,6 +10,7 @@ #pragma once #include +#include #include #include #include "common/common_types.h" @@ -18,8 +19,10 @@ namespace Settings { template struct EnumMetadata { - static std::vector> Canonicalizations(); + static std::vector> Canonicalizations(); static u32 Index(); + static constexpr T GetFirst(); + static constexpr T GetLast(); }; #define PAIR_45(N, X, ...) {#X, N::X} __VA_OPT__(, PAIR_46(N, __VA_ARGS__)) @@ -69,138 +72,101 @@ struct EnumMetadata { #define PAIR_1(N, X, ...) {#X, N::X} __VA_OPT__(, PAIR_2(N, __VA_ARGS__)) #define PAIR(N, X, ...) {#X, N::X} __VA_OPT__(, PAIR_1(N, __VA_ARGS__)) -#define ENUM(NAME, ...) \ - enum class NAME : u32 { __VA_ARGS__ }; \ - template <> \ - inline std::vector> EnumMetadata::Canonicalizations() { \ - return {PAIR(NAME, __VA_ARGS__)}; \ - } \ - template <> \ - inline u32 EnumMetadata::Index() { \ - return __COUNTER__; \ +#define PP_HEAD(A, ...) A + +#define ENUM(NAME, ...) \ + enum class NAME : u32 { __VA_ARGS__ }; \ + template<> inline std::vector> EnumMetadata::Canonicalizations() { \ + return {PAIR(NAME, __VA_ARGS__)}; \ + } \ + template<> inline u32 EnumMetadata::Index() { \ + return __COUNTER__; \ + } \ + template<> inline constexpr NAME EnumMetadata::GetFirst() { \ + return NAME::PP_HEAD(__VA_ARGS__); \ + } \ + template<> inline constexpr NAME EnumMetadata::GetLast() { \ + return (std::vector>{PAIR(NAME, __VA_ARGS__)}).back().second; \ } // AudioEngine must be specified discretely due to having existing but slightly different // canonicalizations // TODO (lat9nq): Remove explicit definition of AudioEngine/sink_id -enum class AudioEngine : u32 { - Auto, - Cubeb, - Sdl2, - Null, - Oboe, -}; - -template <> -inline std::vector> -EnumMetadata::Canonicalizations() { +enum class AudioEngine : u32 { Auto, Cubeb, Sdl2, Null, Oboe, }; +template<> +inline std::vector> EnumMetadata::Canonicalizations() { return { {"auto", AudioEngine::Auto}, {"cubeb", AudioEngine::Cubeb}, {"sdl2", AudioEngine::Sdl2}, {"null", AudioEngine::Null}, {"oboe", AudioEngine::Oboe}, }; } - -template <> +/// @brief This is just a sufficiently large number that is more than the number of other enums declared here +template<> inline u32 EnumMetadata::Index() { - // This is just a sufficiently large number that is more than the number of other enums declared - // here return 100; } +template<> +inline constexpr AudioEngine EnumMetadata::GetFirst() { + return AudioEngine::Auto; +} +template<> +inline constexpr AudioEngine EnumMetadata::GetLast() { + return AudioEngine::Oboe; +} ENUM(AudioMode, Mono, Stereo, Surround); +static_assert(EnumMetadata::GetFirst() == AudioMode::Mono); +static_assert(EnumMetadata::GetLast() == AudioMode::Surround); ENUM(Language, Japanese, EnglishAmerican, French, German, Italian, Spanish, Chinese, Korean, Dutch, Portuguese, Russian, Taiwanese, EnglishBritish, FrenchCanadian, SpanishLatin, ChineseSimplified, ChineseTraditional, PortugueseBrazilian, Serbian); - ENUM(Region, Japan, Usa, Europe, Australia, China, Korea, Taiwan); - ENUM(TimeZone, Auto, Default, Cet, Cst6Cdt, Cuba, Eet, Egypt, Eire, Est, Est5Edt, Gb, GbEire, Gmt, - GmtPlusZero, GmtMinusZero, GmtZero, Greenwich, Hongkong, Hst, Iceland, Iran, Israel, Jamaica, - Japan, Kwajalein, Libya, Met, Mst, Mst7Mdt, Navajo, Nz, NzChat, Poland, Portugal, Prc, Pst8Pdt, - Roc, Rok, Singapore, Turkey, Uct, Universal, Utc, WSu, Wet, Zulu); - + GmtPlusZero, GmtMinusZero, GmtZero, Greenwich, Hongkong, Hst, Iceland, Iran, Israel, Jamaica, + Japan, Kwajalein, Libya, Met, Mst, Mst7Mdt, Navajo, Nz, NzChat, Poland, Portugal, Prc, Pst8Pdt, + Roc, Rok, Singapore, Turkey, Uct, Universal, Utc, WSu, Wet, Zulu); ENUM(AnisotropyMode, Automatic, Default, X2, X4, X8, X16); - ENUM(AstcDecodeMode, Cpu, Gpu, CpuAsynchronous); - ENUM(AstcRecompression, Uncompressed, Bc1, Bc3); - ENUM(VSyncMode, Immediate, Mailbox, Fifo, FifoRelaxed); - ENUM(VramUsageMode, Conservative, Aggressive); - ENUM(RendererBackend, OpenGL, Vulkan, Null); - ENUM(ShaderBackend, Glsl, Glasm, SpirV); - ENUM(GpuAccuracy, Normal, High, Extreme); - ENUM(DmaAccuracy, Default, Unsafe, Safe); - ENUM(CpuBackend, Dynarmic, Nce); - ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid); - ENUM(CpuClock, Boost, Fast) - ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb, Memory_10Gb, Memory_12Gb); - ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never); - ENUM(FullscreenMode, Borderless, Exclusive); - ENUM(NvdecEmulation, Off, Cpu, Gpu); - -ENUM(ResolutionSetup, - Res1_4X, - Res1_2X, - Res3_4X, - Res1X, - Res3_2X, - Res2X, - Res3X, - Res4X, - Res5X, - Res6X, - Res7X, - Res8X); - +ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X); ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, MaxEnum); - ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum); - ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch); - ENUM(ConsoleMode, Handheld, Docked); - ENUM(AppletMode, HLE, LLE); - ENUM(SpirvOptimizeMode, Never, OnLoad, Always); - ENUM(GpuOverclock, Low, Medium, High) - ENUM(TemperatureUnits, Celsius, Fahrenheit) template -inline std::string CanonicalizeEnum(Type id) { +inline std::string_view CanonicalizeEnum(Type id) { const auto group = EnumMetadata::Canonicalizations(); - for (auto& [name, value] : group) { - if (value == id) { + for (auto& [name, value] : group) + if (value == id) return name; - } - } return "unknown"; } template inline Type ToEnum(const std::string& canonicalization) { const auto group = EnumMetadata::Canonicalizations(); - for (auto& [name, value] : group) { - if (name == canonicalization) { + for (auto& [name, value] : group) + if (name == canonicalization) return value; - } - } return {}; } } // namespace Settings diff --git a/src/common/settings_setting.h b/src/common/settings_setting.h index 0aba2e11c9..a7e6bb6168 100644 --- a/src/common/settings_setting.h +++ b/src/common/settings_setting.h @@ -72,10 +72,17 @@ public: u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, BasicSetting* other_setting_ = nullptr) requires(ranged) - : BasicSetting(linkage, name, category_, save_, runtime_modifiable_, specialization_, - other_setting_), + : BasicSetting(linkage, name, category_, save_, runtime_modifiable_, specialization_, other_setting_), value{default_val}, default_value{default_val}, maximum{max_val}, minimum{min_val} {} + explicit Setting(Linkage& linkage, const Type& default_val, + const std::string& name, Category category_, + u32 specialization_ = Specialization::Default, bool save_ = true, + bool runtime_modifiable_ = false, BasicSetting* other_setting_ = nullptr) + requires(ranged && std::is_enum_v) + : BasicSetting(linkage, name, category_, save_, runtime_modifiable_, specialization_, other_setting_), + value{default_val}, default_value{default_val}, maximum{EnumMetadata::GetLast()}, minimum{EnumMetadata::GetFirst()} {} + /** * Returns a reference to the setting's value. * @@ -119,9 +126,6 @@ protected: return value_.has_value() ? std::to_string(*value_) : "none"; } else if constexpr (std::is_same_v) { return value_ ? "true" : "false"; - } else if constexpr (std::is_same_v) { - // Compatibility with old AudioEngine setting being a string - return CanonicalizeEnum(value_); } else if constexpr (std::is_floating_point_v) { return fmt::format("{:f}", value_); } else if constexpr (std::is_enum_v) { @@ -207,7 +211,7 @@ public: [[nodiscard]] std::string Canonicalize() const override final { if constexpr (std::is_enum_v) { - return CanonicalizeEnum(this->GetValue()); + return std::string{CanonicalizeEnum(this->GetValue())}; } else { return ToString(this->GetValue()); } @@ -288,41 +292,32 @@ public: * @param other_setting_ A second Setting to associate to this one in metadata */ template - explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const std::string& name, - Category category_, u32 specialization_ = Specialization::Default, - bool save_ = true, bool runtime_modifiable_ = false, - typename std::enable_if::type other_setting_ = nullptr) - : Setting{ - linkage, default_val, name, category_, specialization_, - save_, runtime_modifiable_, other_setting_} { + explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const std::string& name, Category category_, u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, T* other_setting_ = nullptr) requires(!ranged) + : Setting{ linkage, default_val, name, category_, specialization_, save_, runtime_modifiable_, other_setting_} { linkage.restore_functions.emplace_back([this]() { this->SetGlobal(true); }); } virtual ~SwitchableSetting() = default; - /** - * Sets a default value, minimum value, maximum value, and label. - * - * @param linkage Setting registry - * @param default_val Initial value of the setting, and default value of the setting - * @param min_val Sets the minimum allowed value of the setting - * @param max_val Sets the maximum allowed value of the setting - * @param name Label for the setting - * @param category_ Category of the setting AKA INI group - * @param specialization_ Suggestion for how frontend implementations represent this in a config - * @param save_ Suggests that this should or should not be saved to a frontend config file - * @param runtime_modifiable_ Suggests whether this is modifiable while a guest is loaded - * @param other_setting_ A second Setting to associate to this one in metadata - */ + /// @brief Sets a default value, minimum value, maximum value, and label. + /// @param linkage Setting registry + /// @param default_val Initial value of the setting, and default value of the setting + /// @param min_val Sets the minimum allowed value of the setting + /// @param max_val Sets the maximum allowed value of the setting + /// @param name Label for the setting + /// @param category_ Category of the setting AKA INI group + /// @param specialization_ Suggestion for how frontend implementations represent this in a config + /// @param save_ Suggests that this should or should not be saved to a frontend config file + /// @param runtime_modifiable_ Suggests whether this is modifiable while a guest is loaded + /// @param other_setting_ A second Setting to associate to this one in metadata template - explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const Type& min_val, - const Type& max_val, const std::string& name, Category category_, - u32 specialization_ = Specialization::Default, bool save_ = true, - bool runtime_modifiable_ = false, - typename std::enable_if::type other_setting_ = nullptr) - : Setting{linkage, default_val, min_val, - max_val, name, category_, - specialization_, save_, runtime_modifiable_, - other_setting_} { + explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const Type& min_val, const Type& max_val, const std::string& name, Category category_, u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, T* other_setting_ = nullptr) requires(ranged) + : Setting{linkage, default_val, min_val, max_val, name, category_, specialization_, save_, runtime_modifiable_, other_setting_} { + linkage.restore_functions.emplace_back([this]() { this->SetGlobal(true); }); + } + + template + explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const std::string& name, Category category_, u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, T* other_setting_ = nullptr) requires(ranged) + : Setting{linkage, default_val, EnumMetadata::GetFirst(), EnumMetadata::GetLast(), name, category_, specialization_, save_, runtime_modifiable_, other_setting_} { linkage.restore_functions.emplace_back([this]() { this->SetGlobal(true); }); } diff --git a/src/yuzu/configuration/configure_audio.cpp b/src/yuzu/configuration/configure_audio.cpp index a7ebae91f8..af81ef552e 100644 --- a/src/yuzu/configuration/configure_audio.cpp +++ b/src/yuzu/configuration/configure_audio.cpp @@ -270,10 +270,8 @@ void ConfigureAudio::UpdateAudioDevices(int sink_index) { void ConfigureAudio::InitializeAudioSinkComboBox() { sink_combo_box->clear(); sink_combo_box->addItem(QString::fromUtf8(AudioCore::Sink::auto_device_name)); - - for (const auto& id : AudioCore::Sink::GetSinkIDs()) { - sink_combo_box->addItem(QString::fromStdString(Settings::CanonicalizeEnum(id))); - } + for (const auto& id : AudioCore::Sink::GetSinkIDs()) + sink_combo_box->addItem(QString::fromStdString(std::string{Settings::CanonicalizeEnum(id)})); } void ConfigureAudio::RetranslateUI() { From ecb811ad04e75b221e0eee31f6bbd582620bd7c7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:41:28 +0200 Subject: [PATCH 06/14] [qt] move addons row to rightmost side (#2610) This is because the rightmost row is "extended" to the rest of the table, and add-ons have long names, play time doesn't need that much space. Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2610 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/yuzu/game_list.h | 4 ++-- src/yuzu/game_list_worker.cpp | 36 ++++++++++++----------------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index 94e7b2dc42..cd71fb2139 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -58,11 +58,11 @@ class GameList : public QWidget { public: enum { COLUMN_NAME, - COLUMN_COMPATIBILITY, - COLUMN_ADD_ONS, COLUMN_FILE_TYPE, COLUMN_SIZE, COLUMN_PLAY_TIME, + COLUMN_ADD_ONS, + COLUMN_COMPATIBILITY, COLUMN_COUNT, // Number of columns }; diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index 538c7ab822..2914c275a8 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp @@ -204,36 +204,24 @@ QList MakeGameListEntry(const std::string& path, const PlayTime::PlayTimeManager& play_time_manager, const FileSys::PatchManager& patch) { - const auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); + auto const it = FindMatchingCompatibilityEntry(compatibility_list, program_id); + // The game list uses 99 as compatibility number for untested games + QString compatibility = it != compatibility_list.end() ? it->second.first : QStringLiteral("99"); - // The game list uses this as compatibility number for untested games - QString compatibility{QStringLiteral("99")}; - if (it != compatibility_list.end()) { - compatibility = it->second.first; - } + auto const file_type = loader.GetFileType(); + auto const file_type_string = QString::fromStdString(Loader::GetFileTypeString(file_type)); - const auto file_type = loader.GetFileType(); - const auto file_type_string = QString::fromStdString(Loader::GetFileTypeString(file_type)); - - QList list{ - new GameListItemPath(FormatGameName(path), icon, QString::fromStdString(name), - file_type_string, program_id), - new GameListItemCompat(compatibility), + QString patch_versions = GetGameListCachedObject(fmt::format("{:016X}", patch.GetTitleID()), "pv.txt", [&patch, &loader] { + return FormatPatchNameVersions(patch, loader, loader.IsRomFSUpdatable()); + }); + return QList{ + new GameListItemPath(FormatGameName(path), icon, QString::fromStdString(name), file_type_string, program_id), new GameListItem(file_type_string), new GameListItemSize(size), new GameListItemPlayTime(play_time_manager.GetPlayTime(program_id)), + new GameListItem(patch_versions), + new GameListItemCompat(compatibility), }; - - QString patch_versions; - - patch_versions = GetGameListCachedObject( - fmt::format("{:016X}", patch.GetTitleID()), "pv.txt", [&patch, &loader] { - return FormatPatchNameVersions(patch, loader, loader.IsRomFSUpdatable()); - }); - - list.insert(2, new GameListItem(patch_versions)); - - return list; } } // Anonymous namespace From 50ceb9a43a6dfe417a7760042e4710ce59553d94 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Mon, 29 Sep 2025 18:42:04 +0200 Subject: [PATCH 07/14] [.ci] install-msvc: fix installation on MSVC (#2611) * changed from Build Tools to Community (congrats Microsoft very cool) * add spining to show it didnt stopped installing Signed-off-by: Caio Oliveira Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2611 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: Caio Oliveira Co-committed-by: Caio Oliveira --- .ci/windows/install-msvc.ps1 | 40 ++++++++++++++++++++++++++---------- docs/Deps.md | 6 +++--- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/.ci/windows/install-msvc.ps1 b/.ci/windows/install-msvc.ps1 index b88f727ed8..788b2848ad 100755 --- a/.ci/windows/install-msvc.ps1 +++ b/.ci/windows/install-msvc.ps1 @@ -10,7 +10,7 @@ if (-not ([bool](net session 2>$null))) { } $VSVer = "17" -$ExeFile = "vs_BuildTools.exe" +$ExeFile = "vs_community.exe" $Uri = "https://aka.ms/vs/$VSVer/release/$ExeFile" $Destination = "./$ExeFile" @@ -19,21 +19,39 @@ $WebClient = New-Object System.Net.WebClient $WebClient.DownloadFile($Uri, $Destination) Write-Host "Finished downloading $ExeFile" -$VSROOT = "C:/VSBuildTools/$VSVer" $Arguments = @( - "--installPath `"$VSROOT`"", # set custom installation path - "--quiet", # suppress UI - "--wait", # wait for installation to complete - "--norestart", # prevent automatic restart - "--add Microsoft.VisualStudio.Workload.VCTools", # add C++ build tools workload - "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", # add core x86/x64 C++ tools - "--add Microsoft.VisualStudio.Component.Windows10SDK.19041" # add specific Windows SDK + "--quiet", # Suppress installer UI + "--wait", # Wait for installation to complete + "--norestart", # Prevent automatic restart + "--force", # Force installation even if components are already installed + "--add Microsoft.VisualStudio.Workload.NativeDesktop", # Desktop development with C++ + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", # Core C++ compiler/tools for x86/x64 + "--add Microsoft.VisualStudio.Component.Windows11SDK.26100",# Windows 11 SDK (26100) + "--add Microsoft.VisualStudio.Component.Windows10SDK.19041",# Windows 10 SDK (19041) + "--add Microsoft.VisualStudio.Component.VC.Llvm.Clang", # LLVM Clang compiler + "--add Microsoft.VisualStudio.Component.VC.Llvm.ClangToolset", # LLVM Clang integration toolset + "--add Microsoft.VisualStudio.Component.Windows11SDK.22621",# Windows 11 SDK (22621) + "--add Microsoft.VisualStudio.Component.VC.CMake.Project", # CMake project support + "--add Microsoft.VisualStudio.ComponentGroup.VC.Tools.142.x86.x64", # VC++ 14.2 toolset + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Llvm.Clang" # LLVM Clang for native desktop ) Write-Host "Installing Visual Studio Build Tools" -$InstallProcess = Start-Process -FilePath $Destination -NoNewWindow -PassThru -Wait -ArgumentList $Arguments -$ExitCode = $InstallProcess.ExitCode +$InstallProcess = Start-Process -FilePath $Destination -NoNewWindow -PassThru -ArgumentList $Arguments +# Spinner while installing +$Spinner = "|/-\" +$i = 0 +while (-not $InstallProcess.HasExited) { + Write-Host -NoNewline ("`rInstalling... " + $Spinner[$i % $Spinner.Length]) + Start-Sleep -Milliseconds 250 + $i++ +} + +# Clear spinner line +Write-Host "`rSetup completed! " + +$ExitCode = $InstallProcess.ExitCode if ($ExitCode -ne 0) { Write-Host "Error installing Visual Studio Build Tools (Error: $ExitCode)" Exit $ExitCode diff --git a/docs/Deps.md b/docs/Deps.md index cfc6f0365b..0e7b7cff62 100644 --- a/docs/Deps.md +++ b/docs/Deps.md @@ -4,8 +4,8 @@ To build Eden, you MUST have a C++ compiler. * On Linux, this is usually [GCC](https://gcc.gnu.org/) 11+ or [Clang](https://clang.llvm.org/) v14+ - GCC 12 also requires Clang 14+ * On Windows, this is either: - - **[MSVC](https://visualstudio.microsoft.com/downloads/)**, - * *A convenience script to install the **minimal** version (Visual Build Tools) is provided in `.ci/windows/install-msvc.ps1`* + - **[MSVC](https://visualstudio.microsoft.com/downloads/)** (you should select *Community* option), + * *A convenience script to install the Visual Community Studio 2022 with necessary tools is provided in `.ci/windows/install-msvc.ps1`* - clang-cl - can be downloaded from the MSVC installer, - or **[MSYS2](https://www.msys2.org)** * On macOS, this is Apple Clang @@ -211,4 +211,4 @@ Then install the libraries: `sudo pkg install qt6 boost glslang libzip library/l ## All Done -You may now return to the **[root build guide](Build.md)**. \ No newline at end of file +You may now return to the **[root build guide](Build.md)**. From 9f423a24b82e1b3223d8d4204455e637f86271fa Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:42:28 +0200 Subject: [PATCH 08/14] [linux] fix aarch64 builds (again) + fix with slightly outdated qt (#2612) Fixes issues building on aarch64 linux with a slightly outdated system qt; also fixes linker selection process Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2612 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- CMakeLists.txt | 5 ++--- src/dynarmic/tests/CMakeLists.txt | 4 +++- src/yuzu/configuration/shared_widget.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ef3c0bef6e..f5d7126f92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -895,13 +895,13 @@ if (MSVC AND CXX_CLANG) endif() if (YUZU_USE_FASTER_LD) + # fallback if everything fails (bfd) + set(LINKER bfd) # clang should always use lld find_program(LLD lld) - if (LLD) set(LINKER lld) endif() - # GNU appears to work better with mold # TODO: mold has been slow lately, see if better options exist (search for gold?) if (CXX_GCC) @@ -910,7 +910,6 @@ if (YUZU_USE_FASTER_LD) set(LINKER mold) endif() endif() - message(NOTICE "Selecting ${LINKER} as linker") add_link_options("-fuse-ld=${LINKER}") endif() diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 4ace6c2afd..df90168a52 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -135,6 +135,8 @@ target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -target_compile_options(dynarmic_tests PRIVATE -mavx2) +if ("x86_64" IN_LIST ARCHITECTURE) + target_compile_options(dynarmic_tests PRIVATE -mavx2) +endif() add_test(dynarmic_tests dynarmic_tests --durations yes) diff --git a/src/yuzu/configuration/shared_widget.h b/src/yuzu/configuration/shared_widget.h index 9e718098a3..dd5d5b7257 100644 --- a/src/yuzu/configuration/shared_widget.h +++ b/src/yuzu/configuration/shared_widget.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "qt_common/shared_translation.h" From 33f93ad003a47419d545a6bae018d3bb1c5ee3fb Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:42:51 +0200 Subject: [PATCH 09/14] [macos, qt] workaround upstream rendering bug (#2616) See https://bugreports.qt.io/browse/QTBUG-138942 Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2616 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/yuzu/Info.plist | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist index 96096c84d1..0c43c834d4 100644 --- a/src/yuzu/Info.plist +++ b/src/yuzu/Info.plist @@ -49,5 +49,7 @@ SPDX-License-Identifier: GPL-2.0-or-later NSApplication NSHighResolutionCapable True + UIDesignRequiresCompatibility + From 324ace3cd635bac4230a04c4a769bed236ed1e8d Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:43:13 +0200 Subject: [PATCH 10/14] [macos] associate .XCI/NSP file extensions (#2617) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2617 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/yuzu/Info.plist | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist index 0c43c834d4..773c4ee302 100644 --- a/src/yuzu/Info.plist +++ b/src/yuzu/Info.plist @@ -45,6 +45,26 @@ SPDX-License-Identifier: GPL-2.0-or-later NSHumanReadableCopyright + + LSApplicationCategoryType + public.app-category.games + CFBundleDocumentTypes + + + CFBundleTypeExtensions + + nsp + xci + nro + + CFBundleTypeName + Switch File + CFBundleTypeRole + Viewer + LSHandlerRank + Default + + NSPrincipalClass NSApplication NSHighResolutionCapable From e16881216b9eb339a9243b8226262508c5f671a3 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 27 Sep 2025 13:31:53 -0700 Subject: [PATCH 11/14] [vk] Implement D16 to R16 Texture Conversion --- .../renderer_vulkan/vk_texture_cache.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8d1d609a35..7327c90c12 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1229,6 +1229,18 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; + case PixelFormat::R16_UNORM: + if (src_view.format == PixelFormat::D16_UNORM) { + return blit_image_helper.ConvertD16ToR16(dst, src_view); + } + break; + + case PixelFormat::D16_UNORM: + if (src_view.format == PixelFormat::R16_UNORM) { + return blit_image_helper.ConvertR16ToD16(dst, src_view); + } + break; + case PixelFormat::A8B8G8R8_UNORM: case PixelFormat::A8B8G8R8_SNORM: case PixelFormat::A8B8G8R8_SINT: @@ -1270,7 +1282,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im case PixelFormat::R32G32_SINT: case PixelFormat::R32_FLOAT: case PixelFormat::R16_FLOAT: - case PixelFormat::R16_UNORM: case PixelFormat::R16_SNORM: case PixelFormat::R16_UINT: case PixelFormat::R16_SINT: @@ -1325,7 +1336,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im case PixelFormat::ASTC_2D_6X5_SRGB: case PixelFormat::E5B9G9R9_FLOAT: case PixelFormat::D32_FLOAT: - case PixelFormat::D16_UNORM: case PixelFormat::X8_D24_UNORM: case PixelFormat::S8_UINT: case PixelFormat::S8_UINT_D24_UNORM: From 3eef99dabe263b9139aa719c9adeb1a0852153e6 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 27 Sep 2025 19:12:32 -0700 Subject: [PATCH 12/14] [vk, SPIR-V] Various changes in an attempt to fix MH on Android --- .../backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/emit_spirv_image.cpp | 145 +++++++++++++++--- .../backend/spirv/spirv_emit_context.cpp | 31 +++- .../backend/spirv/spirv_emit_context.h | 2 + .../ir_opt/collect_shader_info_pass.cpp | 15 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 49 +++++- src/shader_recompiler/shader_info.h | 15 +- .../renderer_vulkan/pipeline_helper.h | 17 +- .../renderer_vulkan/vk_texture_cache.cpp | 38 +++-- .../renderer_vulkan/vk_texture_cache.h | 17 +- 10 files changed, 287 insertions(+), 44 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 313a1deb30..ba23fb8d34 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -409,7 +409,7 @@ void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { } void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { - if (info.uses_sampled_1d) { + if (info.uses_sampled_1d || info.uses_image_1d) { ctx.AddCapability(spv::Capability::Sampled1D); } if (info.uses_sparse_residency) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 945cdb42bc..59e96ae06d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" @@ -185,6 +186,84 @@ private: spv::ImageOperandsMask mask{}; }; + +Id SampledVectorType(EmitContext& ctx, TextureComponentType component_type) { + switch (component_type) { + case TextureComponentType::Float: + return ctx.F32[4]; + case TextureComponentType::Sint: + return ctx.S32[4]; + case TextureComponentType::Uint: + return ctx.U32[4]; + } + throw LogicError("Unhandled texture component type {}", static_cast(component_type)); +} + +bool ExpectsFloatResult(const IR::Inst* inst) { + switch (inst->Type()) { + case IR::Type::F32: + case IR::Type::F32x2: + case IR::Type::F32x3: + case IR::Type::F32x4: + return true; + default: + return false; + } +} + +Id MakeFloatVector(EmitContext& ctx, float value) { + const Id scalar{ctx.Const(value)}; + return ctx.ConstantComposite(ctx.F32[4], scalar, scalar, scalar, scalar); +} + +Id NormalizeUnsignedSample(EmitContext& ctx, u32 component_bits, Id value) { + if (component_bits == 0) { + return value; + } + const double max_value = std::exp2(static_cast(component_bits)) - 1.0; + if (!(max_value > 0.0)) { + return value; + } + const float inv_max = static_cast(1.0 / max_value); + return ctx.OpFMul(ctx.F32[4], value, MakeFloatVector(ctx, inv_max)); +} + +Id NormalizeSignedSample(EmitContext& ctx, u32 component_bits, Id value) { + if (component_bits == 0) { + return value; + } + const double positive_max = component_bits > 0 ? std::exp2(static_cast(component_bits - 1)) - 1.0 : 0.0; + if (!(positive_max > 0.0)) { + return ctx.OpFClamp(ctx.F32[4], value, MakeFloatVector(ctx, -1.0f), MakeFloatVector(ctx, 1.0f)); + } + const float inv_pos = static_cast(1.0 / positive_max); + const Id scaled{ctx.OpFMul(ctx.F32[4], value, MakeFloatVector(ctx, inv_pos))}; + return ctx.OpFClamp(ctx.F32[4], scaled, MakeFloatVector(ctx, -1.0f), MakeFloatVector(ctx, 1.0f)); +} + +Id ConvertSampleToExpectedType(EmitContext& ctx, const IR::Inst* inst, + const TextureDefinition* texture_def, Id value) { + if (!texture_def || texture_def->component_type == TextureComponentType::Float) { + return value; + } + if (!ExpectsFloatResult(inst)) { + return value; + } + switch (texture_def->component_type) { + case TextureComponentType::Sint: { + const Id as_float{ctx.OpConvertSToF(ctx.F32[4], value)}; + return NormalizeSignedSample(ctx, texture_def->component_bit_size, as_float); + } + case TextureComponentType::Uint: { + const Id as_float{ctx.OpConvertUToF(ctx.F32[4], value)}; + return NormalizeUnsignedSample(ctx, texture_def->component_bit_size, as_float); + } + case TextureComponentType::Float: + break; + } + return value; +} + Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; if (def.count > 1) { @@ -449,31 +528,39 @@ Id EmitBoundImageWrite(EmitContext&) { Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; + Id sample{}; if (ctx.stage == Stage::Fragment) { const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, offset); - return Emit(&EmitContext::OpImageSparseSampleImplicitLod, - &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); + sample = Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } else { - // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as - // if the lod was explicitly zero. This may change on Turing with implicit compute - // derivatives const Id lod{ctx.Const(0.0f)}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); - return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + sample = Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const ImageOperands operands(ctx, false, true, false, lod, offset); - return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -509,13 +596,19 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const ImageOperands operands(ctx, offset, offset2); if (ctx.profile.need_gather_subpixel_offset) { coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); } - return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), - operands.MaskOptional(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, + result_type, Texture(ctx, info, index), coords, + ctx.Const(info.gather_component), operands.MaskOptional(), + operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -538,12 +631,17 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c lod = Id{}; } if (Sirit::ValidId(ms)) { - // This image is multisampled, lod must be implicit lod = Id{}; } + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const ImageOperands operands(lod, ms); - return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, + result_type, TextureImage(ctx, info, index), coords, + operands.MaskOptional(), operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, @@ -588,14 +686,19 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivatives, const IR::Value& offset, Id lod_clamp) { const auto info{inst->Flags()}; + const TextureDefinition* texture_def = + info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const Id result_type = + texture_def ? SampledVectorType(ctx, texture_def->component_type) : ctx.F32[4]; const auto operands = info.num_derivatives == 3 ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, ctx.Def(offset), {}, lod_clamp) : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, lod_clamp); - return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + const Id sample = Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + return ConvertSampleToExpectedType(ctx, inst, texture_def, sample); } Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4c3e101433..3b7094eb6f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -28,9 +28,23 @@ enum class Operation { FPMax, }; +Id ComponentTypeId(EmitContext& ctx, TextureComponentType component_type) { + switch (component_type) { + case TextureComponentType::Float: + return ctx.F32[1]; + case TextureComponentType::Sint: + return ctx.S32[1]; + case TextureComponentType::Uint: + return ctx.U32[1]; + } + throw LogicError("Unhandled texture component type {}", static_cast(component_type)); +} + Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; - const Id type{ctx.F32[1]}; + const TextureComponentType component_type = desc.is_depth ? TextureComponentType::Float + : desc.component_type; + const Id type{ComponentTypeId(ctx, component_type)}; const bool depth{desc.is_depth}; const bool ms{desc.is_multisample}; switch (desc.type) { @@ -1374,6 +1388,8 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in .image_type = image_type, .count = desc.count, .is_multisample = desc.is_multisample, + .component_type = desc.component_type, + .component_bit_size = desc.component_bit_size, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1417,6 +1433,12 @@ void EmitContext::DefineInputs(const IR::Program& program) { const Info& info{program.info}; const VaryingState loads{info.loads.mask | info.passthrough.mask}; + const auto decorate_flat_if_fragment = [this](Id id) { + if (stage == Stage::Fragment) { + Decorate(id, spv::Decoration::Flat); + } + }; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1432,16 +1454,22 @@ void EmitContext::DefineInputs(const IR::Program& program) { } if (info.uses_sample_id) { sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); + decorate_flat_if_fragment(sample_id); } if (info.uses_is_helper_invocation) { is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } if (info.uses_subgroup_mask) { subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); + decorate_flat_if_fragment(subgroup_mask_eq); subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); + decorate_flat_if_fragment(subgroup_mask_lt); subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); + decorate_flat_if_fragment(subgroup_mask_le); subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); + decorate_flat_if_fragment(subgroup_mask_gt); subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); + decorate_flat_if_fragment(subgroup_mask_ge); } if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || (profile.warp_size_potentially_larger_than_guest && @@ -1461,6 +1489,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { } if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); + decorate_flat_if_fragment(primitive_id); } if (loads[IR::Attribute::Layer]) { AddCapability(spv::Capability::Geometry); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 66cdb1d3db..0606f51f65 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -38,6 +38,8 @@ struct TextureDefinition { Id image_type; u32 count; bool is_multisample; + TextureComponentType component_type{TextureComponentType::Float}; + u32 component_bit_size{}; }; struct TextureBufferDefinition { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 2bfa3227a8..8bb5cc4b33 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -14,6 +14,10 @@ namespace Shader::Optimization { namespace { +constexpr bool IsOneDimensional(TextureType type) { + return type == TextureType::Color1D || type == TextureType::ColorArray1D; +} + void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { if (count != 1) { throw NotImplementedException("Constant buffer descriptor indexing"); @@ -548,7 +552,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryDimensions: case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; - info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_sampled_1d |= IsOneDimensional(type); info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; @@ -560,7 +564,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryLod: { const auto flags{inst.Flags()}; const TextureType type{flags.type}; - info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_sampled_1d |= IsOneDimensional(type); info.uses_shadow_lod |= flags.is_depth != 0; info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; @@ -569,6 +573,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageRead: { const auto flags{inst.Flags()}; info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; + info.uses_image_1d |= IsOneDimensional(flags.type); info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; @@ -576,6 +581,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageWrite: { const auto flags{inst.Flags()}; info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; + info.uses_image_1d |= IsOneDimensional(flags.type); info.uses_image_buffers |= flags.type == TextureType::Buffer; break; } @@ -761,9 +767,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageAtomicAnd32: case IR::Opcode::ImageAtomicOr32: case IR::Opcode::ImageAtomicXor32: - case IR::Opcode::ImageAtomicExchange32: + case IR::Opcode::ImageAtomicExchange32: { + const auto flags{inst.Flags()}; info.uses_atomic_image_u32 = true; + info.uses_image_1d |= IsOneDimensional(flags.type); break; + } default: break; } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 7ff1961172..5d0056ccbd 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -19,6 +19,7 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" +#include "video_core/surface.h" namespace Shader::Optimization { namespace { @@ -248,11 +249,19 @@ bool IsTextureInstruction(const IR::Inst& inst) { } static inline TexturePixelFormat ReadTexturePixelFormatCached(Environment& env, const ConstBufferAddr& cbuf) { - return env.ReadTexturePixelFormat(GetTextureHandleCached(env, cbuf)); + const u32 handle = GetTextureHandleCached(env, cbuf); + if (handle == 0) { + return TexturePixelFormat::A8B8G8R8_UNORM; + } + return env.ReadTexturePixelFormat(handle); } static inline bool IsTexturePixelFormatIntegerCached(Environment& env, const ConstBufferAddr& cbuf) { - return env.IsTexturePixelFormatInteger(GetTextureHandleCached(env, cbuf)); + const u32 handle = GetTextureHandleCached(env, cbuf); + if (handle == 0) { + return false; + } + return env.IsTexturePixelFormatInteger(handle); } @@ -524,6 +533,8 @@ public: const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.is_depth == existing.is_depth && desc.has_secondary == existing.has_secondary && + desc.component_type == existing.component_type && + desc.component_bit_size == existing.component_bit_size && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.shift_left == existing.shift_left && @@ -598,6 +609,35 @@ bool IsPixelFormatSNorm(TexturePixelFormat pixel_format) { } } +TextureComponentType PixelFormatComponentType(TexturePixelFormat pixel_format, bool is_integer) { + if (!is_integer) { + return TextureComponentType::Float; + } + + switch (pixel_format) { + case TexturePixelFormat::A8B8G8R8_SINT: + case TexturePixelFormat::R8_SINT: + case TexturePixelFormat::R16G16B16A16_SINT: + case TexturePixelFormat::R32G32B32A32_SINT: + case TexturePixelFormat::R32G32_SINT: + case TexturePixelFormat::R16_SINT: + case TexturePixelFormat::R16G16_SINT: + case TexturePixelFormat::R8G8_SINT: + case TexturePixelFormat::R32_SINT: + return TextureComponentType::Sint; + default: + return TextureComponentType::Uint; + } +} + +u8 PixelFormatIntegerComponentBits(TexturePixelFormat pixel_format, bool is_integer) { + if (!is_integer) { + return 0; + } + return static_cast(VideoCore::Surface::PixelComponentSizeBitsInteger( + static_cast(pixel_format))); +} + void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_format) { const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -698,6 +738,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo default: break; } + const TexturePixelFormat pixel_format{ReadTexturePixelFormatCached(env, cbuf)}; + const bool is_integer{IsTexturePixelFormatIntegerCached(env, cbuf)}; u32 index; switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: @@ -718,7 +760,6 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; - const bool is_integer{IsTexturePixelFormatIntegerCached(env, cbuf)}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, @@ -764,6 +805,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .is_depth = flags.is_depth != 0, .is_multisample = is_multisample, .has_secondary = cbuf.has_secondary, + .component_type = PixelFormatComponentType(pixel_format, is_integer), + .component_bit_size = PixelFormatIntegerComponentBits(pixel_format, is_integer), .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .shift_left = cbuf.shift_left, diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index ed13e68209..d1de3be7fb 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -35,6 +38,12 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 9; +enum class TextureComponentType : u32 { + Float, + Sint, + Uint, +}; + enum class TexturePixelFormat { A8B8G8R8_UNORM, A8B8G8R8_SNORM, @@ -174,7 +183,9 @@ struct StorageBufferDescriptor { }; struct TextureBufferDescriptor { - bool has_secondary; + TextureComponentType component_type{TextureComponentType::Float}; + u8 component_bit_size{}; + bool has_secondary{}; u32 cbuf_index; u32 cbuf_offset; u32 shift_left; @@ -207,6 +218,8 @@ struct TextureDescriptor { bool is_depth; bool is_multisample; bool has_secondary; + TextureComponentType component_type{TextureComponentType::Float}; + u8 component_bit_size{}; u32 cbuf_index; u32 cbuf_offset; u32 shift_left; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 910e07a606..25f848f0cd 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -191,10 +191,19 @@ inline void PushImageDescriptors(TextureCache& texture_cache, ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; const Sampler& sampler{texture_cache.GetSampler(sampler_id)}; - const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && - !image_view.SupportsAnisotropy()}; - const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() - : sampler.Handle()}; + const bool needs_linear_fallback = sampler.RequiresLinearFiltering() && + !image_view.SupportsLinearFiltering(); + const bool needs_aniso_fallback = sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy(); + if (!image_view.SupportsLinearFiltering()) { + ASSERT_MSG(!sampler.RequiresLinearFiltering() || needs_linear_fallback, + "Linear filtering sampler bound to unsupported image view"); + } + // Prefer degrading to nearest sampling when the view lacks linear support. + const VkSampler vk_sampler = needs_linear_fallback + ? sampler.HandleWithoutLinearFiltering() + : (needs_aniso_fallback ? sampler.HandleWithDefaultAnisotropy() + : sampler.Handle()); guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 7327c90c12..f1089c0d13 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -2061,6 +2061,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }, .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), }; + supports_linear_filtering = device->IsFormatSupported( + create_info.format, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT, FormatType::Optimal); const auto create = [&](TextureType tex_type, std::optional num_layers) { VkImageViewCreateInfo ci{create_info}; ci.viewType = ImageViewType(tex_type); @@ -2111,10 +2113,13 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, - buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} { + supports_linear_filtering = true; +} ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params}, device{&runtime.device} { + supports_linear_filtering = true; if (device->HasNullDescriptor()) { return; } @@ -2175,6 +2180,7 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type, if (image_format == Shader::ImageFormat::Typeless) { return Handle(texture_type); } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || image_format == Shader::ImageFormat::R16_SINT}; if (!storage_views) { @@ -2244,15 +2250,23 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t } // Some games have samplers with garbage. Sanitize them here. const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + const VkFilter mag_filter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); + const VkFilter min_filter = MaxwellToVK::Sampler::Filter(tsc.min_filter); + const VkSamplerMipmapMode mipmap_mode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); + const f32 min_lod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(); + const f32 max_lod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(); + requires_linear_filtering = mag_filter == VK_FILTER_LINEAR || min_filter == VK_FILTER_LINEAR || + mipmap_mode == VK_SAMPLER_MIPMAP_MODE_LINEAR || max_anisotropy > 1.0f; - const auto create_sampler = [&](const f32 anisotropy) { + const auto create_sampler = [&](VkFilter mag, VkFilter min, VkSamplerMipmapMode mip, + f32 anisotropy) { return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = pnext, .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .magFilter = mag, + .minFilter = min, + .mipmapMode = mip, .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), @@ -2261,19 +2275,25 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t .maxAnisotropy = anisotropy, .compareEnable = tsc.depth_compare_enabled, .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .minLod = min_lod, + .maxLod = max_lod, .borderColor = arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), .unnormalizedCoordinates = VK_FALSE, }); }; - sampler = create_sampler(max_anisotropy); + sampler = create_sampler(mag_filter, min_filter, mipmap_mode, max_anisotropy); const f32 max_anisotropy_default = static_cast(1U << tsc.max_anisotropy); if (max_anisotropy > max_anisotropy_default) { - sampler_default_anisotropy = create_sampler(max_anisotropy_default); + sampler_default_anisotropy = create_sampler(mag_filter, min_filter, mipmap_mode, + max_anisotropy_default); + } + + if (requires_linear_filtering) { + sampler_no_linear = create_sampler(VK_FILTER_NEAREST, VK_FILTER_NEAREST, + VK_SAMPLER_MIPMAP_MODE_NEAREST, 1.0f); } } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index cd11cc8fc7..4a13531096 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -238,6 +238,10 @@ public: [[nodiscard]] bool IsRescaled() const noexcept; + [[nodiscard]] bool SupportsLinearFiltering() const noexcept { + return supports_linear_filtering; + } + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { return *image_views[static_cast(texture_type)]; } @@ -278,6 +282,7 @@ private: vk::ImageView depth_view; vk::ImageView stencil_view; vk::ImageView color_view; + bool supports_linear_filtering{}; vk::Image null_image; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; @@ -296,16 +301,26 @@ public: } [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept { - return *sampler_default_anisotropy; + return sampler_default_anisotropy ? *sampler_default_anisotropy : *sampler; + } + + [[nodiscard]] VkSampler HandleWithoutLinearFiltering() const noexcept { + return sampler_no_linear ? *sampler_no_linear : *sampler; } [[nodiscard]] bool HasAddedAnisotropy() const noexcept { return static_cast(sampler_default_anisotropy); } + [[nodiscard]] bool RequiresLinearFiltering() const noexcept { + return requires_linear_filtering; + } + private: vk::Sampler sampler; vk::Sampler sampler_default_anisotropy; + vk::Sampler sampler_no_linear; + bool requires_linear_filtering{}; }; class Framebuffer { From 4a6e2ad350e92653c9c9d6bcb28c807769bf0886 Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 27 Sep 2025 22:45:10 -0700 Subject: [PATCH 13/14] [vk, spir-v] Remove problematic features on Qualcomm/Turnip drivers --- .../vulkan_common/vulkan_device.cpp | 33 +++++++++++++++++-- src/video_core/vulkan_common/vulkan_device.h | 9 +++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0e0bec2ce3..fe1924f763 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -502,6 +502,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_qualcomm) { + if (extensions.shader_float_controls) { + LOG_WARNING(Render_Vulkan, + "Qualcomm drivers have broken VK_KHR_shader_float_controls"); + RemoveExtension(extensions.shader_float_controls, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + } LOG_WARNING(Render_Vulkan, "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); //RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); @@ -985,6 +991,17 @@ bool Device::GetSuitability(bool requires_swapchain) { // Set instance version. instance_version = properties.properties.apiVersion; + VkPhysicalDeviceDriverProperties driver_probe_props{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + }; + VkPhysicalDeviceProperties2 driver_probe{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &driver_probe_props, + }; + physical.GetProperties2(driver_probe); + const bool disable_shader_int64 = driver_probe_props.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_probe_props.driverID == VK_DRIVER_ID_MESA_TURNIP; + // Minimum of API version 1.1 is required. (This is well-supported.) ASSERT(instance_version >= VK_API_VERSION_1_1); @@ -1095,8 +1112,18 @@ bool Device::GetSuitability(bool requires_swapchain) { // Perform the feature test. physical.GetFeatures2(features2); + if (disable_shader_int64) { + features2.features.shaderInt64 = VK_FALSE; + } + // Base Vulkan 1.0 features are always valid regardless of instance version. features.features = features2.features; + if (disable_shader_int64) { + features.features.shaderInt64 = VK_FALSE; + features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE; + features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE; + LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Qualcomm/Turnip drivers"); + } // Some features are mandatory. Check those. #define CHECK_FEATURE(feature, name) \ @@ -1137,8 +1164,10 @@ bool Device::GetSuitability(bool requires_swapchain) { properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; SetNext(next, properties.subgroup_properties); - // Retrieve relevant extension properties. - if (extensions.shader_float_controls) { + // Retrieve relevant extension/core properties. + // Float controls properties are core in Vulkan 1.2; if running on 1.2+ or if the + // KHR extension is present, chain the properties struct to query capabilities. + if (instance_version >= VK_API_VERSION_1_2 || extensions.shader_float_controls) { properties.float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; SetNext(next, properties.float_controls); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index bd54144480..373d36fe6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -376,6 +376,10 @@ public: /// Returns true if shader int64 is supported. bool IsShaderInt64Supported() const { + const auto driver = GetDriverID(); + if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver == VK_DRIVER_ID_MESA_TURNIP) { + return false; + } return features.features.shaderInt64; } @@ -585,6 +589,11 @@ public: /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { + const auto driver = GetDriverID(); + if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver == VK_DRIVER_ID_MESA_TURNIP) { + return false; + } return extensions.shader_atomic_int64; } From 854d6375e74daeaf9468ec20db9b85568072c3fd Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sun, 28 Sep 2025 13:41:24 -0700 Subject: [PATCH 14/14] [SPIR-V] Enable INT64 emulation for Qualcomm drivers --- .../backend/spirv/emit_spirv_atomic.cpp | 249 +++++++++++++++++- .../frontend/maxwell/translate_program.cpp | 2 +- src/shader_recompiler/host_translate_info.h | 1 + src/shader_recompiler/profile.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 6 +- .../vulkan_common/vulkan_device.cpp | 17 +- src/video_core/vulkan_common/vulkan_device.h | 12 +- 7 files changed, 277 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 0ce73f289b..26021f5f59 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -2,14 +2,245 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include "common/assert.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { namespace { -Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0); +std::pair AtomicArgs(EmitContext& ctx); + +enum class PairAtomicOp { + Add, + SMin, + UMin, + SMax, + UMax, + And, + Or, + Xor, + Exchange, +}; + +struct PairComponents { + Id lo; + Id hi; +}; + +PairComponents ComputePairComponents(EmitContext& ctx, PairAtomicOp op, Id current_lo, Id current_hi, + Id value_lo, Id value_hi) { + switch (op) { + case PairAtomicOp::Add: { + const Id sum_lo{ctx.OpIAdd(ctx.U32[1], current_lo, value_lo)}; + const Id carry_pred{ctx.OpULessThan(ctx.U1, sum_lo, current_lo)}; + const Id carry{ctx.OpSelect(ctx.U32[1], carry_pred, ctx.Const(1u), ctx.u32_zero_value)}; + const Id sum_hi_base{ctx.OpIAdd(ctx.U32[1], current_hi, value_hi)}; + const Id sum_hi{ctx.OpIAdd(ctx.U32[1], sum_hi_base, carry)}; + return {sum_lo, sum_hi}; + } + case PairAtomicOp::SMin: { + const Id current_hi_signed{ctx.OpBitcast(ctx.S32[1], current_hi)}; + const Id value_hi_signed{ctx.OpBitcast(ctx.S32[1], value_hi)}; + const Id hi_less{ctx.OpSLessThan(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id lo_less{ctx.OpULessThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_less, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_less, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::UMin: { + const Id hi_less{ctx.OpULessThan(ctx.U1, current_hi, value_hi)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi, value_hi)}; + const Id lo_less{ctx.OpULessThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_less, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_less, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::SMax: { + const Id current_hi_signed{ctx.OpBitcast(ctx.S32[1], current_hi)}; + const Id value_hi_signed{ctx.OpBitcast(ctx.S32[1], value_hi)}; + const Id hi_greater{ctx.OpSGreaterThan(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi_signed, value_hi_signed)}; + const Id lo_greater{ctx.OpUGreaterThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_greater, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_greater, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::UMax: { + const Id hi_greater{ctx.OpUGreaterThan(ctx.U1, current_hi, value_hi)}; + const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi, value_hi)}; + const Id lo_greater{ctx.OpUGreaterThan(ctx.U1, current_lo, value_lo)}; + const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)}; + const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_greater, + ctx.OpLogicalAnd(ctx.U1, hi_equal, + ctx.OpLogicalOr(ctx.U1, lo_greater, lo_equal)))}; + const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)}; + const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::And: { + const Id new_lo{ctx.OpBitwiseAnd(ctx.U32[1], current_lo, value_lo)}; + const Id new_hi{ctx.OpBitwiseAnd(ctx.U32[1], current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::Or: { + const Id new_lo{ctx.OpBitwiseOr(ctx.U32[1], current_lo, value_lo)}; + const Id new_hi{ctx.OpBitwiseOr(ctx.U32[1], current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::Xor: { + const Id new_lo{ctx.OpBitwiseXor(ctx.U32[1], current_lo, value_lo)}; + const Id new_hi{ctx.OpBitwiseXor(ctx.U32[1], current_hi, value_hi)}; + return {new_lo, new_hi}; + } + case PairAtomicOp::Exchange: + return {value_lo, value_hi}; + } + ASSERT_MSG(false, "Unhandled pair atomic operation"); + return {current_lo, current_hi}; +} + +PairAtomicOp GetPairAtomicOp(Id (Sirit::Module::*func)(Id, Id, Id)) { + if (func == &Sirit::Module::OpIAdd) { + return PairAtomicOp::Add; + } + if (func == &Sirit::Module::OpSMin) { + return PairAtomicOp::SMin; + } + if (func == &Sirit::Module::OpUMin) { + return PairAtomicOp::UMin; + } + if (func == &Sirit::Module::OpSMax) { + return PairAtomicOp::SMax; + } + if (func == &Sirit::Module::OpUMax) { + return PairAtomicOp::UMax; + } + if (func == &Sirit::Module::OpBitwiseAnd) { + return PairAtomicOp::And; + } + if (func == &Sirit::Module::OpBitwiseOr) { + return PairAtomicOp::Or; + } + if (func == &Sirit::Module::OpBitwiseXor) { + return PairAtomicOp::Xor; + } + ASSERT_MSG(false, "Unsupported pair atomic opcode"); + return PairAtomicOp::Exchange; +} + +Id EmulateStorageAtomicPair(EmitContext& ctx, PairAtomicOp op, Id pointer, Id value_pair) { + const auto [scope, semantics]{AtomicArgs(ctx)}; + const Id zero{ctx.u32_zero_value}; + const Id one{ctx.Const(1u)}; + const Id low_pointer{ctx.OpAccessChain(ctx.storage_types.U32.element, pointer, zero)}; + const Id high_pointer{ctx.OpAccessChain(ctx.storage_types.U32.element, pointer, one)}; + const Id value_lo{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 0U)}; + const Id value_hi{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 1U)}; + const Id loop_header{ctx.OpLabel()}; + const Id loop_body{ctx.OpLabel()}; + const Id loop_continue{ctx.OpLabel()}; + const Id loop_merge{ctx.OpLabel()}; + const Id high_block{ctx.OpLabel()}; + const Id revert_block{ctx.OpLabel()}; + + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + ctx.OpLoopMerge(loop_merge, loop_continue, spv::LoopControlMask::MaskNone); + ctx.OpBranch(loop_body); + + ctx.AddLabel(loop_body); + const Id current_pair{ctx.OpLoad(ctx.U32[2], pointer)}; + const Id expected_lo{ctx.OpCompositeExtract(ctx.U32[1], current_pair, 0U)}; + const Id expected_hi{ctx.OpCompositeExtract(ctx.U32[1], current_pair, 1U)}; + const PairComponents new_pair{ComputePairComponents(ctx, op, expected_lo, expected_hi, value_lo, value_hi)}; + const Id low_result{ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, + new_pair.lo, expected_lo)}; + const Id low_success{ctx.OpIEqual(ctx.U1, low_result, expected_lo)}; + ctx.OpSelectionMerge(loop_continue, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(low_success, high_block, loop_continue); + + ctx.AddLabel(high_block); + const Id high_result{ctx.OpAtomicCompareExchange(ctx.U32[1], high_pointer, scope, semantics, semantics, + new_pair.hi, expected_hi)}; + const Id high_success{ctx.OpIEqual(ctx.U1, high_result, expected_hi)}; + ctx.OpBranchConditional(high_success, loop_merge, revert_block); + + ctx.AddLabel(revert_block); + ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, expected_lo, + new_pair.lo); + ctx.OpBranch(loop_continue); + + ctx.AddLabel(loop_continue); + ctx.OpBranch(loop_header); + + ctx.AddLabel(loop_merge); + return current_pair; +} + +Id EmulateSharedAtomicExchange(EmitContext& ctx, Id offset, Id value_pair) { + const Id scope{ctx.Const(static_cast(spv::Scope::Workgroup))}; + const Id semantics{ctx.u32_zero_value}; + const Id value_lo{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 0U)}; + const Id value_hi{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 1U)}; + const Id low_pointer{SharedPointer(ctx, offset, 0)}; + const Id high_pointer{SharedPointer(ctx, offset, 1)}; + const Id loop_header{ctx.OpLabel()}; + const Id loop_body{ctx.OpLabel()}; + const Id loop_continue{ctx.OpLabel()}; + const Id loop_merge{ctx.OpLabel()}; + const Id high_block{ctx.OpLabel()}; + const Id revert_block{ctx.OpLabel()}; + + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + ctx.OpLoopMerge(loop_merge, loop_continue, spv::LoopControlMask::MaskNone); + ctx.OpBranch(loop_body); + + ctx.AddLabel(loop_body); + const Id expected_lo{ctx.OpLoad(ctx.U32[1], low_pointer)}; + const Id expected_hi{ctx.OpLoad(ctx.U32[1], high_pointer)}; + const Id current_pair{ctx.OpCompositeConstruct(ctx.U32[2], expected_lo, expected_hi)}; + const Id low_result{ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, + value_lo, expected_lo)}; + const Id low_success{ctx.OpIEqual(ctx.U1, low_result, expected_lo)}; + ctx.OpSelectionMerge(loop_continue, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(low_success, high_block, loop_continue); + + ctx.AddLabel(high_block); + const Id high_result{ctx.OpAtomicCompareExchange(ctx.U32[1], high_pointer, scope, semantics, semantics, + value_hi, expected_hi)}; + const Id high_success{ctx.OpIEqual(ctx.U1, high_result, expected_hi)}; + ctx.OpBranchConditional(high_success, loop_merge, revert_block); + + ctx.AddLabel(revert_block); + ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, expected_lo, value_lo); + ctx.OpBranch(loop_continue); + + ctx.AddLabel(loop_continue); + ctx.OpBranch(loop_header); + + ctx.AddLabel(loop_merge); + return current_pair; +} + +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset) { const Id shift_id{ctx.Const(2U)}; Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; if (index_offset > 0) { @@ -96,6 +327,12 @@ Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Valu return ctx.ConstantNull(ctx.U32[2]); } + if (ctx.profile.emulate_int64_with_uint2) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + return EmulateStorageAtomicPair(ctx, GetPairAtomicOp(non_atomic_func), pointer, value); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; @@ -175,6 +412,10 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { } Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.emulate_int64_with_uint2) { + return EmulateSharedAtomicExchange(ctx, offset, value); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer_1{SharedPointer(ctx, offset, 0)}; const Id pointer_2{SharedPointer(ctx, offset, 1)}; @@ -351,6 +592,12 @@ Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { + if (ctx.profile.emulate_int64_with_uint2) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + return EmulateStorageAtomicPair(ctx, PairAtomicOp::Exchange, pointer, value); + } + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 97b9b0cf07..1c57fc089a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -289,7 +289,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool= VK_API_VERSION_1_1); @@ -1122,7 +1128,12 @@ bool Device::GetSuitability(bool requires_swapchain) { features.features.shaderInt64 = VK_FALSE; features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE; features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE; - LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Qualcomm/Turnip drivers"); + if (shader_int64_emulation) { + LOG_WARNING(Render_Vulkan, + "Using shaderInt64 emulation on Qualcomm proprietary drivers"); + } else { + LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Turnip drivers"); + } } // Some features are mandatory. Check those. diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 373d36fe6b..25a74437b7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -374,15 +374,18 @@ public: return features.features.shaderStorageImageReadWithoutFormat; } - /// Returns true if shader int64 is supported. + /// Returns true if shader int64 is supported (natively or via emulation). bool IsShaderInt64Supported() const { - const auto driver = GetDriverID(); - if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver == VK_DRIVER_ID_MESA_TURNIP) { - return false; + if (shader_int64_emulation) { + return true; } return features.features.shaderInt64; } + /// Returns true when shader int64 operations must be emulated with 32-bit pairs. + bool UsesShaderInt64Emulation() const { + return shader_int64_emulation; + } /// Returns true if shader int16 is supported. bool IsShaderInt16Supported() const { return features.features.shaderInt16; @@ -849,6 +852,7 @@ private: bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool shader_int64_emulation{}; ///< Emulates shader Int64 using 32-bit pairs. bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.