[SPIR-V] Enable INT64 emulation for Qualcomm drivers
Some checks failed
eden-license / license-header (pull_request) Failing after 22s
Some checks failed
eden-license / license-header (pull_request) Failing after 22s
This commit is contained in:
parent
4a6e2ad350
commit
854d6375e7
7 changed files with 277 additions and 11 deletions
|
@ -2,14 +2,245 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <bit>
|
#include <bit>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
namespace {
|
namespace {
|
||||||
Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
|
Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0);
|
||||||
|
std::pair<Id, Id> AtomicArgs(EmitContext& ctx);
|
||||||
|
|
||||||
|
enum class PairAtomicOp {
|
||||||
|
Add,
|
||||||
|
SMin,
|
||||||
|
UMin,
|
||||||
|
SMax,
|
||||||
|
UMax,
|
||||||
|
And,
|
||||||
|
Or,
|
||||||
|
Xor,
|
||||||
|
Exchange,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PairComponents {
|
||||||
|
Id lo;
|
||||||
|
Id hi;
|
||||||
|
};
|
||||||
|
|
||||||
|
PairComponents ComputePairComponents(EmitContext& ctx, PairAtomicOp op, Id current_lo, Id current_hi,
|
||||||
|
Id value_lo, Id value_hi) {
|
||||||
|
switch (op) {
|
||||||
|
case PairAtomicOp::Add: {
|
||||||
|
const Id sum_lo{ctx.OpIAdd(ctx.U32[1], current_lo, value_lo)};
|
||||||
|
const Id carry_pred{ctx.OpULessThan(ctx.U1, sum_lo, current_lo)};
|
||||||
|
const Id carry{ctx.OpSelect(ctx.U32[1], carry_pred, ctx.Const(1u), ctx.u32_zero_value)};
|
||||||
|
const Id sum_hi_base{ctx.OpIAdd(ctx.U32[1], current_hi, value_hi)};
|
||||||
|
const Id sum_hi{ctx.OpIAdd(ctx.U32[1], sum_hi_base, carry)};
|
||||||
|
return {sum_lo, sum_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::SMin: {
|
||||||
|
const Id current_hi_signed{ctx.OpBitcast(ctx.S32[1], current_hi)};
|
||||||
|
const Id value_hi_signed{ctx.OpBitcast(ctx.S32[1], value_hi)};
|
||||||
|
const Id hi_less{ctx.OpSLessThan(ctx.U1, current_hi_signed, value_hi_signed)};
|
||||||
|
const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi_signed, value_hi_signed)};
|
||||||
|
const Id lo_less{ctx.OpULessThan(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_less,
|
||||||
|
ctx.OpLogicalAnd(ctx.U1, hi_equal,
|
||||||
|
ctx.OpLogicalOr(ctx.U1, lo_less, lo_equal)))};
|
||||||
|
const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)};
|
||||||
|
const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)};
|
||||||
|
return {new_lo, new_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::UMin: {
|
||||||
|
const Id hi_less{ctx.OpULessThan(ctx.U1, current_hi, value_hi)};
|
||||||
|
const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi, value_hi)};
|
||||||
|
const Id lo_less{ctx.OpULessThan(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_less,
|
||||||
|
ctx.OpLogicalAnd(ctx.U1, hi_equal,
|
||||||
|
ctx.OpLogicalOr(ctx.U1, lo_less, lo_equal)))};
|
||||||
|
const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)};
|
||||||
|
const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)};
|
||||||
|
return {new_lo, new_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::SMax: {
|
||||||
|
const Id current_hi_signed{ctx.OpBitcast(ctx.S32[1], current_hi)};
|
||||||
|
const Id value_hi_signed{ctx.OpBitcast(ctx.S32[1], value_hi)};
|
||||||
|
const Id hi_greater{ctx.OpSGreaterThan(ctx.U1, current_hi_signed, value_hi_signed)};
|
||||||
|
const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi_signed, value_hi_signed)};
|
||||||
|
const Id lo_greater{ctx.OpUGreaterThan(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_greater,
|
||||||
|
ctx.OpLogicalAnd(ctx.U1, hi_equal,
|
||||||
|
ctx.OpLogicalOr(ctx.U1, lo_greater, lo_equal)))};
|
||||||
|
const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)};
|
||||||
|
const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)};
|
||||||
|
return {new_lo, new_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::UMax: {
|
||||||
|
const Id hi_greater{ctx.OpUGreaterThan(ctx.U1, current_hi, value_hi)};
|
||||||
|
const Id hi_equal{ctx.OpIEqual(ctx.U1, current_hi, value_hi)};
|
||||||
|
const Id lo_greater{ctx.OpUGreaterThan(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id lo_equal{ctx.OpIEqual(ctx.U1, current_lo, value_lo)};
|
||||||
|
const Id select_current{ctx.OpLogicalOr(ctx.U1, hi_greater,
|
||||||
|
ctx.OpLogicalAnd(ctx.U1, hi_equal,
|
||||||
|
ctx.OpLogicalOr(ctx.U1, lo_greater, lo_equal)))};
|
||||||
|
const Id new_lo{ctx.OpSelect(ctx.U32[1], select_current, current_lo, value_lo)};
|
||||||
|
const Id new_hi{ctx.OpSelect(ctx.U32[1], select_current, current_hi, value_hi)};
|
||||||
|
return {new_lo, new_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::And: {
|
||||||
|
const Id new_lo{ctx.OpBitwiseAnd(ctx.U32[1], current_lo, value_lo)};
|
||||||
|
const Id new_hi{ctx.OpBitwiseAnd(ctx.U32[1], current_hi, value_hi)};
|
||||||
|
return {new_lo, new_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::Or: {
|
||||||
|
const Id new_lo{ctx.OpBitwiseOr(ctx.U32[1], current_lo, value_lo)};
|
||||||
|
const Id new_hi{ctx.OpBitwiseOr(ctx.U32[1], current_hi, value_hi)};
|
||||||
|
return {new_lo, new_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::Xor: {
|
||||||
|
const Id new_lo{ctx.OpBitwiseXor(ctx.U32[1], current_lo, value_lo)};
|
||||||
|
const Id new_hi{ctx.OpBitwiseXor(ctx.U32[1], current_hi, value_hi)};
|
||||||
|
return {new_lo, new_hi};
|
||||||
|
}
|
||||||
|
case PairAtomicOp::Exchange:
|
||||||
|
return {value_lo, value_hi};
|
||||||
|
}
|
||||||
|
ASSERT_MSG(false, "Unhandled pair atomic operation");
|
||||||
|
return {current_lo, current_hi};
|
||||||
|
}
|
||||||
|
|
||||||
|
PairAtomicOp GetPairAtomicOp(Id (Sirit::Module::*func)(Id, Id, Id)) {
|
||||||
|
if (func == &Sirit::Module::OpIAdd) {
|
||||||
|
return PairAtomicOp::Add;
|
||||||
|
}
|
||||||
|
if (func == &Sirit::Module::OpSMin) {
|
||||||
|
return PairAtomicOp::SMin;
|
||||||
|
}
|
||||||
|
if (func == &Sirit::Module::OpUMin) {
|
||||||
|
return PairAtomicOp::UMin;
|
||||||
|
}
|
||||||
|
if (func == &Sirit::Module::OpSMax) {
|
||||||
|
return PairAtomicOp::SMax;
|
||||||
|
}
|
||||||
|
if (func == &Sirit::Module::OpUMax) {
|
||||||
|
return PairAtomicOp::UMax;
|
||||||
|
}
|
||||||
|
if (func == &Sirit::Module::OpBitwiseAnd) {
|
||||||
|
return PairAtomicOp::And;
|
||||||
|
}
|
||||||
|
if (func == &Sirit::Module::OpBitwiseOr) {
|
||||||
|
return PairAtomicOp::Or;
|
||||||
|
}
|
||||||
|
if (func == &Sirit::Module::OpBitwiseXor) {
|
||||||
|
return PairAtomicOp::Xor;
|
||||||
|
}
|
||||||
|
ASSERT_MSG(false, "Unsupported pair atomic opcode");
|
||||||
|
return PairAtomicOp::Exchange;
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmulateStorageAtomicPair(EmitContext& ctx, PairAtomicOp op, Id pointer, Id value_pair) {
|
||||||
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
|
const Id zero{ctx.u32_zero_value};
|
||||||
|
const Id one{ctx.Const(1u)};
|
||||||
|
const Id low_pointer{ctx.OpAccessChain(ctx.storage_types.U32.element, pointer, zero)};
|
||||||
|
const Id high_pointer{ctx.OpAccessChain(ctx.storage_types.U32.element, pointer, one)};
|
||||||
|
const Id value_lo{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 0U)};
|
||||||
|
const Id value_hi{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 1U)};
|
||||||
|
const Id loop_header{ctx.OpLabel()};
|
||||||
|
const Id loop_body{ctx.OpLabel()};
|
||||||
|
const Id loop_continue{ctx.OpLabel()};
|
||||||
|
const Id loop_merge{ctx.OpLabel()};
|
||||||
|
const Id high_block{ctx.OpLabel()};
|
||||||
|
const Id revert_block{ctx.OpLabel()};
|
||||||
|
|
||||||
|
ctx.OpBranch(loop_header);
|
||||||
|
ctx.AddLabel(loop_header);
|
||||||
|
ctx.OpLoopMerge(loop_merge, loop_continue, spv::LoopControlMask::MaskNone);
|
||||||
|
ctx.OpBranch(loop_body);
|
||||||
|
|
||||||
|
ctx.AddLabel(loop_body);
|
||||||
|
const Id current_pair{ctx.OpLoad(ctx.U32[2], pointer)};
|
||||||
|
const Id expected_lo{ctx.OpCompositeExtract(ctx.U32[1], current_pair, 0U)};
|
||||||
|
const Id expected_hi{ctx.OpCompositeExtract(ctx.U32[1], current_pair, 1U)};
|
||||||
|
const PairComponents new_pair{ComputePairComponents(ctx, op, expected_lo, expected_hi, value_lo, value_hi)};
|
||||||
|
const Id low_result{ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics,
|
||||||
|
new_pair.lo, expected_lo)};
|
||||||
|
const Id low_success{ctx.OpIEqual(ctx.U1, low_result, expected_lo)};
|
||||||
|
ctx.OpSelectionMerge(loop_continue, spv::SelectionControlMask::MaskNone);
|
||||||
|
ctx.OpBranchConditional(low_success, high_block, loop_continue);
|
||||||
|
|
||||||
|
ctx.AddLabel(high_block);
|
||||||
|
const Id high_result{ctx.OpAtomicCompareExchange(ctx.U32[1], high_pointer, scope, semantics, semantics,
|
||||||
|
new_pair.hi, expected_hi)};
|
||||||
|
const Id high_success{ctx.OpIEqual(ctx.U1, high_result, expected_hi)};
|
||||||
|
ctx.OpBranchConditional(high_success, loop_merge, revert_block);
|
||||||
|
|
||||||
|
ctx.AddLabel(revert_block);
|
||||||
|
ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, expected_lo,
|
||||||
|
new_pair.lo);
|
||||||
|
ctx.OpBranch(loop_continue);
|
||||||
|
|
||||||
|
ctx.AddLabel(loop_continue);
|
||||||
|
ctx.OpBranch(loop_header);
|
||||||
|
|
||||||
|
ctx.AddLabel(loop_merge);
|
||||||
|
return current_pair;
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmulateSharedAtomicExchange(EmitContext& ctx, Id offset, Id value_pair) {
|
||||||
|
const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Workgroup))};
|
||||||
|
const Id semantics{ctx.u32_zero_value};
|
||||||
|
const Id value_lo{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 0U)};
|
||||||
|
const Id value_hi{ctx.OpCompositeExtract(ctx.U32[1], value_pair, 1U)};
|
||||||
|
const Id low_pointer{SharedPointer(ctx, offset, 0)};
|
||||||
|
const Id high_pointer{SharedPointer(ctx, offset, 1)};
|
||||||
|
const Id loop_header{ctx.OpLabel()};
|
||||||
|
const Id loop_body{ctx.OpLabel()};
|
||||||
|
const Id loop_continue{ctx.OpLabel()};
|
||||||
|
const Id loop_merge{ctx.OpLabel()};
|
||||||
|
const Id high_block{ctx.OpLabel()};
|
||||||
|
const Id revert_block{ctx.OpLabel()};
|
||||||
|
|
||||||
|
ctx.OpBranch(loop_header);
|
||||||
|
ctx.AddLabel(loop_header);
|
||||||
|
ctx.OpLoopMerge(loop_merge, loop_continue, spv::LoopControlMask::MaskNone);
|
||||||
|
ctx.OpBranch(loop_body);
|
||||||
|
|
||||||
|
ctx.AddLabel(loop_body);
|
||||||
|
const Id expected_lo{ctx.OpLoad(ctx.U32[1], low_pointer)};
|
||||||
|
const Id expected_hi{ctx.OpLoad(ctx.U32[1], high_pointer)};
|
||||||
|
const Id current_pair{ctx.OpCompositeConstruct(ctx.U32[2], expected_lo, expected_hi)};
|
||||||
|
const Id low_result{ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics,
|
||||||
|
value_lo, expected_lo)};
|
||||||
|
const Id low_success{ctx.OpIEqual(ctx.U1, low_result, expected_lo)};
|
||||||
|
ctx.OpSelectionMerge(loop_continue, spv::SelectionControlMask::MaskNone);
|
||||||
|
ctx.OpBranchConditional(low_success, high_block, loop_continue);
|
||||||
|
|
||||||
|
ctx.AddLabel(high_block);
|
||||||
|
const Id high_result{ctx.OpAtomicCompareExchange(ctx.U32[1], high_pointer, scope, semantics, semantics,
|
||||||
|
value_hi, expected_hi)};
|
||||||
|
const Id high_success{ctx.OpIEqual(ctx.U1, high_result, expected_hi)};
|
||||||
|
ctx.OpBranchConditional(high_success, loop_merge, revert_block);
|
||||||
|
|
||||||
|
ctx.AddLabel(revert_block);
|
||||||
|
ctx.OpAtomicCompareExchange(ctx.U32[1], low_pointer, scope, semantics, semantics, expected_lo, value_lo);
|
||||||
|
ctx.OpBranch(loop_continue);
|
||||||
|
|
||||||
|
ctx.AddLabel(loop_continue);
|
||||||
|
ctx.OpBranch(loop_header);
|
||||||
|
|
||||||
|
ctx.AddLabel(loop_merge);
|
||||||
|
return current_pair;
|
||||||
|
}
|
||||||
|
|
||||||
|
Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset) {
|
||||||
const Id shift_id{ctx.Const(2U)};
|
const Id shift_id{ctx.Const(2U)};
|
||||||
Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
if (index_offset > 0) {
|
if (index_offset > 0) {
|
||||||
|
@ -96,6 +327,12 @@ Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Valu
|
||||||
return ctx.ConstantNull(ctx.U32[2]);
|
return ctx.ConstantNull(ctx.U32[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ctx.profile.emulate_int64_with_uint2) {
|
||||||
|
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||||
|
binding, offset, sizeof(u32[2]))};
|
||||||
|
return EmulateStorageAtomicPair(ctx, GetPairAtomicOp(non_atomic_func), pointer, value);
|
||||||
|
}
|
||||||
|
|
||||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||||
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||||
binding, offset, sizeof(u32[2]))};
|
binding, offset, sizeof(u32[2]))};
|
||||||
|
@ -175,6 +412,10 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
if (ctx.profile.emulate_int64_with_uint2) {
|
||||||
|
return EmulateSharedAtomicExchange(ctx, offset, value);
|
||||||
|
}
|
||||||
|
|
||||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||||
const Id pointer_1{SharedPointer(ctx, offset, 0)};
|
const Id pointer_1{SharedPointer(ctx, offset, 0)};
|
||||||
const Id pointer_2{SharedPointer(ctx, offset, 1)};
|
const Id pointer_2{SharedPointer(ctx, offset, 1)};
|
||||||
|
@ -351,6 +592,12 @@ Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR
|
||||||
|
|
||||||
Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
|
Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
|
||||||
const IR::Value& offset, Id value) {
|
const IR::Value& offset, Id value) {
|
||||||
|
if (ctx.profile.emulate_int64_with_uint2) {
|
||||||
|
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||||
|
binding, offset, sizeof(u32[2]))};
|
||||||
|
return EmulateStorageAtomicPair(ctx, PairAtomicOp::Exchange, pointer, value);
|
||||||
|
}
|
||||||
|
|
||||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||||
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||||
binding, offset, sizeof(u32[2]))};
|
binding, offset, sizeof(u32[2]))};
|
||||||
|
|
|
@ -289,7 +289,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
if (!host_info.support_float16) {
|
if (!host_info.support_float16) {
|
||||||
Optimization::LowerFp16ToFp32(program);
|
Optimization::LowerFp16ToFp32(program);
|
||||||
}
|
}
|
||||||
if (!host_info.support_int64) {
|
if (!host_info.support_int64 || host_info.emulate_int64_with_uint2) {
|
||||||
Optimization::LowerInt64ToInt32(program);
|
Optimization::LowerInt64ToInt32(program);
|
||||||
}
|
}
|
||||||
if (!host_info.support_conditional_barrier) {
|
if (!host_info.support_conditional_barrier) {
|
||||||
|
|
|
@ -13,6 +13,7 @@ struct HostTranslateInfo {
|
||||||
bool support_float64{}; ///< True when the device supports 64-bit floats
|
bool support_float64{}; ///< True when the device supports 64-bit floats
|
||||||
bool support_float16{}; ///< True when the device supports 16-bit floats
|
bool support_float16{}; ///< True when the device supports 16-bit floats
|
||||||
bool support_int64{}; ///< True when the device supports 64-bit integers
|
bool support_int64{}; ///< True when the device supports 64-bit integers
|
||||||
|
bool emulate_int64_with_uint2{}; ///< True when 64-bit ops must be lowered to 32-bit pairs
|
||||||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||||
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
||||||
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
||||||
|
|
|
@ -14,6 +14,7 @@ struct Profile {
|
||||||
bool support_int8{};
|
bool support_int8{};
|
||||||
bool support_int16{};
|
bool support_int16{};
|
||||||
bool support_int64{};
|
bool support_int64{};
|
||||||
|
bool emulate_int64_with_uint2{};
|
||||||
bool support_vertex_instance_id{};
|
bool support_vertex_instance_id{};
|
||||||
bool support_float_controls{};
|
bool support_float_controls{};
|
||||||
bool support_separate_denorm_behavior{};
|
bool support_separate_denorm_behavior{};
|
||||||
|
|
|
@ -324,7 +324,8 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||||
.support_descriptor_aliasing = device.IsDescriptorAliasingSupported(),
|
.support_descriptor_aliasing = device.IsDescriptorAliasingSupported(),
|
||||||
.support_int8 = device.IsInt8Supported(),
|
.support_int8 = device.IsInt8Supported(),
|
||||||
.support_int16 = device.IsShaderInt16Supported(),
|
.support_int16 = device.IsShaderInt16Supported(),
|
||||||
.support_int64 = device.IsShaderInt64Supported(),
|
.support_int64 = device.IsShaderInt64Supported() && !device.UsesShaderInt64Emulation(),
|
||||||
|
.emulate_int64_with_uint2 = device.UsesShaderInt64Emulation(),
|
||||||
.support_vertex_instance_id = false,
|
.support_vertex_instance_id = false,
|
||||||
.support_float_controls = device.IsKhrShaderFloatControlsSupported(),
|
.support_float_controls = device.IsKhrShaderFloatControlsSupported(),
|
||||||
.support_separate_denorm_behavior =
|
.support_separate_denorm_behavior =
|
||||||
|
@ -384,7 +385,8 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||||
host_info = Shader::HostTranslateInfo{
|
host_info = Shader::HostTranslateInfo{
|
||||||
.support_float64 = device.IsFloat64Supported(),
|
.support_float64 = device.IsFloat64Supported(),
|
||||||
.support_float16 = device.IsFloat16Supported(),
|
.support_float16 = device.IsFloat16Supported(),
|
||||||
.support_int64 = device.IsShaderInt64Supported(),
|
.support_int64 = device.IsShaderInt64Supported() && !device.UsesShaderInt64Emulation(),
|
||||||
|
.emulate_int64_with_uint2 = device.UsesShaderInt64Emulation(),
|
||||||
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
|
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
|
||||||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
||||||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
|
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
|
||||||
|
|
|
@ -980,6 +980,7 @@ bool Device::HasTimelineSemaphore() const {
|
||||||
bool Device::GetSuitability(bool requires_swapchain) {
|
bool Device::GetSuitability(bool requires_swapchain) {
|
||||||
// Assume we will be suitable.
|
// Assume we will be suitable.
|
||||||
bool suitable = true;
|
bool suitable = true;
|
||||||
|
shader_int64_emulation = false;
|
||||||
|
|
||||||
// Configure properties.
|
// Configure properties.
|
||||||
VkPhysicalDeviceVulkan12Features features_1_2{};
|
VkPhysicalDeviceVulkan12Features features_1_2{};
|
||||||
|
@ -999,8 +1000,13 @@ bool Device::GetSuitability(bool requires_swapchain) {
|
||||||
.pNext = &driver_probe_props,
|
.pNext = &driver_probe_props,
|
||||||
};
|
};
|
||||||
physical.GetProperties2(driver_probe);
|
physical.GetProperties2(driver_probe);
|
||||||
const bool disable_shader_int64 = driver_probe_props.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
const VkDriverId driver_id = driver_probe_props.driverID;
|
||||||
driver_probe_props.driverID == VK_DRIVER_ID_MESA_TURNIP;
|
const bool is_qualcomm_proprietary = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||||
|
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
|
||||||
|
|
||||||
|
shader_int64_emulation = is_qualcomm_proprietary;
|
||||||
|
|
||||||
|
const bool disable_shader_int64 = shader_int64_emulation || is_turnip;
|
||||||
|
|
||||||
// Minimum of API version 1.1 is required. (This is well-supported.)
|
// Minimum of API version 1.1 is required. (This is well-supported.)
|
||||||
ASSERT(instance_version >= VK_API_VERSION_1_1);
|
ASSERT(instance_version >= VK_API_VERSION_1_1);
|
||||||
|
@ -1122,7 +1128,12 @@ bool Device::GetSuitability(bool requires_swapchain) {
|
||||||
features.features.shaderInt64 = VK_FALSE;
|
features.features.shaderInt64 = VK_FALSE;
|
||||||
features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE;
|
features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE;
|
||||||
features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE;
|
features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE;
|
||||||
LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Qualcomm/Turnip drivers");
|
if (shader_int64_emulation) {
|
||||||
|
LOG_WARNING(Render_Vulkan,
|
||||||
|
"Using shaderInt64 emulation on Qualcomm proprietary drivers");
|
||||||
|
} else {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Disabling shaderInt64 support on Turnip drivers");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some features are mandatory. Check those.
|
// Some features are mandatory. Check those.
|
||||||
|
|
|
@ -374,15 +374,18 @@ public:
|
||||||
return features.features.shaderStorageImageReadWithoutFormat;
|
return features.features.shaderStorageImageReadWithoutFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if shader int64 is supported.
|
/// Returns true if shader int64 is supported (natively or via emulation).
|
||||||
bool IsShaderInt64Supported() const {
|
bool IsShaderInt64Supported() const {
|
||||||
const auto driver = GetDriverID();
|
if (shader_int64_emulation) {
|
||||||
if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver == VK_DRIVER_ID_MESA_TURNIP) {
|
return true;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
return features.features.shaderInt64;
|
return features.features.shaderInt64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when shader int64 operations must be emulated with 32-bit pairs.
|
||||||
|
bool UsesShaderInt64Emulation() const {
|
||||||
|
return shader_int64_emulation;
|
||||||
|
}
|
||||||
/// Returns true if shader int16 is supported.
|
/// Returns true if shader int16 is supported.
|
||||||
bool IsShaderInt16Supported() const {
|
bool IsShaderInt16Supported() const {
|
||||||
return features.features.shaderInt16;
|
return features.features.shaderInt16;
|
||||||
|
@ -849,6 +852,7 @@ private:
|
||||||
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
|
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
|
||||||
bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation
|
bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation
|
||||||
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
|
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
|
||||||
|
bool shader_int64_emulation{}; ///< Emulates shader Int64 using 32-bit pairs.
|
||||||
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
|
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
|
||||||
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
|
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
|
||||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue