Merge pull request #12412 from ameerj/gl-query-prims

OpenGL: Add GL_PRIMITIVES_GENERATED and GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries
This commit is contained in:
liamwhite 2023-12-22 11:42:05 -05:00 committed by GitHub
commit 91290b9be4
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 91 additions and 45 deletions

View file

@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() {
} }
void Maxwell3D::ProcessCounterReset() { void Maxwell3D::ProcessCounterReset() {
switch (regs.clear_report_value) { const auto query_type = [clear_report = regs.clear_report_value]() {
case Regs::ClearReport::ZPassPixelCount: switch (clear_report) {
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount:
break; return VideoCommon::QueryType::ZPassPixelCount64;
case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded:
return VideoCommon::QueryType::StreamingPrimitivesSucceeded;
case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated:
return VideoCommon::QueryType::PrimitivesGenerated;
case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut:
return VideoCommon::QueryType::VtgPrimitivesOut;
default: default:
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report);
break; return VideoCommon::QueryType::Payload;
} }
}();
rasterizer->ResetCounter(query_type);
} }
void Maxwell3D::ProcessSyncPoint() { void Maxwell3D::ProcessSyncPoint() {

View file

@ -28,8 +28,11 @@
namespace VideoCore { namespace VideoCore {
enum class QueryType { enum class QueryType {
SamplesPassed, SamplesPassed,
PrimitivesGenerated,
TfbPrimitivesWritten,
Count,
}; };
constexpr std::size_t NumQueryTypes = 1; constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count);
} // namespace VideoCore } // namespace VideoCore
namespace VideoCommon { namespace VideoCommon {
@ -44,15 +47,6 @@ public:
explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
: cache{cache_}, type{type_} {} : cache{cache_}, type{type_} {}
/// Updates the state of the stream, enabling or disabling as needed.
void Update(bool enabled) {
if (enabled) {
Enable();
} else {
Disable();
}
}
/// Resets the stream to zero. It doesn't disable the query after resetting. /// Resets the stream to zero. It doesn't disable the query after resetting.
void Reset() { void Reset() {
if (current) { if (current) {
@ -80,7 +74,6 @@ public:
return current != nullptr; return current != nullptr;
} }
private:
/// Enables the stream. /// Enables the stream.
void Enable() { void Enable() {
if (current) { if (current) {
@ -97,6 +90,7 @@ private:
last = std::exchange(current, nullptr); last = std::exchange(current, nullptr);
} }
private:
QueryCache& cache; QueryCache& cache;
const VideoCore::QueryType type; const VideoCore::QueryType type;
@ -112,8 +106,14 @@ public:
: rasterizer{rasterizer_}, : rasterizer{rasterizer_},
// Use reinterpret_cast instead of static_cast as workaround for // Use reinterpret_cast instead of static_cast as workaround for
// UBSan bug (https://github.com/llvm/llvm-project/issues/59060) // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this), cpu_memory{cpu_memory_}, streams{{
VideoCore::QueryType::SamplesPassed}}} { {CounterStream{reinterpret_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}},
{CounterStream{reinterpret_cast<QueryCache&>(*this),
VideoCore::QueryType::PrimitivesGenerated}},
{CounterStream{reinterpret_cast<QueryCache&>(*this),
VideoCore::QueryType::TfbPrimitivesWritten}},
}} {
(void)slot_async_jobs.insert(); // Null value (void)slot_async_jobs.insert(); // Null value
} }
@ -157,12 +157,11 @@ public:
AsyncFlushQuery(query, timestamp, lock); AsyncFlushQuery(query, timestamp, lock);
} }
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. /// Enables all available GPU counters
void UpdateCounters() { void EnableCounters() {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
if (maxwell3d) { for (auto& stream : streams) {
const auto& regs = maxwell3d->regs; stream.Enable();
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable);
} }
} }
@ -176,7 +175,7 @@ public:
void DisableStreams() { void DisableStreams() {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
for (auto& stream : streams) { for (auto& stream : streams) {
stream.Update(false); stream.Disable();
} }
} }
@ -353,7 +352,7 @@ private:
std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
}; }; // namespace VideoCommon
template <class QueryCache, class HostCounter> template <class QueryCache, class HostCounter>
class HostCounterBase { class HostCounterBase {

View file

@ -18,16 +18,27 @@ namespace OpenGL {
namespace { namespace {
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
constexpr GLenum GetTarget(VideoCore::QueryType type) { constexpr GLenum GetTarget(VideoCore::QueryType type) {
return QueryTargets[static_cast<std::size_t>(type)]; switch (type) {
case VideoCore::QueryType::SamplesPassed:
return GL_SAMPLES_PASSED;
case VideoCore::QueryType::PrimitivesGenerated:
return GL_PRIMITIVES_GENERATED;
case VideoCore::QueryType::TfbPrimitivesWritten:
return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN;
default:
break;
}
UNIMPLEMENTED_MSG("Query type {}", type);
return 0;
} }
} // Anonymous namespace } // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
: QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {
EnableCounters();
}
QueryCache::~QueryCache() = default; QueryCache::~QueryCache() = default;
@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) {
auto& stream = cache->Stream(type); auto& stream = cache->Stream(type);
const bool slice_counter = WaitPending() && stream.IsEnabled(); const bool slice_counter = WaitPending() && stream.IsEnabled();
if (slice_counter) { if (slice_counter) {
stream.Update(false); stream.Disable();
} }
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) { if (slice_counter) {
stream.Update(true); stream.Enable();
} }
return result; return result;

View file

@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
void oglEnable(GLenum cap, bool state) { void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap); (state ? glEnable : glDisable)(cap);
} }
std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) {
switch (type) {
case VideoCommon::QueryType::PrimitivesGenerated:
case VideoCommon::QueryType::VtgPrimitivesOut:
return VideoCore::QueryType::PrimitivesGenerated;
case VideoCommon::QueryType::ZPassPixelCount64:
return VideoCore::QueryType::SamplesPassed;
case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
// case VideoCommon::QueryType::StreamingByteCount:
// TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
return VideoCore::QueryType::TfbPrimitivesWritten;
default:
return std::nullopt;
}
}
} // Anonymous namespace } // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@ -216,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
SCOPE_EXIT({ gpu.TickWork(); }); SCOPE_EXIT({ gpu.TickWork(); });
gpu_memory->FlushCaching(); gpu_memory->FlushCaching();
query_cache.UpdateCounters();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
if (!pipeline) { if (!pipeline) {
@ -334,7 +349,6 @@ void RasterizerOpenGL::DrawTexture() {
MICROPROFILE_SCOPE(OpenGL_Drawing); MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); }); SCOPE_EXIT({ gpu.TickWork(); });
query_cache.UpdateCounters();
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
@ -401,21 +415,28 @@ void RasterizerOpenGL::DispatchCompute() {
} }
void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
if (type == VideoCommon::QueryType::ZPassPixelCount64) { const auto query_cache_type = MaxwellToVideoCoreQuery(type);
query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); if (!query_cache_type.has_value()) {
UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type);
return;
} }
query_cache.ResetCounter(*query_cache_type);
} }
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
if (type == VideoCommon::QueryType::ZPassPixelCount64) { const auto query_cache_type = MaxwellToVideoCoreQuery(type);
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { if (!query_cache_type.has_value()) {
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); return QueryFallback(gpu_addr, type, flags, payload, subreport);
} else {
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt);
}
return;
} }
const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout);
const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt;
query_cache.Query(gpu_addr, *query_cache_type, timestamp);
}
void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload,
u32 subreport) {
if (type != VideoCommon::QueryType::Payload) { if (type != VideoCommon::QueryType::Payload) {
payload = 1u; payload = 1u;
} }

View file

@ -225,6 +225,9 @@ private:
/// End a transform feedback /// End a transform feedback
void EndTransformFeedback(); void EndTransformFeedback();
void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
Tegra::GPU& gpu; Tegra::GPU& gpu;
const Device& device; const Device& device;

View file

@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() {
} }
void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
if (type != VideoCommon::QueryType::ZPassPixelCount64) {
LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type);
return;
}
query_cache.CounterReset(type); query_cache.CounterReset(type);
} }