From e7c251a00b5ffcb45333ba89318af3cba46aec9e Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 24 Sep 2025 17:43:23 +0000 Subject: [PATCH] [vk, ogl/IR, dynarmic/IR] friendlier IR identity pointer chasing, inline AA passes Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 34 +++++------ .../ir_opt/identity_removal_pass.cpp | 29 +++++---- .../renderer_opengl/present/layer.cpp | 19 +++--- .../renderer_opengl/present/layer.h | 14 ++--- .../renderer_vulkan/present/anti_alias_pass.h | 6 -- .../renderer_vulkan/present/filters.cpp | 39 +++++------- .../renderer_vulkan/present/fsr.cpp | 59 +++++++------------ .../renderer_vulkan/present/layer.cpp | 28 +++++---- .../renderer_vulkan/present/layer.h | 13 ++-- 9 files changed, 106 insertions(+), 135 deletions(-) diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 844e29023c..18323936d9 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -1226,32 +1226,28 @@ static void DeadCodeElimination(IR::Block& block) { } static void IdentityRemovalPass(IR::Block& block) { - boost::container::small_vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); + boost::container::small_vector to_invalidate; + for (auto it = block.begin(); it != block.end();) { + const size_t num_args = it->NumArgs(); + for (size_t i = 0; i < num_args; ++i) { + IR::Value arg = it->GetArg(i); + if (arg.IsIdentity()) { + do { + arg = arg.GetInst()->GetArg(0); + } while (arg.IsIdentity()); + it->SetArg(i, arg); } } - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); + if (it->GetOpcode() == IR::Opcode::Identity || it->GetOpcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*it); + it = block.Instructions().erase(it); } else { - ++iter; + ++it; } } - for (IR::Inst* inst : to_invalidate) { + for (IR::Inst* const inst : to_invalidate) inst->Invalidate(); - } } static void NamingPass(IR::Block& block) { diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 951534bbf2..18e3553010 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" @@ -10,28 +11,30 @@ namespace Shader::Optimization { void IdentityRemovalPass(IR::Program& program) { - std::vector to_invalidate; + boost::container::small_vector to_invalidate; for (IR::Block* const block : program.blocks) { - for (auto inst = block->begin(); inst != block->end();) { - const size_t num_args{inst->NumArgs()}; + for (auto it = block->begin(); it != block->end();) { + const size_t num_args{it->NumArgs()}; for (size_t i = 0; i < num_args; ++i) { - IR::Value arg; - while ((arg = inst->Arg(i)).IsIdentity()) { - inst->SetArg(i, arg.Inst()->Arg(0)); + IR::Value arg = it->Arg(i); + if (arg.IsIdentity()) { + do { // Pointer chasing (3-derefs) + arg = arg.Inst()->Arg(0); + } while (arg.IsIdentity()); + it->SetArg(i, arg); } } - if (inst->GetOpcode() == IR::Opcode::Identity || - inst->GetOpcode() == IR::Opcode::Void) { - to_invalidate.push_back(&*inst); - inst = block->Instructions().erase(inst); + + if (it->GetOpcode() == IR::Opcode::Identity || it->GetOpcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*it); + it = block->Instructions().erase(it); } else { - ++inst; + ++it; } } } - for (IR::Inst* const inst : to_invalidate) { + for (IR::Inst* const inst : to_invalidate) inst->Invalidate(); - } } } // namespace Shader::Optimization diff --git a/src/video_core/renderer_opengl/present/layer.cpp b/src/video_core/renderer_opengl/present/layer.cpp index 6c7092d229..6ce4ffa1ff 100644 --- a/src/video_core/renderer_opengl/present/layer.cpp +++ b/src/video_core/renderer_opengl/present/layer.cpp @@ -54,12 +54,12 @@ GLuint Layer::ConfigureDraw(std::array& out_matrix, switch (anti_aliasing) { case Settings::AntiAliasing::Fxaa: CreateFXAA(); - texture = fxaa->Draw(program_manager, info.display_texture); + texture = std::get(anti_alias).Draw(program_manager, info.display_texture); break; case Settings::AntiAliasing::Smaa: default: CreateSMAA(); - texture = smaa->Draw(program_manager, info.display_texture); + texture = std::get(anti_alias).Draw(program_manager, info.display_texture); break; } } @@ -68,7 +68,7 @@ GLuint Layer::ConfigureDraw(std::array& out_matrix, if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) { if (!fsr || fsr->NeedsRecreation(layout.screen)) { - fsr = std::make_unique(layout.screen.GetWidth(), layout.screen.GetHeight()); + fsr.emplace(layout.screen.GetWidth(), layout.screen.GetHeight()); } texture = fsr->Draw(program_manager, texture, info.scaled_width, info.scaled_height, crop); @@ -199,23 +199,20 @@ void Layer::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuf glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, framebuffer_texture.width, framebuffer_texture.height); - fxaa.reset(); - smaa.reset(); + anti_alias.emplace(); } void Layer::CreateFXAA() { - smaa.reset(); - if (!fxaa) { - fxaa = std::make_unique( + if (!std::holds_alternative(anti_alias)) { + anti_alias.emplace( Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); } } void Layer::CreateSMAA() { - fxaa.reset(); - if (!smaa) { - smaa = std::make_unique( + if (!std::holds_alternative(anti_alias)) { + anti_alias.emplace( Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); } diff --git a/src/video_core/renderer_opengl/present/layer.h b/src/video_core/renderer_opengl/present/layer.h index 5b15b730fc..a4ebed963c 100644 --- a/src/video_core/renderer_opengl/present/layer.h +++ b/src/video_core/renderer_opengl/present/layer.h @@ -3,11 +3,15 @@ #pragma once -#include +#include +#include #include #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/present/smaa.h" +#include "video_core/renderer_opengl/present/fxaa.h" +#include "video_core/renderer_opengl/present/fsr.h" namespace Layout { struct FramebufferLayout; @@ -26,11 +30,8 @@ struct FramebufferConfig; namespace OpenGL { struct FramebufferTextureInfo; -class FSR; -class FXAA; class ProgramManager; class RasterizerOpenGL; -class SMAA; /// Structure used for storing information about the textures for the Switch screen struct TextureInfo { @@ -76,9 +77,8 @@ private: /// Display information for Switch screen TextureInfo framebuffer_texture; - std::unique_ptr fsr; - std::unique_ptr fxaa; - std::unique_ptr smaa; + std::optional fsr; + std::variant anti_alias; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/present/anti_alias_pass.h b/src/video_core/renderer_vulkan/present/anti_alias_pass.h index 1f20fbd7f0..db7bdacd1d 100644 --- a/src/video_core/renderer_vulkan/present/anti_alias_pass.h +++ b/src/video_core/renderer_vulkan/present/anti_alias_pass.h @@ -16,10 +16,4 @@ public: VkImageView* inout_image_view) = 0; }; -class NoAA final : public AntiAliasPass { -public: - void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, - VkImageView* inout_image_view) override {} -}; - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/filters.cpp b/src/video_core/renderer_vulkan/present/filters.cpp index e0f2b26f84..381031fe6e 100644 --- a/src/video_core/renderer_vulkan/present/filters.cpp +++ b/src/video_core/renderer_vulkan/present/filters.cpp @@ -24,61 +24,52 @@ namespace Vulkan { -namespace { - -vk::ShaderModule SelectScaleForceShader(const Device& device) { - if (device.IsFloat16Supported()) { - return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); - } else { - return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); - } -} - -} // Anonymous namespace - std::unique_ptr MakeNearestNeighbor(const Device& device, VkFormat frame_format) { - return std::make_unique(device, frame_format, - CreateNearestNeighborSampler(device), - BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); + return std::make_unique(device, frame_format, CreateNearestNeighborSampler(device), + BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); } std::unique_ptr MakeBilinear(const Device& device, VkFormat frame_format) { return std::make_unique(device, frame_format, CreateBilinearSampler(device), - BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); + BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); } std::unique_ptr MakeSpline1(const Device& device, VkFormat frame_format) { return std::make_unique(device, frame_format, CreateBilinearSampler(device), - BuildShader(device, PRESENT_SPLINE1_FRAG_SPV)); + BuildShader(device, PRESENT_SPLINE1_FRAG_SPV)); } std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format) { // No need for handrolled shader -- if the VK impl can do it for us ;) if (device.IsExtFilterCubicSupported()) return std::make_unique(device, frame_format, CreateCubicSampler(device), - BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); + BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); return std::make_unique(device, frame_format, CreateBilinearSampler(device), - BuildShader(device, PRESENT_BICUBIC_FRAG_SPV)); + BuildShader(device, PRESENT_BICUBIC_FRAG_SPV)); } std::unique_ptr MakeGaussian(const Device& device, VkFormat frame_format) { return std::make_unique(device, frame_format, CreateBilinearSampler(device), - BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV)); + BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV)); } std::unique_ptr MakeLanczos(const Device& device, VkFormat frame_format) { return std::make_unique(device, frame_format, CreateBilinearSampler(device), - BuildShader(device, PRESENT_LANCZOS_FRAG_SPV)); + BuildShader(device, PRESENT_LANCZOS_FRAG_SPV)); } std::unique_ptr MakeScaleForce(const Device& device, VkFormat frame_format) { - return std::make_unique(device, frame_format, CreateBilinearSampler(device), - SelectScaleForceShader(device)); + auto const select_fn = [&]() { + return device.IsFloat16Supported() + ? BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV) + : BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); + }; + return std::make_unique(device, frame_format, CreateBilinearSampler(device), select_fn()); } std::unique_ptr MakeArea(const Device& device, VkFormat frame_format) { return std::make_unique(device, frame_format, CreateBilinearSampler(device), - BuildShader(device, PRESENT_AREA_FRAG_SPV)); + BuildShader(device, PRESENT_AREA_FRAG_SPV)); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp index 3f708be704..b2d35cbe97 100644 --- a/src/video_core/renderer_vulkan/present/fsr.cpp +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -41,25 +41,18 @@ FSR::FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_c void FSR::CreateImages() { m_dynamic_images.resize(m_image_count); for (auto& images : m_dynamic_images) { - images.images[Easu] = - CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - images.images[Rcas] = - CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - images.image_views[Easu] = - CreateWrappedImageView(m_device, images.images[Easu], VK_FORMAT_R16G16B16A16_SFLOAT); - images.image_views[Rcas] = - CreateWrappedImageView(m_device, images.images[Rcas], VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Easu] = CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Rcas] = CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Easu] = CreateWrappedImageView(m_device, images.images[Easu], VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Rcas] = CreateWrappedImageView(m_device, images.images[Rcas], VK_FORMAT_R16G16B16A16_SFLOAT); } } void FSR::CreateRenderPasses() { m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); - for (auto& images : m_dynamic_images) { - images.framebuffers[Easu] = - CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Easu], m_extent); - images.framebuffers[Rcas] = - CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Rcas], m_extent); + images.framebuffers[Easu] = CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Easu], m_extent); + images.framebuffers[Rcas] = CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Rcas], m_extent); } } @@ -87,16 +80,13 @@ void FSR::CreateDescriptorPool() { } void FSR::CreateDescriptorSetLayout() { - m_descriptor_set_layout = - CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); + m_descriptor_set_layout = CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); } void FSR::CreateDescriptorSets() { std::vector layouts(MaxFsrStage, *m_descriptor_set_layout); - - for (auto& images : m_dynamic_images) { + for (auto& images : m_dynamic_images) images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); - } } void FSR::CreatePipelineLayouts() { @@ -128,31 +118,24 @@ void FSR::CreatePipelines() { void FSR::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { Images& images = m_dynamic_images[image_index]; std::vector image_infos; - std::vector updates; - image_infos.reserve(2); - - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, - images.descriptor_sets[Easu], 0)); - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Easu], - images.descriptor_sets[Rcas], 0)); - + std::vector updates{ + CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, images.descriptor_sets[Easu], 0), + CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Easu], images.descriptor_sets[Rcas], 0) + }; m_device.GetLogical().UpdateDescriptorSets(updates, {}); } void FSR::UploadImages(Scheduler& scheduler) { - if (m_images_ready) { - return; + if (!m_images_ready) { + m_images_ready = true; + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + for (auto& image : m_dynamic_images) { + ClearColorImage(cmdbuf, *image.images[Easu]); + ClearColorImage(cmdbuf, *image.images[Rcas]); + } + }); + scheduler.Finish(); } - - scheduler.Record([&](vk::CommandBuffer cmdbuf) { - for (auto& image : m_dynamic_images) { - ClearColorImage(cmdbuf, *image.images[Easu]); - ClearColorImage(cmdbuf, *image.images[Rcas]); - } - }); - scheduler.Finish(); - - m_images_ready = true; } VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index fa7c457573..9946335fa3 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -4,7 +4,12 @@ // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "video_core/present.h" +#include "video_core/renderer_vulkan/present/anti_alias_pass.h" +/* X11 defines */ +#undef Success +#undef BadValue #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "common/settings.h" @@ -58,7 +63,7 @@ Layer::Layer(const Device& device_, MemoryAllocator& memory_allocator_, Schedule CreateDescriptorPool(); CreateDescriptorSets(layout); if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) { - CreateFSR(output_size); + fsr.emplace(device, memory_allocator, image_count, output_size); } } @@ -97,7 +102,11 @@ void Layer::ConfigureDraw(PresentPushConstants* out_push_constants, VkImageView source_image_view = texture_info ? texture_info->image_view : *raw_image_views[image_index]; - anti_alias->Draw(scheduler, image_index, &source_image, &source_image_view); + if (std::holds_alternative(anti_alias)) { + std::get(anti_alias).Draw(scheduler, image_index, &source_image, &source_image_view); + } else if (std::holds_alternative(anti_alias)) { + std::get(anti_alias).Draw(scheduler, image_index, &source_image, &source_image_view); + } auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); const VkExtent2D render_extent{ @@ -156,10 +165,6 @@ void Layer::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { } } -void Layer::CreateFSR(VkExtent2D output_size) { - fsr = std::make_unique(device, memory_allocator, image_count, output_size); -} - void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { if (framebuffer.width == raw_width && framebuffer.height == raw_height && framebuffer.pixel_format == pixel_format && !raw_images.empty()) { @@ -169,7 +174,7 @@ void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { raw_width = framebuffer.width; raw_height = framebuffer.height; pixel_format = framebuffer.pixel_format; - anti_alias.reset(); + anti_alias.emplace(); ReleaseRawImages(); CreateStagingBuffer(framebuffer); @@ -177,9 +182,8 @@ void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { } void Layer::SetAntiAliasPass() { - if (anti_alias && anti_alias_setting == filters.get_anti_aliasing()) { + if (!std::holds_alternative(anti_alias) && anti_alias_setting == filters.get_anti_aliasing()) return; - } anti_alias_setting = filters.get_anti_aliasing(); @@ -190,13 +194,13 @@ void Layer::SetAntiAliasPass() { switch (anti_alias_setting) { case Settings::AntiAliasing::Fxaa: - anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); + anti_alias.emplace(device, memory_allocator, image_count, render_area); break; case Settings::AntiAliasing::Smaa: - anti_alias = std::make_unique(device, memory_allocator, image_count, render_area); + anti_alias.emplace(device, memory_allocator, image_count, render_area); break; default: - anti_alias = std::make_unique(); + anti_alias.emplace(); break; } } diff --git a/src/video_core/renderer_vulkan/present/layer.h b/src/video_core/renderer_vulkan/present/layer.h index f5effdcd7f..6b8fed60ec 100644 --- a/src/video_core/renderer_vulkan/present/layer.h +++ b/src/video_core/renderer_vulkan/present/layer.h @@ -3,9 +3,15 @@ #pragma once +#include +#include + #include "common/math_util.h" #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#include "video_core/renderer_vulkan/present/fsr.h" +#include "video_core/renderer_vulkan/present/fxaa.h" +#include "video_core/renderer_vulkan/present/smaa.h" namespace Layout { struct FramebufferLayout; @@ -29,7 +35,6 @@ namespace Vulkan { class AntiAliasPass; class Device; -class FSR; class MemoryAllocator; struct PresentPushConstants; class RasterizerVulkan; @@ -54,7 +59,6 @@ private: void CreateDescriptorSets(VkDescriptorSetLayout layout); void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - void CreateFSR(VkExtent2D output_size); void RefreshResources(const Tegra::FramebufferConfig& framebuffer); void SetAntiAliasPass(); @@ -87,9 +91,8 @@ private: Service::android::PixelFormat pixel_format{}; Settings::AntiAliasing anti_alias_setting{}; - std::unique_ptr anti_alias{}; - - std::unique_ptr fsr{}; + std::variant anti_alias{}; + std::optional fsr{}; std::vector resource_ticks{}; };