forked from eden-emu/eden
		
	Reduce amount of size calculations.
This commit is contained in:
		
							parent
							
								
									73538e80a6
								
							
						
					
					
						commit
						20e97604f1
					
				
					 8 changed files with 97 additions and 88 deletions
				
			
		|  | @ -4,6 +4,7 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <string> | ||||
| 
 | ||||
| #if !defined(ARCHITECTURE_x86_64) | ||||
|  | @ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { | |||
|     return a | b << 8 | c << 16 | d << 24; | ||||
| } | ||||
| 
 | ||||
| template <class ForwardIt, class T, class Compare = std::less<>> | ||||
| ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { | ||||
|     // Note: BOTH type T and the type after ForwardIt is dereferenced
 | ||||
|     // must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
 | ||||
|     // This is stricter than lower_bound requirement (see above)
 | ||||
| 
 | ||||
|     first = std::lower_bound(first, last, value, comp); | ||||
|     return first != last && !comp(value, *first) ? first : last; | ||||
| } | ||||
| 
 | ||||
| } // namespace Common
 | ||||
|  |  | |||
|  | @ -240,7 +240,6 @@ CachedSurface::~CachedSurface() { | |||
| } | ||||
| 
 | ||||
| void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | ||||
|     LOG_CRITICAL(Render_OpenGL, "Flushing"); | ||||
|     MICROPROFILE_SCOPE(OpenGL_Texture_Download); | ||||
| 
 | ||||
|     // TODO(Rodrigo): Optimize alignment
 | ||||
|  |  | |||
|  | @ -19,19 +19,27 @@ using Tegra::Texture::ConvertFromGuestToHost; | |||
| using VideoCore::MortonSwizzleMode; | ||||
| 
 | ||||
| SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | ||||
|     : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, | ||||
|       guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ | ||||
|     : params{params}, mipmap_sizes(params.num_levels), | ||||
|       mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ | ||||
|                                                                  params.GetHostSizeInBytes()} { | ||||
|     mipmap_offsets.reserve(params.num_levels); | ||||
|     mipmap_sizes.reserve(params.num_levels); | ||||
| 
 | ||||
|     std::size_t offset = 0; | ||||
|     for (u32 level = 0; level < params.num_levels; ++level) { | ||||
|         const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | ||||
|         mipmap_sizes.push_back(mipmap_size); | ||||
|         mipmap_offsets.push_back(offset); | ||||
|         mipmap_sizes[level] = mipmap_size; | ||||
|         mipmap_offsets[level] = offset; | ||||
|         offset += mipmap_size; | ||||
|     } | ||||
|     layer_size = offset; | ||||
|     if (params.is_layered) { | ||||
|         if (params.is_tiled) { | ||||
|             layer_size = | ||||
|                 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); | ||||
|         } | ||||
|         guest_memory_size = layer_size * params.depth; | ||||
|     } else { | ||||
|         guest_memory_size = layer_size; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ | |||
| #include <vector> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/morton.h" | ||||
|  | @ -16,16 +17,6 @@ | |||
| #include "video_core/texture_cache/surface_params.h" | ||||
| #include "video_core/texture_cache/surface_view.h" | ||||
| 
 | ||||
| template <class ForwardIt, class T, class Compare = std::less<>> | ||||
| ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { | ||||
|     // Note: BOTH type T and the type after ForwardIt is dereferenced
 | ||||
|     // must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
 | ||||
|     // This is stricter than lower_bound requirement (see above)
 | ||||
| 
 | ||||
|     first = std::lower_bound(first, last, value, comp); | ||||
|     return first != last && !comp(value, *first) ? first : last; | ||||
| } | ||||
| 
 | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
| } | ||||
|  | @ -153,7 +144,7 @@ public: | |||
|         const auto layer{static_cast<u32>(relative_address / layer_size)}; | ||||
|         const GPUVAddr mipmap_address = relative_address - layer_size * layer; | ||||
|         const auto mipmap_it = | ||||
|             binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | ||||
|             Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | ||||
|         if (mipmap_it == mipmap_offsets.end()) { | ||||
|             return {}; | ||||
|         } | ||||
|  | @ -172,8 +163,8 @@ protected: | |||
|     virtual void DecorateSurfaceName() = 0; | ||||
| 
 | ||||
|     const SurfaceParams params; | ||||
|     const std::size_t layer_size; | ||||
|     const std::size_t guest_memory_size; | ||||
|     std::size_t layer_size; | ||||
|     std::size_t guest_memory_size; | ||||
|     const std::size_t host_memory_size; | ||||
|     GPUVAddr gpu_addr{}; | ||||
|     CacheAddr cache_addr{}; | ||||
|  | @ -268,9 +259,11 @@ public: | |||
|         return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | ||||
|     } | ||||
| 
 | ||||
|     std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { | ||||
|         if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || | ||||
|             params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { | ||||
|     std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | ||||
|                                      const std::size_t candidate_size) { | ||||
|         if (params.target == SurfaceTarget::Texture3D || | ||||
|             (params.num_levels == 1 && !params.is_layered) || | ||||
|             view_params.target == SurfaceTarget::Texture3D) { | ||||
|             return {}; | ||||
|         } | ||||
|         const auto layer_mipmap{GetLayerMipmap(view_addr)}; | ||||
|  | @ -279,8 +272,7 @@ public: | |||
|         } | ||||
|         const u32 layer{layer_mipmap->first}; | ||||
|         const u32 mipmap{layer_mipmap->second}; | ||||
|         const std::size_t size{view_params.GetGuestSizeInBytes()}; | ||||
|         if (GetMipmapSize(mipmap) != size) { | ||||
|         if (GetMipmapSize(mipmap) != candidate_size) { | ||||
|             // TODO: The view may cover many mimaps, this case can still go on.
 | ||||
|             // This edge-case can be safely be ignored since it will just result in worse
 | ||||
|             // performance.
 | ||||
|  |  | |||
|  | @ -4,13 +4,12 @@ | |||
| 
 | ||||
| #include <map> | ||||
| 
 | ||||
| #include "common/cityhash.h" | ||||
| #include "common/alignment.h" | ||||
| #include "common/cityhash.h" | ||||
| #include "core/core.h" | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/texture_cache/surface_params.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
|  | @ -169,18 +168,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | |||
|     return params; | ||||
| } | ||||
| 
 | ||||
| u32 SurfaceParams::GetMipWidth(u32 level) const { | ||||
|     return std::max(1U, width >> level); | ||||
| } | ||||
| 
 | ||||
| u32 SurfaceParams::GetMipHeight(u32 level) const { | ||||
|     return std::max(1U, height >> level); | ||||
| } | ||||
| 
 | ||||
| u32 SurfaceParams::GetMipDepth(u32 level) const { | ||||
|     return is_layered ? depth : std::max(1U, depth >> level); | ||||
| } | ||||
| 
 | ||||
| bool SurfaceParams::IsLayered() const { | ||||
|     switch (target) { | ||||
|     case SurfaceTarget::Texture1DArray: | ||||
|  | @ -275,22 +262,6 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { | |||
|     return GetInnerMipmapMemorySize(level, true, false); | ||||
| } | ||||
| 
 | ||||
| u32 SurfaceParams::GetDefaultBlockWidth() const { | ||||
|     return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); | ||||
| } | ||||
| 
 | ||||
| u32 SurfaceParams::GetDefaultBlockHeight() const { | ||||
|     return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); | ||||
| } | ||||
| 
 | ||||
| u32 SurfaceParams::GetBitsPerPixel() const { | ||||
|     return VideoCore::Surface::GetFormatBpp(pixel_format); | ||||
| } | ||||
| 
 | ||||
| u32 SurfaceParams::GetBytesPerPixel() const { | ||||
|     return VideoCore::Surface::GetBytesPerPixel(pixel_format); | ||||
| } | ||||
| 
 | ||||
| bool SurfaceParams::IsPixelFormatZeta() const { | ||||
|     return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||||
|            pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||||
|  |  | |||
|  | @ -10,8 +10,9 @@ | |||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/fermi_2d.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
|  | @ -50,10 +51,17 @@ public: | |||
|     std::size_t GetHostSizeInBytes() const { | ||||
|         std::size_t host_size_in_bytes; | ||||
|         if (IsPixelFormatASTC(pixel_format)) { | ||||
|             constexpr std::size_t rgb8_bpp = 4ULL; | ||||
|             // ASTC is uncompressed in software, in emulated as RGBA8
 | ||||
|             host_size_in_bytes = static_cast<std::size_t>(Common::AlignUp(width, GetDefaultBlockWidth())) * | ||||
|                                  static_cast<std::size_t>(Common::AlignUp(height, GetDefaultBlockHeight())) * | ||||
|                                  static_cast<std::size_t>(depth) * 4ULL; | ||||
|             host_size_in_bytes = 0; | ||||
|             for (std::size_t level = 0; level < num_levels; level++) { | ||||
|                 const std::size_t width = | ||||
|                     Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); | ||||
|                 const std::size_t height = | ||||
|                     Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); | ||||
|                 const std::size_t depth = is_layered ? depth : GetMipDepth(level); | ||||
|                 host_size_in_bytes += width * height * depth * rgb8_bpp; | ||||
|             } | ||||
|         } else { | ||||
|             host_size_in_bytes = GetInnerMemorySize(true, false, false); | ||||
|         } | ||||
|  | @ -65,13 +73,19 @@ public: | |||
|     } | ||||
| 
 | ||||
|     /// Returns the width of a given mipmap level.
 | ||||
|     u32 GetMipWidth(u32 level) const; | ||||
|     u32 GetMipWidth(u32 level) const { | ||||
|         return std::max(1U, width >> level); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the height of a given mipmap level.
 | ||||
|     u32 GetMipHeight(u32 level) const; | ||||
|     u32 GetMipHeight(u32 level) const { | ||||
|         return std::max(1U, height >> level); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the depth of a given mipmap level.
 | ||||
|     u32 GetMipDepth(u32 level) const; | ||||
|     u32 GetMipDepth(u32 level) const { | ||||
|         return is_layered ? depth : std::max(1U, depth >> level); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the block height of a given mipmap level.
 | ||||
|     u32 GetMipBlockHeight(u32 level) const; | ||||
|  | @ -79,6 +93,12 @@ public: | |||
|     /// Returns the block depth of a given mipmap level.
 | ||||
|     u32 GetMipBlockDepth(u32 level) const; | ||||
| 
 | ||||
|     // Helper used for out of class size calculations
 | ||||
|     static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | ||||
|                                     const u32 block_depth) { | ||||
|         return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the offset in bytes in guest memory of a given mipmap level.
 | ||||
|     std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||||
| 
 | ||||
|  | @ -98,16 +118,24 @@ public: | |||
|     std::size_t GetHostLayerSize(u32 level) const; | ||||
| 
 | ||||
|     /// Returns the default block width.
 | ||||
|     u32 GetDefaultBlockWidth() const; | ||||
|     u32 GetDefaultBlockWidth() const { | ||||
|         return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the default block height.
 | ||||
|     u32 GetDefaultBlockHeight() const; | ||||
|     u32 GetDefaultBlockHeight() const { | ||||
|         return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the bits per pixel.
 | ||||
|     u32 GetBitsPerPixel() const; | ||||
|     u32 GetBitsPerPixel() const { | ||||
|         return VideoCore::Surface::GetFormatBpp(pixel_format); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the bytes per pixel.
 | ||||
|     u32 GetBytesPerPixel() const; | ||||
|     u32 GetBytesPerPixel() const { | ||||
|         return VideoCore::Surface::GetBytesPerPixel(pixel_format); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true if the pixel format is a depth and/or stencil format.
 | ||||
|     bool IsPixelFormatZeta() const; | ||||
|  |  | |||
|  | @ -120,10 +120,6 @@ public: | |||
|             return {}; | ||||
|         } | ||||
| 
 | ||||
|         if (regs.color_mask[index].raw == 0) { | ||||
|             return {}; | ||||
|         } | ||||
| 
 | ||||
|         auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||||
|                                        preserve_contents); | ||||
|         if (render_targets[index].target) | ||||
|  | @ -165,7 +161,9 @@ public: | |||
|                      const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||||
|                      const Common::Rectangle<u32>& src_rect, | ||||
|                      const Common::Rectangle<u32>& dst_rect) { | ||||
|         ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); | ||||
|         TSurface dst_surface = GetFermiSurface(dst_config); | ||||
|         ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); | ||||
|         dst_surface->MarkAsModified(true, Tick()); | ||||
|     } | ||||
| 
 | ||||
|     TSurface TryFindFramebufferSurface(const u8* host_ptr) { | ||||
|  | @ -270,10 +268,6 @@ private: | |||
| 
 | ||||
|     RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | ||||
|                                  const GPUVAddr gpu_addr, const bool untopological) { | ||||
|         // Untopological decision
 | ||||
|         if (untopological) { | ||||
|             return RecycleStrategy::Ignore; | ||||
|         } | ||||
|         // 3D Textures decision
 | ||||
|         if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { | ||||
|             return RecycleStrategy::Flush; | ||||
|  | @ -284,12 +278,16 @@ private: | |||
|                 return RecycleStrategy::Flush; | ||||
|             } | ||||
|         } | ||||
|         // Untopological decision
 | ||||
|         if (untopological) { | ||||
|             return RecycleStrategy::Ignore; | ||||
|         } | ||||
|         return RecycleStrategy::Ignore; | ||||
|     } | ||||
| 
 | ||||
|     std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | ||||
|                                               const SurfaceParams& params, const GPUVAddr gpu_addr, | ||||
|                                               const u8* host_ptr, const bool preserve_contents, | ||||
|                                               const bool preserve_contents, | ||||
|                                               const bool untopological) { | ||||
|         for (auto surface : overlaps) { | ||||
|             Unregister(surface); | ||||
|  | @ -328,6 +326,7 @@ private: | |||
|         } | ||||
|         Unregister(current_surface); | ||||
|         Register(new_surface); | ||||
|         new_surface->MarkAsModified(current_surface->IsModified(), Tick()); | ||||
|         return {new_surface, new_surface->GetMainView()}; | ||||
|     } | ||||
| 
 | ||||
|  | @ -351,6 +350,7 @@ private: | |||
|         if (params.target == SurfaceTarget::Texture3D) { | ||||
|             return {}; | ||||
|         } | ||||
|         bool modified = false; | ||||
|         TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||||
|         for (auto surface : overlaps) { | ||||
|             const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||||
|  | @ -358,7 +358,7 @@ private: | |||
|                 // We send this cases to recycle as they are more complex to handle
 | ||||
|                 return {}; | ||||
|             } | ||||
|             const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); | ||||
|             const std::size_t candidate_size = surface->GetSizeInBytes(); | ||||
|             auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | ||||
|             if (!mipmap_layer) { | ||||
|                 return {}; | ||||
|  | @ -368,6 +368,7 @@ private: | |||
|             if (new_surface->GetMipmapSize(mipmap) != candidate_size) { | ||||
|                 return {}; | ||||
|             } | ||||
|             modified |= surface->IsModified(); | ||||
|             // Now we got all the data set up
 | ||||
|             const u32 dst_width{params.GetMipWidth(mipmap)}; | ||||
|             const u32 dst_height{params.GetMipHeight(mipmap)}; | ||||
|  | @ -381,6 +382,7 @@ private: | |||
|             force_reconfiguration |= surface->IsProtected(); | ||||
|             Unregister(surface, true); | ||||
|         } | ||||
|         new_surface->MarkAsModified(modified, Tick()); | ||||
|         Register(new_surface); | ||||
|         return {{new_surface, new_surface->GetMainView()}}; | ||||
|     } | ||||
|  | @ -399,8 +401,7 @@ private: | |||
| 
 | ||||
|         for (auto surface : overlaps) { | ||||
|             if (!surface->MatchesTopology(params)) { | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, | ||||
|                                       true); | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|  | @ -418,27 +419,26 @@ private: | |||
|                 } | ||||
|             } | ||||
|             if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, | ||||
|                                       false); | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); | ||||
|             } | ||||
|             std::optional<TView> view = current_surface->EmplaceView(params, gpu_addr); | ||||
|             std::optional<TView> view = | ||||
|                 current_surface->EmplaceView(params, gpu_addr, candidate_size); | ||||
|             if (view.has_value()) { | ||||
|                 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||||
|                 if (is_mirage) { | ||||
|                     LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); | ||||
|                     return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, | ||||
|                                           false); | ||||
|                     return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); | ||||
|                 } | ||||
|                 return {current_surface, *view}; | ||||
|             } | ||||
|             return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); | ||||
|             return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); | ||||
|         } else { | ||||
|             std::optional<std::pair<TSurface, TView>> view = | ||||
|                 ReconstructSurface(overlaps, params, gpu_addr, host_ptr); | ||||
|             if (view.has_value()) { | ||||
|                 return *view; | ||||
|             } | ||||
|             return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); | ||||
|             return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow