forked from eden-emu/eden
		
	video_core: Refactor to use MemoryManager interface for all memory access.
# Conflicts: # src/video_core/engines/kepler_memory.cpp # src/video_core/engines/maxwell_3d.cpp # src/video_core/morton.cpp # src/video_core/morton.h # src/video_core/renderer_opengl/gl_global_cache.cpp # src/video_core/renderer_opengl/gl_global_cache.h # src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
This commit is contained in:
		
							parent
							
								
									47b622825c
								
							
						
					
					
						commit
						574e89d924
					
				
					 20 changed files with 196 additions and 189 deletions
				
			
		|  | @ -55,12 +55,9 @@ bool DmaPusher::Step() { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Push buffer non-empty, read a word
 |     // Push buffer non-empty, read a word
 | ||||||
|     const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); |  | ||||||
|     ASSERT_MSG(address, "Invalid GPU address"); |  | ||||||
| 
 |  | ||||||
|     command_headers.resize(command_list_header.size); |     command_headers.resize(command_list_header.size); | ||||||
| 
 |     gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), | ||||||
|     Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32)); |                                   command_list_header.size * sizeof(u32)); | ||||||
| 
 | 
 | ||||||
|     for (const CommandHeader& command_header : command_headers) { |     for (const CommandHeader& command_header : command_headers) { | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) { | ||||||
|     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); |     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | ||||||
|     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); |     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | ||||||
| 
 | 
 | ||||||
|     const GPUVAddr address = regs.dest.Address(); |  | ||||||
|     const auto dest_address = |  | ||||||
|         memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32)); |  | ||||||
|     ASSERT_MSG(dest_address, "Invalid GPU address"); |  | ||||||
| 
 |  | ||||||
|     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
 |     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
 | ||||||
|     // We do this before actually writing the new data because the destination address might contain
 |     // We do this before actually writing the new data because the destination address might
 | ||||||
|     // a dirty surface that will have to be written back to memory.
 |     // contain a dirty surface that will have to be written back to memory.
 | ||||||
|     system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), |     const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; | ||||||
|                                                     sizeof(u32)); |     rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); | ||||||
|  |     memory_manager.Write32(address, data); | ||||||
| 
 | 
 | ||||||
|     Memory::Write32(*dest_address, data); |  | ||||||
|     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||||
| 
 | 
 | ||||||
|     state.write_offset++; |     state.write_offset++; | ||||||
|  |  | ||||||
|  | @ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::ProcessQueryGet() { | void Maxwell3D::ProcessQueryGet() { | ||||||
|     GPUVAddr sequence_address = regs.query.QueryAddress(); |     const GPUVAddr sequence_address{regs.query.QueryAddress()}; | ||||||
|     // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
 |     // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
 | ||||||
|     // VAddr before writing.
 |     // VAddr before writing.
 | ||||||
|     const auto address = memory_manager.GpuToCpuAddress(sequence_address); |  | ||||||
|     ASSERT_MSG(address, "Invalid GPU address"); |  | ||||||
| 
 | 
 | ||||||
|     // TODO(Subv): Support the other query units.
 |     // TODO(Subv): Support the other query units.
 | ||||||
|     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, |     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | ||||||
|  | @ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() { | ||||||
|             // Write the current query sequence to the sequence address.
 |             // Write the current query sequence to the sequence address.
 | ||||||
|             // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
 |             // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
 | ||||||
|             // query.
 |             // query.
 | ||||||
|             Memory::Write32(*address, sequence); |             memory_manager.Write32(sequence_address, sequence); | ||||||
|         } else { |         } else { | ||||||
|             // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
 |             // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
 | ||||||
|             // GPU, this command may actually take a while to complete in real hardware due to GPU
 |             // GPU, this command may actually take a while to complete in real hardware due to GPU
 | ||||||
|  | @ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() { | ||||||
|             query_result.value = result; |             query_result.value = result; | ||||||
|             // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
 |             // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
 | ||||||
|             query_result.timestamp = system.CoreTiming().GetTicks(); |             query_result.timestamp = system.CoreTiming().GetTicks(); | ||||||
|             Memory::WriteBlock(*address, &query_result, sizeof(query_result)); |             memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||||||
|         } |         } | ||||||
|         dirty_flags.OnMemoryWrite(); |         dirty_flags.OnMemoryWrite(); | ||||||
|         break; |         break; | ||||||
|  | @ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) { | ||||||
|     // Don't allow writing past the end of the buffer.
 |     // Don't allow writing past the end of the buffer.
 | ||||||
|     ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); |     ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | ||||||
| 
 | 
 | ||||||
|     const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |     const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | ||||||
|     ASSERT_MSG(address, "Invalid GPU address"); |  | ||||||
| 
 | 
 | ||||||
|     u8* ptr{Memory::GetPointer(*address)}; |     u8* ptr{memory_manager.GetPointer(address)}; | ||||||
|     rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); |     rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | ||||||
|     std::memcpy(ptr, &value, sizeof(u32)); |     memory_manager.Write32(address, value); | ||||||
| 
 | 
 | ||||||
|     dirty_flags.OnMemoryWrite(); |     dirty_flags.OnMemoryWrite(); | ||||||
| 
 | 
 | ||||||
|  | @ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | ||||||
|     const GPUVAddr tic_base_address = regs.tic.TICAddress(); |     const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; | ||||||
| 
 |  | ||||||
|     const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); |  | ||||||
|     const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); |  | ||||||
|     ASSERT_MSG(tic_address_cpu, "Invalid GPU address"); |  | ||||||
| 
 | 
 | ||||||
|     Texture::TICEntry tic_entry; |     Texture::TICEntry tic_entry; | ||||||
|     Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); |     memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | ||||||
| 
 | 
 | ||||||
|     ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || |     ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || | ||||||
|                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch, |                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch, | ||||||
|  | @ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | ||||||
|     const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); |     const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; | ||||||
| 
 |  | ||||||
|     const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); |  | ||||||
|     const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); |  | ||||||
|     ASSERT_MSG(tsc_address_cpu, "Invalid GPU address"); |  | ||||||
| 
 | 
 | ||||||
|     Texture::TSCEntry tsc_entry; |     Texture::TSCEntry tsc_entry; | ||||||
|     Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); |     memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); | ||||||
|     return tsc_entry; |     return tsc_entry; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | ||||||
|     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; |     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; | ||||||
|          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { |          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { | ||||||
| 
 | 
 | ||||||
|         const auto address = memory_manager.GpuToCpuAddress(current_texture); |         const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)}; | ||||||
|         ASSERT_MSG(address, "Invalid GPU address"); |  | ||||||
| 
 |  | ||||||
|         const Texture::TextureHandle tex_handle{Memory::Read32(*address)}; |  | ||||||
| 
 | 
 | ||||||
|         Texture::FullTextureInfo tex_info{}; |         Texture::FullTextureInfo tex_info{}; | ||||||
|         // TODO(Subv): Use the shader to determine which textures are actually accessed.
 |         // TODO(Subv): Use the shader to determine which textures are actually accessed.
 | ||||||
|  | @ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | ||||||
| 
 | 
 | ||||||
|     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); |     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | ||||||
| 
 | 
 | ||||||
|     const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); |     const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)}; | ||||||
|     ASSERT_MSG(tex_address_cpu, "Invalid GPU address"); |  | ||||||
| 
 |  | ||||||
|     const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)}; |  | ||||||
| 
 | 
 | ||||||
|     Texture::FullTextureInfo tex_info{}; |     Texture::FullTextureInfo tex_info{}; | ||||||
|     tex_info.index = static_cast<u32>(offset); |     tex_info.index = static_cast<u32>(offset); | ||||||
|  |  | ||||||
|  | @ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() { | ||||||
|     const GPUVAddr source = regs.src_address.Address(); |     const GPUVAddr source = regs.src_address.Address(); | ||||||
|     const GPUVAddr dest = regs.dst_address.Address(); |     const GPUVAddr dest = regs.dst_address.Address(); | ||||||
| 
 | 
 | ||||||
|     const auto source_cpu = memory_manager.GpuToCpuAddress(source); |  | ||||||
|     const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); |  | ||||||
|     ASSERT_MSG(source_cpu, "Invalid source GPU address"); |  | ||||||
|     ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); |  | ||||||
| 
 |  | ||||||
|     // TODO(Subv): Perform more research and implement all features of this engine.
 |     // TODO(Subv): Perform more research and implement all features of this engine.
 | ||||||
|     ASSERT(regs.exec.enable_swizzle == 0); |     ASSERT(regs.exec.enable_swizzle == 0); | ||||||
|     ASSERT(regs.exec.query_mode == Regs::QueryMode::None); |     ASSERT(regs.exec.query_mode == Regs::QueryMode::None); | ||||||
|  | @ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() { | ||||||
|         // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
 |         // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
 | ||||||
|         // y_count).
 |         // y_count).
 | ||||||
|         if (!regs.exec.enable_2d) { |         if (!regs.exec.enable_2d) { | ||||||
|             Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); |             memory_manager.CopyBlock(dest, source, regs.x_count); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() { | ||||||
|         // rectangle. There is no need to manually flush/invalidate the regions because
 |         // rectangle. There is no need to manually flush/invalidate the regions because
 | ||||||
|         // CopyBlock does that for us.
 |         // CopyBlock does that for us.
 | ||||||
|         for (u32 line = 0; line < regs.y_count; ++line) { |         for (u32 line = 0; line < regs.y_count; ++line) { | ||||||
|             const VAddr source_line = *source_cpu + line * regs.src_pitch; |             const GPUVAddr source_line = source + line * regs.src_pitch; | ||||||
|             const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; |             const GPUVAddr dest_line = dest + line * regs.dst_pitch; | ||||||
|             Memory::CopyBlock(dest_line, source_line, regs.x_count); |             memory_manager.CopyBlock(dest_line, source_line, regs.x_count); | ||||||
|         } |         } | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() { | ||||||
| 
 | 
 | ||||||
|     const std::size_t copy_size = regs.x_count * regs.y_count; |     const std::size_t copy_size = regs.x_count * regs.y_count; | ||||||
| 
 | 
 | ||||||
|  |     auto source_ptr{memory_manager.GetPointer(source)}; | ||||||
|  |     auto dst_ptr{memory_manager.GetPointer(dest)}; | ||||||
|  | 
 | ||||||
|     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | ||||||
|         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
 |         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
 | ||||||
|         // copying.
 |         // copying.
 | ||||||
|         Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( |         rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); | ||||||
|             ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size); |  | ||||||
| 
 | 
 | ||||||
|         // We have to invalidate the destination region to evict any outdated surfaces from the
 |         // We have to invalidate the destination region to evict any outdated surfaces from the
 | ||||||
|         // cache. We do this before actually writing the new data because the destination address
 |         // cache. We do this before actually writing the new data because the destination address
 | ||||||
|         // might contain a dirty surface that will have to be written back to memory.
 |         // might contain a dirty surface that will have to be written back to memory.
 | ||||||
|         Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( |         rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); | ||||||
|             ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size); |  | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | ||||||
|  | @ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() { | ||||||
|                            copy_size * src_bytes_per_pixel); |                            copy_size * src_bytes_per_pixel); | ||||||
| 
 | 
 | ||||||
|         Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, |         Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | ||||||
|                                   regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, |                                   regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, | ||||||
|                                   *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, |                                   regs.src_params.BlockHeight(), regs.src_params.pos_x, | ||||||
|                                   regs.src_params.pos_y); |                                   regs.src_params.pos_y); | ||||||
|     } else { |     } else { | ||||||
|         ASSERT(regs.dst_params.size_z == 1); |         ASSERT(regs.dst_params.size_z == 1); | ||||||
|  | @ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() { | ||||||
| 
 | 
 | ||||||
|         // If the input is linear and the output is tiled, swizzle the input and copy it over.
 |         // If the input is linear and the output is tiled, swizzle the input and copy it over.
 | ||||||
|         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, |         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | ||||||
|                                 src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); |                                 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -274,7 +274,6 @@ void GPU::ProcessSemaphoreTriggerMethod() { | ||||||
|     const auto op = |     const auto op = | ||||||
|         static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); |         static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||||||
|     if (op == GpuSemaphoreOperation::WriteLong) { |     if (op == GpuSemaphoreOperation::WriteLong) { | ||||||
|         auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); |  | ||||||
|         struct Block { |         struct Block { | ||||||
|             u32 sequence; |             u32 sequence; | ||||||
|             u32 zeros = 0; |             u32 zeros = 0; | ||||||
|  | @ -286,11 +285,9 @@ void GPU::ProcessSemaphoreTriggerMethod() { | ||||||
|         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
 |         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
 | ||||||
|         // CoreTiming
 |         // CoreTiming
 | ||||||
|         block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); |         block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); | ||||||
|         Memory::WriteBlock(*address, &block, sizeof(block)); |         memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block)); | ||||||
|     } else { |     } else { | ||||||
|         const auto address = |         const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())}; | ||||||
|             memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); |  | ||||||
|         const u32 word = Memory::Read32(*address); |  | ||||||
|         if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || |         if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||||||
|             (op == GpuSemaphoreOperation::AcquireGequal && |             (op == GpuSemaphoreOperation::AcquireGequal && | ||||||
|              static_cast<s32>(word - regs.semaphore_sequence) > 0) || |              static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||||||
|  | @ -317,13 +314,11 @@ void GPU::ProcessSemaphoreTriggerMethod() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPU::ProcessSemaphoreRelease() { | void GPU::ProcessSemaphoreRelease() { | ||||||
|     const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); |     memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release); | ||||||
|     Memory::Write32(*address, regs.semaphore_release); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPU::ProcessSemaphoreAcquire() { | void GPU::ProcessSemaphoreAcquire() { | ||||||
|     const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); |     const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress()); | ||||||
|     const u32 word = Memory::Read32(*address); |  | ||||||
|     const auto value = regs.semaphore_acquire; |     const auto value = regs.semaphore_acquire; | ||||||
|     if (word != value) { |     if (word != value) { | ||||||
|         regs.acquire_active = true; |         regs.acquire_active = true; | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ | ||||||
| #include "common/alignment.h" | #include "common/alignment.h" | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | @ -162,15 +163,51 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { | ||||||
|     return base_addr + (gpu_addr & PAGE_MASK); |     return base_addr + (gpu_addr & PAGE_MASK); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { | u8 MemoryManager::Read8(GPUVAddr addr) { | ||||||
|     std::vector<GPUVAddr> results; |     return Memory::Read8(*GpuToCpuAddress(addr)); | ||||||
|     for (const auto& region : mapped_regions) { | } | ||||||
|         if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { | 
 | ||||||
|             const u64 offset{cpu_addr - region.cpu_addr}; | u16 MemoryManager::Read16(GPUVAddr addr) { | ||||||
|             results.push_back(region.gpu_addr + offset); |     return Memory::Read16(*GpuToCpuAddress(addr)); | ||||||
|         } | } | ||||||
|     } | 
 | ||||||
|     return results; | u32 MemoryManager::Read32(GPUVAddr addr) { | ||||||
|  |     return Memory::Read32(*GpuToCpuAddress(addr)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u64 MemoryManager::Read64(GPUVAddr addr) { | ||||||
|  |     return Memory::Read64(*GpuToCpuAddress(addr)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void MemoryManager::Write8(GPUVAddr addr, u8 data) { | ||||||
|  |     Memory::Write8(*GpuToCpuAddress(addr), data); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void MemoryManager::Write16(GPUVAddr addr, u16 data) { | ||||||
|  |     Memory::Write16(*GpuToCpuAddress(addr), data); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void MemoryManager::Write32(GPUVAddr addr, u32 data) { | ||||||
|  |     Memory::Write32(*GpuToCpuAddress(addr), data); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void MemoryManager::Write64(GPUVAddr addr, u64 data) { | ||||||
|  |     Memory::Write64(*GpuToCpuAddress(addr), data); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u8* MemoryManager::GetPointer(GPUVAddr addr) { | ||||||
|  |     return Memory::GetPointer(*GpuToCpuAddress(addr)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) { | ||||||
|  |     std::memcpy(dest_buffer, GetPointer(src_addr), size); | ||||||
|  | } | ||||||
|  | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { | ||||||
|  |     std::memcpy(GetPointer(dest_addr), src_buffer, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { | ||||||
|  |     std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { | VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { | ||||||
|  |  | ||||||
|  | @ -27,12 +27,27 @@ public: | ||||||
|     GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); |     GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); | ||||||
|     GPUVAddr GetRegionEnd(GPUVAddr region_start) const; |     GPUVAddr GetRegionEnd(GPUVAddr region_start) const; | ||||||
|     std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); |     std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); | ||||||
|     std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const; |  | ||||||
| 
 | 
 | ||||||
|     static constexpr u64 PAGE_BITS = 16; |     static constexpr u64 PAGE_BITS = 16; | ||||||
|     static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; |     static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; | ||||||
|     static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; |     static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; | ||||||
| 
 | 
 | ||||||
|  |     u8 Read8(GPUVAddr addr); | ||||||
|  |     u16 Read16(GPUVAddr addr); | ||||||
|  |     u32 Read32(GPUVAddr addr); | ||||||
|  |     u64 Read64(GPUVAddr addr); | ||||||
|  | 
 | ||||||
|  |     void Write8(GPUVAddr addr, u8 data); | ||||||
|  |     void Write16(GPUVAddr addr, u16 data); | ||||||
|  |     void Write32(GPUVAddr addr, u32 data); | ||||||
|  |     void Write64(GPUVAddr addr, u64 data); | ||||||
|  | 
 | ||||||
|  |     u8* GetPointer(GPUVAddr vaddr); | ||||||
|  | 
 | ||||||
|  |     void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size); | ||||||
|  |     void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | ||||||
|  |     void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size); | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     enum class PageStatus : u64 { |     enum class PageStatus : u64 { | ||||||
|         Unmapped = 0xFFFFFFFFFFFFFFFFULL, |         Unmapped = 0xFFFFFFFFFFFFFFFFULL, | ||||||
|  |  | ||||||
|  | @ -6,7 +6,6 @@ | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/morton.h" | #include "video_core/morton.h" | ||||||
| #include "video_core/surface.h" | #include "video_core/surface.h" | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
|  | @ -16,12 +15,12 @@ namespace VideoCore { | ||||||
| using Surface::GetBytesPerPixel; | using Surface::GetBytesPerPixel; | ||||||
| using Surface::PixelFormat; | using Surface::PixelFormat; | ||||||
| 
 | 
 | ||||||
| using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, VAddr); | using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); | ||||||
| using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; | using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; | ||||||
| 
 | 
 | ||||||
| template <bool morton_to_linear, PixelFormat format> | template <bool morton_to_linear, PixelFormat format> | ||||||
| static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, | static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, | ||||||
|                        u32 tile_width_spacing, u8* buffer, VAddr addr) { |                        u32 tile_width_spacing, u8* buffer, u8* addr) { | ||||||
|     constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); |     constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | ||||||
| 
 | 
 | ||||||
|     // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
 |     // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
 | ||||||
|  | @ -34,10 +33,10 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth | ||||||
|                                          stride, height, depth, block_height, block_depth, |                                          stride, height, depth, block_height, block_depth, | ||||||
|                                          tile_width_spacing); |                                          tile_width_spacing); | ||||||
|     } else { |     } else { | ||||||
|         Tegra::Texture::CopySwizzledData( |         Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, | ||||||
|             (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, |                                          (height + tile_size_y - 1) / tile_size_y, depth, | ||||||
|             depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, |                                          bytes_per_pixel, bytes_per_pixel, addr, buffer, false, | ||||||
|             block_height, block_depth, tile_width_spacing); |                                          block_height, block_depth, tile_width_spacing); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -282,7 +281,7 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | ||||||
| 
 | 
 | ||||||
| void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | ||||||
|                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | ||||||
|                    u8* buffer, VAddr addr) { |                    u8* buffer, u8* addr) { | ||||||
|     GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, |     GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, | ||||||
|                                      tile_width_spacing, buffer, addr); |                                      tile_width_spacing, buffer, addr); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -13,7 +13,7 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; | ||||||
| 
 | 
 | ||||||
| void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, | void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, | ||||||
|                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | ||||||
|                    u8* buffer, VAddr addr); |                    u8* buffer, u8* addr); | ||||||
| 
 | 
 | ||||||
| void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, | void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, | ||||||
|                          u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); |                          u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); | ||||||
|  |  | ||||||
|  | @ -24,14 +24,12 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | ||||||
| GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, | GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, | ||||||
|                                       std::size_t alignment, bool cache) { |                                       std::size_t alignment, bool cache) { | ||||||
|     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; |  | ||||||
|     ASSERT_MSG(cpu_addr, "Invalid GPU address"); |  | ||||||
| 
 | 
 | ||||||
|     // Cache management is a big overhead, so only cache entries with a given size.
 |     // Cache management is a big overhead, so only cache entries with a given size.
 | ||||||
|     // TODO: Figure out which size is the best for given games.
 |     // TODO: Figure out which size is the best for given games.
 | ||||||
|     cache &= size >= 2048; |     cache &= size >= 2048; | ||||||
| 
 | 
 | ||||||
|     const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; |     const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||||||
|     if (cache) { |     if (cache) { | ||||||
|         auto entry = TryGet(host_ptr); |         auto entry = TryGet(host_ptr); | ||||||
|         if (entry) { |         if (entry) { | ||||||
|  | @ -54,8 +52,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size | ||||||
|     buffer_offset += size; |     buffer_offset += size; | ||||||
| 
 | 
 | ||||||
|     if (cache) { |     if (cache) { | ||||||
|         auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, |         auto entry = std::make_shared<CachedBufferEntry>( | ||||||
|                                                          alignment, host_ptr); |             *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); | ||||||
|         Register(entry); |         Register(entry); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -7,7 +7,6 @@ | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_global_cache.h" | #include "video_core/renderer_opengl/gl_global_cache.h" | ||||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||||
|  | @ -39,7 +38,7 @@ void CachedGlobalRegion::Reload(u32 size_) { | ||||||
|     glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); |     glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { | ||||||
|     const auto search{reserve.find(addr)}; |     const auto search{reserve.find(addr)}; | ||||||
|     if (search == reserve.end()) { |     if (search == reserve.end()) { | ||||||
|         return {}; |         return {}; | ||||||
|  | @ -47,11 +46,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 | ||||||
|     return search->second; |     return search->second; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) { | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, | ||||||
|     GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; |                                                               u8* host_ptr) { | ||||||
|  |     GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; | ||||||
|     if (!region) { |     if (!region) { | ||||||
|         // No reserved surface available, create a new one and reserve it
 |         // No reserved surface available, create a new one and reserve it
 | ||||||
|         region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr); |         auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||||||
|  |         const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); | ||||||
|  |         region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); | ||||||
|         ReserveGlobalRegion(region); |         ReserveGlobalRegion(region); | ||||||
|     } |     } | ||||||
|     region->Reload(size); |     region->Reload(size); | ||||||
|  | @ -59,7 +61,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | ||||||
|     reserve.insert_or_assign(region->GetCpuAddr(), std::move(region)); |     reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | ||||||
|  | @ -70,23 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||||||
|     Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { |     Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||||||
| 
 | 
 | ||||||
|     auto& gpu{Core::System::GetInstance().GPU()}; |     auto& gpu{Core::System::GetInstance().GPU()}; | ||||||
|     const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; |     auto& memory_manager{gpu.MemoryManager()}; | ||||||
|     const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( |     const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; | ||||||
|         cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); |     const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + | ||||||
|     ASSERT(cbuf_addr); |                     global_region.GetCbufOffset()}; | ||||||
| 
 |     const auto actual_addr{memory_manager.Read64(addr)}; | ||||||
|     const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); |     const auto size{memory_manager.Read32(addr + 8)}; | ||||||
|     const auto size = Memory::Read32(*cbuf_addr + 8); |  | ||||||
|     const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu); |  | ||||||
|     ASSERT(actual_addr); |  | ||||||
| 
 | 
 | ||||||
|     // Look up global region in the cache based on address
 |     // Look up global region in the cache based on address
 | ||||||
|     const auto& host_ptr{Memory::GetPointer(*actual_addr)}; |     const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; | ||||||
|     GlobalRegion region{TryGet(host_ptr)}; |     GlobalRegion region{TryGet(host_ptr)}; | ||||||
| 
 | 
 | ||||||
|     if (!region) { |     if (!region) { | ||||||
|         // No global region found - create a new one
 |         // No global region found - create a new one
 | ||||||
|         region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr); |         region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); | ||||||
|         Register(region); |         Register(region); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -65,11 +65,11 @@ public: | ||||||
|                                  Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); |                                  Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; |     GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; | ||||||
|     GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr); |     GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr); | ||||||
|     void ReserveGlobalRegion(GlobalRegion region); |     void ReserveGlobalRegion(GlobalRegion region); | ||||||
| 
 | 
 | ||||||
|     std::unordered_map<VAddr, GlobalRegion> reserve; |     std::unordered_map<CacheAddr, GlobalRegion> reserve; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace OpenGL
 | } // namespace OpenGL
 | ||||||
|  |  | ||||||
|  | @ -46,10 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size | ||||||
|     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); |     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); | ||||||
| 
 | 
 | ||||||
|     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; |     const u8* source{memory_manager.GetPointer(gpu_addr)}; | ||||||
|     ASSERT_MSG(cpu_addr, "Invalid GPU address"); |  | ||||||
| 
 |  | ||||||
|     const u8* source{Memory::GetPointer(*cpu_addr)}; |  | ||||||
| 
 | 
 | ||||||
|     for (u32 primitive = 0; primitive < count / 4; ++primitive) { |     for (u32 primitive = 0; primitive < count / 4; ++primitive) { | ||||||
|         for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { |         for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { | ||||||
|  | @ -64,4 +61,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size | ||||||
|     return index_offset; |     return index_offset; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace OpenGL
 | } // namespace OpenGL
 | ||||||
|  |  | ||||||
|  | @ -57,11 +57,9 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) { | ||||||
| 
 | 
 | ||||||
| void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { | void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { | ||||||
|     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; |     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)}; |  | ||||||
| 
 | 
 | ||||||
|     addr = cpu_addr ? *cpu_addr : 0; |  | ||||||
|     gpu_addr = gpu_addr_; |     gpu_addr = gpu_addr_; | ||||||
|     host_ptr = Memory::GetPointer(addr); |     host_ptr = memory_manager.GetPointer(gpu_addr_); | ||||||
|     size_in_bytes = SizeInBytesRaw(); |     size_in_bytes = SizeInBytesRaw(); | ||||||
| 
 | 
 | ||||||
|     if (IsPixelFormatASTC(pixel_format)) { |     if (IsPixelFormatASTC(pixel_format)) { | ||||||
|  | @ -447,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, | ||||||
|             MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |             MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | ||||||
|                           params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |                           params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | ||||||
|                           params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, |                           params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, | ||||||
|                           gl_buffer.data() + offset_gl, params.addr + offset); |                           gl_buffer.data() + offset_gl, params.host_ptr + offset); | ||||||
|             offset += layer_size; |             offset += layer_size; | ||||||
|             offset_gl += gl_size; |             offset_gl += gl_size; | ||||||
|         } |         } | ||||||
|  | @ -456,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, | ||||||
|         MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |         MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | ||||||
|                       params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |                       params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | ||||||
|                       params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, |                       params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, | ||||||
|                       gl_buffer.data(), params.addr + offset); |                       gl_buffer.data(), params.host_ptr + offset); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -514,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac | ||||||
|                               "reinterpretation but the texture is tiled."); |                               "reinterpretation but the texture is tiled."); | ||||||
|         } |         } | ||||||
|         const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; |         const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; | ||||||
| 
 |         auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||||||
|         glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, |         glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, | ||||||
|                         Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); |                         memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); |     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||||||
|  | @ -604,7 +602,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | ||||||
| 
 | 
 | ||||||
|     ApplyTextureDefaults(texture.handle, params.max_mip_level); |     ApplyTextureDefaults(texture.handle, params.max_mip_level); | ||||||
| 
 | 
 | ||||||
|     OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); |     OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); | ||||||
| 
 | 
 | ||||||
|     // Clamp size to mapped GPU memory region
 |     // Clamp size to mapped GPU memory region
 | ||||||
|     // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
 |     // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
 | ||||||
|  | @ -617,6 +615,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | ||||||
|         LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); |         LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); | ||||||
|         cached_size_in_bytes = max_size; |         cached_size_in_bytes = max_size; | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | ||||||
|  | @ -925,7 +925,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { | ||||||
|     if (params.addr == 0 || params.height * params.width == 0) { |     if (params.gpu_addr == 0 || params.height * params.width == 0) { | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -979,14 +979,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | ||||||
|                                                    const Surface& dst_surface) { |                                                    const Surface& dst_surface) { | ||||||
|     const auto& init_params{src_surface->GetSurfaceParams()}; |     const auto& init_params{src_surface->GetSurfaceParams()}; | ||||||
|     const auto& dst_params{dst_surface->GetSurfaceParams()}; |     const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||||||
|     VAddr address = init_params.addr; |     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||||||
|     const std::size_t layer_size = dst_params.LayerMemorySize(); |     Tegra::GPUVAddr address{init_params.gpu_addr}; | ||||||
|  |     const std::size_t layer_size{dst_params.LayerMemorySize()}; | ||||||
|     for (u32 layer = 0; layer < dst_params.depth; layer++) { |     for (u32 layer = 0; layer < dst_params.depth; layer++) { | ||||||
|         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { |         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { | ||||||
|             const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); |             const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)}; | ||||||
|             const Surface& copy = TryGet(Memory::GetPointer(sub_address)); |             const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))}; | ||||||
|             if (!copy) |             if (!copy) { | ||||||
|                 continue; |                 continue; | ||||||
|  |             } | ||||||
|             const auto& src_params{copy->GetSurfaceParams()}; |             const auto& src_params{copy->GetSurfaceParams()}; | ||||||
|             const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; |             const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; | ||||||
|             const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; |             const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; | ||||||
|  | @ -1242,9 +1244,10 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar | ||||||
|     return {}; |     return {}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { | static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params, | ||||||
|     const std::size_t size = params.LayerMemorySize(); |                                            u32 mipmap) { | ||||||
|     VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); |     const std::size_t size{params.LayerMemorySize()}; | ||||||
|  |     Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)}; | ||||||
|     for (u32 i = 0; i < params.depth; i++) { |     for (u32 i = 0; i < params.depth; i++) { | ||||||
|         if (start == addr) { |         if (start == addr) { | ||||||
|             return {i}; |             return {i}; | ||||||
|  | @ -1266,7 +1269,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa | ||||||
|             src_params.height == dst_params.MipHeight(*level) && |             src_params.height == dst_params.MipHeight(*level) && | ||||||
|             src_params.block_height >= dst_params.MipBlockHeight(*level)) { |             src_params.block_height >= dst_params.MipBlockHeight(*level)) { | ||||||
|             const std::optional<u32> slot = |             const std::optional<u32> slot = | ||||||
|                 TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level); |                 TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level); | ||||||
|             if (slot.has_value()) { |             if (slot.has_value()) { | ||||||
|                 glCopyImageSubData(render_surface->Texture().handle, |                 glCopyImageSubData(render_surface->Texture().handle, | ||||||
|                                    SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, |                                    SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, | ||||||
|  |  | ||||||
|  | @ -296,7 +296,6 @@ struct SurfaceParams { | ||||||
|     bool is_array; |     bool is_array; | ||||||
|     bool srgb_conversion; |     bool srgb_conversion; | ||||||
|     // Parameters used for caching
 |     // Parameters used for caching
 | ||||||
|     VAddr addr; |  | ||||||
|     u8* host_ptr; |     u8* host_ptr; | ||||||
|     Tegra::GPUVAddr gpu_addr; |     Tegra::GPUVAddr gpu_addr; | ||||||
|     std::size_t size_in_bytes; |     std::size_t size_in_bytes; | ||||||
|  | @ -349,7 +348,7 @@ public: | ||||||
|     explicit CachedSurface(const SurfaceParams& params); |     explicit CachedSurface(const SurfaceParams& params); | ||||||
| 
 | 
 | ||||||
|     VAddr GetCpuAddr() const override { |     VAddr GetCpuAddr() const override { | ||||||
|         return params.addr; |         return cpu_addr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::size_t GetSizeInBytes() const override { |     std::size_t GetSizeInBytes() const override { | ||||||
|  | @ -433,6 +432,7 @@ private: | ||||||
|     std::size_t memory_size; |     std::size_t memory_size; | ||||||
|     bool reinterpreted = false; |     bool reinterpreted = false; | ||||||
|     bool must_reload = false; |     bool must_reload = false; | ||||||
|  |     VAddr cpu_addr{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | ||||||
|  |  | ||||||
|  | @ -32,13 +32,10 @@ struct UnspecializedShader { | ||||||
| namespace { | namespace { | ||||||
| 
 | 
 | ||||||
| /// Gets the address for the specified shader stage program
 | /// Gets the address for the specified shader stage program
 | ||||||
| VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { | ||||||
|     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |     const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; | ||||||
|     const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; |     const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; | ||||||
|     const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + |     return gpu.regs.code_address.CodeAddress() + shader_config.offset; | ||||||
|                                                             shader_config.offset); |  | ||||||
|     ASSERT_MSG(address, "Invalid GPU address"); |  | ||||||
|     return *address; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /// Gets the shader program code from memory for the specified address
 | /// Gets the shader program code from memory for the specified address
 | ||||||
|  | @ -214,11 +211,11 @@ std::set<GLenum> GetSupportedFormats() { | ||||||
| 
 | 
 | ||||||
| } // namespace
 | } // namespace
 | ||||||
| 
 | 
 | ||||||
| CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, | ||||||
|                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                            const PrecompiledPrograms& precompiled_programs, |                            const PrecompiledPrograms& precompiled_programs, | ||||||
|                            ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) |                            ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) | ||||||
|     : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier}, |     : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, | ||||||
|       program_type{program_type}, disk_cache{disk_cache}, |       program_type{program_type}, disk_cache{disk_cache}, | ||||||
|       precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { |       precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { | ||||||
| 
 | 
 | ||||||
|  | @ -244,11 +241,11 @@ CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | ||||||
|     disk_cache.SaveRaw(raw); |     disk_cache.SaveRaw(raw); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, | ||||||
|                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                            const PrecompiledPrograms& precompiled_programs, |                            const PrecompiledPrograms& precompiled_programs, | ||||||
|                            GLShader::ProgramResult result, u8* host_ptr) |                            GLShader::ProgramResult result, u8* host_ptr) | ||||||
|     : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type}, |     : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type}, | ||||||
|       disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ |       disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ | ||||||
|                                                                               host_ptr} { |                                                                               host_ptr} { | ||||||
| 
 | 
 | ||||||
|  | @ -273,7 +270,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive | ||||||
|                 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |                 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             LabelGLObject(GL_PROGRAM, program->handle, guest_addr); |             LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         handle = program->handle; |         handle = program->handle; | ||||||
|  | @ -325,7 +322,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind | ||||||
|         disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |         disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name); |     LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); | ||||||
| 
 | 
 | ||||||
|     return target_program->handle; |     return target_program->handle; | ||||||
| }; | }; | ||||||
|  | @ -488,31 +485,31 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||||||
|         return last_shaders[static_cast<u32>(program)]; |         return last_shaders[static_cast<u32>(program)]; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const VAddr program_addr{GetShaderAddress(program)}; |     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||||||
|  |     const Tegra::GPUVAddr program_addr{GetShaderAddress(program)}; | ||||||
| 
 | 
 | ||||||
|     // Look up shader in the cache based on address
 |     // Look up shader in the cache based on address
 | ||||||
|     const auto& host_ptr{Memory::GetPointer(program_addr)}; |     const auto& host_ptr{memory_manager.GetPointer(program_addr)}; | ||||||
|     Shader shader{TryGet(host_ptr)}; |     Shader shader{TryGet(host_ptr)}; | ||||||
| 
 | 
 | ||||||
|     if (!shader) { |     if (!shader) { | ||||||
|         // No shader found - create a new one
 |         // No shader found - create a new one
 | ||||||
|         const auto& host_ptr{Memory::GetPointer(program_addr)}; |  | ||||||
|         ProgramCode program_code{GetShaderCode(host_ptr)}; |         ProgramCode program_code{GetShaderCode(host_ptr)}; | ||||||
|         ProgramCode program_code_b; |         ProgramCode program_code_b; | ||||||
|         if (program == Maxwell::ShaderProgram::VertexA) { |         if (program == Maxwell::ShaderProgram::VertexA) { | ||||||
|             program_code_b = GetShaderCode( |             program_code_b = GetShaderCode( | ||||||
|                 Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); |                 memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); | ||||||
|         } |         } | ||||||
|         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); |         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | ||||||
| 
 |         const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | ||||||
|         const auto found = precompiled_shaders.find(unique_identifier); |         const auto found = precompiled_shaders.find(unique_identifier); | ||||||
|         if (found != precompiled_shaders.end()) { |         if (found != precompiled_shaders.end()) { | ||||||
|             shader = |             shader = | ||||||
|                 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, |                 std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, | ||||||
|                                                precompiled_programs, found->second, host_ptr); |                                                precompiled_programs, found->second, host_ptr); | ||||||
|         } else { |         } else { | ||||||
|             shader = std::make_shared<CachedShader>( |             shader = std::make_shared<CachedShader>( | ||||||
|                 program_addr, unique_identifier, program, disk_cache, precompiled_programs, |                 cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, | ||||||
|                 std::move(program_code), std::move(program_code_b), host_ptr); |                 std::move(program_code), std::move(program_code_b), host_ptr); | ||||||
|         } |         } | ||||||
|         Register(shader); |         Register(shader); | ||||||
|  |  | ||||||
|  | @ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | ||||||
| 
 | 
 | ||||||
| class CachedShader final : public RasterizerCacheObject { | class CachedShader final : public RasterizerCacheObject { | ||||||
| public: | public: | ||||||
|     explicit CachedShader(VAddr guest_addr, u64 unique_identifier, |     explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, | ||||||
|                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                           const PrecompiledPrograms& precompiled_programs, |                           const PrecompiledPrograms& precompiled_programs, | ||||||
|                           ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); |                           ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); | ||||||
| 
 | 
 | ||||||
|     explicit CachedShader(VAddr guest_addr, u64 unique_identifier, |     explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, | ||||||
|                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                           const PrecompiledPrograms& precompiled_programs, |                           const PrecompiledPrograms& precompiled_programs, | ||||||
|                           GLShader::ProgramResult result, u8* host_ptr); |                           GLShader::ProgramResult result, u8* host_ptr); | ||||||
| 
 | 
 | ||||||
|     VAddr GetCpuAddr() const override { |     VAddr GetCpuAddr() const override { | ||||||
|         return guest_addr; |         return cpu_addr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::size_t GetSizeInBytes() const override { |     std::size_t GetSizeInBytes() const override { | ||||||
|  | @ -92,7 +92,7 @@ private: | ||||||
|     ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; |     ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; | ||||||
| 
 | 
 | ||||||
|     u8* host_ptr{}; |     u8* host_ptr{}; | ||||||
|     VAddr guest_addr{}; |     VAddr cpu_addr{}; | ||||||
|     u64 unique_identifier{}; |     u64 unique_identifier{}; | ||||||
|     Maxwell::ShaderProgram program_type{}; |     Maxwell::ShaderProgram program_type{}; | ||||||
|     ShaderDiskCacheOpenGL& disk_cache; |     ShaderDiskCacheOpenGL& disk_cache; | ||||||
|  |  | ||||||
|  | @ -6,7 +6,6 @@ | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include "common/alignment.h" | #include "common/alignment.h" | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
|  | @ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, | void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | ||||||
|                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, |                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, | ||||||
|                       u32 block_depth, u32 width_spacing) { |                       u32 block_depth, u32 width_spacing) { | ||||||
|     CopySwizzledData((width + tile_size_x - 1) / tile_size_x, |     CopySwizzledData((width + tile_size_x - 1) / tile_size_x, | ||||||
|                      (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, |                      (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, | ||||||
|                      bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, |                      bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, | ||||||
|                      block_height, block_depth, width_spacing); |                      width_spacing); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | ||||||
|                                  u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |                                  u32 width, u32 height, u32 depth, u32 block_height, | ||||||
|                                  u32 block_height, u32 block_depth, u32 width_spacing) { |                                  u32 block_depth, u32 width_spacing) { | ||||||
|     std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); |     std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); | ||||||
|     UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, |     UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, | ||||||
|                      width, height, depth, block_height, block_depth, width_spacing); |                      width, height, depth, block_height, block_depth, width_spacing); | ||||||
|  | @ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | ||||||
|                     u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |                     u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { | ||||||
|                     u32 block_height) { |  | ||||||
|     const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |     const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | ||||||
|                                   gob_size_x}; |                                   gob_size_x}; | ||||||
|     for (u32 line = 0; line < subrect_height; ++line) { |     for (u32 line = 0; line < subrect_height; ++line) { | ||||||
|  | @ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | ||||||
|             const u32 gob_address = |             const u32 gob_address = | ||||||
|                 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; |                 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | ||||||
|             const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; |             const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | ||||||
|             const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |             u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | ||||||
|             const VAddr dest_addr = swizzled_data + swizzled_offset; |             u8* dest_addr = swizzled_data + swizzled_offset; | ||||||
| 
 | 
 | ||||||
|             Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel); |             std::memcpy(dest_addr, source_line, bytes_per_pixel); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | ||||||
|                       u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |                       u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | ||||||
|                       u32 block_height, u32 offset_x, u32 offset_y) { |                       u32 offset_x, u32 offset_y) { | ||||||
|     for (u32 line = 0; line < subrect_height; ++line) { |     for (u32 line = 0; line < subrect_height; ++line) { | ||||||
|         const u32 y2 = line + offset_y; |         const u32 y2 = line + offset_y; | ||||||
|         const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + |         const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | ||||||
|  | @ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | ||||||
|             const u32 x2 = (x + offset_x) * bytes_per_pixel; |             const u32 x2 = (x + offset_x) * bytes_per_pixel; | ||||||
|             const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; |             const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; | ||||||
|             const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; |             const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; | ||||||
|             const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; |             u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; | ||||||
|             const VAddr source_addr = swizzled_data + swizzled_offset; |             u8* source_addr = swizzled_data + swizzled_offset; | ||||||
| 
 | 
 | ||||||
|             Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel); |             std::memcpy(dest_line, source_addr, bytes_per_pixel); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -17,14 +17,14 @@ inline std::size_t GetGOBSize() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /// Unswizzles a swizzled texture without changing its format.
 | /// Unswizzles a swizzled texture without changing its format.
 | ||||||
| void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, | void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | ||||||
|                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | ||||||
|                       u32 block_height = TICEntry::DefaultBlockHeight, |                       u32 block_height = TICEntry::DefaultBlockHeight, | ||||||
|                       u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); |                       u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | ||||||
| 
 | 
 | ||||||
| /// Unswizzles a swizzled texture without changing its format.
 | /// Unswizzles a swizzled texture without changing its format.
 | ||||||
| std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | ||||||
|                                  u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |                                  u32 width, u32 height, u32 depth, | ||||||
|                                  u32 block_height = TICEntry::DefaultBlockHeight, |                                  u32 block_height = TICEntry::DefaultBlockHeight, | ||||||
|                                  u32 block_depth = TICEntry::DefaultBlockHeight, |                                  u32 block_depth = TICEntry::DefaultBlockHeight, | ||||||
|                                  u32 width_spacing = 0); |                                  u32 width_spacing = 0); | ||||||
|  | @ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | ||||||
| 
 | 
 | ||||||
| /// Copies an untiled subrectangle into a tiled surface.
 | /// Copies an untiled subrectangle into a tiled surface.
 | ||||||
| void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | ||||||
|                     u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |                     u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); | ||||||
|                     u32 block_height); |  | ||||||
| 
 | 
 | ||||||
| /// Copies a tiled subrectangle into a linear surface.
 | /// Copies a tiled subrectangle into a linear surface.
 | ||||||
| void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | ||||||
|                       u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |                       u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | ||||||
|                       u32 block_height, u32 offset_x, u32 offset_y); |                       u32 offset_x, u32 offset_y); | ||||||
| 
 | 
 | ||||||
| } // namespace Tegra::Texture
 | } // namespace Tegra::Texture
 | ||||||
|  |  | ||||||
|  | @ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() { | ||||||
|     // TODO: Implement a good way to visualize alpha components!
 |     // TODO: Implement a good way to visualize alpha components!
 | ||||||
| 
 | 
 | ||||||
|     QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); |     QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); | ||||||
|     std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address); |  | ||||||
| 
 | 
 | ||||||
|     // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
 |     // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
 | ||||||
|     // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
 |     // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
 | ||||||
|     auto unswizzled_data = Tegra::Texture::UnswizzleTexture( |     auto unswizzled_data = Tegra::Texture::UnswizzleTexture( | ||||||
|         *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width, |         gpu.MemoryManager().GetPointer(surface_address), 1, 1, | ||||||
|         surface_height, 1U); |         Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U); | ||||||
| 
 | 
 | ||||||
|     auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, |     auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, | ||||||
|                                                       surface_width, surface_height); |                                                       surface_width, surface_height); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei