forked from eden-emu/eden
		
	Use spans over guest memory where possible instead of copying data.
This commit is contained in:
		
							parent
							
								
									95ceae40e6
								
							
						
					
					
						commit
						6f7cb69c94
					
				
					 22 changed files with 462 additions and 233 deletions
				
			
		|  | @ -92,9 +92,9 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) { | ||||||
|         if (type == Sink::StreamType::In) { |         if (type == Sink::StreamType::In) { | ||||||
|             stream->AppendBuffer(new_buffer, tmp_samples); |             stream->AppendBuffer(new_buffer, tmp_samples); | ||||||
|         } else { |         } else { | ||||||
|             system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(), |             Core::Memory::CpuGuestMemory<s16, Core::Memory::GuestMemoryFlags::UnsafeRead> samples( | ||||||
|                                                        buffer.size); |                 system.ApplicationMemory(), buffer.samples, buffer.size / sizeof(s16)); | ||||||
|             stream->AppendBuffer(new_buffer, tmp_samples); |             stream->AppendBuffer(new_buffer, samples); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -28,7 +28,6 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4}; | ||||||
| template <typename T> | template <typename T> | ||||||
| static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | ||||||
|                      const DecodeArg& req) { |                      const DecodeArg& req) { | ||||||
|     std::array<T, TempBufferSize> tmp_samples{}; |  | ||||||
|     constexpr s32 min{std::numeric_limits<s16>::min()}; |     constexpr s32 min{std::numeric_limits<s16>::min()}; | ||||||
|     constexpr s32 max{std::numeric_limits<s16>::max()}; |     constexpr s32 max{std::numeric_limits<s16>::max()}; | ||||||
| 
 | 
 | ||||||
|  | @ -49,19 +48,18 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | ||||||
|         const VAddr source{req.buffer + |         const VAddr source{req.buffer + | ||||||
|                            (((req.start_offset + req.offset) * channel_count) * sizeof(T))}; |                            (((req.start_offset + req.offset) * channel_count) * sizeof(T))}; | ||||||
|         const u64 size{channel_count * samples_to_decode}; |         const u64 size{channel_count * samples_to_decode}; | ||||||
|         const u64 size_bytes{size * sizeof(T)}; |  | ||||||
| 
 |  | ||||||
|         memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes); |  | ||||||
| 
 | 
 | ||||||
|  |         Core::Memory::CpuGuestMemory<T, Core::Memory::GuestMemoryFlags::UnsafeRead> samples( | ||||||
|  |             memory, source, size); | ||||||
|         if constexpr (std::is_floating_point_v<T>) { |         if constexpr (std::is_floating_point_v<T>) { | ||||||
|             for (u32 i = 0; i < samples_to_decode; i++) { |             for (u32 i = 0; i < samples_to_decode; i++) { | ||||||
|                 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] * |                 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * | ||||||
|                                              std::numeric_limits<s16>::max())}; |                                              std::numeric_limits<s16>::max())}; | ||||||
|                 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); |                 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             for (u32 i = 0; i < samples_to_decode; i++) { |             for (u32 i = 0; i < samples_to_decode; i++) { | ||||||
|                 out_buffer[i] = tmp_samples[i * channel_count + req.target_channel]; |                 out_buffer[i] = samples[i * channel_count + req.target_channel]; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } break; |     } break; | ||||||
|  | @ -74,16 +72,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; |         const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; | ||||||
|         memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T)); |         Core::Memory::CpuGuestMemory<T, Core::Memory::GuestMemoryFlags::UnsafeRead> samples( | ||||||
|  |             memory, source, samples_to_decode); | ||||||
| 
 | 
 | ||||||
|         if constexpr (std::is_floating_point_v<T>) { |         if constexpr (std::is_floating_point_v<T>) { | ||||||
|             for (u32 i = 0; i < samples_to_decode; i++) { |             for (u32 i = 0; i < samples_to_decode; i++) { | ||||||
|                 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] * |                 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * | ||||||
|                                              std::numeric_limits<s16>::max())}; |                                              std::numeric_limits<s16>::max())}; | ||||||
|                 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); |                 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16)); |             std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  | @ -101,7 +100,6 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | ||||||
|  */ |  */ | ||||||
| static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | ||||||
|                        const DecodeArg& req) { |                        const DecodeArg& req) { | ||||||
|     std::array<u8, TempBufferSize> wavebuffer{}; |  | ||||||
|     constexpr u32 SamplesPerFrame{14}; |     constexpr u32 SamplesPerFrame{14}; | ||||||
|     constexpr u32 NibblesPerFrame{16}; |     constexpr u32 NibblesPerFrame{16}; | ||||||
| 
 | 
 | ||||||
|  | @ -139,7 +137,8 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; |     const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; | ||||||
|     memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size); |     Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> wavebuffer( | ||||||
|  |         memory, req.buffer + position_in_frame / 2, size); | ||||||
| 
 | 
 | ||||||
|     auto context{req.adpcm_context}; |     auto context{req.adpcm_context}; | ||||||
|     auto header{context->header}; |     auto header{context->header}; | ||||||
|  |  | ||||||
|  | @ -21,23 +21,13 @@ static void ResetAuxBufferDsp(Core::Memory::Memory& memory, const CpuAddr aux_in | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     AuxInfo::AuxInfoDsp info{}; |     AuxInfo::AuxInfoDsp info{}; | ||||||
|     auto info_ptr{&info}; |     memory.ReadBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp)); | ||||||
|     bool host_safe{(aux_info & Core::Memory::YUZU_PAGEMASK) <= |  | ||||||
|                    (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp))}; |  | ||||||
| 
 | 
 | ||||||
|     if (host_safe) [[likely]] { |     info.read_offset = 0; | ||||||
|         info_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(aux_info); |     info.write_offset = 0; | ||||||
|     } else { |     info.total_sample_count = 0; | ||||||
|         memory.ReadBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     info_ptr->read_offset = 0; |     memory.WriteBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp)); | ||||||
|     info_ptr->write_offset = 0; |  | ||||||
|     info_ptr->total_sample_count = 0; |  | ||||||
| 
 |  | ||||||
|     if (!host_safe) [[unlikely]] { |  | ||||||
|         memory.WriteBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -86,17 +76,9 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     AuxInfo::AuxInfoDsp send_info{}; |     AuxInfo::AuxInfoDsp send_info{}; | ||||||
|     auto send_ptr = &send_info; |     memory.ReadBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp)); | ||||||
|     bool host_safe = (send_info_ & Core::Memory::YUZU_PAGEMASK) <= |  | ||||||
|                      (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
| 
 | 
 | ||||||
|     if (host_safe) [[likely]] { |     u32 target_write_offset{send_info.write_offset + write_offset}; | ||||||
|         send_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(send_info_); |  | ||||||
|     } else { |  | ||||||
|         memory.ReadBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 target_write_offset{send_ptr->write_offset + write_offset}; |  | ||||||
|     if (target_write_offset > count_max) { |     if (target_write_offset > count_max) { | ||||||
|         return 0; |         return 0; | ||||||
|     } |     } | ||||||
|  | @ -105,15 +87,9 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, | ||||||
|     u32 read_pos{0}; |     u32 read_pos{0}; | ||||||
|     while (write_count > 0) { |     while (write_count > 0) { | ||||||
|         u32 to_write{std::min(count_max - target_write_offset, write_count)}; |         u32 to_write{std::min(count_max - target_write_offset, write_count)}; | ||||||
|         const auto write_addr = send_buffer + target_write_offset * sizeof(s32); |         if (to_write > 0) { | ||||||
|         bool write_safe{(write_addr & Core::Memory::YUZU_PAGEMASK) <= |             const auto write_addr = send_buffer + target_write_offset * sizeof(s32); | ||||||
|                         (Core::Memory::YUZU_PAGESIZE - (write_addr + to_write * sizeof(s32)))}; |             memory.WriteBlockUnsafe(write_addr, &input[read_pos], to_write * sizeof(s32)); | ||||||
|         if (write_safe) [[likely]] { |  | ||||||
|             auto ptr = memory.GetPointer(write_addr); |  | ||||||
|             std::memcpy(ptr, &input[read_pos], to_write * sizeof(s32)); |  | ||||||
|         } else { |  | ||||||
|             memory.WriteBlockUnsafe(send_buffer + target_write_offset * sizeof(s32), |  | ||||||
|                                     &input[read_pos], to_write * sizeof(s32)); |  | ||||||
|         } |         } | ||||||
|         target_write_offset = (target_write_offset + to_write) % count_max; |         target_write_offset = (target_write_offset + to_write) % count_max; | ||||||
|         write_count -= to_write; |         write_count -= to_write; | ||||||
|  | @ -121,13 +97,10 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (update_count) { |     if (update_count) { | ||||||
|         send_ptr->write_offset = (send_ptr->write_offset + update_count) % count_max; |         send_info.write_offset = (send_info.write_offset + update_count) % count_max; | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (!host_safe) [[unlikely]] { |  | ||||||
|         memory.WriteBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     memory.WriteBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp)); | ||||||
|     return write_count_; |     return write_count_; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -174,17 +147,9 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     AuxInfo::AuxInfoDsp return_info{}; |     AuxInfo::AuxInfoDsp return_info{}; | ||||||
|     auto return_ptr = &return_info; |     memory.ReadBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp)); | ||||||
|     bool host_safe = (return_info_ & Core::Memory::YUZU_PAGEMASK) <= |  | ||||||
|                      (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
| 
 | 
 | ||||||
|     if (host_safe) [[likely]] { |     u32 target_read_offset{return_info.read_offset + read_offset}; | ||||||
|         return_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(return_info_); |  | ||||||
|     } else { |  | ||||||
|         memory.ReadBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 target_read_offset{return_ptr->read_offset + read_offset}; |  | ||||||
|     if (target_read_offset > count_max) { |     if (target_read_offset > count_max) { | ||||||
|         return 0; |         return 0; | ||||||
|     } |     } | ||||||
|  | @ -193,15 +158,9 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, | ||||||
|     u32 write_pos{0}; |     u32 write_pos{0}; | ||||||
|     while (read_count > 0) { |     while (read_count > 0) { | ||||||
|         u32 to_read{std::min(count_max - target_read_offset, read_count)}; |         u32 to_read{std::min(count_max - target_read_offset, read_count)}; | ||||||
|         const auto read_addr = return_buffer + target_read_offset * sizeof(s32); |         if (to_read > 0) { | ||||||
|         bool read_safe{(read_addr & Core::Memory::YUZU_PAGEMASK) <= |             const auto read_addr = return_buffer + target_read_offset * sizeof(s32); | ||||||
|                        (Core::Memory::YUZU_PAGESIZE - (read_addr + to_read * sizeof(s32)))}; |             memory.ReadBlockUnsafe(read_addr, &output[write_pos], to_read * sizeof(s32)); | ||||||
|         if (read_safe) [[likely]] { |  | ||||||
|             auto ptr = memory.GetPointer(read_addr); |  | ||||||
|             std::memcpy(&output[write_pos], ptr, to_read * sizeof(s32)); |  | ||||||
|         } else { |  | ||||||
|             memory.ReadBlockUnsafe(return_buffer + target_read_offset * sizeof(s32), |  | ||||||
|                                    &output[write_pos], to_read * sizeof(s32)); |  | ||||||
|         } |         } | ||||||
|         target_read_offset = (target_read_offset + to_read) % count_max; |         target_read_offset = (target_read_offset + to_read) % count_max; | ||||||
|         read_count -= to_read; |         read_count -= to_read; | ||||||
|  | @ -209,13 +168,10 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (update_count) { |     if (update_count) { | ||||||
|         return_ptr->read_offset = (return_ptr->read_offset + update_count) % count_max; |         return_info.read_offset = (return_info.read_offset + update_count) % count_max; | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (!host_safe) [[unlikely]] { |  | ||||||
|         memory.WriteBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp)); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     memory.WriteBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp)); | ||||||
|     return read_count_; |     return read_count_; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -66,6 +66,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page | ||||||
|                                              << (address_space_width_in_bits - page_size_in_bits)}; |                                              << (address_space_width_in_bits - page_size_in_bits)}; | ||||||
|     pointers.resize(num_page_table_entries); |     pointers.resize(num_page_table_entries); | ||||||
|     backing_addr.resize(num_page_table_entries); |     backing_addr.resize(num_page_table_entries); | ||||||
|  |     blocks.resize(num_page_table_entries); | ||||||
|     current_address_space_width_in_bits = address_space_width_in_bits; |     current_address_space_width_in_bits = address_space_width_in_bits; | ||||||
|     page_size = 1ULL << page_size_in_bits; |     page_size = 1ULL << page_size_in_bits; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -122,6 +122,7 @@ struct PageTable { | ||||||
|      * corresponding attribute element is of type `Memory`. |      * corresponding attribute element is of type `Memory`. | ||||||
|      */ |      */ | ||||||
|     VirtualBuffer<PageInfo> pointers; |     VirtualBuffer<PageInfo> pointers; | ||||||
|  |     VirtualBuffer<u64> blocks; | ||||||
| 
 | 
 | ||||||
|     VirtualBuffer<u64> backing_addr; |     VirtualBuffer<u64> backing_addr; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -70,7 +70,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) { | ||||||
|         -> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; |         -> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; | ||||||
|     ev_lost = CreateEvent("_lost_event", empty_timed_callback); |     ev_lost = CreateEvent("_lost_event", empty_timed_callback); | ||||||
|     if (is_multicore) { |     if (is_multicore) { | ||||||
|         timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); |         timer_thread = std::make_unique<std::jthread>(ThreadEntry, std::ref(*this)); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -255,7 +255,6 @@ void CoreTiming::ThreadLoop() { | ||||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||||
|                     while (!paused && !event.IsSet() && wait_time > 0) { |                     while (!paused && !event.IsSet() && wait_time > 0) { | ||||||
|                         wait_time = *next_time - GetGlobalTimeNs().count(); |                         wait_time = *next_time - GetGlobalTimeNs().count(); | ||||||
| 
 |  | ||||||
|                         if (wait_time >= timer_resolution_ns) { |                         if (wait_time >= timer_resolution_ns) { | ||||||
|                             Common::Windows::SleepForOneTick(); |                             Common::Windows::SleepForOneTick(); | ||||||
|                         } else { |                         } else { | ||||||
|  |  | ||||||
|  | @ -163,7 +163,7 @@ private: | ||||||
|     Common::Event pause_event{}; |     Common::Event pause_event{}; | ||||||
|     std::mutex basic_lock; |     std::mutex basic_lock; | ||||||
|     std::mutex advance_lock; |     std::mutex advance_lock; | ||||||
|     std::unique_ptr<std::thread> timer_thread; |     std::unique_ptr<std::jthread> timer_thread; | ||||||
|     std::atomic<bool> paused{}; |     std::atomic<bool> paused{}; | ||||||
|     std::atomic<bool> paused_set{}; |     std::atomic<bool> paused_set{}; | ||||||
|     std::atomic<bool> wait_set{}; |     std::atomic<bool> wait_set{}; | ||||||
|  |  | ||||||
|  | @ -329,8 +329,22 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { | std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { | ||||||
|     static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_a; |     static thread_local std::array read_buffer_a{ | ||||||
|     static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_x; |         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||||
|  |         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||||
|  |     }; | ||||||
|  |     static thread_local std::array read_buffer_data_a{ | ||||||
|  |         Common::ScratchBuffer<u8>(), | ||||||
|  |         Common::ScratchBuffer<u8>(), | ||||||
|  |     }; | ||||||
|  |     static thread_local std::array read_buffer_x{ | ||||||
|  |         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||||
|  |         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||||
|  |     }; | ||||||
|  |     static thread_local std::array read_buffer_data_x{ | ||||||
|  |         Common::ScratchBuffer<u8>(), | ||||||
|  |         Common::ScratchBuffer<u8>(), | ||||||
|  |     }; | ||||||
| 
 | 
 | ||||||
|     const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && |     const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && | ||||||
|                            BufferDescriptorA()[buffer_index].Size()}; |                            BufferDescriptorA()[buffer_index].Size()}; | ||||||
|  | @ -339,19 +353,17 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons | ||||||
|             BufferDescriptorA().size() > buffer_index, { return {}; }, |             BufferDescriptorA().size() > buffer_index, { return {}; }, | ||||||
|             "BufferDescriptorA invalid buffer_index {}", buffer_index); |             "BufferDescriptorA invalid buffer_index {}", buffer_index); | ||||||
|         auto& read_buffer = read_buffer_a[buffer_index]; |         auto& read_buffer = read_buffer_a[buffer_index]; | ||||||
|         read_buffer.resize_destructive(BufferDescriptorA()[buffer_index].Size()); |         return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), | ||||||
|         memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), read_buffer.data(), |                                 BufferDescriptorA()[buffer_index].Size(), | ||||||
|                          read_buffer.size()); |                                 &read_buffer_data_a[buffer_index]); | ||||||
|         return read_buffer; |  | ||||||
|     } else { |     } else { | ||||||
|         ASSERT_OR_EXECUTE_MSG( |         ASSERT_OR_EXECUTE_MSG( | ||||||
|             BufferDescriptorX().size() > buffer_index, { return {}; }, |             BufferDescriptorX().size() > buffer_index, { return {}; }, | ||||||
|             "BufferDescriptorX invalid buffer_index {}", buffer_index); |             "BufferDescriptorX invalid buffer_index {}", buffer_index); | ||||||
|         auto& read_buffer = read_buffer_x[buffer_index]; |         auto& read_buffer = read_buffer_x[buffer_index]; | ||||||
|         read_buffer.resize_destructive(BufferDescriptorX()[buffer_index].Size()); |         return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), | ||||||
|         memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), read_buffer.data(), |                                 BufferDescriptorX()[buffer_index].Size(), | ||||||
|                          read_buffer.size()); |                                 &read_buffer_data_x[buffer_index]); | ||||||
|         return read_buffer; |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -266,6 +266,22 @@ struct Memory::Impl { | ||||||
|         ReadBlockImpl<true>(*system.ApplicationProcess(), src_addr, dest_buffer, size); |         ReadBlockImpl<true>(*system.ApplicationProcess(), src_addr, dest_buffer, size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     const u8* GetSpan(const VAddr src_addr, const std::size_t size) const { | ||||||
|  |         if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] == | ||||||
|  |             current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) { | ||||||
|  |             return GetPointerSilent(src_addr); | ||||||
|  |         } | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u8* GetSpan(const VAddr src_addr, const std::size_t size) { | ||||||
|  |         if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] == | ||||||
|  |             current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) { | ||||||
|  |             return GetPointerSilent(src_addr); | ||||||
|  |         } | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     template <bool UNSAFE> |     template <bool UNSAFE> | ||||||
|     void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr, |     void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr, | ||||||
|                         const void* src_buffer, const std::size_t size) { |                         const void* src_buffer, const std::size_t size) { | ||||||
|  | @ -559,7 +575,7 @@ struct Memory::Impl { | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         const Common::ProcessAddress end = base + size; |         const auto end = base + size; | ||||||
|         ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |         ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | ||||||
|                    base + page_table.pointers.size()); |                    base + page_table.pointers.size()); | ||||||
| 
 | 
 | ||||||
|  | @ -570,14 +586,18 @@ struct Memory::Impl { | ||||||
|             while (base != end) { |             while (base != end) { | ||||||
|                 page_table.pointers[base].Store(nullptr, type); |                 page_table.pointers[base].Store(nullptr, type); | ||||||
|                 page_table.backing_addr[base] = 0; |                 page_table.backing_addr[base] = 0; | ||||||
| 
 |                 page_table.blocks[base] = 0; | ||||||
|                 base += 1; |                 base += 1; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|  |             auto orig_base = base; | ||||||
|             while (base != end) { |             while (base != end) { | ||||||
|                 page_table.pointers[base].Store( |                 auto host_ptr = | ||||||
|                     system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS), type); |                     system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS); | ||||||
|                 page_table.backing_addr[base] = GetInteger(target) - (base << YUZU_PAGEBITS); |                 auto backing = GetInteger(target) - (base << YUZU_PAGEBITS); | ||||||
|  |                 page_table.pointers[base].Store(host_ptr, type); | ||||||
|  |                 page_table.backing_addr[base] = backing; | ||||||
|  |                 page_table.blocks[base] = orig_base << YUZU_PAGEBITS; | ||||||
| 
 | 
 | ||||||
|                 ASSERT_MSG(page_table.pointers[base].Pointer(), |                 ASSERT_MSG(page_table.pointers[base].Pointer(), | ||||||
|                            "memory mapping base yield a nullptr within the table"); |                            "memory mapping base yield a nullptr within the table"); | ||||||
|  | @ -747,6 +767,14 @@ struct Memory::Impl { | ||||||
|         VAddr last_address; |         VAddr last_address; | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|  |     void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | ||||||
|  |         system.GPU().InvalidateRegion(GetInteger(dest_addr), size); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | ||||||
|  |         system.GPU().FlushRegion(GetInteger(dest_addr), size); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     Common::PageTable* current_page_table = nullptr; |     Common::PageTable* current_page_table = nullptr; | ||||||
|     std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> |     std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | ||||||
|  | @ -881,6 +909,14 @@ void Memory::ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_b | ||||||
|     impl->ReadBlockUnsafe(src_addr, dest_buffer, size); |     impl->ReadBlockUnsafe(src_addr, dest_buffer, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | const u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) const { | ||||||
|  |     return impl->GetSpan(src_addr, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) { | ||||||
|  |     return impl->GetSpan(src_addr, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer, | void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer, | ||||||
|                         const std::size_t size) { |                         const std::size_t size) { | ||||||
|     impl->WriteBlock(dest_addr, src_buffer, size); |     impl->WriteBlock(dest_addr, src_buffer, size); | ||||||
|  | @ -924,4 +960,12 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) | ||||||
|     impl->MarkRegionDebug(GetInteger(vaddr), size, debug); |     impl->MarkRegionDebug(GetInteger(vaddr), size, debug); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | ||||||
|  |     impl->InvalidateRegion(dest_addr, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | ||||||
|  |     impl->FlushRegion(dest_addr, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Core::Memory
 | } // namespace Core::Memory
 | ||||||
|  |  | ||||||
|  | @ -5,8 +5,12 @@ | ||||||
| 
 | 
 | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <memory> | #include <memory> | ||||||
|  | #include <optional> | ||||||
| #include <span> | #include <span> | ||||||
| #include <string> | #include <string> | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | #include "common/scratch_buffer.h" | ||||||
| #include "common/typed_address.h" | #include "common/typed_address.h" | ||||||
| #include "core/hle/result.h" | #include "core/hle/result.h" | ||||||
| 
 | 
 | ||||||
|  | @ -24,6 +28,10 @@ class PhysicalMemory; | ||||||
| class KProcess; | class KProcess; | ||||||
| } // namespace Kernel
 | } // namespace Kernel
 | ||||||
| 
 | 
 | ||||||
|  | namespace Tegra { | ||||||
|  | class MemoryManager; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| namespace Core::Memory { | namespace Core::Memory { | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -343,6 +351,9 @@ public: | ||||||
|      */ |      */ | ||||||
|     void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size); |     void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size); | ||||||
| 
 | 
 | ||||||
|  |     const u8* GetSpan(const VAddr src_addr, const std::size_t size) const; | ||||||
|  |     u8* GetSpan(const VAddr src_addr, const std::size_t size); | ||||||
|  | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Writes a range of bytes into the current process' address space at the specified |      * Writes a range of bytes into the current process' address space at the specified | ||||||
|      * virtual address. |      * virtual address. | ||||||
|  | @ -461,6 +472,8 @@ public: | ||||||
|     void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); |     void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | ||||||
| 
 | 
 | ||||||
|     void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); |     void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | ||||||
|  |     void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); | ||||||
|  |     void FlushRegion(Common::ProcessAddress dest_addr, size_t size); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|  | @ -469,4 +482,203 @@ private: | ||||||
|     std::unique_ptr<Impl> impl; |     std::unique_ptr<Impl> impl; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | enum GuestMemoryFlags : u32 { | ||||||
|  |     Read = 1 << 0, | ||||||
|  |     Write = 1 << 1, | ||||||
|  |     Safe = 1 << 2, | ||||||
|  |     Cached = 1 << 3, | ||||||
|  | 
 | ||||||
|  |     SafeRead = Read | Safe, | ||||||
|  |     SafeWrite = Write | Safe, | ||||||
|  |     SafeReadWrite = SafeRead | SafeWrite, | ||||||
|  |     SafeReadCachedWrite = SafeReadWrite | Cached, | ||||||
|  | 
 | ||||||
|  |     UnsafeRead = Read, | ||||||
|  |     UnsafeWrite = Write, | ||||||
|  |     UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||||||
|  |     UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | namespace { | ||||||
|  | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||||
|  | class GuestMemory { | ||||||
|  |     using iterator = T*; | ||||||
|  |     using const_iterator = const T*; | ||||||
|  |     using value_type = T; | ||||||
|  |     using element_type = T; | ||||||
|  |     using iterator_category = std::contiguous_iterator_tag; | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  |     GuestMemory() = delete; | ||||||
|  |     explicit GuestMemory(M& memory_, u64 addr_, std::size_t size_, | ||||||
|  |                          Common::ScratchBuffer<T>* backup = nullptr) | ||||||
|  |         : memory{memory_}, addr{addr_}, size{size_} { | ||||||
|  |         static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||||||
|  |         if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||||||
|  |             Read(addr, size, backup); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ~GuestMemory() = default; | ||||||
|  | 
 | ||||||
|  |     T* data() noexcept { | ||||||
|  |         return data_span.data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const T* data() const noexcept { | ||||||
|  |         return data_span.data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] T* begin() noexcept { | ||||||
|  |         return data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] const T* begin() const noexcept { | ||||||
|  |         return data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] T* end() noexcept { | ||||||
|  |         return data() + size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] const T* end() const noexcept { | ||||||
|  |         return data() + size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     T& operator[](size_t index) noexcept { | ||||||
|  |         return data_span[index]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const T& operator[](size_t index) const noexcept { | ||||||
|  |         return data_span[index]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void SetAddressAndSize(u64 addr_, std::size_t size_) noexcept { | ||||||
|  |         addr = addr_; | ||||||
|  |         size = size_; | ||||||
|  |         addr_changed = true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::span<T> Read(u64 addr_, std::size_t size_, | ||||||
|  |                       Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||||||
|  |         addr = addr_; | ||||||
|  |         size = size_; | ||||||
|  |         if (size == 0) { | ||||||
|  |             is_data_copy = true; | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (TrySetSpan()) { | ||||||
|  |             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |                 memory.FlushRegion(addr, size * sizeof(T)); | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             if (backup) { | ||||||
|  |                 backup->resize_destructive(size); | ||||||
|  |                 data_span = *backup; | ||||||
|  |             } else { | ||||||
|  |                 data_copy.resize(size); | ||||||
|  |                 data_span = std::span(data_copy); | ||||||
|  |             } | ||||||
|  |             is_data_copy = true; | ||||||
|  |             span_valid = true; | ||||||
|  |             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |                 memory.ReadBlock(addr, data_span.data(), size * sizeof(T)); | ||||||
|  |             } else { | ||||||
|  |                 memory.ReadBlockUnsafe(addr, data_span.data(), size * sizeof(T)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         return data_span; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Write(std::span<T> write_data) noexcept { | ||||||
|  |         if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||||
|  |             memory.WriteBlockCached(addr, write_data.data(), size * sizeof(T)); | ||||||
|  |         } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |             memory.WriteBlock(addr, write_data.data(), size * sizeof(T)); | ||||||
|  |         } else { | ||||||
|  |             memory.WriteBlockUnsafe(addr, write_data.data(), size * sizeof(T)); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool TrySetSpan() noexcept { | ||||||
|  |         if (u8* ptr = memory.GetSpan(addr, size * sizeof(T)); ptr) { | ||||||
|  |             data_span = {reinterpret_cast<T*>(ptr), size}; | ||||||
|  |             span_valid = true; | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | protected: | ||||||
|  |     bool IsDataCopy() const noexcept { | ||||||
|  |         return is_data_copy; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool AddressChanged() const noexcept { | ||||||
|  |         return addr_changed; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     M& memory; | ||||||
|  |     u64 addr; | ||||||
|  |     size_t size; | ||||||
|  |     std::span<T> data_span{}; | ||||||
|  |     std::vector<T> data_copy; | ||||||
|  |     bool span_valid{false}; | ||||||
|  |     bool is_data_copy{false}; | ||||||
|  |     bool addr_changed{false}; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||||
|  | class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||||||
|  | public: | ||||||
|  |     GuestMemoryScoped() = delete; | ||||||
|  |     explicit GuestMemoryScoped(M& memory_, u64 addr_, std::size_t size_, | ||||||
|  |                                Common::ScratchBuffer<T>* backup = nullptr) | ||||||
|  |         : GuestMemory<M, T, FLAGS>(memory_, addr_, size_, backup) { | ||||||
|  |         if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||||||
|  |             if (!this->TrySetSpan()) { | ||||||
|  |                 if (backup) { | ||||||
|  |                     this->data_span = *backup; | ||||||
|  |                     this->span_valid = true; | ||||||
|  |                     this->is_data_copy = true; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ~GuestMemoryScoped() { | ||||||
|  |         if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||||||
|  |             if (this->size == 0) [[unlikely]] { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (this->AddressChanged() || this->IsDataCopy()) { | ||||||
|  |                 ASSERT(this->span_valid); | ||||||
|  |                 if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||||
|  |                     this->memory.WriteBlockCached(this->addr, this->data_span.data(), | ||||||
|  |                                                   this->size * sizeof(T)); | ||||||
|  |                 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |                     this->memory.WriteBlock(this->addr, this->data_span.data(), | ||||||
|  |                                             this->size * sizeof(T)); | ||||||
|  |                 } else { | ||||||
|  |                     this->memory.WriteBlockUnsafe(this->addr, this->data_span.data(), | ||||||
|  |                                                   this->size * sizeof(T)); | ||||||
|  |                 } | ||||||
|  |             } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |                 this->memory.InvalidateRegion(this->addr, this->size * sizeof(T)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||||||
| } // namespace Core::Memory
 | } // namespace Core::Memory
 | ||||||
|  |  | ||||||
|  | @ -234,9 +234,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | ||||||
|     if (has_new_downloads) { |     if (has_new_downloads) { | ||||||
|         memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |         memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | ||||||
|     } |     } | ||||||
|     tmp_buffer.resize_destructive(amount); | 
 | ||||||
|     cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); |     Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( | ||||||
|     cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); |         cpu_memory, *cpu_src_address, amount, &tmp_buffer); | ||||||
|  |     tmp.SetAddressAndSize(*cpu_dest_address, amount); | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/dma_pusher.h" | #include "video_core/dma_pusher.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | @ -12,6 +13,8 @@ | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
|  | constexpr u32 MacroRegistersStart = 0xE00; | ||||||
|  | 
 | ||||||
| DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, | DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, | ||||||
|                      Control::ChannelState& channel_state_) |                      Control::ChannelState& channel_state_) | ||||||
|     : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, |     : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, | ||||||
|  | @ -74,25 +77,16 @@ bool DmaPusher::Step() { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Push buffer non-empty, read a word
 |         // Push buffer non-empty, read a word
 | ||||||
|         command_headers.resize_destructive(command_list_header.size); |         if (dma_state.method >= MacroRegistersStart) { | ||||||
|         constexpr u32 MacroRegistersStart = 0xE00; |  | ||||||
|         if (dma_state.method < MacroRegistersStart) { |  | ||||||
|             if (Settings::IsGPULevelHigh()) { |  | ||||||
|                 memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), |  | ||||||
|                                          command_list_header.size * sizeof(u32)); |  | ||||||
|             } else { |  | ||||||
|                 memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), |  | ||||||
|                                                command_list_header.size * sizeof(u32)); |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             const size_t copy_size = command_list_header.size * sizeof(u32); |  | ||||||
|             if (subchannels[dma_state.subchannel]) { |             if (subchannels[dma_state.subchannel]) { | ||||||
|                 subchannels[dma_state.subchannel]->current_dirty = |                 subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty( | ||||||
|                     memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); |                     dma_state.dma_get, command_list_header.size * sizeof(u32)); | ||||||
|             } |             } | ||||||
|             memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size); |  | ||||||
|         } |         } | ||||||
|         ProcessCommands(command_headers); |         Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||||
|  |                                      Core::Memory::GuestMemoryFlags::UnsafeRead> | ||||||
|  |             headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); | ||||||
|  |         ProcessCommands(headers); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return true; |     return true; | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/algorithm.h" | #include "common/algorithm.h" | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
|  | @ -46,15 +47,11 @@ void State::ProcessData(const u32* data, size_t num_data) { | ||||||
| void State::ProcessData(std::span<const u8> read_buffer) { | void State::ProcessData(std::span<const u8> read_buffer) { | ||||||
|     const GPUVAddr address{regs.dest.Address()}; |     const GPUVAddr address{regs.dest.Address()}; | ||||||
|     if (is_linear) { |     if (is_linear) { | ||||||
|         if (regs.line_count == 1) { |         for (size_t line = 0; line < regs.line_count; ++line) { | ||||||
|             rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); |             const GPUVAddr dest_line = address + line * regs.dest.pitch; | ||||||
|         } else { |             std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, | ||||||
|             for (size_t line = 0; line < regs.line_count; ++line) { |                                        regs.line_length_in); | ||||||
|                 const GPUVAddr dest_line = address + line * regs.dest.pitch; |             rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); | ||||||
|                 std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, |  | ||||||
|                                            regs.line_length_in); |  | ||||||
|                 rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         u32 width = regs.dest.width; |         u32 width = regs.dest.width; | ||||||
|  | @ -70,13 +67,14 @@ void State::ProcessData(std::span<const u8> read_buffer) { | ||||||
|         const std::size_t dst_size = Tegra::Texture::CalculateSize( |         const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||||||
|             true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |             true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, | ||||||
|             regs.dest.BlockHeight(), regs.dest.BlockDepth()); |             regs.dest.BlockHeight(), regs.dest.BlockDepth()); | ||||||
|         tmp_buffer.resize_destructive(dst_size); | 
 | ||||||
|         memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); |         Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|         Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, |             tmp(memory_manager, address, dst_size, &tmp_buffer); | ||||||
|                                        regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, | 
 | ||||||
|                                        x_elements, regs.line_count, regs.dest.BlockHeight(), |         Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, | ||||||
|  |                                        regs.dest.depth, x_offset, regs.dest.y, x_elements, | ||||||
|  |                                        regs.line_count, regs.dest.BlockHeight(), | ||||||
|                                        regs.dest.BlockDepth(), regs.line_length_in); |                                        regs.dest.BlockDepth(), regs.line_length_in); | ||||||
|         memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -84,7 +84,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | ||||||
| 
 | 
 | ||||||
|     Texture::TICEntry tic_entry; |     Texture::TICEntry tic_entry; | ||||||
|     memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |     memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | ||||||
| 
 |  | ||||||
|     return tic_entry; |     return tic_entry; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/core_timing.h" | #include "core/core_timing.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/dirty_flags.h" | #include "video_core/dirty_flags.h" | ||||||
| #include "video_core/engines/draw_manager.h" | #include "video_core/engines/draw_manager.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | @ -679,17 +680,14 @@ void Maxwell3D::ProcessCBData(u32 value) { | ||||||
| Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | ||||||
|     const GPUVAddr tic_address_gpu{regs.tex_header.Address() + |     const GPUVAddr tic_address_gpu{regs.tex_header.Address() + | ||||||
|                                    tic_index * sizeof(Texture::TICEntry)}; |                                    tic_index * sizeof(Texture::TICEntry)}; | ||||||
| 
 |  | ||||||
|     Texture::TICEntry tic_entry; |     Texture::TICEntry tic_entry; | ||||||
|     memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |     memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | ||||||
| 
 |  | ||||||
|     return tic_entry; |     return tic_entry; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | ||||||
|     const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + |     const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + | ||||||
|                                    tsc_index * sizeof(Texture::TSCEntry)}; |                                    tsc_index * sizeof(Texture::TSCEntry)}; | ||||||
| 
 |  | ||||||
|     Texture::TSCEntry tsc_entry; |     Texture::TSCEntry tsc_entry; | ||||||
|     memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); |     memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); | ||||||
|     return tsc_entry; |     return tsc_entry; | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/engines/maxwell_dma.h" | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
|  | @ -130,11 +131,12 @@ void MaxwellDMA::Launch() { | ||||||
|                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | ||||||
|                 read_buffer.resize_destructive(16); |                 read_buffer.resize_destructive(16); | ||||||
|                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | ||||||
|                     memory_manager.ReadBlock( |                     Core::Memory::GpuGuestMemoryScoped< | ||||||
|                         convert_linear_2_blocklinear_addr(regs.offset_in + offset), |                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|                         read_buffer.data(), read_buffer.size()); |                         tmp_write_buffer(memory_manager, | ||||||
|                     memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), |                                          convert_linear_2_blocklinear_addr(regs.offset_in + offset), | ||||||
|                                                     read_buffer.size()); |                                          16, &read_buffer); | ||||||
|  |                     tmp_write_buffer.SetAddressAndSize(regs.offset_out + offset, 16); | ||||||
|                 } |                 } | ||||||
|             } else if (is_src_pitch && !is_dst_pitch) { |             } else if (is_src_pitch && !is_dst_pitch) { | ||||||
|                 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |                 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | ||||||
|  | @ -142,20 +144,19 @@ void MaxwellDMA::Launch() { | ||||||
|                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | ||||||
|                 read_buffer.resize_destructive(16); |                 read_buffer.resize_destructive(16); | ||||||
|                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | ||||||
|                     memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), |                     Core::Memory::GpuGuestMemoryScoped< | ||||||
|                                              read_buffer.size()); |                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|                     memory_manager.WriteBlockCached( |                         tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); | ||||||
|                         convert_linear_2_blocklinear_addr(regs.offset_out + offset), |                     tmp_write_buffer.SetAddressAndSize( | ||||||
|                         read_buffer.data(), read_buffer.size()); |                         convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|                 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |                 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | ||||||
|                     read_buffer.resize_destructive(regs.line_length_in); |                     Core::Memory::GpuGuestMemoryScoped< | ||||||
|                     memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), |                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|                                              regs.line_length_in, |                         tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, | ||||||
|                                              VideoCommon::CacheType::NoBufferCache); |                                          &read_buffer); | ||||||
|                     memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), |                     tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); | ||||||
|                                                     regs.line_length_in); |  | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  | @ -222,17 +223,15 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
|         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | ||||||
| 
 | 
 | ||||||
|     const size_t dst_size = dst_operand.pitch * regs.line_count; |     const size_t dst_size = dst_operand.pitch * regs.line_count; | ||||||
|     read_buffer.resize_destructive(src_size); |  | ||||||
|     write_buffer.resize_destructive(dst_size); |  | ||||||
| 
 | 
 | ||||||
|     memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); |     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||||
|     memory_manager.ReadBlock(dst_operand.address, write_buffer.data(), dst_size); |         memory_manager, src_operand.address, src_size, &read_buffer); | ||||||
|  |     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|  |         tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); | ||||||
| 
 | 
 | ||||||
|     UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |     UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, | ||||||
|                      src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |                      x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, | ||||||
|                      dst_operand.pitch); |                      block_depth, dst_operand.pitch); | ||||||
| 
 |  | ||||||
|     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::CopyPitchToBlockLinear() { | void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|  | @ -287,18 +286,17 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | ||||||
|     const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; |     const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; | ||||||
| 
 | 
 | ||||||
|     read_buffer.resize_destructive(src_size); |     GPUVAddr src_addr = regs.offset_in; | ||||||
|     write_buffer.resize_destructive(dst_size); |     GPUVAddr dst_addr = regs.offset_out; | ||||||
|  |     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||||
|  |         memory_manager, src_addr, src_size, &read_buffer); | ||||||
|  |     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|  |         tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); | ||||||
| 
 | 
 | ||||||
|     memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |     //  If the input is linear and the output is tiled, swizzle the input and copy it over.
 | ||||||
|     memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |     SwizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, | ||||||
| 
 |                    x_offset, dst_params.origin.y, x_elements, regs.line_count, block_height, | ||||||
|     // If the input is linear and the output is tiled, swizzle the input and copy it over.
 |                    block_depth, regs.pitch_in); | ||||||
|     SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |  | ||||||
|                    dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |  | ||||||
|                    regs.pitch_in); |  | ||||||
| 
 |  | ||||||
|     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::CopyBlockLinearToBlockLinear() { | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||||||
|  | @ -342,23 +340,20 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||||||
|     const u32 pitch = x_elements * bytes_per_pixel; |     const u32 pitch = x_elements * bytes_per_pixel; | ||||||
|     const size_t mid_buffer_size = pitch * regs.line_count; |     const size_t mid_buffer_size = pitch * regs.line_count; | ||||||
| 
 | 
 | ||||||
|     read_buffer.resize_destructive(src_size); |  | ||||||
|     write_buffer.resize_destructive(dst_size); |  | ||||||
| 
 |  | ||||||
|     intermediate_buffer.resize_destructive(mid_buffer_size); |     intermediate_buffer.resize_destructive(mid_buffer_size); | ||||||
| 
 | 
 | ||||||
|     memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||||
|     memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |         memory_manager, regs.offset_in, src_size, &read_buffer); | ||||||
|  |     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|  |         tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); | ||||||
| 
 | 
 | ||||||
|     UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, |     UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, | ||||||
|                      src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, |                      src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, | ||||||
|                      src.block_size.height, src.block_size.depth, pitch); |                      src.block_size.height, src.block_size.depth, pitch); | ||||||
| 
 | 
 | ||||||
|     SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, |     SwizzleSubrect(tmp_write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, | ||||||
|                    dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, |                    dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, | ||||||
|                    dst.block_size.height, dst.block_size.depth, pitch); |                    dst.block_size.height, dst.block_size.depth, pitch); | ||||||
| 
 |  | ||||||
|     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::ReleaseSemaphore() { | void MaxwellDMA::ReleaseSemaphore() { | ||||||
|  |  | ||||||
|  | @ -159,11 +159,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | ||||||
|     const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); |     const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | ||||||
|     const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); |     const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | ||||||
|     const size_t src_size = get_surface_size(src, src_bytes_per_pixel); |     const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | ||||||
|     impl->tmp_buffer.resize_destructive(src_size); | 
 | ||||||
|     memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); |     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( | ||||||
|  |         memory_manager, src.Address(), src_size, &impl->tmp_buffer); | ||||||
| 
 | 
 | ||||||
|     const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; |     const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | ||||||
| 
 |  | ||||||
|     const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; |     const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     impl->src_buffer.resize_destructive(src_copy_size); |     impl->src_buffer.resize_destructive(src_copy_size); | ||||||
|  | @ -200,12 +200,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | ||||||
| 
 | 
 | ||||||
|     impl->dst_buffer.resize_destructive(dst_copy_size); |     impl->dst_buffer.resize_destructive(dst_copy_size); | ||||||
|     if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { |     if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||||||
|         UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, |         UnswizzleSubrect(impl->src_buffer, tmp_buffer, src_bytes_per_pixel, src.width, src.height, | ||||||
|                          src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, |                          src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y, | ||||||
|                          src_extent_y, src.block_height, src.block_depth, |                          src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel); | ||||||
|                          src_extent_x * src_bytes_per_pixel); |  | ||||||
|     } else { |     } else { | ||||||
|         process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, |         process_pitch_linear(false, tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, | ||||||
|                              src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); |                              src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -221,20 +220,18 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); |     const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | ||||||
|     impl->tmp_buffer.resize_destructive(dst_size); |     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> | ||||||
|     memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); |         tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); | ||||||
| 
 | 
 | ||||||
|     if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { |     if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||||||
|         SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, |         SwizzleSubrect(tmp_buffer2, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height, | ||||||
|                        dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, |                        dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y, | ||||||
|                        dst_extent_y, dst.block_height, dst.block_depth, |                        dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel); | ||||||
|                        dst_extent_x * dst_bytes_per_pixel); |  | ||||||
|     } else { |     } else { | ||||||
|         process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, |         process_pitch_linear(true, impl->dst_buffer, tmp_buffer2, dst_extent_x, dst_extent_y, | ||||||
|                              dst.pitch, config.dst_x0, config.dst_y0, |                              dst.pitch, config.dst_x0, config.dst_y0, | ||||||
|                              static_cast<size_t>(dst_bytes_per_pixel)); |                              static_cast<size_t>(dst_bytes_per_pixel)); | ||||||
|     } |     } | ||||||
|     memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); |  | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,13 +10,13 @@ | ||||||
| #include "core/device_memory.h" | #include "core/device_memory.h" | ||||||
| #include "core/hle/kernel/k_page_table.h" | #include "core/hle/kernel/k_page_table.h" | ||||||
| #include "core/hle/kernel/k_process.h" | #include "core/hle/kernel/k_process.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/invalidation_accumulator.h" | #include "video_core/invalidation_accumulator.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | using Core::Memory::GuestMemoryFlags; | ||||||
| 
 | 
 | ||||||
| std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | ||||||
| 
 | 
 | ||||||
|  | @ -587,13 +587,10 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | ||||||
| 
 | 
 | ||||||
| void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | ||||||
|                               VideoCommon::CacheType which) { |                               VideoCommon::CacheType which) { | ||||||
|     tmp_buffer.resize_destructive(size); |     Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( | ||||||
|     ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); |         *this, gpu_src_addr, size); | ||||||
| 
 |     data.SetAddressAndSize(gpu_dest_addr, size); | ||||||
|     // The output block must be flushed in case it has data modified from the GPU.
 |  | ||||||
|     // Fixes NPC geometry in Zombie Panic in Wonderland DX
 |  | ||||||
|     FlushRegion(gpu_dest_addr, size, which); |     FlushRegion(gpu_dest_addr, size, which); | ||||||
|     WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | ||||||
|  | @ -758,4 +755,23 @@ void MemoryManager::FlushCaching() { | ||||||
|     accumulator->Clear(); |     accumulator->Clear(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { | ||||||
|  |     auto cpu_addr = GpuToCpuAddress(src_addr); | ||||||
|  |     if (cpu_addr) { | ||||||
|  |         return memory.GetSpan(*cpu_addr, size); | ||||||
|  |     } | ||||||
|  |     return nullptr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { | ||||||
|  |     if (!IsContinuousRange(src_addr, size)) { | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  |     auto cpu_addr = GpuToCpuAddress(src_addr); | ||||||
|  |     if (cpu_addr) { | ||||||
|  |         return memory.GetSpan(*cpu_addr, size); | ||||||
|  |     } | ||||||
|  |     return nullptr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Tegra
 | } // namespace Tegra
 | ||||||
|  |  | ||||||
|  | @ -15,6 +15,7 @@ | ||||||
| #include "common/range_map.h" | #include "common/range_map.h" | ||||||
| #include "common/scratch_buffer.h" | #include "common/scratch_buffer.h" | ||||||
| #include "common/virtual_buffer.h" | #include "common/virtual_buffer.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/cache_types.h" | #include "video_core/cache_types.h" | ||||||
| #include "video_core/pte_kind.h" | #include "video_core/pte_kind.h" | ||||||
| 
 | 
 | ||||||
|  | @ -62,6 +63,20 @@ public: | ||||||
|     [[nodiscard]] u8* GetPointer(GPUVAddr addr); |     [[nodiscard]] u8* GetPointer(GPUVAddr addr); | ||||||
|     [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; |     [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; | ||||||
| 
 | 
 | ||||||
|  |     template <typename T> | ||||||
|  |     [[nodiscard]] T* GetPointer(GPUVAddr addr) { | ||||||
|  |         const auto address{GpuToCpuAddress(addr)}; | ||||||
|  |         if (!address) { | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  |         return memory.GetPointer(*address); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename T> | ||||||
|  |     [[nodiscard]] const T* GetPointer(GPUVAddr addr) const { | ||||||
|  |         return GetPointer<T*>(addr); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * ReadBlock and WriteBlock are full read and write operations over virtual |      * ReadBlock and WriteBlock are full read and write operations over virtual | ||||||
|      * GPU Memory. It's important to use these when GPU memory may not be continuous |      * GPU Memory. It's important to use these when GPU memory may not be continuous | ||||||
|  | @ -139,6 +154,9 @@ public: | ||||||
| 
 | 
 | ||||||
|     void FlushCaching(); |     void FlushCaching(); | ||||||
| 
 | 
 | ||||||
|  |     const u8* GetSpan(const GPUVAddr src_addr, const std::size_t size) const; | ||||||
|  |     u8* GetSpan(const GPUVAddr src_addr, const std::size_t size); | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | ||||||
|     inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, |     inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, | ||||||
|  |  | ||||||
|  | @ -8,6 +8,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/alignment.h" | #include "common/alignment.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/control/channel_state.h" | #include "video_core/control/channel_state.h" | ||||||
| #include "video_core/dirty_flags.h" | #include "video_core/dirty_flags.h" | ||||||
| #include "video_core/engines/kepler_compute.h" | #include "video_core/engines/kepler_compute.h" | ||||||
|  | @ -1022,19 +1023,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | ||||||
|         runtime.AccelerateImageUpload(image, staging, uploads); |         runtime.AccelerateImageUpload(image, staging, uploads); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     const size_t guest_size_bytes = image.guest_size_bytes; | 
 | ||||||
|     swizzle_data_buffer.resize_destructive(guest_size_bytes); |     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||||
|     gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); |         *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | ||||||
| 
 | 
 | ||||||
|     if (True(image.flags & ImageFlagBits::Converted)) { |     if (True(image.flags & ImageFlagBits::Converted)) { | ||||||
|         unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |         unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | ||||||
|         auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, |         auto copies = | ||||||
|                                      unswizzle_data_buffer); |             UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); | ||||||
|         ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); |         ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); | ||||||
|         image.UploadMemory(staging, copies); |         image.UploadMemory(staging, copies); | ||||||
|     } else { |     } else { | ||||||
|         const auto copies = |         const auto copies = | ||||||
|             UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); |             UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); | ||||||
|         image.UploadMemory(staging, copies); |         image.UploadMemory(staging, copies); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | @ -1227,11 +1228,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { | ||||||
|     decode->image_id = image_id; |     decode->image_id = image_id; | ||||||
|     async_decodes.push_back(std::move(decode)); |     async_decodes.push_back(std::move(decode)); | ||||||
| 
 | 
 | ||||||
|     Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); |     static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; | ||||||
|     const size_t guest_size_bytes = image.guest_size_bytes; |     local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | ||||||
|     swizzle_data_buffer.resize_destructive(guest_size_bytes); |     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||||
|     gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); |         *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | ||||||
|     auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, | 
 | ||||||
|  |     auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, | ||||||
|                                  local_unswizzle_data_buffer); |                                  local_unswizzle_data_buffer); | ||||||
|     const size_t out_size = MapSizeBytes(image); |     const size_t out_size = MapSizeBytes(image); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
| #include "common/div_ceil.h" | #include "common/div_ceil.h" | ||||||
| #include "common/scratch_buffer.h" | #include "common/scratch_buffer.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/compatible_formats.h" | #include "video_core/compatible_formats.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
|  | @ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | ||||||
|                        tile_size.height, info.tile_width_spacing); |                        tile_size.height, info.tile_width_spacing); | ||||||
|     const size_t subresource_size = sizes[level]; |     const size_t subresource_size = sizes[level]; | ||||||
| 
 | 
 | ||||||
|     tmp_buffer.resize_destructive(subresource_size); |  | ||||||
|     const std::span<u8> dst(tmp_buffer); |  | ||||||
| 
 |  | ||||||
|     for (s32 layer = 0; layer < info.resources.layers; ++layer) { |     for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||||||
|         const std::span<const u8> src = input.subspan(host_offset); |         const std::span<const u8> src = input.subspan(host_offset); | ||||||
|         gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); |         { | ||||||
|  |             Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> | ||||||
|  |                 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); | ||||||
| 
 | 
 | ||||||
|         SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |             SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | ||||||
|                        num_tiles.depth, block.height, block.depth); |                            num_tiles.depth, block.height, block.depth); | ||||||
| 
 |         } | ||||||
|         gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); |  | ||||||
| 
 | 
 | ||||||
|         host_offset += host_bytes_per_layer; |         host_offset += host_bytes_per_layer; | ||||||
|         guest_offset += layer_stride; |         guest_offset += layer_stride; | ||||||
|  | @ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | ||||||
|     const Extent3D size = info.size; |     const Extent3D size = info.size; | ||||||
| 
 | 
 | ||||||
|     if (info.type == ImageType::Linear) { |     if (info.type == ImageType::Linear) { | ||||||
|  |         ASSERT(output.size_bytes() >= guest_size_bytes); | ||||||
|         gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); |         gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); | ||||||
| 
 | 
 | ||||||
|         ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); |         ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); | ||||||
|  | @ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | ||||||
|     return copies; |     return copies; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |  | ||||||
|                             const ImageBase& image, std::span<u8> output) { |  | ||||||
|     gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); |  | ||||||
|     return BufferCopy{ |  | ||||||
|         .src_offset = 0, |  | ||||||
|         .dst_offset = 0, |  | ||||||
|         .size = image.guest_size_bytes, |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||||||
|                   std::span<BufferImageCopy> copies) { |                   std::span<BufferImageCopy> copies) { | ||||||
|     u32 output_offset = 0; |     u32 output_offset = 0; | ||||||
|  |  | ||||||
|  | @ -66,9 +66,6 @@ struct OverlapResult { | ||||||
|     Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |     Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | ||||||
|     std::span<const u8> input, std::span<u8> output); |     std::span<const u8> input, std::span<u8> output); | ||||||
| 
 | 
 | ||||||
| [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |  | ||||||
|                                           const ImageBase& image, std::span<u8> output); |  | ||||||
| 
 |  | ||||||
| void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||||||
|                   std::span<BufferImageCopy> copies); |                   std::span<BufferImageCopy> copies); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Kelebek1
						Kelebek1