forked from eden-emu/eden
		
	Merge pull request #6799 from ameerj/vp9-fixes
nvdec: Fix VP9 reference frame refreshes
This commit is contained in:
		
						commit
						cb6d198101
					
				
					 5 changed files with 51 additions and 204 deletions
				
			
		|  | @ -166,8 +166,6 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto | |||
|             LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); | ||||
|         } else { | ||||
|             cmd_buffer.map_address = object->dma_map_addr; | ||||
|             AddBufferMap(object->dma_map_addr, object->size, object->addr, | ||||
|                          object->status == nvmap::Object::Status::Allocated); | ||||
|         } | ||||
|     } | ||||
|     std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); | ||||
|  | @ -178,30 +176,11 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto | |||
| } | ||||
| 
 | ||||
| NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { | ||||
|     IoctlMapBuffer params{}; | ||||
|     std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); | ||||
|     std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); | ||||
|     SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); | ||||
| 
 | ||||
|     auto& gpu = system.GPU(); | ||||
| 
 | ||||
|     for (auto& cmd_buffer : cmd_buffer_handles) { | ||||
|         const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)}; | ||||
|         if (!object) { | ||||
|             LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle); | ||||
|             std::memcpy(output.data(), ¶ms, output.size()); | ||||
|             return NvResult::InvalidState; | ||||
|         } | ||||
|         if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) { | ||||
|             gpu.MemoryManager().Unmap(object->dma_map_addr, *size); | ||||
|         } else { | ||||
|             // This occurs quite frequently, however does not seem to impact functionality
 | ||||
|             LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr, | ||||
|                       object->dma_map_addr); | ||||
|         } | ||||
|         object->dma_map_addr = 0; | ||||
|     } | ||||
|     // This is intntionally stubbed.
 | ||||
|     // Skip unmapping buffers here, as to not break the continuity of the VP9 reference frame
 | ||||
|     // addresses, and risk invalidating data before the async GPU thread is done with it
 | ||||
|     std::memset(output.data(), 0, output.size()); | ||||
|     LOG_DEBUG(Service_NVDRV, "(STUBBED) called"); | ||||
|     return NvResult::Success; | ||||
| } | ||||
| 
 | ||||
|  | @ -212,33 +191,4 @@ NvResult nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input, | |||
|     return NvResult::Success; | ||||
| } | ||||
| 
 | ||||
| std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap( | ||||
|     GPUVAddr gpu_addr) const { | ||||
|     const auto it = std::find_if( | ||||
|         buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) { | ||||
|             return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr()); | ||||
|         }); | ||||
| 
 | ||||
|     ASSERT(it != buffer_mappings.end()); | ||||
|     return it->second; | ||||
| } | ||||
| 
 | ||||
| void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, | ||||
|                                        bool is_allocated) { | ||||
|     buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated}); | ||||
| } | ||||
| 
 | ||||
| std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) { | ||||
|     const auto iter{buffer_mappings.find(gpu_addr)}; | ||||
|     if (iter == buffer_mappings.end()) { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     std::size_t size = 0; | ||||
|     if (iter->second.IsAllocated()) { | ||||
|         size = iter->second.Size(); | ||||
|     } | ||||
|     buffer_mappings.erase(iter); | ||||
|     return size; | ||||
| } | ||||
| 
 | ||||
| } // namespace Service::Nvidia::Devices
 | ||||
|  |  | |||
|  | @ -23,45 +23,6 @@ public: | |||
|     ~nvhost_nvdec_common() override; | ||||
| 
 | ||||
| protected: | ||||
|     class BufferMap final { | ||||
|     public: | ||||
|         constexpr BufferMap() = default; | ||||
| 
 | ||||
|         constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_) | ||||
|             : start_addr{start_addr_}, end_addr{start_addr_ + size_} {} | ||||
| 
 | ||||
|         constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_, | ||||
|                             bool is_allocated_) | ||||
|             : start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_}, | ||||
|               is_allocated{is_allocated_} {} | ||||
| 
 | ||||
|         constexpr VAddr StartAddr() const { | ||||
|             return start_addr; | ||||
|         } | ||||
| 
 | ||||
|         constexpr VAddr EndAddr() const { | ||||
|             return end_addr; | ||||
|         } | ||||
| 
 | ||||
|         constexpr std::size_t Size() const { | ||||
|             return end_addr - start_addr; | ||||
|         } | ||||
| 
 | ||||
|         constexpr VAddr CpuAddr() const { | ||||
|             return cpu_addr; | ||||
|         } | ||||
| 
 | ||||
|         constexpr bool IsAllocated() const { | ||||
|             return is_allocated; | ||||
|         } | ||||
| 
 | ||||
|     private: | ||||
|         GPUVAddr start_addr{}; | ||||
|         GPUVAddr end_addr{}; | ||||
|         VAddr cpu_addr{}; | ||||
|         bool is_allocated{}; | ||||
|     }; | ||||
| 
 | ||||
|     struct IoctlSetNvmapFD { | ||||
|         s32_le nvmap_fd{}; | ||||
|     }; | ||||
|  | @ -154,17 +115,11 @@ protected: | |||
|     NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output); | ||||
|     NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output); | ||||
| 
 | ||||
|     std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; | ||||
|     void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); | ||||
|     std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); | ||||
| 
 | ||||
|     s32_le nvmap_fd{}; | ||||
|     u32_le submit_timeout{}; | ||||
|     std::shared_ptr<nvmap> nvmap_dev; | ||||
|     SyncpointManager& syncpoint_manager; | ||||
|     std::array<u32, MaxSyncPoints> device_syncpoints{}; | ||||
|     // This is expected to be ordered, therefore we must use a map, not unordered_map
 | ||||
|     std::map<GPUVAddr, BufferMap> buffer_mappings; | ||||
| }; | ||||
| }; // namespace Devices
 | ||||
| } // namespace Service::Nvidia
 | ||||
|  |  | |||
|  | @ -11,6 +11,9 @@ | |||
| 
 | ||||
| namespace Tegra::Decoder { | ||||
| namespace { | ||||
| constexpr u32 diff_update_probability = 252; | ||||
| constexpr u32 frame_sync_code = 0x498342; | ||||
| 
 | ||||
| // Default compressed header probabilities once frame context resets
 | ||||
| constexpr Vp9EntropyProbs default_probs{ | ||||
|     .y_mode_prob{ | ||||
|  | @ -361,8 +364,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) | |||
|     InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); | ||||
| 
 | ||||
|     // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
 | ||||
|     // order: last, golden, altref, current. It may be worthwhile to track the updates done here
 | ||||
|     // to avoid buffering frame data needed for reference frame updating in the header composition.
 | ||||
|     // order: last, golden, altref, current.
 | ||||
|     std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4, | ||||
|               vp9_info.frame_offsets.begin()); | ||||
| 
 | ||||
|  | @ -384,33 +386,18 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) | |||
|         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), | ||||
|                                       current_frame.info.bitstream_size); | ||||
|     } | ||||
|     // Buffer two frames, saving the last show frame info
 | ||||
|     if (!next_next_frame.bit_stream.empty()) { | ||||
|     if (!next_frame.bit_stream.empty()) { | ||||
|         Vp9FrameContainer temp{ | ||||
|             .info = current_frame.info, | ||||
|             .bit_stream = std::move(current_frame.bit_stream), | ||||
|         }; | ||||
|         next_next_frame.info.show_frame = current_frame.info.last_frame_shown; | ||||
|         current_frame.info = next_next_frame.info; | ||||
|         current_frame.bit_stream = std::move(next_next_frame.bit_stream); | ||||
|         next_next_frame = std::move(temp); | ||||
| 
 | ||||
|         if (!next_frame.bit_stream.empty()) { | ||||
|             Vp9FrameContainer temp2{ | ||||
|                 .info = current_frame.info, | ||||
|                 .bit_stream = std::move(current_frame.bit_stream), | ||||
|             }; | ||||
|             next_frame.info.show_frame = current_frame.info.last_frame_shown; | ||||
|             current_frame.info = next_frame.info; | ||||
|             current_frame.bit_stream = std::move(next_frame.bit_stream); | ||||
|             next_frame = std::move(temp2); | ||||
|         } else { | ||||
|             next_frame.info = current_frame.info; | ||||
|             next_frame.bit_stream = std::move(current_frame.bit_stream); | ||||
|         } | ||||
|         next_frame.info.show_frame = current_frame.info.last_frame_shown; | ||||
|         current_frame.info = next_frame.info; | ||||
|         current_frame.bit_stream = std::move(next_frame.bit_stream); | ||||
|         next_frame = std::move(temp); | ||||
|     } else { | ||||
|         next_next_frame.info = current_frame.info; | ||||
|         next_next_frame.bit_stream = std::move(current_frame.bit_stream); | ||||
|         next_frame.info = current_frame.info; | ||||
|         next_frame.bit_stream = std::move(current_frame.bit_stream); | ||||
|     } | ||||
|     return current_frame; | ||||
| } | ||||
|  | @ -613,86 +600,64 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { | |||
| 
 | ||||
|         // Reset context
 | ||||
|         prev_frame_probs = default_probs; | ||||
|         swap_next_golden = false; | ||||
|         swap_ref_indices = false; | ||||
|         loop_filter_ref_deltas.fill(0); | ||||
|         loop_filter_mode_deltas.fill(0); | ||||
| 
 | ||||
|         // allow frames offsets to stabilize before checking for golden frames
 | ||||
|         grace_period = 4; | ||||
| 
 | ||||
|         // On key frames, all frame slots are set to the current frame,
 | ||||
|         // so the value of the selected slot doesn't really matter.
 | ||||
|         frame_ctxs.fill({current_frame_number, false, default_probs}); | ||||
|         frame_ctxs.fill(default_probs); | ||||
| 
 | ||||
|         // intra only, meaning the frame can be recreated with no other references
 | ||||
|         current_frame_info.intra_only = true; | ||||
| 
 | ||||
|     } else { | ||||
| 
 | ||||
|         if (!current_frame_info.show_frame) { | ||||
|             uncomp_writer.WriteBit(current_frame_info.intra_only); | ||||
|             if (!current_frame_info.last_frame_was_key) { | ||||
|                 swap_next_golden = !swap_next_golden; | ||||
|             } | ||||
|         } else { | ||||
|             current_frame_info.intra_only = false; | ||||
|         } | ||||
|         if (!current_frame_info.error_resilient_mode) { | ||||
|             uncomp_writer.WriteU(0, 2); // Reset frame context.
 | ||||
|         } | ||||
| 
 | ||||
|         // Last, Golden, Altref frames
 | ||||
|         std::array<s32, 3> ref_frame_index{0, 1, 2}; | ||||
| 
 | ||||
|         // Set when next frame is hidden
 | ||||
|         // altref and golden references are swapped
 | ||||
|         if (swap_next_golden) { | ||||
|             ref_frame_index = std::array<s32, 3>{0, 2, 1}; | ||||
|         const auto& curr_offsets = current_frame_info.frame_offsets; | ||||
|         const auto& next_offsets = next_frame.info.frame_offsets; | ||||
|         const bool ref_frames_different = curr_offsets[1] != curr_offsets[2]; | ||||
|         const bool next_references_swap = | ||||
|             (next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]); | ||||
|         const bool needs_ref_swap = ref_frames_different && next_references_swap; | ||||
|         if (needs_ref_swap) { | ||||
|             swap_ref_indices = !swap_ref_indices; | ||||
|         } | ||||
|         union { | ||||
|             u32 raw; | ||||
|             BitField<0, 1, u32> refresh_last; | ||||
|             BitField<1, 2, u32> refresh_golden; | ||||
|             BitField<2, 1, u32> refresh_alt; | ||||
|         } refresh_frame_flags; | ||||
| 
 | ||||
|         // update Last Frame
 | ||||
|         u64 refresh_frame_flags = 1; | ||||
| 
 | ||||
|         // golden frame may refresh, determined if the next golden frame offset is changed
 | ||||
|         bool golden_refresh = false; | ||||
|         if (grace_period <= 0) { | ||||
|             for (s32 index = 1; index < 3; ++index) { | ||||
|                 if (current_frame_info.frame_offsets[index] != | ||||
|                     next_frame.info.frame_offsets[index]) { | ||||
|                     current_frame_info.refresh_frame[index] = true; | ||||
|                     golden_refresh = true; | ||||
|                     grace_period = 3; | ||||
|                 } | ||||
|         refresh_frame_flags.raw = 0; | ||||
|         for (u32 index = 0; index < 3; ++index) { | ||||
|             // Refresh indices that use the current frame as an index
 | ||||
|             if (curr_offsets[3] == next_offsets[index]) { | ||||
|                 refresh_frame_flags.raw |= 1u << index; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (current_frame_info.show_frame && | ||||
|             (!next_frame.info.show_frame || next_frame.info.is_key_frame)) { | ||||
|             // Update golden frame
 | ||||
|             refresh_frame_flags = swap_next_golden ? 2 : 4; | ||||
|         if (swap_ref_indices) { | ||||
|             const u32 temp = refresh_frame_flags.refresh_golden; | ||||
|             refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value()); | ||||
|             refresh_frame_flags.refresh_alt.Assign(temp); | ||||
|         } | ||||
| 
 | ||||
|         if (!current_frame_info.show_frame) { | ||||
|             // Update altref
 | ||||
|             refresh_frame_flags = swap_next_golden ? 2 : 4; | ||||
|         } else if (golden_refresh) { | ||||
|             refresh_frame_flags = 3; | ||||
|         } | ||||
| 
 | ||||
|         if (current_frame_info.intra_only) { | ||||
|             uncomp_writer.WriteU(frame_sync_code, 24); | ||||
|             uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8); | ||||
|             uncomp_writer.WriteU(refresh_frame_flags.raw, 8); | ||||
|             uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); | ||||
|             uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); | ||||
|             uncomp_writer.WriteBit(false); // Render and frame size different.
 | ||||
|         } else { | ||||
|             uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8); | ||||
| 
 | ||||
|             for (s32 index = 1; index < 4; index++) { | ||||
|             const bool swap_indices = needs_ref_swap ^ swap_ref_indices; | ||||
|             const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2}; | ||||
|             uncomp_writer.WriteU(refresh_frame_flags.raw, 8); | ||||
|             for (size_t index = 1; index < 4; index++) { | ||||
|                 uncomp_writer.WriteU(ref_frame_index[index - 1], 3); | ||||
|                 uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1); | ||||
|             } | ||||
| 
 | ||||
|             uncomp_writer.WriteBit(true);  // Frame size with refs.
 | ||||
|             uncomp_writer.WriteBit(false); // Render and frame size different.
 | ||||
|             uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv); | ||||
|  | @ -714,10 +679,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { | |||
|         frame_ctx_idx = 1; | ||||
|     } | ||||
| 
 | ||||
|     uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
 | ||||
|     prev_frame_probs = | ||||
|         frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
 | ||||
|     frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy}; | ||||
|     uncomp_writer.WriteU(frame_ctx_idx, 2);       // Frame context index.
 | ||||
|     prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header
 | ||||
|     frame_ctxs[frame_ctx_idx] = current_frame_info.entropy; | ||||
| 
 | ||||
|     uncomp_writer.WriteU(current_frame_info.first_level, 6); | ||||
|     uncomp_writer.WriteU(current_frame_info.sharpness_level, 3); | ||||
|  | @ -812,7 +776,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters | |||
|         current_frame_info = curr_frame.info; | ||||
|         bitstream = std::move(curr_frame.bit_stream); | ||||
|     } | ||||
| 
 | ||||
|     // The uncompressed header routine sets PrevProb parameters needed for the compressed header
 | ||||
|     auto uncomp_writer = ComposeUncompressedHeader(); | ||||
|     std::vector<u8> compressed_header = ComposeCompressedHeader(); | ||||
|  | @ -828,13 +791,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters | |||
|               frame.begin() + uncompressed_header.size()); | ||||
|     std::copy(bitstream.begin(), bitstream.end(), | ||||
|               frame.begin() + uncompressed_header.size() + compressed_header.size()); | ||||
| 
 | ||||
|     // keep track of frame number
 | ||||
|     current_frame_number++; | ||||
|     grace_period--; | ||||
| 
 | ||||
|     // don't display hidden frames
 | ||||
|     hidden = !current_frame_info.show_frame; | ||||
|     return frame; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -14,7 +14,6 @@ | |||
| 
 | ||||
| namespace Tegra { | ||||
| class GPU; | ||||
| enum class FrameType { KeyFrame = 0, InterFrame = 1 }; | ||||
| namespace Decoder { | ||||
| 
 | ||||
| /// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
 | ||||
|  | @ -124,7 +123,7 @@ public: | |||
| 
 | ||||
|     /// Returns true if the most recent frame was a hidden frame.
 | ||||
|     [[nodiscard]] bool WasFrameHidden() const { | ||||
|         return hidden; | ||||
|         return !current_frame_info.show_frame; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|  | @ -178,19 +177,12 @@ private: | |||
|     std::array<s8, 4> loop_filter_ref_deltas{}; | ||||
|     std::array<s8, 2> loop_filter_mode_deltas{}; | ||||
| 
 | ||||
|     bool hidden = false; | ||||
|     s64 current_frame_number = -2; // since we buffer 2 frames
 | ||||
|     s32 grace_period = 6;          // frame offsets need to stabilize
 | ||||
|     std::array<FrameContexts, 4> frame_ctxs{}; | ||||
|     Vp9FrameContainer next_frame{}; | ||||
|     Vp9FrameContainer next_next_frame{}; | ||||
|     bool swap_next_golden{}; | ||||
|     std::array<Vp9EntropyProbs, 4> frame_ctxs{}; | ||||
|     bool swap_ref_indices{}; | ||||
| 
 | ||||
|     Vp9PictureInfo current_frame_info{}; | ||||
|     Vp9EntropyProbs prev_frame_probs{}; | ||||
| 
 | ||||
|     s32 diff_update_probability = 252; | ||||
|     s32 frame_sync_code = 0x498342; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Decoder
 | ||||
|  |  | |||
|  | @ -296,12 +296,6 @@ struct RefPoolElement { | |||
|     bool refresh{}; | ||||
| }; | ||||
| 
 | ||||
| struct FrameContexts { | ||||
|     s64 from; | ||||
|     bool adapted; | ||||
|     Vp9EntropyProbs probs; | ||||
| }; | ||||
| 
 | ||||
| #define ASSERT_POSITION(field_name, position)                                                      \ | ||||
|     static_assert(offsetof(Vp9EntropyProbs, field_name) == position,                               \ | ||||
|                   "Field " #field_name " has invalid position") | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei