forked from eden-emu/eden
		
	Merge pull request #6540 from Kelebek1/nvdec
Slightly refactor NVDEC and codecs for readability and safety
This commit is contained in:
		
						commit
						c770fa9823
					
				
					 10 changed files with 544 additions and 378 deletions
				
			
		|  | @ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { | ||||||
|         case ThiMethod::SetMethod1: |         case ThiMethod::SetMethod1: | ||||||
|             LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", |             LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", | ||||||
|                       static_cast<u32>(nvdec_thi_state.method_0)); |                       static_cast<u32>(nvdec_thi_state.method_0)); | ||||||
|             nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), |             nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); | ||||||
|                                            data); |  | ||||||
|             break; |             break; | ||||||
|         default: |         default: | ||||||
|             break; |             break; | ||||||
|  |  | ||||||
|  | @ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { | ||||||
|     av_free(ptr); |     av_free(ptr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Codec::Codec(GPU& gpu_) | Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) | ||||||
|     : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), |     : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), | ||||||
|       vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} |       vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} | ||||||
| 
 | 
 | ||||||
| Codec::~Codec() { | Codec::~Codec() { | ||||||
|  | @ -43,46 +43,48 @@ Codec::~Codec() { | ||||||
|     avcodec_close(av_codec_ctx); |     avcodec_close(av_codec_ctx); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { | void Codec::Initialize() { | ||||||
|     if (current_codec != codec) { |     AVCodecID codec{AV_CODEC_ID_NONE}; | ||||||
|         LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); |     switch (current_codec) { | ||||||
|         current_codec = codec; |     case NvdecCommon::VideoCodec::H264: | ||||||
|  |         codec = AV_CODEC_ID_H264; | ||||||
|  |         break; | ||||||
|  |     case NvdecCommon::VideoCodec::Vp9: | ||||||
|  |         codec = AV_CODEC_ID_VP9; | ||||||
|  |         break; | ||||||
|  |     default: | ||||||
|  |         return; | ||||||
|     } |     } | ||||||
|  |     av_codec = avcodec_find_decoder(codec); | ||||||
|  |     av_codec_ctx = avcodec_alloc_context3(av_codec); | ||||||
|  |     av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); | ||||||
|  | 
 | ||||||
|  |     // TODO(ameerj): libavcodec gpu hw acceleration
 | ||||||
|  | 
 | ||||||
|  |     const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); | ||||||
|  |     if (av_error < 0) { | ||||||
|  |         LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); | ||||||
|  |         avcodec_close(av_codec_ctx); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     initialized = true; | ||||||
|  |     return; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Codec::StateWrite(u32 offset, u64 arguments) { | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { | ||||||
|     u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); |     if (current_codec != codec) { | ||||||
|     std::memcpy(state_offset, &arguments, sizeof(u64)); |         current_codec = codec; | ||||||
|  |         LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Codec::Decode() { | void Codec::Decode() { | ||||||
|     bool is_first_frame = false; |     const bool is_first_frame = !initialized; | ||||||
|     if (!initialized) { |     if (!initialized) { | ||||||
|         if (current_codec == NvdecCommon::VideoCodec::H264) { |         Initialize(); | ||||||
|             av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); |  | ||||||
|         } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { |  | ||||||
|             av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); |  | ||||||
|         } else { |  | ||||||
|             LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         av_codec_ctx = avcodec_alloc_context3(av_codec); |  | ||||||
|         av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); |  | ||||||
| 
 |  | ||||||
|         // TODO(ameerj): libavcodec gpu hw acceleration
 |  | ||||||
| 
 |  | ||||||
|         const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); |  | ||||||
|         if (av_error < 0) { |  | ||||||
|             LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); |  | ||||||
|             avcodec_close(av_codec_ctx); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|         initialized = true; |  | ||||||
|         is_first_frame = true; |  | ||||||
|     } |     } | ||||||
|     bool vp9_hidden_frame = false; |  | ||||||
| 
 | 
 | ||||||
|  |     bool vp9_hidden_frame = false; | ||||||
|     AVPacket packet{}; |     AVPacket packet{}; | ||||||
|     av_init_packet(&packet); |     av_init_packet(&packet); | ||||||
|     std::vector<u8> frame_data; |     std::vector<u8> frame_data; | ||||||
|  | @ -95,7 +97,7 @@ void Codec::Decode() { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     packet.data = frame_data.data(); |     packet.data = frame_data.data(); | ||||||
|     packet.size = static_cast<int>(frame_data.size()); |     packet.size = static_cast<s32>(frame_data.size()); | ||||||
| 
 | 
 | ||||||
|     avcodec_send_packet(av_codec_ctx, &packet); |     avcodec_send_packet(av_codec_ctx, &packet); | ||||||
| 
 | 
 | ||||||
|  | @ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { | ||||||
|     return current_codec; |     return current_codec; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | std::string_view Codec::GetCurrentCodecName() const { | ||||||
|  |     switch (current_codec) { | ||||||
|  |     case NvdecCommon::VideoCodec::None: | ||||||
|  |         return "None"; | ||||||
|  |     case NvdecCommon::VideoCodec::H264: | ||||||
|  |         return "H264"; | ||||||
|  |     case NvdecCommon::VideoCodec::Vp8: | ||||||
|  |         return "VP8"; | ||||||
|  |     case NvdecCommon::VideoCodec::H265: | ||||||
|  |         return "H265"; | ||||||
|  |     case NvdecCommon::VideoCodec::Vp9: | ||||||
|  |         return "VP9"; | ||||||
|  |     default: | ||||||
|  |         return "Unknown"; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| } // namespace Tegra
 | } // namespace Tegra
 | ||||||
|  |  | ||||||
|  | @ -42,15 +42,15 @@ class VP9; | ||||||
| 
 | 
 | ||||||
| class Codec { | class Codec { | ||||||
| public: | public: | ||||||
|     explicit Codec(GPU& gpu); |     explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); | ||||||
|     ~Codec(); |     ~Codec(); | ||||||
| 
 | 
 | ||||||
|  |     /// Initialize the codec, returning success or failure
 | ||||||
|  |     void Initialize(); | ||||||
|  | 
 | ||||||
|     /// Sets NVDEC video stream codec
 |     /// Sets NVDEC video stream codec
 | ||||||
|     void SetTargetCodec(NvdecCommon::VideoCodec codec); |     void SetTargetCodec(NvdecCommon::VideoCodec codec); | ||||||
| 
 | 
 | ||||||
|     /// Populate NvdecRegisters state with argument value at the provided offset
 |  | ||||||
|     void StateWrite(u32 offset, u64 arguments); |  | ||||||
| 
 |  | ||||||
|     /// Call decoders to construct headers, decode AVFrame with ffmpeg
 |     /// Call decoders to construct headers, decode AVFrame with ffmpeg
 | ||||||
|     void Decode(); |     void Decode(); | ||||||
| 
 | 
 | ||||||
|  | @ -59,6 +59,8 @@ public: | ||||||
| 
 | 
 | ||||||
|     /// Returns the value of current_codec
 |     /// Returns the value of current_codec
 | ||||||
|     [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; |     [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; | ||||||
|  |     /// Return name of the current codec
 | ||||||
|  |     [[nodiscard]] std::string_view GetCurrentCodecName() const; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     bool initialized{}; |     bool initialized{}; | ||||||
|  | @ -68,10 +70,10 @@ private: | ||||||
|     AVCodecContext* av_codec_ctx{nullptr}; |     AVCodecContext* av_codec_ctx{nullptr}; | ||||||
| 
 | 
 | ||||||
|     GPU& gpu; |     GPU& gpu; | ||||||
|  |     const NvdecCommon::NvdecRegisters& state; | ||||||
|     std::unique_ptr<Decoder::H264> h264_decoder; |     std::unique_ptr<Decoder::H264> h264_decoder; | ||||||
|     std::unique_ptr<Decoder::VP9> vp9_decoder; |     std::unique_ptr<Decoder::VP9> vp9_decoder; | ||||||
| 
 | 
 | ||||||
|     NvdecCommon::NvdecRegisters state{}; |  | ||||||
|     std::queue<AVFramePtr> av_frames{}; |     std::queue<AVFramePtr> av_frames{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -45,135 +45,130 @@ H264::~H264() = default; | ||||||
| 
 | 
 | ||||||
| const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, | const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, | ||||||
|                                                 bool is_first_frame) { |                                                 bool is_first_frame) { | ||||||
|     H264DecoderContext context{}; |     H264DecoderContext context; | ||||||
|     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); |     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); | ||||||
| 
 | 
 | ||||||
|     const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); |     const s64 frame_number = context.h264_parameter_set.frame_number.Value(); | ||||||
|     if (!is_first_frame && frame_number != 0) { |     if (!is_first_frame && frame_number != 0) { | ||||||
|         frame.resize(context.frame_data_size); |         frame.resize(context.stream_len); | ||||||
| 
 |  | ||||||
|         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); |         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); | ||||||
|     } else { |         return frame; | ||||||
|         /// Encode header
 |  | ||||||
|         H264BitWriter writer{}; |  | ||||||
|         writer.WriteU(1, 24); |  | ||||||
|         writer.WriteU(0, 1); |  | ||||||
|         writer.WriteU(3, 2); |  | ||||||
|         writer.WriteU(7, 5); |  | ||||||
|         writer.WriteU(100, 8); |  | ||||||
|         writer.WriteU(0, 8); |  | ||||||
|         writer.WriteU(31, 8); |  | ||||||
|         writer.WriteUe(0); |  | ||||||
|         const auto chroma_format_idc = |  | ||||||
|             static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); |  | ||||||
|         writer.WriteUe(chroma_format_idc); |  | ||||||
|         if (chroma_format_idc == 3) { |  | ||||||
|             writer.WriteBit(false); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         writer.WriteUe(0); |  | ||||||
|         writer.WriteUe(0); |  | ||||||
|         writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
 |  | ||||||
|         writer.WriteBit(false); // Scaling matrix present flag
 |  | ||||||
| 
 |  | ||||||
|         const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); |  | ||||||
|         writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); |  | ||||||
|         writer.WriteUe(order_cnt_type); |  | ||||||
|         if (order_cnt_type == 0) { |  | ||||||
|             writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); |  | ||||||
|         } else if (order_cnt_type == 1) { |  | ||||||
|             writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); |  | ||||||
| 
 |  | ||||||
|             writer.WriteSe(0); |  | ||||||
|             writer.WriteSe(0); |  | ||||||
|             writer.WriteUe(0); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / |  | ||||||
|                                (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); |  | ||||||
| 
 |  | ||||||
|         writer.WriteUe(16); |  | ||||||
|         writer.WriteBit(false); |  | ||||||
|         writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); |  | ||||||
|         writer.WriteUe(pic_height - 1); |  | ||||||
|         writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); |  | ||||||
| 
 |  | ||||||
|         if (!context.h264_parameter_set.frame_mbs_only_flag) { |  | ||||||
|             writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); |  | ||||||
|         writer.WriteBit(false); // Frame cropping flag
 |  | ||||||
|         writer.WriteBit(false); // VUI parameter present flag
 |  | ||||||
| 
 |  | ||||||
|         writer.End(); |  | ||||||
| 
 |  | ||||||
|         // H264 PPS
 |  | ||||||
|         writer.WriteU(1, 24); |  | ||||||
|         writer.WriteU(0, 1); |  | ||||||
|         writer.WriteU(3, 2); |  | ||||||
|         writer.WriteU(8, 5); |  | ||||||
| 
 |  | ||||||
|         writer.WriteUe(0); |  | ||||||
|         writer.WriteUe(0); |  | ||||||
| 
 |  | ||||||
|         writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); |  | ||||||
|         writer.WriteBit(false); |  | ||||||
|         writer.WriteUe(0); |  | ||||||
|         writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); |  | ||||||
|         writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); |  | ||||||
|         writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); |  | ||||||
|         writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); |  | ||||||
|         s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); |  | ||||||
|         pic_init_qp = (pic_init_qp << 26) >> 26; |  | ||||||
|         writer.WriteSe(pic_init_qp); |  | ||||||
|         writer.WriteSe(0); |  | ||||||
|         s32 chroma_qp_index_offset = |  | ||||||
|             static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); |  | ||||||
|         chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; |  | ||||||
| 
 |  | ||||||
|         writer.WriteSe(chroma_qp_index_offset); |  | ||||||
|         writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); |  | ||||||
|         writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); |  | ||||||
|         writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); |  | ||||||
|         writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); |  | ||||||
| 
 |  | ||||||
|         writer.WriteBit(true); |  | ||||||
| 
 |  | ||||||
|         for (s32 index = 0; index < 6; index++) { |  | ||||||
|             writer.WriteBit(true); |  | ||||||
|             const auto matrix_x4 = |  | ||||||
|                 std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); |  | ||||||
|             writer.WriteScalingList(matrix_x4, index * 16, 16); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (context.h264_parameter_set.transform_8x8_mode_flag) { |  | ||||||
|             for (s32 index = 0; index < 2; index++) { |  | ||||||
|                 writer.WriteBit(true); |  | ||||||
|                 const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), |  | ||||||
|                                                        context.scaling_matrix_8.end()); |  | ||||||
| 
 |  | ||||||
|                 writer.WriteScalingList(matrix_x8, index * 64, 64); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         s32 chroma_qp_index_offset2 = |  | ||||||
|             static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); |  | ||||||
|         chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; |  | ||||||
| 
 |  | ||||||
|         writer.WriteSe(chroma_qp_index_offset2); |  | ||||||
| 
 |  | ||||||
|         writer.End(); |  | ||||||
| 
 |  | ||||||
|         const auto& encoded_header = writer.GetByteArray(); |  | ||||||
|         frame.resize(encoded_header.size() + context.frame_data_size); |  | ||||||
|         std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); |  | ||||||
| 
 |  | ||||||
|         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, |  | ||||||
|                                       frame.data() + encoded_header.size(), |  | ||||||
|                                       context.frame_data_size); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     // Encode header
 | ||||||
|  |     H264BitWriter writer{}; | ||||||
|  |     writer.WriteU(1, 24); | ||||||
|  |     writer.WriteU(0, 1); | ||||||
|  |     writer.WriteU(3, 2); | ||||||
|  |     writer.WriteU(7, 5); | ||||||
|  |     writer.WriteU(100, 8); | ||||||
|  |     writer.WriteU(0, 8); | ||||||
|  |     writer.WriteU(31, 8); | ||||||
|  |     writer.WriteUe(0); | ||||||
|  |     const u32 chroma_format_idc = | ||||||
|  |         static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); | ||||||
|  |     writer.WriteUe(chroma_format_idc); | ||||||
|  |     if (chroma_format_idc == 3) { | ||||||
|  |         writer.WriteBit(false); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     writer.WriteUe(0); | ||||||
|  |     writer.WriteUe(0); | ||||||
|  |     writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
 | ||||||
|  |     writer.WriteBit(false); // Scaling matrix present flag
 | ||||||
|  | 
 | ||||||
|  |     writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); | ||||||
|  | 
 | ||||||
|  |     const auto order_cnt_type = | ||||||
|  |         static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); | ||||||
|  |     writer.WriteUe(order_cnt_type); | ||||||
|  |     if (order_cnt_type == 0) { | ||||||
|  |         writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); | ||||||
|  |     } else if (order_cnt_type == 1) { | ||||||
|  |         writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); | ||||||
|  | 
 | ||||||
|  |         writer.WriteSe(0); | ||||||
|  |         writer.WriteSe(0); | ||||||
|  |         writer.WriteUe(0); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / | ||||||
|  |                            (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); | ||||||
|  | 
 | ||||||
|  |     writer.WriteUe(16); | ||||||
|  |     writer.WriteBit(false); | ||||||
|  |     writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); | ||||||
|  |     writer.WriteUe(pic_height - 1); | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); | ||||||
|  | 
 | ||||||
|  |     if (!context.h264_parameter_set.frame_mbs_only_flag) { | ||||||
|  |         writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); | ||||||
|  |     writer.WriteBit(false); // Frame cropping flag
 | ||||||
|  |     writer.WriteBit(false); // VUI parameter present flag
 | ||||||
|  | 
 | ||||||
|  |     writer.End(); | ||||||
|  | 
 | ||||||
|  |     // H264 PPS
 | ||||||
|  |     writer.WriteU(1, 24); | ||||||
|  |     writer.WriteU(0, 1); | ||||||
|  |     writer.WriteU(3, 2); | ||||||
|  |     writer.WriteU(8, 5); | ||||||
|  | 
 | ||||||
|  |     writer.WriteUe(0); | ||||||
|  |     writer.WriteUe(0); | ||||||
|  | 
 | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); | ||||||
|  |     writer.WriteBit(false); | ||||||
|  |     writer.WriteUe(0); | ||||||
|  |     writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); | ||||||
|  |     writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); | ||||||
|  |     writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); | ||||||
|  |     s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); | ||||||
|  |     writer.WriteSe(pic_init_qp); | ||||||
|  |     writer.WriteSe(0); | ||||||
|  |     s32 chroma_qp_index_offset = | ||||||
|  |         static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); | ||||||
|  | 
 | ||||||
|  |     writer.WriteSe(chroma_qp_index_offset); | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); | ||||||
|  |     writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); | ||||||
|  | 
 | ||||||
|  |     writer.WriteBit(true); | ||||||
|  | 
 | ||||||
|  |     for (s32 index = 0; index < 6; index++) { | ||||||
|  |         writer.WriteBit(true); | ||||||
|  |         std::span<const u8> matrix{context.weight_scale}; | ||||||
|  |         writer.WriteScalingList(matrix, index * 16, 16); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (context.h264_parameter_set.transform_8x8_mode_flag) { | ||||||
|  |         for (s32 index = 0; index < 2; index++) { | ||||||
|  |             writer.WriteBit(true); | ||||||
|  |             std::span<const u8> matrix{context.weight_scale_8x8}; | ||||||
|  |             writer.WriteScalingList(matrix, index * 64, 64); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     s32 chroma_qp_index_offset2 = | ||||||
|  |         static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); | ||||||
|  | 
 | ||||||
|  |     writer.WriteSe(chroma_qp_index_offset2); | ||||||
|  | 
 | ||||||
|  |     writer.End(); | ||||||
|  | 
 | ||||||
|  |     const auto& encoded_header = writer.GetByteArray(); | ||||||
|  |     frame.resize(encoded_header.size() + context.stream_len); | ||||||
|  |     std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); | ||||||
|  | 
 | ||||||
|  |     gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, | ||||||
|  |                                   frame.data() + encoded_header.size(), context.stream_len); | ||||||
|  | 
 | ||||||
|     return frame; |     return frame; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { | ||||||
|     WriteBits(state ? 1 : 0, 1); |     WriteBits(state ? 1 : 0, 1); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { | void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { | ||||||
|     std::vector<u8> scan(count); |     std::vector<u8> scan(count); | ||||||
|     if (count == 16) { |     if (count == 16) { | ||||||
|         std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); |         std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); | ||||||
|  |  | ||||||
|  | @ -20,7 +20,9 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <span> | ||||||
| #include <vector> | #include <vector> | ||||||
|  | #include "common/bit_field.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/command_classes/nvdec_common.h" | #include "video_core/command_classes/nvdec_common.h" | ||||||
|  | @ -48,7 +50,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
 |     /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
 | ||||||
|     /// Writes the scaling matrices of the sream
 |     /// Writes the scaling matrices of the sream
 | ||||||
|     void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); |     void WriteScalingList(std::span<const u8> list, s32 start, s32 count); | ||||||
| 
 | 
 | ||||||
|     /// Return the bitstream as a vector.
 |     /// Return the bitstream as a vector.
 | ||||||
|     [[nodiscard]] std::vector<u8>& GetByteArray(); |     [[nodiscard]] std::vector<u8>& GetByteArray(); | ||||||
|  | @ -78,40 +80,110 @@ public: | ||||||
|         const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); |         const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     struct H264ParameterSet { |  | ||||||
|         u32 log2_max_pic_order_cnt{}; |  | ||||||
|         u32 delta_pic_order_always_zero_flag{}; |  | ||||||
|         u32 frame_mbs_only_flag{}; |  | ||||||
|         u32 pic_width_in_mbs{}; |  | ||||||
|         u32 pic_height_in_map_units{}; |  | ||||||
|         INSERT_PADDING_WORDS(1); |  | ||||||
|         u32 entropy_coding_mode_flag{}; |  | ||||||
|         u32 bottom_field_pic_order_flag{}; |  | ||||||
|         u32 num_refidx_l0_default_active{}; |  | ||||||
|         u32 num_refidx_l1_default_active{}; |  | ||||||
|         u32 deblocking_filter_control_flag{}; |  | ||||||
|         u32 redundant_pic_count_flag{}; |  | ||||||
|         u32 transform_8x8_mode_flag{}; |  | ||||||
|         INSERT_PADDING_WORDS(9); |  | ||||||
|         u64 flags{}; |  | ||||||
|         u32 frame_number{}; |  | ||||||
|         u32 frame_number2{}; |  | ||||||
|     }; |  | ||||||
|     static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); |  | ||||||
| 
 |  | ||||||
|     struct H264DecoderContext { |  | ||||||
|         INSERT_PADDING_BYTES(0x48); |  | ||||||
|         u32 frame_data_size{}; |  | ||||||
|         INSERT_PADDING_BYTES(0xc); |  | ||||||
|         H264ParameterSet h264_parameter_set{}; |  | ||||||
|         INSERT_PADDING_BYTES(0x100); |  | ||||||
|         std::array<u8, 0x60> scaling_matrix_4; |  | ||||||
|         std::array<u8, 0x80> scaling_matrix_8; |  | ||||||
|     }; |  | ||||||
|     static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); |  | ||||||
| 
 |  | ||||||
|     std::vector<u8> frame; |     std::vector<u8> frame; | ||||||
|     GPU& gpu; |     GPU& gpu; | ||||||
|  | 
 | ||||||
|  |     struct H264ParameterSet { | ||||||
|  |         s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
 | ||||||
|  |         s32 delta_pic_order_always_zero_flag;  ///< 0x04
 | ||||||
|  |         s32 frame_mbs_only_flag;               ///< 0x08
 | ||||||
|  |         u32 pic_width_in_mbs;                  ///< 0x0C
 | ||||||
|  |         u32 frame_height_in_map_units;         ///< 0x10
 | ||||||
|  |         union {                                ///< 0x14
 | ||||||
|  |             BitField<0, 2, u32> tile_format; | ||||||
|  |             BitField<2, 3, u32> gob_height; | ||||||
|  |         }; | ||||||
|  |         u32 entropy_coding_mode_flag;               ///< 0x18
 | ||||||
|  |         s32 pic_order_present_flag;                 ///< 0x1C
 | ||||||
|  |         s32 num_refidx_l0_default_active;           ///< 0x20
 | ||||||
|  |         s32 num_refidx_l1_default_active;           ///< 0x24
 | ||||||
|  |         s32 deblocking_filter_control_present_flag; ///< 0x28
 | ||||||
|  |         s32 redundant_pic_cnt_present_flag;         ///< 0x2C
 | ||||||
|  |         u32 transform_8x8_mode_flag;                ///< 0x30
 | ||||||
|  |         u32 pitch_luma;                             ///< 0x34
 | ||||||
|  |         u32 pitch_chroma;                           ///< 0x38
 | ||||||
|  |         u32 luma_top_offset;                        ///< 0x3C
 | ||||||
|  |         u32 luma_bot_offset;                        ///< 0x40
 | ||||||
|  |         u32 luma_frame_offset;                      ///< 0x44
 | ||||||
|  |         u32 chroma_top_offset;                      ///< 0x48
 | ||||||
|  |         u32 chroma_bot_offset;                      ///< 0x4C
 | ||||||
|  |         u32 chroma_frame_offset;                    ///< 0x50
 | ||||||
|  |         u32 hist_buffer_size;                       ///< 0x54
 | ||||||
|  |         union {                                     ///< 0x58
 | ||||||
|  |             union { | ||||||
|  |                 BitField<0, 1, u64> mbaff_frame; | ||||||
|  |                 BitField<1, 1, u64> direct_8x8_inference; | ||||||
|  |                 BitField<2, 1, u64> weighted_pred; | ||||||
|  |                 BitField<3, 1, u64> constrained_intra_pred; | ||||||
|  |                 BitField<4, 1, u64> ref_pic; | ||||||
|  |                 BitField<5, 1, u64> field_pic; | ||||||
|  |                 BitField<6, 1, u64> bottom_field; | ||||||
|  |                 BitField<7, 1, u64> second_field; | ||||||
|  |             } flags; | ||||||
|  |             BitField<8, 4, u64> log2_max_frame_num_minus4; | ||||||
|  |             BitField<12, 2, u64> chroma_format_idc; | ||||||
|  |             BitField<14, 2, u64> pic_order_cnt_type; | ||||||
|  |             BitField<16, 6, s64> pic_init_qp_minus26; | ||||||
|  |             BitField<22, 5, s64> chroma_qp_index_offset; | ||||||
|  |             BitField<27, 5, s64> second_chroma_qp_index_offset; | ||||||
|  |             BitField<32, 2, u64> weighted_bipred_idc; | ||||||
|  |             BitField<34, 7, u64> curr_pic_idx; | ||||||
|  |             BitField<41, 5, u64> curr_col_idx; | ||||||
|  |             BitField<46, 16, u64> frame_number; | ||||||
|  |             BitField<62, 1, u64> frame_surfaces; | ||||||
|  |             BitField<63, 1, u64> output_memory_layout; | ||||||
|  |         }; | ||||||
|  |     }; | ||||||
|  |     static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); | ||||||
|  | 
 | ||||||
|  |     struct H264DecoderContext { | ||||||
|  |         INSERT_PADDING_WORDS_NOINIT(18);       ///< 0x0000
 | ||||||
|  |         u32 stream_len;                        ///< 0x0048
 | ||||||
|  |         INSERT_PADDING_WORDS_NOINIT(3);        ///< 0x004C
 | ||||||
|  |         H264ParameterSet h264_parameter_set;   ///< 0x0058
 | ||||||
|  |         INSERT_PADDING_WORDS_NOINIT(66);       ///< 0x00B8
 | ||||||
|  |         std::array<u8, 0x60> weight_scale;     ///< 0x01C0
 | ||||||
|  |         std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
 | ||||||
|  |     }; | ||||||
|  |     static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); | ||||||
|  | 
 | ||||||
|  | #define ASSERT_POSITION(field_name, position)                                                      \ | ||||||
|  |     static_assert(offsetof(H264ParameterSet, field_name) == position,                              \ | ||||||
|  |                   "Field " #field_name " has invalid position") | ||||||
|  | 
 | ||||||
|  |     ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); | ||||||
|  |     ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); | ||||||
|  |     ASSERT_POSITION(frame_mbs_only_flag, 0x08); | ||||||
|  |     ASSERT_POSITION(pic_width_in_mbs, 0x0C); | ||||||
|  |     ASSERT_POSITION(frame_height_in_map_units, 0x10); | ||||||
|  |     ASSERT_POSITION(tile_format, 0x14); | ||||||
|  |     ASSERT_POSITION(entropy_coding_mode_flag, 0x18); | ||||||
|  |     ASSERT_POSITION(pic_order_present_flag, 0x1C); | ||||||
|  |     ASSERT_POSITION(num_refidx_l0_default_active, 0x20); | ||||||
|  |     ASSERT_POSITION(num_refidx_l1_default_active, 0x24); | ||||||
|  |     ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); | ||||||
|  |     ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); | ||||||
|  |     ASSERT_POSITION(transform_8x8_mode_flag, 0x30); | ||||||
|  |     ASSERT_POSITION(pitch_luma, 0x34); | ||||||
|  |     ASSERT_POSITION(pitch_chroma, 0x38); | ||||||
|  |     ASSERT_POSITION(luma_top_offset, 0x3C); | ||||||
|  |     ASSERT_POSITION(luma_bot_offset, 0x40); | ||||||
|  |     ASSERT_POSITION(luma_frame_offset, 0x44); | ||||||
|  |     ASSERT_POSITION(chroma_top_offset, 0x48); | ||||||
|  |     ASSERT_POSITION(chroma_bot_offset, 0x4C); | ||||||
|  |     ASSERT_POSITION(chroma_frame_offset, 0x50); | ||||||
|  |     ASSERT_POSITION(hist_buffer_size, 0x54); | ||||||
|  |     ASSERT_POSITION(flags, 0x58); | ||||||
|  | #undef ASSERT_POSITION | ||||||
|  | 
 | ||||||
|  | #define ASSERT_POSITION(field_name, position)                                                      \ | ||||||
|  |     static_assert(offsetof(H264DecoderContext, field_name) == position,                            \ | ||||||
|  |                   "Field " #field_name " has invalid position") | ||||||
|  | 
 | ||||||
|  |     ASSERT_POSITION(stream_len, 0x48); | ||||||
|  |     ASSERT_POSITION(h264_parameter_set, 0x58); | ||||||
|  |     ASSERT_POSITION(weight_scale, 0x1C0); | ||||||
|  | #undef ASSERT_POSITION | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Decoder
 | } // namespace Decoder
 | ||||||
|  |  | ||||||
|  | @ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { | Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { | ||||||
|     PictureInfo picture_info{}; |     PictureInfo picture_info; | ||||||
|     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); |     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); | ||||||
|     Vp9PictureInfo vp9_info = picture_info.Convert(); |     Vp9PictureInfo vp9_info = picture_info.Convert(); | ||||||
| 
 | 
 | ||||||
|  | @ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { | void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { | ||||||
|     EntropyProbs entropy{}; |     EntropyProbs entropy; | ||||||
|     gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); |     gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); | ||||||
|     entropy.Convert(dst); |     entropy.Convert(dst); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -15,10 +15,10 @@ class GPU; | ||||||
| 
 | 
 | ||||||
| namespace Decoder { | namespace Decoder { | ||||||
| struct Vp9FrameDimensions { | struct Vp9FrameDimensions { | ||||||
|     s16 width{}; |     s16 width; | ||||||
|     s16 height{}; |     s16 height; | ||||||
|     s16 luma_pitch{}; |     s16 luma_pitch; | ||||||
|     s16 chroma_pitch{}; |     s16 chroma_pitch; | ||||||
| }; | }; | ||||||
| static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); | static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); | ||||||
| 
 | 
 | ||||||
|  | @ -49,87 +49,87 @@ enum class TxMode { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct Segmentation { | struct Segmentation { | ||||||
|     u8 enabled{}; |     u8 enabled; | ||||||
|     u8 update_map{}; |     u8 update_map; | ||||||
|     u8 temporal_update{}; |     u8 temporal_update; | ||||||
|     u8 abs_delta{}; |     u8 abs_delta; | ||||||
|     std::array<u32, 8> feature_mask{}; |     std::array<u32, 8> feature_mask; | ||||||
|     std::array<std::array<s16, 4>, 8> feature_data{}; |     std::array<std::array<s16, 4>, 8> feature_data; | ||||||
| }; | }; | ||||||
| static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); | static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); | ||||||
| 
 | 
 | ||||||
| struct LoopFilter { | struct LoopFilter { | ||||||
|     u8 mode_ref_delta_enabled{}; |     u8 mode_ref_delta_enabled; | ||||||
|     std::array<s8, 4> ref_deltas{}; |     std::array<s8, 4> ref_deltas; | ||||||
|     std::array<s8, 2> mode_deltas{}; |     std::array<s8, 2> mode_deltas; | ||||||
| }; | }; | ||||||
| static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); | static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); | ||||||
| 
 | 
 | ||||||
| struct Vp9EntropyProbs { | struct Vp9EntropyProbs { | ||||||
|     std::array<u8, 36> y_mode_prob{}; |     std::array<u8, 36> y_mode_prob;           ///< 0x0000
 | ||||||
|     std::array<u8, 64> partition_prob{}; |     std::array<u8, 64> partition_prob;        ///< 0x0024
 | ||||||
|     std::array<u8, 1728> coef_probs{}; |     std::array<u8, 1728> coef_probs;          ///< 0x0064
 | ||||||
|     std::array<u8, 8> switchable_interp_prob{}; |     std::array<u8, 8> switchable_interp_prob; ///< 0x0724
 | ||||||
|     std::array<u8, 28> inter_mode_prob{}; |     std::array<u8, 28> inter_mode_prob;       ///< 0x072C
 | ||||||
|     std::array<u8, 4> intra_inter_prob{}; |     std::array<u8, 4> intra_inter_prob;       ///< 0x0748
 | ||||||
|     std::array<u8, 5> comp_inter_prob{}; |     std::array<u8, 5> comp_inter_prob;        ///< 0x074C
 | ||||||
|     std::array<u8, 10> single_ref_prob{}; |     std::array<u8, 10> single_ref_prob;       ///< 0x0751
 | ||||||
|     std::array<u8, 5> comp_ref_prob{}; |     std::array<u8, 5> comp_ref_prob;          ///< 0x075B
 | ||||||
|     std::array<u8, 6> tx_32x32_prob{}; |     std::array<u8, 6> tx_32x32_prob;          ///< 0x0760
 | ||||||
|     std::array<u8, 4> tx_16x16_prob{}; |     std::array<u8, 4> tx_16x16_prob;          ///< 0x0766
 | ||||||
|     std::array<u8, 2> tx_8x8_prob{}; |     std::array<u8, 2> tx_8x8_prob;            ///< 0x076A
 | ||||||
|     std::array<u8, 3> skip_probs{}; |     std::array<u8, 3> skip_probs;             ///< 0x076C
 | ||||||
|     std::array<u8, 3> joints{}; |     std::array<u8, 3> joints;                 ///< 0x076F
 | ||||||
|     std::array<u8, 2> sign{}; |     std::array<u8, 2> sign;                   ///< 0x0772
 | ||||||
|     std::array<u8, 20> classes{}; |     std::array<u8, 20> classes;               ///< 0x0774
 | ||||||
|     std::array<u8, 2> class_0{}; |     std::array<u8, 2> class_0;                ///< 0x0788
 | ||||||
|     std::array<u8, 20> prob_bits{}; |     std::array<u8, 20> prob_bits;             ///< 0x078A
 | ||||||
|     std::array<u8, 12> class_0_fr{}; |     std::array<u8, 12> class_0_fr;            ///< 0x079E
 | ||||||
|     std::array<u8, 6> fr{}; |     std::array<u8, 6> fr;                     ///< 0x07AA
 | ||||||
|     std::array<u8, 2> class_0_hp{}; |     std::array<u8, 2> class_0_hp;             ///< 0x07B0
 | ||||||
|     std::array<u8, 2> high_precision{}; |     std::array<u8, 2> high_precision;         ///< 0x07B2
 | ||||||
| }; | }; | ||||||
| static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); | static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); | ||||||
| 
 | 
 | ||||||
| struct Vp9PictureInfo { | struct Vp9PictureInfo { | ||||||
|     bool is_key_frame{}; |     bool is_key_frame; | ||||||
|     bool intra_only{}; |     bool intra_only; | ||||||
|     bool last_frame_was_key{}; |     bool last_frame_was_key; | ||||||
|     bool frame_size_changed{}; |     bool frame_size_changed; | ||||||
|     bool error_resilient_mode{}; |     bool error_resilient_mode; | ||||||
|     bool last_frame_shown{}; |     bool last_frame_shown; | ||||||
|     bool show_frame{}; |     bool show_frame; | ||||||
|     std::array<s8, 4> ref_frame_sign_bias{}; |     std::array<s8, 4> ref_frame_sign_bias; | ||||||
|     s32 base_q_index{}; |     s32 base_q_index; | ||||||
|     s32 y_dc_delta_q{}; |     s32 y_dc_delta_q; | ||||||
|     s32 uv_dc_delta_q{}; |     s32 uv_dc_delta_q; | ||||||
|     s32 uv_ac_delta_q{}; |     s32 uv_ac_delta_q; | ||||||
|     bool lossless{}; |     bool lossless; | ||||||
|     s32 transform_mode{}; |     s32 transform_mode; | ||||||
|     bool allow_high_precision_mv{}; |     bool allow_high_precision_mv; | ||||||
|     s32 interp_filter{}; |     s32 interp_filter; | ||||||
|     s32 reference_mode{}; |     s32 reference_mode; | ||||||
|     s8 comp_fixed_ref{}; |     s8 comp_fixed_ref; | ||||||
|     std::array<s8, 2> comp_var_ref{}; |     std::array<s8, 2> comp_var_ref; | ||||||
|     s32 log2_tile_cols{}; |     s32 log2_tile_cols; | ||||||
|     s32 log2_tile_rows{}; |     s32 log2_tile_rows; | ||||||
|     bool segment_enabled{}; |     bool segment_enabled; | ||||||
|     bool segment_map_update{}; |     bool segment_map_update; | ||||||
|     bool segment_map_temporal_update{}; |     bool segment_map_temporal_update; | ||||||
|     s32 segment_abs_delta{}; |     s32 segment_abs_delta; | ||||||
|     std::array<u32, 8> segment_feature_enable{}; |     std::array<u32, 8> segment_feature_enable; | ||||||
|     std::array<std::array<s16, 4>, 8> segment_feature_data{}; |     std::array<std::array<s16, 4>, 8> segment_feature_data; | ||||||
|     bool mode_ref_delta_enabled{}; |     bool mode_ref_delta_enabled; | ||||||
|     bool use_prev_in_find_mv_refs{}; |     bool use_prev_in_find_mv_refs; | ||||||
|     std::array<s8, 4> ref_deltas{}; |     std::array<s8, 4> ref_deltas; | ||||||
|     std::array<s8, 2> mode_deltas{}; |     std::array<s8, 2> mode_deltas; | ||||||
|     Vp9EntropyProbs entropy{}; |     Vp9EntropyProbs entropy; | ||||||
|     Vp9FrameDimensions frame_size{}; |     Vp9FrameDimensions frame_size; | ||||||
|     u8 first_level{}; |     u8 first_level; | ||||||
|     u8 sharpness_level{}; |     u8 sharpness_level; | ||||||
|     u32 bitstream_size{}; |     u32 bitstream_size; | ||||||
|     std::array<u64, 4> frame_offsets{}; |     std::array<u64, 4> frame_offsets; | ||||||
|     std::array<bool, 4> refresh_frame{}; |     std::array<bool, 4> refresh_frame; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct Vp9FrameContainer { | struct Vp9FrameContainer { | ||||||
|  | @ -138,35 +138,35 @@ struct Vp9FrameContainer { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct PictureInfo { | struct PictureInfo { | ||||||
|     INSERT_PADDING_WORDS(12); |     INSERT_PADDING_WORDS_NOINIT(12);       ///< 0x00
 | ||||||
|     u32 bitstream_size{}; |     u32 bitstream_size;                    ///< 0x30
 | ||||||
|     INSERT_PADDING_WORDS(5); |     INSERT_PADDING_WORDS_NOINIT(5);        ///< 0x34
 | ||||||
|     Vp9FrameDimensions last_frame_size{}; |     Vp9FrameDimensions last_frame_size;    ///< 0x48
 | ||||||
|     Vp9FrameDimensions golden_frame_size{}; |     Vp9FrameDimensions golden_frame_size;  ///< 0x50
 | ||||||
|     Vp9FrameDimensions alt_frame_size{}; |     Vp9FrameDimensions alt_frame_size;     ///< 0x58
 | ||||||
|     Vp9FrameDimensions current_frame_size{}; |     Vp9FrameDimensions current_frame_size; ///< 0x60
 | ||||||
|     u32 vp9_flags{}; |     u32 vp9_flags;                         ///< 0x68
 | ||||||
|     std::array<s8, 4> ref_frame_sign_bias{}; |     std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
 | ||||||
|     u8 first_level{}; |     u8 first_level;                        ///< 0x70
 | ||||||
|     u8 sharpness_level{}; |     u8 sharpness_level;                    ///< 0x71
 | ||||||
|     u8 base_q_index{}; |     u8 base_q_index;                       ///< 0x72
 | ||||||
|     u8 y_dc_delta_q{}; |     u8 y_dc_delta_q;                       ///< 0x73
 | ||||||
|     u8 uv_ac_delta_q{}; |     u8 uv_ac_delta_q;                      ///< 0x74
 | ||||||
|     u8 uv_dc_delta_q{}; |     u8 uv_dc_delta_q;                      ///< 0x75
 | ||||||
|     u8 lossless{}; |     u8 lossless;                           ///< 0x76
 | ||||||
|     u8 tx_mode{}; |     u8 tx_mode;                            ///< 0x77
 | ||||||
|     u8 allow_high_precision_mv{}; |     u8 allow_high_precision_mv;            ///< 0x78
 | ||||||
|     u8 interp_filter{}; |     u8 interp_filter;                      ///< 0x79
 | ||||||
|     u8 reference_mode{}; |     u8 reference_mode;                     ///< 0x7A
 | ||||||
|     s8 comp_fixed_ref{}; |     s8 comp_fixed_ref;                     ///< 0x7B
 | ||||||
|     std::array<s8, 2> comp_var_ref{}; |     std::array<s8, 2> comp_var_ref;        ///< 0x7C
 | ||||||
|     u8 log2_tile_cols{}; |     u8 log2_tile_cols;                     ///< 0x7E
 | ||||||
|     u8 log2_tile_rows{}; |     u8 log2_tile_rows;                     ///< 0x7F
 | ||||||
|     Segmentation segmentation{}; |     Segmentation segmentation;             ///< 0x80
 | ||||||
|     LoopFilter loop_filter{}; |     LoopFilter loop_filter;                ///< 0xE4
 | ||||||
|     INSERT_PADDING_BYTES(5); |     INSERT_PADDING_BYTES_NOINIT(5);        ///< 0xEB
 | ||||||
|     u32 surface_params{}; |     u32 surface_params;                    ///< 0xF0
 | ||||||
|     INSERT_PADDING_WORDS(3); |     INSERT_PADDING_WORDS_NOINIT(3);        ///< 0xF4
 | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] Vp9PictureInfo Convert() const { |     [[nodiscard]] Vp9PictureInfo Convert() const { | ||||||
|         return { |         return { | ||||||
|  | @ -176,6 +176,7 @@ struct PictureInfo { | ||||||
|             .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, |             .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, | ||||||
|             .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, |             .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, | ||||||
|             .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, |             .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, | ||||||
|  |             .show_frame = false, | ||||||
|             .ref_frame_sign_bias = ref_frame_sign_bias, |             .ref_frame_sign_bias = ref_frame_sign_bias, | ||||||
|             .base_q_index = base_q_index, |             .base_q_index = base_q_index, | ||||||
|             .y_dc_delta_q = y_dc_delta_q, |             .y_dc_delta_q = y_dc_delta_q, | ||||||
|  | @ -204,45 +205,48 @@ struct PictureInfo { | ||||||
|                                         !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), |                                         !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), | ||||||
|             .ref_deltas = loop_filter.ref_deltas, |             .ref_deltas = loop_filter.ref_deltas, | ||||||
|             .mode_deltas = loop_filter.mode_deltas, |             .mode_deltas = loop_filter.mode_deltas, | ||||||
|  |             .entropy{}, | ||||||
|             .frame_size = current_frame_size, |             .frame_size = current_frame_size, | ||||||
|             .first_level = first_level, |             .first_level = first_level, | ||||||
|             .sharpness_level = sharpness_level, |             .sharpness_level = sharpness_level, | ||||||
|             .bitstream_size = bitstream_size, |             .bitstream_size = bitstream_size, | ||||||
|  |             .frame_offsets{}, | ||||||
|  |             .refresh_frame{}, | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); | static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); | ||||||
| 
 | 
 | ||||||
| struct EntropyProbs { | struct EntropyProbs { | ||||||
|     INSERT_PADDING_BYTES(1024); |     INSERT_PADDING_BYTES_NOINIT(1024);                 ///< 0x0000
 | ||||||
|     std::array<u8, 28> inter_mode_prob{}; |     std::array<u8, 28> inter_mode_prob;                ///< 0x0400
 | ||||||
|     std::array<u8, 4> intra_inter_prob{}; |     std::array<u8, 4> intra_inter_prob;                ///< 0x041C
 | ||||||
|     INSERT_PADDING_BYTES(80); |     INSERT_PADDING_BYTES_NOINIT(80);                   ///< 0x0420
 | ||||||
|     std::array<u8, 2> tx_8x8_prob{}; |     std::array<u8, 2> tx_8x8_prob;                     ///< 0x0470
 | ||||||
|     std::array<u8, 4> tx_16x16_prob{}; |     std::array<u8, 4> tx_16x16_prob;                   ///< 0x0472
 | ||||||
|     std::array<u8, 6> tx_32x32_prob{}; |     std::array<u8, 6> tx_32x32_prob;                   ///< 0x0476
 | ||||||
|     std::array<u8, 4> y_mode_prob_e8{}; |     std::array<u8, 4> y_mode_prob_e8;                  ///< 0x047C
 | ||||||
|     std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; |     std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
 | ||||||
|     INSERT_PADDING_BYTES(64); |     INSERT_PADDING_BYTES_NOINIT(64);                   ///< 0x04A0
 | ||||||
|     std::array<u8, 64> partition_prob{}; |     std::array<u8, 64> partition_prob;                 ///< 0x04E0
 | ||||||
|     INSERT_PADDING_BYTES(10); |     INSERT_PADDING_BYTES_NOINIT(10);                   ///< 0x0520
 | ||||||
|     std::array<u8, 8> switchable_interp_prob{}; |     std::array<u8, 8> switchable_interp_prob;          ///< 0x052A
 | ||||||
|     std::array<u8, 5> comp_inter_prob{}; |     std::array<u8, 5> comp_inter_prob;                 ///< 0x0532
 | ||||||
|     std::array<u8, 3> skip_probs{}; |     std::array<u8, 3> skip_probs;                      ///< 0x0537
 | ||||||
|     INSERT_PADDING_BYTES(1); |     INSERT_PADDING_BYTES_NOINIT(1);                    ///< 0x053A
 | ||||||
|     std::array<u8, 3> joints{}; |     std::array<u8, 3> joints;                          ///< 0x053B
 | ||||||
|     std::array<u8, 2> sign{}; |     std::array<u8, 2> sign;                            ///< 0x053E
 | ||||||
|     std::array<u8, 2> class_0{}; |     std::array<u8, 2> class_0;                         ///< 0x0540
 | ||||||
|     std::array<u8, 6> fr{}; |     std::array<u8, 6> fr;                              ///< 0x0542
 | ||||||
|     std::array<u8, 2> class_0_hp{}; |     std::array<u8, 2> class_0_hp;                      ///< 0x0548
 | ||||||
|     std::array<u8, 2> high_precision{}; |     std::array<u8, 2> high_precision;                  ///< 0x054A
 | ||||||
|     std::array<u8, 20> classes{}; |     std::array<u8, 20> classes;                        ///< 0x054C
 | ||||||
|     std::array<u8, 12> class_0_fr{}; |     std::array<u8, 12> class_0_fr;                     ///< 0x0560
 | ||||||
|     std::array<u8, 20> pred_bits{}; |     std::array<u8, 20> pred_bits;                      ///< 0x056C
 | ||||||
|     std::array<u8, 10> single_ref_prob{}; |     std::array<u8, 10> single_ref_prob;                ///< 0x0580
 | ||||||
|     std::array<u8, 5> comp_ref_prob{}; |     std::array<u8, 5> comp_ref_prob;                   ///< 0x058A
 | ||||||
|     INSERT_PADDING_BYTES(17); |     INSERT_PADDING_BYTES_NOINIT(17);                   ///< 0x058F
 | ||||||
|     std::array<u8, 2304> coef_probs{}; |     std::array<u8, 2304> coef_probs;                   ///< 0x05A0
 | ||||||
| 
 | 
 | ||||||
|     void Convert(Vp9EntropyProbs& fc) { |     void Convert(Vp9EntropyProbs& fc) { | ||||||
|         fc.inter_mode_prob = inter_mode_prob; |         fc.inter_mode_prob = inter_mode_prob; | ||||||
|  | @ -293,10 +297,45 @@ struct RefPoolElement { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct FrameContexts { | struct FrameContexts { | ||||||
|     s64 from{}; |     s64 from; | ||||||
|     bool adapted{}; |     bool adapted; | ||||||
|     Vp9EntropyProbs probs{}; |     Vp9EntropyProbs probs; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | #define ASSERT_POSITION(field_name, position)                                                      \ | ||||||
|  |     static_assert(offsetof(Vp9EntropyProbs, field_name) == position,                               \ | ||||||
|  |                   "Field " #field_name " has invalid position") | ||||||
|  | 
 | ||||||
|  | ASSERT_POSITION(partition_prob, 0x0024); | ||||||
|  | ASSERT_POSITION(switchable_interp_prob, 0x0724); | ||||||
|  | ASSERT_POSITION(sign, 0x0772); | ||||||
|  | ASSERT_POSITION(class_0_fr, 0x079E); | ||||||
|  | ASSERT_POSITION(high_precision, 0x07B2); | ||||||
|  | #undef ASSERT_POSITION | ||||||
|  | 
 | ||||||
|  | #define ASSERT_POSITION(field_name, position)                                                      \ | ||||||
|  |     static_assert(offsetof(PictureInfo, field_name) == position,                                   \ | ||||||
|  |                   "Field " #field_name " has invalid position") | ||||||
|  | 
 | ||||||
|  | ASSERT_POSITION(bitstream_size, 0x30); | ||||||
|  | ASSERT_POSITION(last_frame_size, 0x48); | ||||||
|  | ASSERT_POSITION(first_level, 0x70); | ||||||
|  | ASSERT_POSITION(segmentation, 0x80); | ||||||
|  | ASSERT_POSITION(loop_filter, 0xE4); | ||||||
|  | ASSERT_POSITION(surface_params, 0xF0); | ||||||
|  | #undef ASSERT_POSITION | ||||||
|  | 
 | ||||||
|  | #define ASSERT_POSITION(field_name, position)                                                      \ | ||||||
|  |     static_assert(offsetof(EntropyProbs, field_name) == position,                                  \ | ||||||
|  |                   "Field " #field_name " has invalid position") | ||||||
|  | 
 | ||||||
|  | ASSERT_POSITION(inter_mode_prob, 0x400); | ||||||
|  | ASSERT_POSITION(tx_8x8_prob, 0x470); | ||||||
|  | ASSERT_POSITION(partition_prob, 0x4E0); | ||||||
|  | ASSERT_POSITION(class_0, 0x540); | ||||||
|  | ASSERT_POSITION(class_0_fr, 0x560); | ||||||
|  | ASSERT_POSITION(coef_probs, 0x5A0); | ||||||
|  | #undef ASSERT_POSITION | ||||||
|  | 
 | ||||||
| }; // namespace Decoder
 | }; // namespace Decoder
 | ||||||
| }; // namespace Tegra
 | }; // namespace Tegra
 | ||||||
|  |  | ||||||
|  | @ -8,22 +8,21 @@ | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
| Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} | #define NVDEC_REG_INDEX(field_name)                                                                \ | ||||||
|  |     (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) | ||||||
|  | 
 | ||||||
|  | Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} | ||||||
| 
 | 
 | ||||||
| Nvdec::~Nvdec() = default; | Nvdec::~Nvdec() = default; | ||||||
| 
 | 
 | ||||||
| void Nvdec::ProcessMethod(Method method, u32 argument) { | void Nvdec::ProcessMethod(u32 method, u32 argument) { | ||||||
|     if (method == Method::SetVideoCodec) { |     state.reg_array[method] = static_cast<u64>(argument) << 8; | ||||||
|         codec->StateWrite(static_cast<u32>(method), argument); |  | ||||||
|     } else { |  | ||||||
|         codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     switch (method) { |     switch (method) { | ||||||
|     case Method::SetVideoCodec: |     case NVDEC_REG_INDEX(set_codec_id): | ||||||
|         codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); |         codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); | ||||||
|         break; |         break; | ||||||
|     case Method::Execute: |     case NVDEC_REG_INDEX(execute): | ||||||
|         Execute(); |         Execute(); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -14,16 +14,11 @@ class GPU; | ||||||
| 
 | 
 | ||||||
| class Nvdec { | class Nvdec { | ||||||
| public: | public: | ||||||
|     enum class Method : u32 { |  | ||||||
|         SetVideoCodec = 0x80, |  | ||||||
|         Execute = 0xc0, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     explicit Nvdec(GPU& gpu); |     explicit Nvdec(GPU& gpu); | ||||||
|     ~Nvdec(); |     ~Nvdec(); | ||||||
| 
 | 
 | ||||||
|     /// Writes the method into the state, Invoke Execute() if encountered
 |     /// Writes the method into the state, Invoke Execute() if encountered
 | ||||||
|     void ProcessMethod(Method method, u32 argument); |     void ProcessMethod(u32 method, u32 argument); | ||||||
| 
 | 
 | ||||||
|     /// Return most recently decoded frame
 |     /// Return most recently decoded frame
 | ||||||
|     [[nodiscard]] AVFramePtr GetFrame(); |     [[nodiscard]] AVFramePtr GetFrame(); | ||||||
|  | @ -33,6 +28,7 @@ private: | ||||||
|     void Execute(); |     void Execute(); | ||||||
| 
 | 
 | ||||||
|     GPU& gpu; |     GPU& gpu; | ||||||
|  |     NvdecCommon::NvdecRegisters state; | ||||||
|     std::unique_ptr<Codec> codec; |     std::unique_ptr<Codec> codec; | ||||||
| }; | }; | ||||||
| } // namespace Tegra
 | } // namespace Tegra
 | ||||||
|  |  | ||||||
|  | @ -4,40 +4,13 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include "common/bit_field.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra::NvdecCommon { | namespace Tegra::NvdecCommon { | ||||||
| 
 | 
 | ||||||
| struct NvdecRegisters { | enum class VideoCodec : u64 { | ||||||
|     INSERT_PADDING_WORDS(256); |  | ||||||
|     u64 set_codec_id{}; |  | ||||||
|     INSERT_PADDING_WORDS(254); |  | ||||||
|     u64 set_platform_id{}; |  | ||||||
|     u64 picture_info_offset{}; |  | ||||||
|     u64 frame_bitstream_offset{}; |  | ||||||
|     u64 frame_number{}; |  | ||||||
|     u64 h264_slice_data_offsets{}; |  | ||||||
|     u64 h264_mv_dump_offset{}; |  | ||||||
|     INSERT_PADDING_WORDS(6); |  | ||||||
|     u64 frame_stats_offset{}; |  | ||||||
|     u64 h264_last_surface_luma_offset{}; |  | ||||||
|     u64 h264_last_surface_chroma_offset{}; |  | ||||||
|     std::array<u64, 17> surface_luma_offset{}; |  | ||||||
|     std::array<u64, 17> surface_chroma_offset{}; |  | ||||||
|     INSERT_PADDING_WORDS(132); |  | ||||||
|     u64 vp9_entropy_probs_offset{}; |  | ||||||
|     u64 vp9_backward_updates_offset{}; |  | ||||||
|     u64 vp9_last_frame_segmap_offset{}; |  | ||||||
|     u64 vp9_curr_frame_segmap_offset{}; |  | ||||||
|     INSERT_PADDING_WORDS(2); |  | ||||||
|     u64 vp9_last_frame_mvs_offset{}; |  | ||||||
|     u64 vp9_curr_frame_mvs_offset{}; |  | ||||||
|     INSERT_PADDING_WORDS(2); |  | ||||||
| }; |  | ||||||
| static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); |  | ||||||
| 
 |  | ||||||
| enum class VideoCodec : u32 { |  | ||||||
|     None = 0x0, |     None = 0x0, | ||||||
|     H264 = 0x3, |     H264 = 0x3, | ||||||
|     Vp8 = 0x5, |     Vp8 = 0x5, | ||||||
|  | @ -45,4 +18,76 @@ enum class VideoCodec : u32 { | ||||||
|     Vp9 = 0x9, |     Vp9 = 0x9, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | // NVDEC should use a 32-bit address space, but is mapped to 64-bit,
 | ||||||
|  | // doubling the sizes here is compensating for that.
 | ||||||
|  | struct NvdecRegisters { | ||||||
|  |     static constexpr std::size_t NUM_REGS = 0x178; | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         struct { | ||||||
|  |             INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
 | ||||||
|  |             VideoCodec set_codec_id;          ///< 0x0400
 | ||||||
|  |             INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
 | ||||||
|  |             u64 execute;                      ///< 0x0600
 | ||||||
|  |             INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
 | ||||||
|  |             struct {                          ///< 0x0800
 | ||||||
|  |                 union { | ||||||
|  |                     BitField<0, 3, VideoCodec> codec; | ||||||
|  |                     BitField<4, 1, u64> gp_timer_on; | ||||||
|  |                     BitField<13, 1, u64> mb_timer_on; | ||||||
|  |                     BitField<14, 1, u64> intra_frame_pslc; | ||||||
|  |                     BitField<17, 1, u64> all_intra_frame; | ||||||
|  |                 }; | ||||||
|  |             } control_params; | ||||||
|  |             u64 picture_info_offset;                   ///< 0x0808
 | ||||||
|  |             u64 frame_bitstream_offset;                ///< 0x0810
 | ||||||
|  |             u64 frame_number;                          ///< 0x0818
 | ||||||
|  |             u64 h264_slice_data_offsets;               ///< 0x0820
 | ||||||
|  |             u64 h264_mv_dump_offset;                   ///< 0x0828
 | ||||||
|  |             INSERT_PADDING_WORDS_NOINIT(6);            ///< 0x0830
 | ||||||
|  |             u64 frame_stats_offset;                    ///< 0x0848
 | ||||||
|  |             u64 h264_last_surface_luma_offset;         ///< 0x0850
 | ||||||
|  |             u64 h264_last_surface_chroma_offset;       ///< 0x0858
 | ||||||
|  |             std::array<u64, 17> surface_luma_offset;   ///< 0x0860
 | ||||||
|  |             std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
 | ||||||
|  |             INSERT_PADDING_WORDS_NOINIT(132);          ///< 0x0970
 | ||||||
|  |             u64 vp9_entropy_probs_offset;              ///< 0x0B80
 | ||||||
|  |             u64 vp9_backward_updates_offset;           ///< 0x0B88
 | ||||||
|  |             u64 vp9_last_frame_segmap_offset;          ///< 0x0B90
 | ||||||
|  |             u64 vp9_curr_frame_segmap_offset;          ///< 0x0B98
 | ||||||
|  |             INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BA0
 | ||||||
|  |             u64 vp9_last_frame_mvs_offset;             ///< 0x0BA8
 | ||||||
|  |             u64 vp9_curr_frame_mvs_offset;             ///< 0x0BB0
 | ||||||
|  |             INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BB8
 | ||||||
|  |         }; | ||||||
|  |         std::array<u64, NUM_REGS> reg_array; | ||||||
|  |     }; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); | ||||||
|  | 
 | ||||||
|  | #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||||
|  |     static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64),                  \ | ||||||
|  |                   "Field " #field_name " has invalid position") | ||||||
|  | 
 | ||||||
|  | ASSERT_REG_POSITION(set_codec_id, 0x80); | ||||||
|  | ASSERT_REG_POSITION(execute, 0xC0); | ||||||
|  | ASSERT_REG_POSITION(control_params, 0x100); | ||||||
|  | ASSERT_REG_POSITION(picture_info_offset, 0x101); | ||||||
|  | ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); | ||||||
|  | ASSERT_REG_POSITION(frame_number, 0x103); | ||||||
|  | ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); | ||||||
|  | ASSERT_REG_POSITION(frame_stats_offset, 0x109); | ||||||
|  | ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); | ||||||
|  | ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); | ||||||
|  | ASSERT_REG_POSITION(surface_luma_offset, 0x10C); | ||||||
|  | ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); | ||||||
|  | ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); | ||||||
|  | ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); | ||||||
|  | ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); | ||||||
|  | ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); | ||||||
|  | ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); | ||||||
|  | ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); | ||||||
|  | 
 | ||||||
|  | #undef ASSERT_REG_POSITION | ||||||
|  | 
 | ||||||
| } // namespace Tegra::NvdecCommon
 | } // namespace Tegra::NvdecCommon
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Ameer J
						Ameer J