add syncpoint to codecs
Some checks failed
eden-license / license-header (pull_request) Failing after 28s
Some checks failed
eden-license / license-header (pull_request) Failing after 28s
This commit is contained in:
parent
020f1cdb1f
commit
c58af7b556
10 changed files with 224 additions and 32 deletions
|
@ -35,12 +35,115 @@
|
||||||
|
|
||||||
namespace Core::Memory {
|
namespace Core::Memory {
|
||||||
|
|
||||||
static inline bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessAddress addr,
|
namespace {
|
||||||
|
|
||||||
|
inline void FastMemcpy(void* dst, const void* src, std::size_t size) {
|
||||||
|
// Fast path for small copies
|
||||||
|
switch (size) {
|
||||||
|
case 1:
|
||||||
|
*static_cast<u8*>(dst) = *static_cast<const u8*>(src);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
*static_cast<u16*>(dst) = *static_cast<const u16*>(src);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
*static_cast<u32*>(dst) = *static_cast<const u32*>(src);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
*static_cast<u64*>(dst) = *static_cast<const u64*>(src);
|
||||||
|
break;
|
||||||
|
case 16: {
|
||||||
|
// Optimize for 16-byte copy (common case for SIMD registers)
|
||||||
|
const u64* src_64 = static_cast<const u64*>(src);
|
||||||
|
u64* dst_64 = static_cast<u64*>(dst);
|
||||||
|
dst_64[0] = src_64[0];
|
||||||
|
dst_64[1] = src_64[1];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 32: {
|
||||||
|
// Optimize for 32-byte copy
|
||||||
|
const u64* src_64 = static_cast<const u64*>(src);
|
||||||
|
u64* dst_64 = static_cast<u64*>(dst);
|
||||||
|
dst_64[0] = src_64[0];
|
||||||
|
dst_64[1] = src_64[1];
|
||||||
|
dst_64[2] = src_64[2];
|
||||||
|
dst_64[3] = src_64[3];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 64: {
|
||||||
|
// Optimize for 64-byte copy
|
||||||
|
const u64* src_64 = static_cast<const u64*>(src);
|
||||||
|
u64* dst_64 = static_cast<u64*>(dst);
|
||||||
|
dst_64[0] = src_64[0];
|
||||||
|
dst_64[1] = src_64[1];
|
||||||
|
dst_64[2] = src_64[2];
|
||||||
|
dst_64[3] = src_64[3];
|
||||||
|
dst_64[4] = src_64[4];
|
||||||
|
dst_64[5] = src_64[5];
|
||||||
|
dst_64[6] = src_64[6];
|
||||||
|
dst_64[7] = src_64[7];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// For larger sizes, use standard memcpy which is usually optimized by the compiler
|
||||||
|
std::memcpy(dst, src, size);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void FastMemset(void* dst, int value, std::size_t size) {
|
||||||
|
// Fast path for small fills
|
||||||
|
switch (size) {
|
||||||
|
case 1:
|
||||||
|
*static_cast<u8*>(dst) = static_cast<u8>(value);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
*static_cast<u16*>(dst) = static_cast<u16>(value);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
*static_cast<u32*>(dst) = static_cast<u32>(value);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
*static_cast<u64*>(dst) = static_cast<u64>(value);
|
||||||
|
break;
|
||||||
|
case 16: {
|
||||||
|
// Optimize for 16-byte fill (common case for SIMD registers)
|
||||||
|
u64* dst_64 = static_cast<u64*>(dst);
|
||||||
|
const u64 val64 = static_cast<u8>(value) * 0x0101010101010101ULL;
|
||||||
|
dst_64[0] = val64;
|
||||||
|
dst_64[1] = val64;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if (size <= 128 && value == 0) {
|
||||||
|
// Fast path for small zero-fills
|
||||||
|
u8* dst_bytes = static_cast<u8*>(dst);
|
||||||
|
for (std::size_t i = 0; i < size; i += 8) {
|
||||||
|
if (i + 8 <= size) {
|
||||||
|
*reinterpret_cast<u64*>(dst_bytes + i) = 0;
|
||||||
|
} else {
|
||||||
|
// Handle remaining bytes (less than 8)
|
||||||
|
for (std::size_t j = i; j < size; j++) {
|
||||||
|
dst_bytes[j] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For larger sizes, use standard memset which is usually optimized by the compiler
|
||||||
|
std::memset(dst, value, size);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessAddress addr,
|
||||||
const std::size_t size) {
|
const std::size_t size) {
|
||||||
const Common::ProcessAddress max_addr = 1ULL << table.GetAddressSpaceBits();
|
const Common::ProcessAddress max_addr = 1ULL << table.GetAddressSpaceBits();
|
||||||
return addr + size >= addr && addr + size <= max_addr;
|
return addr + size >= addr && addr + size <= max_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// Implementation class used to keep the specifics of the memory subsystem hidden
|
// Implementation class used to keep the specifics of the memory subsystem hidden
|
||||||
// from outside classes. This also allows modification to the internals of the memory
|
// from outside classes. This also allows modification to the internals of the memory
|
||||||
// subsystem without needing to rebuild all files that make use of the memory interface.
|
// subsystem without needing to rebuild all files that make use of the memory interface.
|
||||||
|
@ -313,28 +416,70 @@ struct Memory::Impl {
|
||||||
LOG_ERROR(HW_Memory,
|
LOG_ERROR(HW_Memory,
|
||||||
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||||
GetInteger(current_vaddr), GetInteger(src_addr), size);
|
GetInteger(current_vaddr), GetInteger(src_addr), size);
|
||||||
std::memset(dest_buffer, 0, copy_amount);
|
FastMemset(dest_buffer, 0, copy_amount);
|
||||||
},
|
},
|
||||||
[&](const std::size_t copy_amount, const u8* const src_ptr) {
|
[&](const std::size_t copy_amount, const u8* const src_ptr) {
|
||||||
std::memcpy(dest_buffer, src_ptr, copy_amount);
|
FastMemcpy(dest_buffer, src_ptr, copy_amount);
|
||||||
},
|
},
|
||||||
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
|
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
|
||||||
const u8* const host_ptr) {
|
const u8* const host_ptr) {
|
||||||
if constexpr (!UNSAFE) {
|
if constexpr (!UNSAFE) {
|
||||||
HandleRasterizerDownload(GetInteger(current_vaddr), copy_amount);
|
HandleRasterizerDownload(GetInteger(current_vaddr), copy_amount);
|
||||||
}
|
}
|
||||||
std::memcpy(dest_buffer, host_ptr, copy_amount);
|
FastMemcpy(dest_buffer, host_ptr, copy_amount);
|
||||||
},
|
},
|
||||||
[&](const std::size_t copy_amount) {
|
[&](const std::size_t copy_amount) {
|
||||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ReadBlockParallel(const Common::ProcessAddress src_addr, void* dest_buffer,
|
||||||
|
const std::size_t size) {
|
||||||
|
// Calculate chunk size based on thread count
|
||||||
|
const size_t chunk_size = (size + thread_count - 1) / thread_count;
|
||||||
|
|
||||||
|
// Create threads for parallel processing
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
threads.reserve(thread_count);
|
||||||
|
|
||||||
|
// Create a vector to store the results of each thread
|
||||||
|
std::vector<bool> results(thread_count, true);
|
||||||
|
|
||||||
|
// Split the work among threads
|
||||||
|
for (unsigned int i = 0; i < thread_count; ++i) {
|
||||||
|
const size_t offset = i * chunk_size;
|
||||||
|
if (offset >= size) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t current_chunk_size = std::min(chunk_size, size - offset);
|
||||||
|
const Common::ProcessAddress current_addr = src_addr + offset;
|
||||||
|
void* current_dest = static_cast<u8*>(dest_buffer) + offset;
|
||||||
|
|
||||||
|
// Launch thread
|
||||||
|
threads.emplace_back([this, i, current_addr, current_dest, current_chunk_size, &results] {
|
||||||
|
results[i] = ReadBlockImpl<false>(current_addr, current_dest, current_chunk_size);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all threads to complete
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if all operations succeeded
|
||||||
|
return std::all_of(results.begin(), results.end(), [](bool result) { return result; });
|
||||||
|
}
|
||||||
|
|
||||||
bool ReadBlock(const Common::ProcessAddress src_addr, void* dest_buffer,
|
bool ReadBlock(const Common::ProcessAddress src_addr, void* dest_buffer,
|
||||||
const std::size_t size) {
|
const std::size_t size) {
|
||||||
// TODO: If you want a proper multithreaded implementation (w/o cache coherency fights)
|
// For small reads, use the regular implementation
|
||||||
// use TBB or something that splits the job properly
|
if (size < PARALLEL_THRESHOLD) {
|
||||||
return ReadBlockImpl<false>(src_addr, dest_buffer, size);
|
return ReadBlockImpl<false>(src_addr, dest_buffer, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For large reads, use parallel implementation
|
||||||
|
return ReadBlockParallel(src_addr, dest_buffer, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_buffer,
|
bool ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_buffer,
|
||||||
|
@ -370,25 +515,67 @@ struct Memory::Impl {
|
||||||
GetInteger(current_vaddr), GetInteger(dest_addr), size);
|
GetInteger(current_vaddr), GetInteger(dest_addr), size);
|
||||||
},
|
},
|
||||||
[&](const std::size_t copy_amount, u8* const dest_ptr) {
|
[&](const std::size_t copy_amount, u8* const dest_ptr) {
|
||||||
std::memcpy(dest_ptr, src_buffer, copy_amount);
|
FastMemcpy(dest_ptr, src_buffer, copy_amount);
|
||||||
},
|
},
|
||||||
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
|
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
|
||||||
u8* const host_ptr) {
|
u8* const host_ptr) {
|
||||||
if constexpr (!UNSAFE) {
|
if constexpr (!UNSAFE) {
|
||||||
HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
|
HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
|
||||||
}
|
}
|
||||||
std::memcpy(host_ptr, src_buffer, copy_amount);
|
FastMemcpy(host_ptr, src_buffer, copy_amount);
|
||||||
},
|
},
|
||||||
[&](const std::size_t copy_amount) {
|
[&](const std::size_t copy_amount) {
|
||||||
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
|
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool WriteBlockParallel(const Common::ProcessAddress dest_addr, const void* src_buffer,
|
||||||
|
const std::size_t size) {
|
||||||
|
// Calculate chunk size based on thread count
|
||||||
|
const size_t chunk_size = (size + thread_count - 1) / thread_count;
|
||||||
|
|
||||||
|
// Create threads for parallel processing
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
threads.reserve(thread_count);
|
||||||
|
|
||||||
|
// Create a vector to store the results of each thread
|
||||||
|
std::vector<bool> results(thread_count, true);
|
||||||
|
|
||||||
|
// Split the work among threads
|
||||||
|
for (unsigned int i = 0; i < thread_count; ++i) {
|
||||||
|
const size_t offset = i * chunk_size;
|
||||||
|
if (offset >= size) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t current_chunk_size = std::min(chunk_size, size - offset);
|
||||||
|
const Common::ProcessAddress current_addr = dest_addr + offset;
|
||||||
|
const void* current_src = static_cast<const u8*>(src_buffer) + offset;
|
||||||
|
|
||||||
|
// Launch thread
|
||||||
|
threads.emplace_back([this, i, current_addr, current_src, current_chunk_size, &results] {
|
||||||
|
results[i] = WriteBlockImpl<false>(current_addr, current_src, current_chunk_size);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all threads to complete
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if all operations succeeded
|
||||||
|
return std::all_of(results.begin(), results.end(), [](bool result) { return result; });
|
||||||
|
}
|
||||||
|
|
||||||
bool WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer,
|
bool WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer,
|
||||||
const std::size_t size) {
|
const std::size_t size) {
|
||||||
// TODO: If you want a proper multithreaded implementation (w/o cache coherency fights)
|
// For small writes, use the regular implementation
|
||||||
// use TBB or something that splits the job properly
|
if (size < PARALLEL_THRESHOLD) {
|
||||||
return WriteBlockImpl<false>(dest_addr, src_buffer, size);
|
return WriteBlockImpl<false>(dest_addr, src_buffer, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For large writes, use parallel implementation
|
||||||
|
return WriteBlockParallel(dest_addr, src_buffer, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WriteBlockUnsafe(const Common::ProcessAddress dest_addr, const void* src_buffer,
|
bool WriteBlockUnsafe(const Common::ProcessAddress dest_addr, const void* src_buffer,
|
||||||
|
@ -406,12 +593,12 @@ struct Memory::Impl {
|
||||||
GetInteger(current_vaddr), GetInteger(dest_addr), size);
|
GetInteger(current_vaddr), GetInteger(dest_addr), size);
|
||||||
},
|
},
|
||||||
[](const std::size_t copy_amount, u8* const dest_ptr) {
|
[](const std::size_t copy_amount, u8* const dest_ptr) {
|
||||||
std::memset(dest_ptr, 0, copy_amount);
|
FastMemset(dest_ptr, 0, copy_amount);
|
||||||
},
|
},
|
||||||
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
|
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
|
||||||
u8* const host_ptr) {
|
u8* const host_ptr) {
|
||||||
HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
|
HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
|
||||||
std::memset(host_ptr, 0, copy_amount);
|
FastMemset(host_ptr, 0, copy_amount);
|
||||||
},
|
},
|
||||||
[](const std::size_t copy_amount) {});
|
[](const std::size_t copy_amount) {});
|
||||||
}
|
}
|
||||||
|
@ -806,7 +993,7 @@ struct Memory::Impl {
|
||||||
},
|
},
|
||||||
[&]() { HandleRasterizerDownload(addr, sizeof(T)); });
|
[&]() { HandleRasterizerDownload(addr, sizeof(T)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
std::memcpy(&result, ptr, sizeof(T));
|
FastMemcpy(&result, ptr, sizeof(T));
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -893,7 +1080,7 @@ struct Memory::Impl {
|
||||||
},
|
},
|
||||||
[&]() { HandleRasterizerWrite(addr, sizeof(T)); });
|
[&]() { HandleRasterizerWrite(addr, sizeof(T)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
std::memcpy(ptr, &data, sizeof(T));
|
FastMemcpy(ptr, &data, sizeof(T));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1016,7 +1203,7 @@ struct Memory::Impl {
|
||||||
unsigned int thread_count = 2;
|
unsigned int thread_count = 2;
|
||||||
|
|
||||||
// Minimum size in bytes for which parallel processing is beneficial
|
// Minimum size in bytes for which parallel processing is beneficial
|
||||||
//size_t PARALLEL_THRESHOLD = (L3 CACHE * NUM PHYSICAL CORES); // 64 KB
|
static constexpr size_t PARALLEL_THRESHOLD = 64 * 1024; // 64 KB
|
||||||
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
|
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
|
||||||
rasterizer_read_areas{};
|
rasterizer_read_areas{};
|
||||||
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
|
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
|
||||||
|
|
|
@ -12,9 +12,10 @@
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_,
|
Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, u32 syncpoint_,
|
||||||
|
const Host1x::NvdecCommon::NvdecRegisters& regs_,
|
||||||
Host1x::FrameQueue& frame_queue_)
|
Host1x::FrameQueue& frame_queue_)
|
||||||
: host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{
|
: host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, syncpoint{syncpoint_},id{id_}, frame_queue{
|
||||||
frame_queue_} {}
|
frame_queue_} {}
|
||||||
|
|
||||||
Decoder::~Decoder() = default;
|
Decoder::~Decoder() = default;
|
||||||
|
|
|
@ -41,7 +41,7 @@ public:
|
||||||
[[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0;
|
[[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit Decoder(Host1x::Host1x& host1x, s32 id,
|
explicit Decoder(Host1x::Host1x& host1x, s32 id, u32 syncpoint,
|
||||||
const Host1x::NvdecCommon::NvdecRegisters& regs,
|
const Host1x::NvdecCommon::NvdecRegisters& regs,
|
||||||
Host1x::FrameQueue& frame_queue);
|
Host1x::FrameQueue& frame_queue);
|
||||||
|
|
||||||
|
@ -53,6 +53,7 @@ protected:
|
||||||
Host1x::Host1x& host1x;
|
Host1x::Host1x& host1x;
|
||||||
Tegra::MemoryManager& memory_manager;
|
Tegra::MemoryManager& memory_manager;
|
||||||
const Host1x::NvdecCommon::NvdecRegisters& regs;
|
const Host1x::NvdecCommon::NvdecRegisters& regs;
|
||||||
|
u32 syncpoint;
|
||||||
s32 id;
|
s32 id;
|
||||||
Host1x::FrameQueue& frame_queue;
|
Host1x::FrameQueue& frame_queue;
|
||||||
Host1x::NvdecCommon::VideoCodec codec;
|
Host1x::NvdecCommon::VideoCodec codec;
|
||||||
|
|
|
@ -29,8 +29,9 @@ constexpr std::array<u8, 16> zig_zag_scan{
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
H264::H264(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
H264::H264(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
||||||
|
u32 syncpoint_,
|
||||||
Host1x::FrameQueue& frame_queue_)
|
Host1x::FrameQueue& frame_queue_)
|
||||||
: Decoder{host1x_, id_, regs_, frame_queue_} {
|
: Decoder{host1x_, id_, syncpoint_, regs_, frame_queue_} {
|
||||||
codec = Host1x::NvdecCommon::VideoCodec::H264;
|
codec = Host1x::NvdecCommon::VideoCodec::H264;
|
||||||
initialized = decode_api.Initialize(codec);
|
initialized = decode_api.Initialize(codec);
|
||||||
}
|
}
|
||||||
|
|
|
@ -242,7 +242,7 @@ ASSERT_POSITION(weight_scale_4x4, 0x1C0);
|
||||||
class H264 final : public Decoder {
|
class H264 final : public Decoder {
|
||||||
public:
|
public:
|
||||||
explicit H264(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
explicit H264(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
||||||
Host1x::FrameQueue& frame_queue);
|
u32 syncpoint, Host1x::FrameQueue& frame_queue);
|
||||||
~H264() override;
|
~H264() override;
|
||||||
|
|
||||||
H264(const H264&) = delete;
|
H264(const H264&) = delete;
|
||||||
|
|
|
@ -9,8 +9,8 @@
|
||||||
|
|
||||||
namespace Tegra::Decoders {
|
namespace Tegra::Decoders {
|
||||||
VP8::VP8(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
VP8::VP8(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
||||||
Host1x::FrameQueue& frame_queue_)
|
u32 syncpoint_, Host1x::FrameQueue& frame_queue_)
|
||||||
: Decoder{host1x_, id_, regs_, frame_queue_} {
|
: Decoder{host1x_, id_, syncpoint_, regs_, frame_queue_} {
|
||||||
codec = Host1x::NvdecCommon::VideoCodec::VP8;
|
codec = Host1x::NvdecCommon::VideoCodec::VP8;
|
||||||
initialized = decode_api.Initialize(codec);
|
initialized = decode_api.Initialize(codec);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@ enum class Vp8SurfaceIndex : u32 {
|
||||||
class VP8 final : public Decoder {
|
class VP8 final : public Decoder {
|
||||||
public:
|
public:
|
||||||
explicit VP8(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
explicit VP8(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
||||||
Host1x::FrameQueue& frame_queue);
|
u32 syncpoint, Host1x::FrameQueue& frame_queue);
|
||||||
~VP8() override;
|
~VP8() override;
|
||||||
|
|
||||||
VP8(const VP8&) = delete;
|
VP8(const VP8&) = delete;
|
||||||
|
|
|
@ -242,8 +242,8 @@ constexpr std::array<u8, 254> map_lut{
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
VP9::VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
VP9::VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
||||||
Host1x::FrameQueue& frame_queue_)
|
u32 syncpoint_, Host1x::FrameQueue& frame_queue_)
|
||||||
: Decoder{host1x_, id_, regs_, frame_queue_} {
|
: Decoder{host1x_, id_, syncpoint_, regs_, frame_queue_} {
|
||||||
codec = Host1x::NvdecCommon::VideoCodec::VP9;
|
codec = Host1x::NvdecCommon::VideoCodec::VP9;
|
||||||
initialized = decode_api.Initialize(codec);
|
initialized = decode_api.Initialize(codec);
|
||||||
}
|
}
|
||||||
|
@ -900,6 +900,8 @@ std::span<const u8> VP9::ComposeFrame() {
|
||||||
|
|
||||||
vp9_hidden_frame = WasFrameHidden();
|
vp9_hidden_frame = WasFrameHidden();
|
||||||
|
|
||||||
|
host1x.GetSyncpointManager().IncrementGuest(syncpoint);
|
||||||
|
|
||||||
return GetFrameBytes();
|
return GetFrameBytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -113,8 +113,8 @@ private:
|
||||||
|
|
||||||
class VP9 final : public Decoder {
|
class VP9 final : public Decoder {
|
||||||
public:
|
public:
|
||||||
explicit VP9(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
||||||
Host1x::FrameQueue& frame_queue);
|
u32 syncpoint_, Host1x::FrameQueue& frame_queue_);
|
||||||
~VP9() override;
|
~VP9() override;
|
||||||
|
|
||||||
VP9(const VP9&) = delete;
|
VP9(const VP9&) = delete;
|
||||||
|
|
|
@ -48,13 +48,13 @@ void Nvdec::CreateDecoder(NvdecCommon::VideoCodec codec) {
|
||||||
}
|
}
|
||||||
switch (codec) {
|
switch (codec) {
|
||||||
case NvdecCommon::VideoCodec::H264:
|
case NvdecCommon::VideoCodec::H264:
|
||||||
decoder = std::make_unique<Decoders::H264>(host1x, regs, id, frame_queue);
|
decoder = std::make_unique<Decoders::H264>(host1x, regs, id, syncpoint, frame_queue);
|
||||||
break;
|
break;
|
||||||
case NvdecCommon::VideoCodec::VP8:
|
case NvdecCommon::VideoCodec::VP8:
|
||||||
decoder = std::make_unique<Decoders::VP8>(host1x, regs, id, frame_queue);
|
decoder = std::make_unique<Decoders::VP8>(host1x, regs, id, syncpoint, frame_queue);
|
||||||
break;
|
break;
|
||||||
case NvdecCommon::VideoCodec::VP9:
|
case NvdecCommon::VideoCodec::VP9:
|
||||||
decoder = std::make_unique<Decoders::VP9>(host1x, regs, id, frame_queue);
|
decoder = std::make_unique<Decoders::VP9>(host1x, regs, id, syncpoint, frame_queue);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
|
UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue