forked from eden-emu/eden
		
	Merge pull request #3631 from ReinUsesLisp/more-astc
texture/astc: More small ASTC optimizations
This commit is contained in:
		
						commit
						c4001225f6
					
				
					 1 changed files with 159 additions and 82 deletions
				
			
		|  | @ -20,6 +20,8 @@ | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include <vector> | #include <vector> | ||||||
| 
 | 
 | ||||||
|  | #include <boost/container/static_vector.hpp> | ||||||
|  | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| 
 | 
 | ||||||
| #include "video_core/textures/astc.h" | #include "video_core/textures/astc.h" | ||||||
|  | @ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) { | ||||||
| 
 | 
 | ||||||
| class InputBitStream { | class InputBitStream { | ||||||
| public: | public: | ||||||
|     explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |     constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) | ||||||
|         : m_CurByte(ptr), m_NextBit(start_offset % 8) {} |         : cur_byte{ptr}, next_bit{start_offset % 8} {} | ||||||
| 
 | 
 | ||||||
|     std::size_t GetBitsRead() const { |     constexpr std::size_t GetBitsRead() const { | ||||||
|         return m_BitsRead; |         return bits_read; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     u32 ReadBit() { |     constexpr bool ReadBit() { | ||||||
|         u32 bit = *m_CurByte >> m_NextBit++; |         const bool bit = (*cur_byte >> next_bit++) & 1; | ||||||
|         while (m_NextBit >= 8) { |         while (next_bit >= 8) { | ||||||
|             m_NextBit -= 8; |             next_bit -= 8; | ||||||
|             m_CurByte++; |             cur_byte++; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         m_BitsRead++; |         bits_read++; | ||||||
|         return bit & 1; |         return bit; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     u32 ReadBits(std::size_t nBits) { |     constexpr u32 ReadBits(std::size_t nBits) { | ||||||
|         u32 ret = 0; |         u32 ret = 0; | ||||||
|         for (std::size_t i = 0; i < nBits; ++i) { |         for (std::size_t i = 0; i < nBits; ++i) { | ||||||
|             ret |= (ReadBit() & 1) << i; |             ret |= (ReadBit() & 1) << i; | ||||||
|  | @ -66,7 +68,7 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     template <std::size_t nBits> |     template <std::size_t nBits> | ||||||
|     u32 ReadBits() { |     constexpr u32 ReadBits() { | ||||||
|         u32 ret = 0; |         u32 ret = 0; | ||||||
|         for (std::size_t i = 0; i < nBits; ++i) { |         for (std::size_t i = 0; i < nBits; ++i) { | ||||||
|             ret |= (ReadBit() & 1) << i; |             ret |= (ReadBit() & 1) << i; | ||||||
|  | @ -75,64 +77,58 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     const u8* m_CurByte; |     const u8* cur_byte; | ||||||
|     std::size_t m_NextBit = 0; |     std::size_t next_bit = 0; | ||||||
|     std::size_t m_BitsRead = 0; |     std::size_t bits_read = 0; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class OutputBitStream { | class OutputBitStream { | ||||||
| public: | public: | ||||||
|     explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) |     constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) | ||||||
|         : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |         : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} | ||||||
| 
 | 
 | ||||||
|     ~OutputBitStream() = default; |     constexpr std::size_t GetBitsWritten() const { | ||||||
| 
 |         return bits_written; | ||||||
|     s32 GetBitsWritten() const { |  | ||||||
|         return m_BitsWritten; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void WriteBitsR(u32 val, u32 nBits) { |     constexpr void WriteBitsR(u32 val, u32 nBits) { | ||||||
|         for (u32 i = 0; i < nBits; i++) { |         for (u32 i = 0; i < nBits; i++) { | ||||||
|             WriteBit((val >> (nBits - i - 1)) & 1); |             WriteBit((val >> (nBits - i - 1)) & 1); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void WriteBits(u32 val, u32 nBits) { |     constexpr void WriteBits(u32 val, u32 nBits) { | ||||||
|         for (u32 i = 0; i < nBits; i++) { |         for (u32 i = 0; i < nBits; i++) { | ||||||
|             WriteBit((val >> i) & 1); |             WriteBit((val >> i) & 1); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     void WriteBit(s32 b) { |     constexpr void WriteBit(bool b) { | ||||||
| 
 |         if (bits_written >= num_bits) { | ||||||
|         if (done) |  | ||||||
|             return; |             return; | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         const u32 mask = 1 << m_NextBit++; |         const u32 mask = 1 << next_bit++; | ||||||
| 
 | 
 | ||||||
|         // clear the bit
 |         // clear the bit
 | ||||||
|         *m_CurByte &= static_cast<u8>(~mask); |         *cur_byte &= static_cast<u8>(~mask); | ||||||
| 
 | 
 | ||||||
|         // Write the bit, if necessary
 |         // Write the bit, if necessary
 | ||||||
|         if (b) |         if (b) | ||||||
|             *m_CurByte |= static_cast<u8>(mask); |             *cur_byte |= static_cast<u8>(mask); | ||||||
| 
 | 
 | ||||||
|         // Next byte?
 |         // Next byte?
 | ||||||
|         if (m_NextBit >= 8) { |         if (next_bit >= 8) { | ||||||
|             m_CurByte += 1; |             cur_byte += 1; | ||||||
|             m_NextBit = 0; |             next_bit = 0; | ||||||
|         } |         } | ||||||
| 
 |  | ||||||
|         done = done || ++m_BitsWritten >= m_NumBits; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     s32 m_BitsWritten = 0; |     u8* cur_byte; | ||||||
|     const s32 m_NumBits; |     std::size_t num_bits; | ||||||
|     u8* m_CurByte; |     std::size_t bits_written = 0; | ||||||
|     s32 m_NextBit = 0; |     std::size_t next_bit = 0; | ||||||
| 
 |  | ||||||
|     bool done = false; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <typename IntType> | template <typename IntType> | ||||||
|  | @ -195,9 +191,13 @@ struct IntegerEncodedValue { | ||||||
|         u32 trit_value; |         u32 trit_value; | ||||||
|     }; |     }; | ||||||
| }; | }; | ||||||
|  | using IntegerEncodedVector = boost::container::static_vector< | ||||||
|  |     IntegerEncodedValue, 64, | ||||||
|  |     boost::container::static_vector_options< | ||||||
|  |         boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, | ||||||
|  |         boost::container::throw_on_overflow<false>>::type>; | ||||||
| 
 | 
 | ||||||
| static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { | ||||||
|                             u32 nBitsPerValue) { |  | ||||||
|     // Implement the algorithm in section C.2.12
 |     // Implement the algorithm in section C.2.12
 | ||||||
|     u32 m[5]; |     u32 m[5]; | ||||||
|     u32 t[5]; |     u32 t[5]; | ||||||
|  | @ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result, | ||||||
|                              u32 nBitsPerValue) { |                              u32 nBitsPerValue) { | ||||||
|     // Implement the algorithm in section C.2.12
 |     // Implement the algorithm in section C.2.12
 | ||||||
|     u32 m[3]; |     u32 m[3]; | ||||||
|  | @ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues(); | ||||||
| // Fills result with the values that are encoded in the given
 | // Fills result with the values that are encoded in the given
 | ||||||
| // bitstream. We must know beforehand what the maximum possible
 | // bitstream. We must know beforehand what the maximum possible
 | ||||||
| // value is, and how many values we're decoding.
 | // value is, and how many values we're decoding.
 | ||||||
| static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, | static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, | ||||||
|                                   u32 maxRange, u32 nValues) { |                                   u32 nValues) { | ||||||
|     // Determine encoding parameters
 |     // Determine encoding parameters
 | ||||||
|     IntegerEncodedValue val = EncodingsValues[maxRange]; |     IntegerEncodedValue val = EncodingsValues[maxRange]; | ||||||
| 
 | 
 | ||||||
|  | @ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { | ||||||
| // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
 | ||||||
| // is the same as [(numBits - 1):0] and repeats all the way down.
 | // is the same as [(numBits - 1):0] and repeats all the way down.
 | ||||||
| template <typename IntType> | template <typename IntType> | ||||||
| static IntType Replicate(IntType val, u32 numBits, u32 toBit) { | static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) { | ||||||
|     if (numBits == 0) |     if (numBits == 0) { | ||||||
|         return 0; |         return 0; | ||||||
|     if (toBit == 0) |     } | ||||||
|  |     if (toBit == 0) { | ||||||
|         return 0; |         return 0; | ||||||
|     IntType v = val & static_cast<IntType>((1 << numBits) - 1); |     } | ||||||
|  |     const IntType v = val & static_cast<IntType>((1 << numBits) - 1); | ||||||
|     IntType res = v; |     IntType res = v; | ||||||
|     u32 reslen = numBits; |     u32 reslen = numBits; | ||||||
|     while (reslen < toBit) { |     while (reslen < toBit) { | ||||||
|  | @ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) { | ||||||
|     return res; |     return res; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static constexpr std::size_t NumReplicateEntries(u32 num_bits) { | ||||||
|  |     return std::size_t(1) << num_bits; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template <typename IntType, u32 num_bits, u32 to_bit> | ||||||
|  | static constexpr auto MakeReplicateTable() { | ||||||
|  |     std::array<IntType, NumReplicateEntries(num_bits)> table{}; | ||||||
|  |     for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { | ||||||
|  |         table[value] = Replicate(value, num_bits, to_bit); | ||||||
|  |     } | ||||||
|  |     return table; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||||||
|  | static constexpr u32 ReplicateByteTo16(std::size_t value) { | ||||||
|  |     return REPLICATE_BYTE_TO_16_TABLE[value]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); | ||||||
|  | static constexpr u32 ReplicateBitTo7(std::size_t value) { | ||||||
|  |     return REPLICATE_BIT_TO_7_TABLE[value]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); | ||||||
|  | static constexpr u32 ReplicateBitTo9(std::size_t value) { | ||||||
|  |     return REPLICATE_BIT_TO_9_TABLE[value]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); | ||||||
|  | static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); | ||||||
|  | static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); | ||||||
|  | static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); | ||||||
|  | static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); | ||||||
|  | static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | ||||||
|  | static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | ||||||
|  | static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | ||||||
|  | /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
 | ||||||
|  | /// to the runtime implementation
 | ||||||
|  | static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { | ||||||
|  |     switch (num_bits) { | ||||||
|  |     case 1: | ||||||
|  |         return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||||||
|  |     case 2: | ||||||
|  |         return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||||||
|  |     case 3: | ||||||
|  |         return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||||||
|  |     case 4: | ||||||
|  |         return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||||||
|  |     case 5: | ||||||
|  |         return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||||||
|  |     case 6: | ||||||
|  |         return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||||||
|  |     case 7: | ||||||
|  |         return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||||||
|  |     case 8: | ||||||
|  |         return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||||||
|  |     default: | ||||||
|  |         return Replicate(value, num_bits, 8); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); | ||||||
|  | static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); | ||||||
|  | static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); | ||||||
|  | static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); | ||||||
|  | static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); | ||||||
|  | static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { | ||||||
|  |     switch (num_bits) { | ||||||
|  |     case 1: | ||||||
|  |         return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||||||
|  |     case 2: | ||||||
|  |         return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||||||
|  |     case 3: | ||||||
|  |         return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||||||
|  |     case 4: | ||||||
|  |         return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||||||
|  |     case 5: | ||||||
|  |         return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||||||
|  |     default: | ||||||
|  |         return Replicate(value, num_bits, 6); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| class Pixel { | class Pixel { | ||||||
| protected: | protected: | ||||||
|     using ChannelType = s16; |     using ChannelType = s16; | ||||||
|  | @ -674,10 +759,10 @@ public: | ||||||
|     // significant bits when going from larger to smaller bit depth
 |     // significant bits when going from larger to smaller bit depth
 | ||||||
|     // or by repeating the most significant bits when going from
 |     // or by repeating the most significant bits when going from
 | ||||||
|     // smaller to larger bit depths.
 |     // smaller to larger bit depths.
 | ||||||
|     void ChangeBitDepth(const u8 (&depth)[4]) { |     void ChangeBitDepth() { | ||||||
|         for (u32 i = 0; i < 4; i++) { |         for (u32 i = 0; i < 4; i++) { | ||||||
|             Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); |             Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); | ||||||
|             m_BitDepth[i] = depth[i]; |             m_BitDepth[i] = 8; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -689,28 +774,23 @@ public: | ||||||
| 
 | 
 | ||||||
|     // Changes the bit depth of a single component. See the comment
 |     // Changes the bit depth of a single component. See the comment
 | ||||||
|     // above for how we do this.
 |     // above for how we do this.
 | ||||||
|     static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { |     static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { | ||||||
|         assert(newDepth <= 8); |  | ||||||
|         assert(oldDepth <= 8); |         assert(oldDepth <= 8); | ||||||
| 
 | 
 | ||||||
|         if (oldDepth == newDepth) { |         if (oldDepth == 8) { | ||||||
|             // Do nothing
 |             // Do nothing
 | ||||||
|             return val; |             return val; | ||||||
|         } else if (oldDepth == 0 && newDepth != 0) { |         } else if (oldDepth == 0) { | ||||||
|             return static_cast<ChannelType>((1 << newDepth) - 1); |             return static_cast<ChannelType>((1 << 8) - 1); | ||||||
|         } else if (newDepth > oldDepth) { |         } else if (8 > oldDepth) { | ||||||
|             return Replicate(val, oldDepth, newDepth); |             return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); | ||||||
|         } else { |         } else { | ||||||
|             // oldDepth > newDepth
 |             // oldDepth > newDepth
 | ||||||
|             if (newDepth == 0) { |             const u8 bitsWasted = static_cast<u8>(oldDepth - 8); | ||||||
|                 return 0xFF; |             u16 v = static_cast<u16>(val); | ||||||
|             } else { |             v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | ||||||
|                 u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); |             v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); | ||||||
|                 u16 v = static_cast<u16>(val); |             return static_cast<u8>(v); | ||||||
|                 v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |  | ||||||
|                 v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); |  | ||||||
|                 return static_cast<u8>(v); |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         assert(false && "We shouldn't get here."); |         assert(false && "We shouldn't get here."); | ||||||
|  | @ -760,8 +840,7 @@ public: | ||||||
|     // up in the most-significant byte.
 |     // up in the most-significant byte.
 | ||||||
|     u32 Pack() const { |     u32 Pack() const { | ||||||
|         Pixel eightBit(*this); |         Pixel eightBit(*this); | ||||||
|         const u8 eightBitDepth[4] = {8, 8, 8, 8}; |         eightBit.ChangeBitDepth(); | ||||||
|         eightBit.ChangeBitDepth(eightBitDepth); |  | ||||||
| 
 | 
 | ||||||
|         u32 r = 0; |         u32 r = 0; | ||||||
|         r |= eightBit.A(); |         r |= eightBit.A(); | ||||||
|  | @ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // We now have enough to decode our integer sequence.
 |     // We now have enough to decode our integer sequence.
 | ||||||
|     std::vector<IntegerEncodedValue> decodedColorValues; |     IntegerEncodedVector decodedColorValues; | ||||||
|     decodedColorValues.reserve(32); |  | ||||||
| 
 | 
 | ||||||
|     InputBitStream colorStream(data); |     InputBitStream colorStream(data); | ||||||
|     DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |     DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | ||||||
|  | @ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP | ||||||
| 
 | 
 | ||||||
|         u32 A = 0, B = 0, C = 0, D = 0; |         u32 A = 0, B = 0, C = 0, D = 0; | ||||||
|         // A is just the lsb replicated 9 times.
 |         // A is just the lsb replicated 9 times.
 | ||||||
|         A = Replicate(bitval & 1, 1, 9); |         A = ReplicateBitTo9(bitval & 1); | ||||||
| 
 | 
 | ||||||
|         switch (val.encoding) { |         switch (val.encoding) { | ||||||
|         // Replicate bits
 |         // Replicate bits
 | ||||||
|         case IntegerEncoding::JustBits: |         case IntegerEncoding::JustBits: | ||||||
|             out[outIdx++] = Replicate(bitval, bitlen, 8); |             out[outIdx++] = FastReplicateTo8(bitval, bitlen); | ||||||
|             break; |             break; | ||||||
| 
 | 
 | ||||||
|         // Use algorithm in C.2.13
 |         // Use algorithm in C.2.13
 | ||||||
|  | @ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | ||||||
|     u32 bitval = val.bit_value; |     u32 bitval = val.bit_value; | ||||||
|     u32 bitlen = val.num_bits; |     u32 bitlen = val.num_bits; | ||||||
| 
 | 
 | ||||||
|     u32 A = Replicate(bitval & 1, 1, 7); |     u32 A = ReplicateBitTo7(bitval & 1); | ||||||
|     u32 B = 0, C = 0, D = 0; |     u32 B = 0, C = 0, D = 0; | ||||||
| 
 | 
 | ||||||
|     u32 result = 0; |     u32 result = 0; | ||||||
|     switch (val.encoding) { |     switch (val.encoding) { | ||||||
|     case IntegerEncoding::JustBits: |     case IntegerEncoding::JustBits: | ||||||
|         result = Replicate(bitval, bitlen, 6); |         result = FastReplicateTo6(bitval, bitlen); | ||||||
|         break; |         break; | ||||||
| 
 | 
 | ||||||
|     case IntegerEncoding::Trit: { |     case IntegerEncoding::Trit: { | ||||||
|  | @ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, | static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, | ||||||
|                                    const TexelWeightParams& params, const u32 blockWidth, |                                    const TexelWeightParams& params, const u32 blockWidth, | ||||||
|                                    const u32 blockHeight) { |                                    const u32 blockHeight) { | ||||||
|     u32 weightIdx = 0; |     u32 weightIdx = 0; | ||||||
|  | @ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | ||||||
|         static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |         static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | ||||||
|     memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |     memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | ||||||
| 
 | 
 | ||||||
|     std::vector<IntegerEncodedValue> texelWeightValues; |     IntegerEncodedVector texelWeightValues; | ||||||
|     texelWeightValues.reserve(64); |  | ||||||
| 
 | 
 | ||||||
|     InputBitStream weightStream(texelWeightData); |     InputBitStream weightStream(texelWeightData); | ||||||
| 
 | 
 | ||||||
|  | @ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | ||||||
|             Pixel p; |             Pixel p; | ||||||
|             for (u32 c = 0; c < 4; c++) { |             for (u32 c = 0; c < 4; c++) { | ||||||
|                 u32 C0 = endpos32s[partition][0].Component(c); |                 u32 C0 = endpos32s[partition][0].Component(c); | ||||||
|                 C0 = Replicate(C0, 8, 16); |                 C0 = ReplicateByteTo16(C0); | ||||||
|                 u32 C1 = endpos32s[partition][1].Component(c); |                 u32 C1 = endpos32s[partition][1].Component(c); | ||||||
|                 C1 = Replicate(C1, 8, 16); |                 C1 = ReplicateByteTo16(C1); | ||||||
| 
 | 
 | ||||||
|                 u32 plane = 0; |                 u32 plane = 0; | ||||||
|                 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { |                 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Mat M
						Mat M