Pica: Implement stencil testing.
This commit is contained in:
		
							parent
							
								
									302e9a20f3
								
							
						
					
					
						commit
						5e79706db2
					
				
					 3 changed files with 199 additions and 13 deletions
				
			
		|  | @ -208,7 +208,32 @@ inline void EncodeD24(u32 value, u8* bytes) { | ||||||
|  * @param bytes Pointer where to store the encoded value |  * @param bytes Pointer where to store the encoded value | ||||||
|  */ |  */ | ||||||
| inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) { | inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) { | ||||||
|     *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth; |     bytes[0] = depth & 0xFF; | ||||||
|  |     bytes[1] = (depth >> 8) & 0xFF; | ||||||
|  |     bytes[2] = (depth >> 16) & 0xFF; | ||||||
|  |     bytes[3] = stencil; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused) | ||||||
|  |  * @param depth 24 bit source depth value to encode | ||||||
|  |  * @param bytes Pointer where to store the encoded value | ||||||
|  |  * @note unused bits will not be modified | ||||||
|  |  */ | ||||||
|  | inline void EncodeD24X8(u32 depth, u8* bytes) { | ||||||
|  |     bytes[0] = depth & 0xFF; | ||||||
|  |     bytes[1] = (depth >> 8) & 0xFF; | ||||||
|  |     bytes[2] = (depth >> 16) & 0xFF; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused) | ||||||
|  |  * @param stencil 8 bit source stencil value to encode | ||||||
|  |  * @param bytes Pointer where to store the encoded value | ||||||
|  |  * @note unused bits will not be modified | ||||||
|  |  */ | ||||||
|  | inline void EncodeX24S8(u8 stencil, u8* bytes) { | ||||||
|  |     bytes[3] = stencil; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace
 | } // namespace
 | ||||||
|  |  | ||||||
|  | @ -420,6 +420,11 @@ struct Regs { | ||||||
|         GreaterThanOrEqual = 7, |         GreaterThanOrEqual = 7, | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|  |     enum class StencilAction : u32 { | ||||||
|  |         Keep = 0, | ||||||
|  |         Xor  = 5, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|     struct { |     struct { | ||||||
|         union { |         union { | ||||||
|             // If false, logic blending is used
 |             // If false, logic blending is used
 | ||||||
|  | @ -454,15 +459,35 @@ struct Regs { | ||||||
|             BitField< 8, 8, u32> ref; |             BitField< 8, 8, u32> ref; | ||||||
|         } alpha_test; |         } alpha_test; | ||||||
| 
 | 
 | ||||||
|  |         struct { | ||||||
|             union { |             union { | ||||||
|             BitField< 0, 1, u32> stencil_test_enable; |                 // If true, enable stencil testing
 | ||||||
|             BitField< 4, 3, CompareFunc> stencil_test_func; |                 BitField< 0, 1, u32> enable; | ||||||
|             BitField< 8, 8, u32> stencil_replacement_value; |  | ||||||
|             BitField<16, 8, u32> stencil_reference_value; |  | ||||||
|             BitField<24, 8, u32> stencil_mask; |  | ||||||
|         } stencil_test; |  | ||||||
| 
 | 
 | ||||||
|         INSERT_PADDING_WORDS(0x1); |                 // Comparison operation for stencil testing
 | ||||||
|  |                 BitField< 4, 3, CompareFunc> func; | ||||||
|  | 
 | ||||||
|  |                 // Value to calculate the new stencil value from
 | ||||||
|  |                 BitField< 8, 8, u32> replacement_value; | ||||||
|  | 
 | ||||||
|  |                 // Value to compare against for stencil testing
 | ||||||
|  |                 BitField<16, 8, u32> reference_value; | ||||||
|  | 
 | ||||||
|  |                 // Mask to apply on stencil test inputs
 | ||||||
|  |                 BitField<24, 8, u32> mask; | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             union { | ||||||
|  |                 // Action to perform when the stencil test fails
 | ||||||
|  |                 BitField< 0, 3, StencilAction> action_stencil_fail; | ||||||
|  | 
 | ||||||
|  |                 // Action to perform when stencil testing passed but depth testing fails
 | ||||||
|  |                 BitField< 4, 3, StencilAction> action_depth_fail; | ||||||
|  | 
 | ||||||
|  |                 // Action to perform when both stencil and depth testing pass
 | ||||||
|  |                 BitField< 8, 3, StencilAction> action_depth_pass; | ||||||
|  |             }; | ||||||
|  |         } stencil_test; | ||||||
| 
 | 
 | ||||||
|         union { |         union { | ||||||
|             BitField< 0, 1, u32> depth_test_enable; |             BitField< 0, 1, u32> depth_test_enable; | ||||||
|  | @ -512,7 +537,7 @@ struct Regs { | ||||||
|     struct { |     struct { | ||||||
|         INSERT_PADDING_WORDS(0x6); |         INSERT_PADDING_WORDS(0x6); | ||||||
| 
 | 
 | ||||||
|         DepthFormat depth_format; |         DepthFormat depth_format; // TODO: Should be a BitField!
 | ||||||
|         BitField<16, 3, ColorFormat> color_format; |         BitField<16, 3, ColorFormat> color_format; | ||||||
| 
 | 
 | ||||||
|         INSERT_PADDING_WORDS(0x4); |         INSERT_PADDING_WORDS(0x4); | ||||||
|  |  | ||||||
|  | @ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static u8 GetStencil(int x, int y) { | ||||||
|  |     const auto& framebuffer = g_state.regs.framebuffer; | ||||||
|  |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|  |     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||||
|  | 
 | ||||||
|  |     y = framebuffer.height - y; | ||||||
|  | 
 | ||||||
|  |     const u32 coarse_y = y & ~7; | ||||||
|  |     u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|  |     u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
|  | 
 | ||||||
|  |     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|  |     u8* src_pixel = depth_buffer + src_offset; | ||||||
|  | 
 | ||||||
|  |     switch (framebuffer.depth_format) { | ||||||
|  |         case Regs::DepthFormat::D24S8: | ||||||
|  |             return Color::DecodeD24S8(src_pixel).y; | ||||||
|  | 
 | ||||||
|  |         default: | ||||||
|  |             LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); | ||||||
|  |             return 0; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void SetDepth(int x, int y, u32 value) { | static void SetDepth(int x, int y, u32 value) { | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer; |     const auto& framebuffer = g_state.regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|  | @ -144,13 +168,15 @@ static void SetDepth(int x, int y, u32 value) { | ||||||
|         case Regs::DepthFormat::D16: |         case Regs::DepthFormat::D16: | ||||||
|             Color::EncodeD16(value, dst_pixel); |             Color::EncodeD16(value, dst_pixel); | ||||||
|             break; |             break; | ||||||
|  | 
 | ||||||
|         case Regs::DepthFormat::D24: |         case Regs::DepthFormat::D24: | ||||||
|             Color::EncodeD24(value, dst_pixel); |             Color::EncodeD24(value, dst_pixel); | ||||||
|             break; |             break; | ||||||
|  | 
 | ||||||
|         case Regs::DepthFormat::D24S8: |         case Regs::DepthFormat::D24S8: | ||||||
|             // TODO(Subv): Implement the stencil buffer
 |             Color::EncodeD24X8(value, dst_pixel); | ||||||
|             Color::EncodeD24S8(value, 0, dst_pixel); |  | ||||||
|             break; |             break; | ||||||
|  | 
 | ||||||
|         default: |         default: | ||||||
|             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||||
|             UNIMPLEMENTED(); |             UNIMPLEMENTED(); | ||||||
|  | @ -158,6 +184,53 @@ static void SetDepth(int x, int y, u32 value) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void SetStencil(int x, int y, u8 value) { | ||||||
|  |     const auto& framebuffer = g_state.regs.framebuffer; | ||||||
|  |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|  |     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||||
|  | 
 | ||||||
|  |     y = framebuffer.height - y; | ||||||
|  | 
 | ||||||
|  |     const u32 coarse_y = y & ~7; | ||||||
|  |     u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|  |     u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
|  | 
 | ||||||
|  |     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|  |     u8* dst_pixel = depth_buffer + dst_offset; | ||||||
|  | 
 | ||||||
|  |     switch (framebuffer.depth_format) { | ||||||
|  |         case Pica::Regs::DepthFormat::D16: | ||||||
|  |         case Pica::Regs::DepthFormat::D24: | ||||||
|  |             // Nothing to do
 | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         case Pica::Regs::DepthFormat::D24S8: | ||||||
|  |             Color::EncodeX24S8(value, dst_pixel); | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         default: | ||||||
|  |             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||||
|  |             UNIMPLEMENTED(); | ||||||
|  |             break; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
 | ||||||
|  | static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { | ||||||
|  |     switch (action) { | ||||||
|  |     case Regs::StencilAction::Keep: | ||||||
|  |         return dest; | ||||||
|  | 
 | ||||||
|  |     case Regs::StencilAction::Xor: | ||||||
|  |         return dest ^ ref; | ||||||
|  | 
 | ||||||
|  |     default: | ||||||
|  |         LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); | ||||||
|  |         UNIMPLEMENTED(); | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | ||||||
| struct Fix12P4 { | struct Fix12P4 { | ||||||
|     Fix12P4() {} |     Fix12P4() {} | ||||||
|  | @ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|     auto textures = regs.GetTextures(); |     auto textures = regs.GetTextures(); | ||||||
|     auto tev_stages = regs.GetTevStages(); |     auto tev_stages = regs.GetTevStages(); | ||||||
| 
 | 
 | ||||||
|  |     bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | ||||||
|  |     const auto stencil_test = g_state.regs.output_merger.stencil_test; | ||||||
|  | 
 | ||||||
|     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
 |     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
 | ||||||
|     // TODO: Not sure if looping through x first might be faster
 |     // TODO: Not sure if looping through x first might be faster
 | ||||||
|     for (u16 y = min_y + 8; y < max_y; y += 0x10) { |     for (u16 y = min_y + 8; y < max_y; y += 0x10) { | ||||||
|  | @ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             const auto& output_merger = regs.output_merger; |             const auto& output_merger = regs.output_merger; | ||||||
|  |             // TODO: Does alpha testing happen before or after stencil?
 | ||||||
|             if (output_merger.alpha_test.enable) { |             if (output_merger.alpha_test.enable) { | ||||||
|                 bool pass = false; |                 bool pass = false; | ||||||
| 
 | 
 | ||||||
|  | @ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|                     continue; |                     continue; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  |             u8 old_stencil = 0; | ||||||
|  |             if (stencil_action_enable) { | ||||||
|  |                 old_stencil = GetStencil(x >> 4, y >> 4); | ||||||
|  |                 u8 dest = old_stencil & stencil_test.mask; | ||||||
|  |                 u8 ref = stencil_test.reference_value & stencil_test.mask; | ||||||
|  | 
 | ||||||
|  |                 bool pass = false; | ||||||
|  |                 switch (stencil_test.func) { | ||||||
|  |                 case Regs::CompareFunc::Never: | ||||||
|  |                     pass = false; | ||||||
|  |                     break; | ||||||
|  | 
 | ||||||
|  |                 case Regs::CompareFunc::Always: | ||||||
|  |                     pass = true; | ||||||
|  |                     break; | ||||||
|  | 
 | ||||||
|  |                 case Regs::CompareFunc::Equal: | ||||||
|  |                     pass = (ref == dest); | ||||||
|  |                     break; | ||||||
|  | 
 | ||||||
|  |                 case Regs::CompareFunc::NotEqual: | ||||||
|  |                     pass = (ref != dest); | ||||||
|  |                     break; | ||||||
|  | 
 | ||||||
|  |                 case Regs::CompareFunc::LessThan: | ||||||
|  |                     pass = (ref < dest); | ||||||
|  |                     break; | ||||||
|  | 
 | ||||||
|  |                 case Regs::CompareFunc::LessThanOrEqual: | ||||||
|  |                     pass = (ref <= dest); | ||||||
|  |                     break; | ||||||
|  | 
 | ||||||
|  |                 case Regs::CompareFunc::GreaterThan: | ||||||
|  |                     pass = (ref > dest); | ||||||
|  |                     break; | ||||||
|  | 
 | ||||||
|  |                 case Regs::CompareFunc::GreaterThanOrEqual: | ||||||
|  |                     pass = (ref >= dest); | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 if (!pass) { | ||||||
|  |                     u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); | ||||||
|  |                     SetStencil(x >> 4, y >> 4, new_stencil); | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|             // TODO: Does depth indeed only get written even if depth testing is enabled?
 |             // TODO: Does depth indeed only get written even if depth testing is enabled?
 | ||||||
|             if (output_merger.depth_test_enable) { |             if (output_merger.depth_test_enable) { | ||||||
|                 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); |                 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); | ||||||
|  | @ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|                     break; |                     break; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (!pass) |                 if (!pass) { | ||||||
|  |                     if (stencil_action_enable) { | ||||||
|  |                         u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); | ||||||
|  |                         SetStencil(x >> 4, y >> 4, new_stencil); | ||||||
|  |                     } | ||||||
|                     continue; |                     continue; | ||||||
|  |                 } | ||||||
| 
 | 
 | ||||||
|                 if (output_merger.depth_write_enable) |                 if (output_merger.depth_write_enable) | ||||||
|                     SetDepth(x >> 4, y >> 4, z); |                     SetDepth(x >> 4, y >> 4, z); | ||||||
|  | 
 | ||||||
|  |                 if (stencil_action_enable) { | ||||||
|  |                     // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
 | ||||||
|  |                     u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); | ||||||
|  |                     SetStencil(x >> 4, y >> 4, new_stencil); | ||||||
|  |                 } | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             auto dest = GetPixel(x >> 4, y >> 4); |             auto dest = GetPixel(x >> 4, y >> 4); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Tony Wasserka
						Tony Wasserka