mirror of
				https://git.tardis.systems/mirrors/yuzu
				synced 2025-10-31 18:54:14 +01:00 
			
		
		
		
	Merge pull request #12066 from ameerj/nvidia-nsanity
shader_recompiler: add byteswap pattern workaround for Nvidia
This commit is contained in:
		
						commit
						4458920799
					
				| @ -231,6 +231,7 @@ add_library(shader_recompiler STATIC | ||||
|     ir_opt/rescaling_pass.cpp | ||||
|     ir_opt/ssa_rewrite_pass.cpp | ||||
|     ir_opt/texture_pass.cpp | ||||
|     ir_opt/vendor_workaround_pass.cpp | ||||
|     ir_opt/verification_pass.cpp | ||||
|     object_pool.h | ||||
|     precompiled_headers.h | ||||
|  | ||||
| @ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||
|     } | ||||
|     Optimization::CollectShaderInfoPass(env, program); | ||||
|     Optimization::LayerPass(program, host_info); | ||||
|     Optimization::VendorWorkaroundPass(program); | ||||
| 
 | ||||
|     CollectInterpolationInfo(env, program); | ||||
|     AddNVNStorageBuffers(program); | ||||
|  | ||||
| @ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); | ||||
| void PositionPass(Environment& env, IR::Program& program); | ||||
| void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); | ||||
| void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); | ||||
| void VendorWorkaroundPass(IR::Program& program); | ||||
| void VerificationPass(const IR::Program& program); | ||||
| 
 | ||||
| // Dual Vertex
 | ||||
|  | ||||
							
								
								
									
										79
									
								
								src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,79 @@ | ||||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| 
 | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||
| #include "shader_recompiler/frontend/ir/value.h" | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
| 
 | ||||
| namespace Shader::Optimization { | ||||
| 
 | ||||
| namespace { | ||||
| void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { | ||||
|     /*
 | ||||
|      * Workaround for an NVIDIA bug seen in Super Mario RPG | ||||
|      * | ||||
|      * We are looking for this pattern: | ||||
|      *   %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 | ||||
|      *   %lhs_mul = IMul32 %lhs_bfe, %factor_b           // potentially optional?
 | ||||
|      *   %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 | ||||
|      *   %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 | ||||
|      *   %result  = IAdd32 %lhs_shl, %rhs_bfe | ||||
|      * | ||||
|      * And replacing the IAdd32 with a BitwiseOr32 | ||||
|      *   %result  = BitwiseOr32 %lhs_shl, %rhs_bfe | ||||
|      * | ||||
|      */ | ||||
|     IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; | ||||
|     IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; | ||||
|     if (!lhs_shl || !rhs_bfe) { | ||||
|         return; | ||||
|     } | ||||
|     if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || | ||||
|         lhs_shl->Arg(1) != IR::Value{16U}) { | ||||
|         return; | ||||
|     } | ||||
|     if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || | ||||
|         rhs_bfe->Arg(2) != IR::Value{16U}) { | ||||
|         return; | ||||
|     } | ||||
|     IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; | ||||
|     if (!lhs_mul) { | ||||
|         return; | ||||
|     } | ||||
|     const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; | ||||
|     if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && | ||||
|         lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||||
|         return; | ||||
|     } | ||||
|     IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; | ||||
|     if (!lhs_bfe) { | ||||
|         return; | ||||
|     } | ||||
|     if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||||
|         return; | ||||
|     } | ||||
|     if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { | ||||
|         return; | ||||
|     } | ||||
|     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|     inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void VendorWorkaroundPass(IR::Program& program) { | ||||
|     for (IR::Block* const block : program.post_order_blocks) { | ||||
|         for (IR::Inst& inst : block->Instructions()) { | ||||
|             switch (inst.GetOpcode()) { | ||||
|             case IR::Opcode::IAdd32: | ||||
|                 AddingByteSwapsWorkaround(*block, inst); | ||||
|                 break; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Optimization
 | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user