mirror of
				https://git.tardis.systems/mirrors/yuzu
				synced 2025-10-31 02:34:11 +01:00 
			
		
		
		
	Merge pull request #11789 from Kelebek1/spirv_shift_right
Manually robust on Maxwell and earlier
This commit is contained in:
		
						commit
						4b06bcc82c
					
				| @ -111,16 +111,33 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, | ||||
|     } else if (element_size > 1) { | ||||
|         const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; | ||||
|         const Id shift{ctx.Const(log2_element_size)}; | ||||
|         buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); | ||||
|         buffer_offset = ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), shift); | ||||
|     } else { | ||||
|         buffer_offset = ctx.Def(offset); | ||||
|     } | ||||
|     if (!binding.IsImmediate()) { | ||||
|         return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); | ||||
|     } | ||||
| 
 | ||||
|     const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; | ||||
|     const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; | ||||
|     return ctx.OpLoad(result_type, access_chain); | ||||
|     const Id val = ctx.OpLoad(result_type, access_chain); | ||||
| 
 | ||||
|     if (offset.IsImmediate() || !ctx.profile.has_broken_robust) { | ||||
|         return val; | ||||
|     } | ||||
| 
 | ||||
|     const auto is_float = UniformDefinitions::IsFloat(member_ptr); | ||||
|     const auto num_elements = UniformDefinitions::NumElements(member_ptr); | ||||
|     const std::array zero_vec{ | ||||
|         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||
|         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||
|         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||
|         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||
|     }; | ||||
|     const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu)); | ||||
|     const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)); | ||||
|     return ctx.OpSelect(result_type, cond, val, zero); | ||||
| } | ||||
| 
 | ||||
| Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||||
| @ -138,7 +155,7 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde | ||||
|         const u32 element{(offset.U32() / 4) % 4 + index_offset}; | ||||
|         return ctx.OpCompositeExtract(ctx.U32[1], vector, element); | ||||
|     } | ||||
|     const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; | ||||
|     const Id shift{ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; | ||||
|     Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; | ||||
|     if (index_offset > 0) { | ||||
|         element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); | ||||
|  | ||||
| @ -64,6 +64,42 @@ struct UniformDefinitions { | ||||
|     Id F32{}; | ||||
|     Id U32x2{}; | ||||
|     Id U32x4{}; | ||||
| 
 | ||||
|     constexpr static size_t NumElements(Id UniformDefinitions::*member_ptr) { | ||||
|         if (member_ptr == &UniformDefinitions::U8) { | ||||
|             return 1; | ||||
|         } | ||||
|         if (member_ptr == &UniformDefinitions::S8) { | ||||
|             return 1; | ||||
|         } | ||||
|         if (member_ptr == &UniformDefinitions::U16) { | ||||
|             return 1; | ||||
|         } | ||||
|         if (member_ptr == &UniformDefinitions::S16) { | ||||
|             return 1; | ||||
|         } | ||||
|         if (member_ptr == &UniformDefinitions::U32) { | ||||
|             return 1; | ||||
|         } | ||||
|         if (member_ptr == &UniformDefinitions::F32) { | ||||
|             return 1; | ||||
|         } | ||||
|         if (member_ptr == &UniformDefinitions::U32x2) { | ||||
|             return 2; | ||||
|         } | ||||
|         if (member_ptr == &UniformDefinitions::U32x4) { | ||||
|             return 4; | ||||
|         } | ||||
|         ASSERT(false); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     constexpr static bool IsFloat(Id UniformDefinitions::*member_ptr) { | ||||
|         if (member_ptr == &UniformDefinitions::F32) { | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct StorageTypeDefinition { | ||||
|  | ||||
| @ -9,7 +9,6 @@ namespace Shader { | ||||
| 
 | ||||
| struct Profile { | ||||
|     u32 supported_spirv{0x00010000}; | ||||
| 
 | ||||
|     bool unified_descriptor_binding{}; | ||||
|     bool support_descriptor_aliasing{}; | ||||
|     bool support_int8{}; | ||||
| @ -82,6 +81,9 @@ struct Profile { | ||||
|     bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; | ||||
| 
 | ||||
|     u32 gl_max_compute_smem_size{}; | ||||
| 
 | ||||
|     /// Maxwell and earlier nVidia architectures have broken robust support
 | ||||
|     bool has_broken_robust{}; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Shader
 | ||||
|  | ||||
| @ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | ||||
|         .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, | ||||
|         .ignore_nan_fp_comparisons = false, | ||||
|         .has_broken_spirv_subgroup_mask_vector_extract_dynamic = | ||||
|             driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; | ||||
|             driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, | ||||
|         .has_broken_robust = | ||||
|             device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell, | ||||
|     }; | ||||
| 
 | ||||
|     host_info = Shader::HostTranslateInfo{ | ||||
|         .support_float64 = device.IsFloat64Supported(), | ||||
|         .support_float16 = device.IsFloat16Supported(), | ||||
|  | ||||
| @ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ | ||||
| 
 | ||||
| } // namespace Alternatives
 | ||||
| 
 | ||||
| enum class NvidiaArchitecture { | ||||
|     KeplerOrOlder, | ||||
|     Maxwell, | ||||
|     Pascal, | ||||
|     Volta, | ||||
|     Turing, | ||||
|     AmpereOrNewer, | ||||
| }; | ||||
| 
 | ||||
| template <typename T> | ||||
| void SetNext(void**& next, T& data) { | ||||
|     *next = &data; | ||||
| @ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | ||||
|         if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { | ||||
|             // Only Ampere and newer support this feature
 | ||||
|             // TODO: Find a way to differentiate Ampere and Ada
 | ||||
|             return NvidiaArchitecture::AmpereOrNewer; | ||||
|             return NvidiaArchitecture::Arch_AmpereOrNewer; | ||||
|         } | ||||
|         return NvidiaArchitecture::Turing; | ||||
|         return NvidiaArchitecture::Arch_Turing; | ||||
|     } | ||||
| 
 | ||||
|     if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { | ||||
| @ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | ||||
|         physical_properties.pNext = &advanced_blending_props; | ||||
|         physical.GetProperties2(physical_properties); | ||||
|         if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { | ||||
|             return NvidiaArchitecture::Maxwell; | ||||
|             return NvidiaArchitecture::Arch_Maxwell; | ||||
|         } | ||||
| 
 | ||||
|         if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { | ||||
| @ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | ||||
|             physical_properties.pNext = &conservative_raster_props; | ||||
|             physical.GetProperties2(physical_properties); | ||||
|             if (conservative_raster_props.degenerateLinesRasterized) { | ||||
|                 return NvidiaArchitecture::Volta; | ||||
|                 return NvidiaArchitecture::Arch_Volta; | ||||
|             } | ||||
|             return NvidiaArchitecture::Pascal; | ||||
|             return NvidiaArchitecture::Arch_Pascal; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return NvidiaArchitecture::KeplerOrOlder; | ||||
|     return NvidiaArchitecture::Arch_KeplerOrOlder; | ||||
| } | ||||
| 
 | ||||
| std::vector<const char*> ExtensionListForVulkan( | ||||
| @ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|         throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | ||||
|     } | ||||
| 
 | ||||
|     if (is_nvidia) { | ||||
|         nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions); | ||||
|     } | ||||
| 
 | ||||
|     SetupFamilies(surface); | ||||
|     const auto queue_cis = GetDeviceQueueCreateInfos(); | ||||
| 
 | ||||
| @ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
| 
 | ||||
|     if (is_nvidia) { | ||||
|         const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; | ||||
|         const auto arch = GetNvidiaArchitecture(physical, supported_extensions); | ||||
|         if (arch >= NvidiaArchitecture::AmpereOrNewer) { | ||||
|         const auto arch = GetNvidiaArch(); | ||||
|         if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) { | ||||
|             LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); | ||||
|             features.shader_float16_int8.shaderFloat16 = false; | ||||
|         } else if (arch <= NvidiaArchitecture::Volta) { | ||||
|         } else if (arch <= NvidiaArchitecture::Arch_Volta) { | ||||
|             if (nv_major_version < 527) { | ||||
|                 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); | ||||
|                 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | ||||
| @ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|             RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | ||||
|         } | ||||
|     } else if (extensions.push_descriptor && is_nvidia) { | ||||
|         const auto arch = GetNvidiaArchitecture(physical, supported_extensions); | ||||
|         if (arch <= NvidiaArchitecture::Pascal) { | ||||
|         const auto arch = GetNvidiaArch(); | ||||
|         if (arch <= NvidiaArchitecture::Arch_Pascal) { | ||||
|             LOG_WARNING(Render_Vulkan, | ||||
|                         "Pascal and older architectures have broken VK_KHR_push_descriptor"); | ||||
|             RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | ||||
|  | ||||
| @ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer }; | ||||
| /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
 | ||||
| const u32 GuestWarpSize = 32; | ||||
| 
 | ||||
| enum class NvidiaArchitecture { | ||||
|     Arch_KeplerOrOlder, | ||||
|     Arch_Maxwell, | ||||
|     Arch_Pascal, | ||||
|     Arch_Volta, | ||||
|     Arch_Turing, | ||||
|     Arch_AmpereOrNewer, | ||||
| }; | ||||
| 
 | ||||
| /// Handles data specific to a physical device.
 | ||||
| class Device { | ||||
| public: | ||||
| @ -670,6 +679,14 @@ public: | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     bool IsNvidia() const noexcept { | ||||
|         return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; | ||||
|     } | ||||
| 
 | ||||
|     NvidiaArchitecture GetNvidiaArch() const noexcept { | ||||
|         return nvidia_arch; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     /// Checks if the physical device is suitable and configures the object state
 | ||||
|     /// with all necessary info about its properties.
 | ||||
| @ -788,6 +805,7 @@ private: | ||||
|     bool supports_conditional_barriers{};      ///< Allows barriers in conditional control flow.
 | ||||
|     u64 device_access_memory{};                ///< Total size of device local memory in bytes.
 | ||||
|     u32 sets_per_pool{};                       ///< Sets per Description Pool
 | ||||
|     NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer}; | ||||
| 
 | ||||
|     // Telemetry parameters
 | ||||
|     std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user