mirror of
				https://git.tardis.systems/mirrors/yuzu
				synced 2025-10-31 18:54:14 +01:00 
			
		
		
		
	Merge pull request #12412 from ameerj/gl-query-prims
OpenGL: Add GL_PRIMITIVES_GENERATED and GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries
This commit is contained in:
		
						commit
						91290b9be4
					
				| @ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() { | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessCounterReset() { | ||||
|     switch (regs.clear_report_value) { | ||||
|     case Regs::ClearReport::ZPassPixelCount: | ||||
|         rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); | ||||
|         break; | ||||
|     default: | ||||
|         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); | ||||
|         break; | ||||
|     } | ||||
|     const auto query_type = [clear_report = regs.clear_report_value]() { | ||||
|         switch (clear_report) { | ||||
|         case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount: | ||||
|             return VideoCommon::QueryType::ZPassPixelCount64; | ||||
|         case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded: | ||||
|             return VideoCommon::QueryType::StreamingPrimitivesSucceeded; | ||||
|         case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated: | ||||
|             return VideoCommon::QueryType::PrimitivesGenerated; | ||||
|         case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut: | ||||
|             return VideoCommon::QueryType::VtgPrimitivesOut; | ||||
|         default: | ||||
|             LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report); | ||||
|             return VideoCommon::QueryType::Payload; | ||||
|         } | ||||
|     }(); | ||||
|     rasterizer->ResetCounter(query_type); | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessSyncPoint() { | ||||
|  | ||||
| @ -28,8 +28,11 @@ | ||||
| namespace VideoCore { | ||||
| enum class QueryType { | ||||
|     SamplesPassed, | ||||
|     PrimitivesGenerated, | ||||
|     TfbPrimitivesWritten, | ||||
|     Count, | ||||
| }; | ||||
| constexpr std::size_t NumQueryTypes = 1; | ||||
| constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count); | ||||
| } // namespace VideoCore
 | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| @ -44,15 +47,6 @@ public: | ||||
|     explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) | ||||
|         : cache{cache_}, type{type_} {} | ||||
| 
 | ||||
|     /// Updates the state of the stream, enabling or disabling as needed.
 | ||||
|     void Update(bool enabled) { | ||||
|         if (enabled) { | ||||
|             Enable(); | ||||
|         } else { | ||||
|             Disable(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Resets the stream to zero. It doesn't disable the query after resetting.
 | ||||
|     void Reset() { | ||||
|         if (current) { | ||||
| @ -80,7 +74,6 @@ public: | ||||
|         return current != nullptr; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     /// Enables the stream.
 | ||||
|     void Enable() { | ||||
|         if (current) { | ||||
| @ -97,6 +90,7 @@ private: | ||||
|         last = std::exchange(current, nullptr); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     QueryCache& cache; | ||||
|     const VideoCore::QueryType type; | ||||
| 
 | ||||
| @ -112,8 +106,14 @@ public: | ||||
|         : rasterizer{rasterizer_}, | ||||
|           // Use reinterpret_cast instead of static_cast as workaround for
 | ||||
|           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
 | ||||
|           cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||
|                                                           VideoCore::QueryType::SamplesPassed}}} { | ||||
|           cpu_memory{cpu_memory_}, streams{{ | ||||
|                                        {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||
|                                                       VideoCore::QueryType::SamplesPassed}}, | ||||
|                                        {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||
|                                                       VideoCore::QueryType::PrimitivesGenerated}}, | ||||
|                                        {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||
|                                                       VideoCore::QueryType::TfbPrimitivesWritten}}, | ||||
|                                    }} { | ||||
|         (void)slot_async_jobs.insert(); // Null value
 | ||||
|     } | ||||
| 
 | ||||
| @ -157,12 +157,11 @@ public: | ||||
|         AsyncFlushQuery(query, timestamp, lock); | ||||
|     } | ||||
| 
 | ||||
|     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
 | ||||
|     void UpdateCounters() { | ||||
|     /// Enables all available GPU counters
 | ||||
|     void EnableCounters() { | ||||
|         std::unique_lock lock{mutex}; | ||||
|         if (maxwell3d) { | ||||
|             const auto& regs = maxwell3d->regs; | ||||
|             Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable); | ||||
|         for (auto& stream : streams) { | ||||
|             stream.Enable(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| @ -176,7 +175,7 @@ public: | ||||
|     void DisableStreams() { | ||||
|         std::unique_lock lock{mutex}; | ||||
|         for (auto& stream : streams) { | ||||
|             stream.Update(false); | ||||
|             stream.Disable(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| @ -353,7 +352,7 @@ private: | ||||
| 
 | ||||
|     std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; | ||||
|     std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; | ||||
| }; | ||||
| }; // namespace VideoCommon
 | ||||
| 
 | ||||
| template <class QueryCache, class HostCounter> | ||||
| class HostCounterBase { | ||||
|  | ||||
| @ -18,16 +18,27 @@ namespace OpenGL { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; | ||||
| 
 | ||||
| constexpr GLenum GetTarget(VideoCore::QueryType type) { | ||||
|     return QueryTargets[static_cast<std::size_t>(type)]; | ||||
|     switch (type) { | ||||
|     case VideoCore::QueryType::SamplesPassed: | ||||
|         return GL_SAMPLES_PASSED; | ||||
|     case VideoCore::QueryType::PrimitivesGenerated: | ||||
|         return GL_PRIMITIVES_GENERATED; | ||||
|     case VideoCore::QueryType::TfbPrimitivesWritten: | ||||
|         return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|     UNIMPLEMENTED_MSG("Query type {}", type); | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | ||||
|     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} | ||||
|     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { | ||||
|     EnableCounters(); | ||||
| } | ||||
| 
 | ||||
| QueryCache::~QueryCache() = default; | ||||
| 
 | ||||
| @ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) { | ||||
|     auto& stream = cache->Stream(type); | ||||
|     const bool slice_counter = WaitPending() && stream.IsEnabled(); | ||||
|     if (slice_counter) { | ||||
|         stream.Update(false); | ||||
|         stream.Disable(); | ||||
|     } | ||||
| 
 | ||||
|     auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); | ||||
| 
 | ||||
|     if (slice_counter) { | ||||
|         stream.Update(true); | ||||
|         stream.Enable(); | ||||
|     } | ||||
| 
 | ||||
|     return result; | ||||
|  | ||||
| @ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | ||||
| void oglEnable(GLenum cap, bool state) { | ||||
|     (state ? glEnable : glDisable)(cap); | ||||
| } | ||||
| 
 | ||||
| std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) { | ||||
|     switch (type) { | ||||
|     case VideoCommon::QueryType::PrimitivesGenerated: | ||||
|     case VideoCommon::QueryType::VtgPrimitivesOut: | ||||
|         return VideoCore::QueryType::PrimitivesGenerated; | ||||
|     case VideoCommon::QueryType::ZPassPixelCount64: | ||||
|         return VideoCore::QueryType::SamplesPassed; | ||||
|     case VideoCommon::QueryType::StreamingPrimitivesSucceeded: | ||||
|         // case VideoCommon::QueryType::StreamingByteCount:
 | ||||
|         // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
 | ||||
|         return VideoCore::QueryType::TfbPrimitivesWritten; | ||||
|     default: | ||||
|         return std::nullopt; | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||
| @ -216,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { | ||||
| 
 | ||||
|     SCOPE_EXIT({ gpu.TickWork(); }); | ||||
|     gpu_memory->FlushCaching(); | ||||
|     query_cache.UpdateCounters(); | ||||
| 
 | ||||
|     GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; | ||||
|     if (!pipeline) { | ||||
| @ -334,7 +349,6 @@ void RasterizerOpenGL::DrawTexture() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Drawing); | ||||
| 
 | ||||
|     SCOPE_EXIT({ gpu.TickWork(); }); | ||||
|     query_cache.UpdateCounters(); | ||||
| 
 | ||||
|     texture_cache.SynchronizeGraphicsDescriptors(); | ||||
|     texture_cache.UpdateRenderTargets(false); | ||||
| @ -401,21 +415,28 @@ void RasterizerOpenGL::DispatchCompute() { | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { | ||||
|     if (type == VideoCommon::QueryType::ZPassPixelCount64) { | ||||
|         query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); | ||||
|     const auto query_cache_type = MaxwellToVideoCoreQuery(type); | ||||
|     if (!query_cache_type.has_value()) { | ||||
|         UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type); | ||||
|         return; | ||||
|     } | ||||
|     query_cache.ResetCounter(*query_cache_type); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||
|     if (type == VideoCommon::QueryType::ZPassPixelCount64) { | ||||
|         if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||||
|             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); | ||||
|         } else { | ||||
|             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); | ||||
|         } | ||||
|         return; | ||||
|     const auto query_cache_type = MaxwellToVideoCoreQuery(type); | ||||
|     if (!query_cache_type.has_value()) { | ||||
|         return QueryFallback(gpu_addr, type, flags, payload, subreport); | ||||
|     } | ||||
|     const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout); | ||||
|     const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt; | ||||
|     query_cache.Query(gpu_addr, *query_cache_type, timestamp); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                                      VideoCommon::QueryPropertiesFlags flags, u32 payload, | ||||
|                                      u32 subreport) { | ||||
|     if (type != VideoCommon::QueryType::Payload) { | ||||
|         payload = 1u; | ||||
|     } | ||||
|  | ||||
| @ -225,6 +225,9 @@ private: | ||||
|     /// End a transform feedback
 | ||||
|     void EndTransformFeedback(); | ||||
| 
 | ||||
|     void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                        VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); | ||||
| 
 | ||||
|     Tegra::GPU& gpu; | ||||
| 
 | ||||
|     const Device& device; | ||||
|  | ||||
| @ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() { | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { | ||||
|     if (type != VideoCommon::QueryType::ZPassPixelCount64) { | ||||
|         LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type); | ||||
|         return; | ||||
|     } | ||||
|     query_cache.CounterReset(type); | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user