mirror of
				https://git.tardis.systems/mirrors/yuzu
				synced 2025-11-04 04:34:07 +01:00 
			
		
		
		
	Merge pull request #3409 from ReinUsesLisp/host-queries
query_cache: Implement a query cache and query 21 (samples passed)
This commit is contained in:
		
						commit
						93acfbd3a5
					
				@ -37,6 +37,7 @@ add_library(video_core STATIC
 | 
			
		||||
    memory_manager.h
 | 
			
		||||
    morton.cpp
 | 
			
		||||
    morton.h
 | 
			
		||||
    query_cache.h
 | 
			
		||||
    rasterizer_accelerated.cpp
 | 
			
		||||
    rasterizer_accelerated.h
 | 
			
		||||
    rasterizer_cache.cpp
 | 
			
		||||
@ -74,6 +75,8 @@ add_library(video_core STATIC
 | 
			
		||||
    renderer_opengl/gl_stream_buffer.h
 | 
			
		||||
    renderer_opengl/gl_texture_cache.cpp
 | 
			
		||||
    renderer_opengl/gl_texture_cache.h
 | 
			
		||||
    renderer_opengl/gl_query_cache.cpp
 | 
			
		||||
    renderer_opengl/gl_query_cache.h
 | 
			
		||||
    renderer_opengl/maxwell_to_gl.h
 | 
			
		||||
    renderer_opengl/renderer_opengl.cpp
 | 
			
		||||
    renderer_opengl/renderer_opengl.h
 | 
			
		||||
@ -177,6 +180,8 @@ if (ENABLE_VULKAN)
 | 
			
		||||
        renderer_vulkan/vk_memory_manager.h
 | 
			
		||||
        renderer_vulkan/vk_pipeline_cache.cpp
 | 
			
		||||
        renderer_vulkan/vk_pipeline_cache.h
 | 
			
		||||
        renderer_vulkan/vk_query_cache.cpp
 | 
			
		||||
        renderer_vulkan/vk_query_cache.h
 | 
			
		||||
        renderer_vulkan/vk_rasterizer.cpp
 | 
			
		||||
        renderer_vulkan/vk_rasterizer.h
 | 
			
		||||
        renderer_vulkan/vk_renderpass_cache.cpp
 | 
			
		||||
 | 
			
		||||
@ -4,6 +4,7 @@
 | 
			
		||||
 | 
			
		||||
#include <cinttypes>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/core_timing.h"
 | 
			
		||||
@ -16,6 +17,8 @@
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines {
 | 
			
		||||
 | 
			
		||||
using VideoCore::QueryType;
 | 
			
		||||
 | 
			
		||||
/// First register id that is actually a Macro call.
 | 
			
		||||
constexpr u32 MacroRegistersStart = 0xE00;
 | 
			
		||||
 | 
			
		||||
@ -400,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
 | 
			
		||||
        ProcessQueryCondition();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(counter_reset): {
 | 
			
		||||
        ProcessCounterReset();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(sync_info): {
 | 
			
		||||
        ProcessSyncPoint();
 | 
			
		||||
        break;
 | 
			
		||||
@ -544,40 +551,28 @@ void Maxwell3D::ProcessQueryGet() {
 | 
			
		||||
               "Units other than CROP are unimplemented");
 | 
			
		||||
 | 
			
		||||
    switch (regs.query.query_get.operation) {
 | 
			
		||||
    case Regs::QueryOperation::Release: {
 | 
			
		||||
        const u64 result = regs.query.query_sequence;
 | 
			
		||||
        StampQueryResult(result, regs.query.query_get.short_query == 0);
 | 
			
		||||
    case Regs::QueryOperation::Release:
 | 
			
		||||
        StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::QueryOperation::Acquire: {
 | 
			
		||||
        // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
 | 
			
		||||
        // to write a value that matches the current payload.
 | 
			
		||||
    case Regs::QueryOperation::Acquire:
 | 
			
		||||
        // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
 | 
			
		||||
        // matches the current payload.
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::QueryOperation::Counter: {
 | 
			
		||||
        u64 result{};
 | 
			
		||||
        switch (regs.query.query_get.select) {
 | 
			
		||||
        case Regs::QuerySelect::Zero:
 | 
			
		||||
            result = 0;
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
            result = 1;
 | 
			
		||||
            UNIMPLEMENTED_MSG("Unimplemented query select type {}",
 | 
			
		||||
                              static_cast<u32>(regs.query.query_get.select.Value()));
 | 
			
		||||
    case Regs::QueryOperation::Counter:
 | 
			
		||||
        if (const std::optional<u64> result = GetQueryResult()) {
 | 
			
		||||
            // If the query returns an empty optional it means it's cached and deferred.
 | 
			
		||||
            // In this case we have a non-empty result, so we stamp it immediately.
 | 
			
		||||
            StampQueryResult(*result, regs.query.query_get.short_query == 0);
 | 
			
		||||
        }
 | 
			
		||||
        StampQueryResult(result, regs.query.query_get.short_query == 0);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::QueryOperation::Trap: {
 | 
			
		||||
    case Regs::QueryOperation::Trap:
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    default: {
 | 
			
		||||
    default:
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unknown query operation");
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessQueryCondition() {
 | 
			
		||||
@ -593,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() {
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::ConditionMode::ResNonZero: {
 | 
			
		||||
        Regs::QueryCompare cmp;
 | 
			
		||||
        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::ConditionMode::Equal: {
 | 
			
		||||
        Regs::QueryCompare cmp;
 | 
			
		||||
        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        execute_on =
 | 
			
		||||
            cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::ConditionMode::NotEqual: {
 | 
			
		||||
        Regs::QueryCompare cmp;
 | 
			
		||||
        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        execute_on =
 | 
			
		||||
            cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
 | 
			
		||||
        break;
 | 
			
		||||
@ -619,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessCounterReset() {
 | 
			
		||||
    switch (regs.counter_reset) {
 | 
			
		||||
    case Regs::CounterReset::SampleCnt:
 | 
			
		||||
        rasterizer.ResetCounter(QueryType::SamplesPassed);
 | 
			
		||||
        break;
 | 
			
		||||
    default:
 | 
			
		||||
        LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
 | 
			
		||||
                    static_cast<int>(regs.counter_reset));
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessSyncPoint() {
 | 
			
		||||
    const u32 sync_point = regs.sync_info.sync_point.Value();
 | 
			
		||||
    const u32 increment = regs.sync_info.increment.Value();
 | 
			
		||||
@ -661,6 +668,22 @@ void Maxwell3D::DrawArrays() {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::optional<u64> Maxwell3D::GetQueryResult() {
 | 
			
		||||
    switch (regs.query.query_get.select) {
 | 
			
		||||
    case Regs::QuerySelect::Zero:
 | 
			
		||||
        return 0;
 | 
			
		||||
    case Regs::QuerySelect::SamplesPassed:
 | 
			
		||||
        // Deferred.
 | 
			
		||||
        rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
 | 
			
		||||
                         system.GPU().GetTicks());
 | 
			
		||||
        return {};
 | 
			
		||||
    default:
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
 | 
			
		||||
                          static_cast<u32>(regs.query.query_get.select.Value()));
 | 
			
		||||
        return 1;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
 | 
			
		||||
    // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
 | 
			
		||||
    auto& shader = state.shader_stages[stage_index];
 | 
			
		||||
 | 
			
		||||
@ -6,6 +6,7 @@
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <bitset>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
#include <vector>
 | 
			
		||||
@ -409,6 +410,27 @@ public:
 | 
			
		||||
            Linear = 1,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        enum class CounterReset : u32 {
 | 
			
		||||
            SampleCnt = 0x01,
 | 
			
		||||
            Unk02 = 0x02,
 | 
			
		||||
            Unk03 = 0x03,
 | 
			
		||||
            Unk04 = 0x04,
 | 
			
		||||
            EmittedPrimitives = 0x10, // Not tested
 | 
			
		||||
            Unk11 = 0x11,
 | 
			
		||||
            Unk12 = 0x12,
 | 
			
		||||
            Unk13 = 0x13,
 | 
			
		||||
            Unk15 = 0x15,
 | 
			
		||||
            Unk16 = 0x16,
 | 
			
		||||
            Unk17 = 0x17,
 | 
			
		||||
            Unk18 = 0x18,
 | 
			
		||||
            Unk1A = 0x1A,
 | 
			
		||||
            Unk1B = 0x1B,
 | 
			
		||||
            Unk1C = 0x1C,
 | 
			
		||||
            Unk1D = 0x1D,
 | 
			
		||||
            Unk1E = 0x1E,
 | 
			
		||||
            GeneratedPrimitives = 0x1F,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        struct Cull {
 | 
			
		||||
            enum class FrontFace : u32 {
 | 
			
		||||
                ClockWise = 0x0900,
 | 
			
		||||
@ -857,7 +879,7 @@ public:
 | 
			
		||||
                    BitField<7, 1, u32> c7;
 | 
			
		||||
                } clip_distance_enabled;
 | 
			
		||||
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x1);
 | 
			
		||||
                u32 samplecnt_enable;
 | 
			
		||||
 | 
			
		||||
                float point_size;
 | 
			
		||||
 | 
			
		||||
@ -865,7 +887,11 @@ public:
 | 
			
		||||
 | 
			
		||||
                u32 point_sprite_enable;
 | 
			
		||||
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x5);
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x3);
 | 
			
		||||
 | 
			
		||||
                CounterReset counter_reset;
 | 
			
		||||
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x1);
 | 
			
		||||
 | 
			
		||||
                u32 zeta_enable;
 | 
			
		||||
 | 
			
		||||
@ -1412,12 +1438,15 @@ private:
 | 
			
		||||
    /// Handles a write to the QUERY_GET register.
 | 
			
		||||
    void ProcessQueryGet();
 | 
			
		||||
 | 
			
		||||
    // Writes the query result accordingly
 | 
			
		||||
    /// Writes the query result accordingly.
 | 
			
		||||
    void StampQueryResult(u64 payload, bool long_query);
 | 
			
		||||
 | 
			
		||||
    // Handles Conditional Rendering
 | 
			
		||||
    /// Handles conditional rendering.
 | 
			
		||||
    void ProcessQueryCondition();
 | 
			
		||||
 | 
			
		||||
    /// Handles counter resets.
 | 
			
		||||
    void ProcessCounterReset();
 | 
			
		||||
 | 
			
		||||
    /// Handles writes to syncing register.
 | 
			
		||||
    void ProcessSyncPoint();
 | 
			
		||||
 | 
			
		||||
@ -1434,6 +1463,9 @@ private:
 | 
			
		||||
 | 
			
		||||
    // Handles a instance drawcall from MME
 | 
			
		||||
    void StepInstance(MMEDrawMode expected_mode, u32 count);
 | 
			
		||||
 | 
			
		||||
    /// Returns a query's value or an empty object if the value will be deferred through a cache.
 | 
			
		||||
    std::optional<u64> GetQueryResult();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define ASSERT_REG_POSITION(field_name, position)                                                  \
 | 
			
		||||
@ -1499,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
 | 
			
		||||
ASSERT_REG_POSITION(vb_element_base, 0x50D);
 | 
			
		||||
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
 | 
			
		||||
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
 | 
			
		||||
ASSERT_REG_POSITION(samplecnt_enable, 0x545);
 | 
			
		||||
ASSERT_REG_POSITION(point_size, 0x546);
 | 
			
		||||
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
 | 
			
		||||
ASSERT_REG_POSITION(counter_reset, 0x54C);
 | 
			
		||||
ASSERT_REG_POSITION(zeta_enable, 0x54E);
 | 
			
		||||
ASSERT_REG_POSITION(multisample_control, 0x54F);
 | 
			
		||||
ASSERT_REG_POSITION(condition, 0x554);
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										359
									
								
								src/video_core/query_cache.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										359
									
								
								src/video_core/query_cache.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,359 @@
 | 
			
		||||
// Copyright 2020 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <iterator>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <mutex>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/gpu.h"
 | 
			
		||||
#include "video_core/memory_manager.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
 | 
			
		||||
namespace VideoCommon {
 | 
			
		||||
 | 
			
		||||
template <class QueryCache, class HostCounter>
 | 
			
		||||
class CounterStreamBase {
 | 
			
		||||
public:
 | 
			
		||||
    explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
 | 
			
		||||
        : cache{cache}, type{type} {}
 | 
			
		||||
 | 
			
		||||
    /// Updates the state of the stream, enabling or disabling as needed.
 | 
			
		||||
    void Update(bool enabled) {
 | 
			
		||||
        if (enabled) {
 | 
			
		||||
            Enable();
 | 
			
		||||
        } else {
 | 
			
		||||
            Disable();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Resets the stream to zero. It doesn't disable the query after resetting.
 | 
			
		||||
    void Reset() {
 | 
			
		||||
        if (current) {
 | 
			
		||||
            current->EndQuery();
 | 
			
		||||
 | 
			
		||||
            // Immediately start a new query to avoid disabling its state.
 | 
			
		||||
            current = cache.Counter(nullptr, type);
 | 
			
		||||
        }
 | 
			
		||||
        last = nullptr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns the current counter slicing as needed.
 | 
			
		||||
    std::shared_ptr<HostCounter> Current() {
 | 
			
		||||
        if (!current) {
 | 
			
		||||
            return nullptr;
 | 
			
		||||
        }
 | 
			
		||||
        current->EndQuery();
 | 
			
		||||
        last = std::move(current);
 | 
			
		||||
        current = cache.Counter(last, type);
 | 
			
		||||
        return last;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns true when the counter stream is enabled.
 | 
			
		||||
    bool IsEnabled() const {
 | 
			
		||||
        return current != nullptr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    /// Enables the stream.
 | 
			
		||||
    void Enable() {
 | 
			
		||||
        if (current) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        current = cache.Counter(last, type);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Disables the stream.
 | 
			
		||||
    void Disable() {
 | 
			
		||||
        if (current) {
 | 
			
		||||
            current->EndQuery();
 | 
			
		||||
        }
 | 
			
		||||
        last = std::exchange(current, nullptr);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    QueryCache& cache;
 | 
			
		||||
    const VideoCore::QueryType type;
 | 
			
		||||
 | 
			
		||||
    std::shared_ptr<HostCounter> current;
 | 
			
		||||
    std::shared_ptr<HostCounter> last;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
 | 
			
		||||
          class QueryPool>
 | 
			
		||||
class QueryCacheBase {
 | 
			
		||||
public:
 | 
			
		||||
    explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
 | 
			
		||||
        : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
 | 
			
		||||
                                                      static_cast<QueryCache&>(*this),
 | 
			
		||||
                                                      VideoCore::QueryType::SamplesPassed}}} {}
 | 
			
		||||
 | 
			
		||||
    void InvalidateRegion(CacheAddr addr, std::size_t size) {
 | 
			
		||||
        std::unique_lock lock{mutex};
 | 
			
		||||
        FlushAndRemoveRegion(addr, size);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void FlushRegion(CacheAddr addr, std::size_t size) {
 | 
			
		||||
        std::unique_lock lock{mutex};
 | 
			
		||||
        FlushAndRemoveRegion(addr, size);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Records a query in GPU mapped memory, potentially marked with a timestamp.
 | 
			
		||||
     * @param gpu_addr  GPU address to flush to when the mapped memory is read.
 | 
			
		||||
     * @param type      Query type, e.g. SamplesPassed.
 | 
			
		||||
     * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
 | 
			
		||||
     */
 | 
			
		||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
 | 
			
		||||
        std::unique_lock lock{mutex};
 | 
			
		||||
        auto& memory_manager = system.GPU().MemoryManager();
 | 
			
		||||
        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
			
		||||
 | 
			
		||||
        CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
 | 
			
		||||
        if (!query) {
 | 
			
		||||
            const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
 | 
			
		||||
            ASSERT_OR_EXECUTE(cpu_addr, return;);
 | 
			
		||||
 | 
			
		||||
            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        query->BindCounter(Stream(type).Current(), timestamp);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
 | 
			
		||||
    void UpdateCounters() {
 | 
			
		||||
        std::unique_lock lock{mutex};
 | 
			
		||||
        const auto& regs = system.GPU().Maxwell3D().regs;
 | 
			
		||||
        Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Resets a counter to zero. It doesn't disable the query after resetting.
 | 
			
		||||
    void ResetCounter(VideoCore::QueryType type) {
 | 
			
		||||
        std::unique_lock lock{mutex};
 | 
			
		||||
        Stream(type).Reset();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Disable all active streams. Expected to be called at the end of a command buffer.
 | 
			
		||||
    void DisableStreams() {
 | 
			
		||||
        std::unique_lock lock{mutex};
 | 
			
		||||
        for (auto& stream : streams) {
 | 
			
		||||
            stream.Update(false);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns a new host counter.
 | 
			
		||||
    std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
 | 
			
		||||
                                         VideoCore::QueryType type) {
 | 
			
		||||
        return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
 | 
			
		||||
                                             type);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns the counter stream of the specified type.
 | 
			
		||||
    CounterStream& Stream(VideoCore::QueryType type) {
 | 
			
		||||
        return streams[static_cast<std::size_t>(type)];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns the counter stream of the specified type.
 | 
			
		||||
    const CounterStream& Stream(VideoCore::QueryType type) const {
 | 
			
		||||
        return streams[static_cast<std::size_t>(type)];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    /// Flushes a memory range to guest memory and removes it from the cache.
 | 
			
		||||
    void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
 | 
			
		||||
        const u64 addr_begin = static_cast<u64>(addr);
 | 
			
		||||
        const u64 addr_end = addr_begin + static_cast<u64>(size);
 | 
			
		||||
        const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
 | 
			
		||||
            const u64 cache_begin = query.GetCacheAddr();
 | 
			
		||||
            const u64 cache_end = cache_begin + query.SizeInBytes();
 | 
			
		||||
            return cache_begin < addr_end && addr_begin < cache_end;
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        const u64 page_end = addr_end >> PAGE_SHIFT;
 | 
			
		||||
        for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
 | 
			
		||||
            const auto& it = cached_queries.find(page);
 | 
			
		||||
            if (it == std::end(cached_queries)) {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            auto& contents = it->second;
 | 
			
		||||
            for (auto& query : contents) {
 | 
			
		||||
                if (!in_range(query)) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
                rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
 | 
			
		||||
                query.Flush();
 | 
			
		||||
            }
 | 
			
		||||
            contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
 | 
			
		||||
                           std::end(contents));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
 | 
			
		||||
    CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
 | 
			
		||||
        rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
 | 
			
		||||
        const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
 | 
			
		||||
        return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
 | 
			
		||||
                                                  host_ptr);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Tries to a get a cached query. Returns nullptr on failure.
 | 
			
		||||
    CachedQuery* TryGet(CacheAddr addr) {
 | 
			
		||||
        const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
 | 
			
		||||
        const auto it = cached_queries.find(page);
 | 
			
		||||
        if (it == std::end(cached_queries)) {
 | 
			
		||||
            return nullptr;
 | 
			
		||||
        }
 | 
			
		||||
        auto& contents = it->second;
 | 
			
		||||
        const auto found =
 | 
			
		||||
            std::find_if(std::begin(contents), std::end(contents),
 | 
			
		||||
                         [addr](auto& query) { return query.GetCacheAddr() == addr; });
 | 
			
		||||
        return found != std::end(contents) ? &*found : nullptr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static constexpr std::uintptr_t PAGE_SIZE = 4096;
 | 
			
		||||
    static constexpr unsigned PAGE_SHIFT = 12;
 | 
			
		||||
 | 
			
		||||
    Core::System& system;
 | 
			
		||||
    VideoCore::RasterizerInterface& rasterizer;
 | 
			
		||||
 | 
			
		||||
    std::recursive_mutex mutex;
 | 
			
		||||
 | 
			
		||||
    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
 | 
			
		||||
 | 
			
		||||
    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class QueryCache, class HostCounter>
 | 
			
		||||
class HostCounterBase {
 | 
			
		||||
public:
 | 
			
		||||
    explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
 | 
			
		||||
        : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
 | 
			
		||||
        // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
 | 
			
		||||
        constexpr u64 depth_threshold = 96;
 | 
			
		||||
        if (depth > depth_threshold) {
 | 
			
		||||
            depth = 0;
 | 
			
		||||
            base_result = dependency->Query();
 | 
			
		||||
            dependency = nullptr;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    virtual ~HostCounterBase() = default;
 | 
			
		||||
 | 
			
		||||
    /// Returns the current value of the query.
 | 
			
		||||
    u64 Query() {
 | 
			
		||||
        if (result) {
 | 
			
		||||
            return *result;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        u64 value = BlockingQuery() + base_result;
 | 
			
		||||
        if (dependency) {
 | 
			
		||||
            value += dependency->Query();
 | 
			
		||||
            dependency = nullptr;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        result = value;
 | 
			
		||||
        return *result;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns true when flushing this query will potentially wait.
 | 
			
		||||
    bool WaitPending() const noexcept {
 | 
			
		||||
        return result.has_value();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u64 Depth() const noexcept {
 | 
			
		||||
        return depth;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
    /// Returns the value of query from the backend API blocking as needed.
 | 
			
		||||
    virtual u64 BlockingQuery() const = 0;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
 | 
			
		||||
    std::optional<u64> result;               ///< Filled with the already returned value.
 | 
			
		||||
    u64 depth;                               ///< Number of nested dependencies.
 | 
			
		||||
    u64 base_result = 0;                     ///< Equivalent to nested dependencies value.
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class HostCounter>
 | 
			
		||||
class CachedQueryBase {
 | 
			
		||||
public:
 | 
			
		||||
    explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
 | 
			
		||||
        : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
 | 
			
		||||
    virtual ~CachedQueryBase() = default;
 | 
			
		||||
 | 
			
		||||
    CachedQueryBase(CachedQueryBase&&) noexcept = default;
 | 
			
		||||
    CachedQueryBase(const CachedQueryBase&) = delete;
 | 
			
		||||
 | 
			
		||||
    CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default;
 | 
			
		||||
    CachedQueryBase& operator=(const CachedQueryBase&) = delete;
 | 
			
		||||
 | 
			
		||||
    /// Flushes the query to guest memory.
 | 
			
		||||
    virtual void Flush() {
 | 
			
		||||
        // When counter is nullptr it means that it's just been reseted. We are supposed to write a
 | 
			
		||||
        // zero in these cases.
 | 
			
		||||
        const u64 value = counter ? counter->Query() : 0;
 | 
			
		||||
        std::memcpy(host_ptr, &value, sizeof(u64));
 | 
			
		||||
 | 
			
		||||
        if (timestamp) {
 | 
			
		||||
            std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Binds a counter to this query.
 | 
			
		||||
    void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
 | 
			
		||||
        if (counter) {
 | 
			
		||||
            // If there's an old counter set it means the query is being rewritten by the game.
 | 
			
		||||
            // To avoid losing the data forever, flush here.
 | 
			
		||||
            Flush();
 | 
			
		||||
        }
 | 
			
		||||
        counter = std::move(counter_);
 | 
			
		||||
        timestamp = timestamp_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    VAddr CpuAddr() const noexcept {
 | 
			
		||||
        return cpu_addr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    CacheAddr GetCacheAddr() const noexcept {
 | 
			
		||||
        return ToCacheAddr(host_ptr);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u64 SizeInBytes() const noexcept {
 | 
			
		||||
        return SizeInBytes(timestamp.has_value());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static constexpr u64 SizeInBytes(bool with_timestamp) noexcept {
 | 
			
		||||
        return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
    /// Returns true when querying the counter may potentially block.
 | 
			
		||||
    bool WaitPending() const noexcept {
 | 
			
		||||
        return counter && counter->WaitPending();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    static constexpr std::size_t SMALL_QUERY_SIZE = 8;   // Query size without timestamp.
 | 
			
		||||
    static constexpr std::size_t LARGE_QUERY_SIZE = 16;  // Query size with timestamp.
 | 
			
		||||
    static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
 | 
			
		||||
 | 
			
		||||
    VAddr cpu_addr;                       ///< Guest CPU address.
 | 
			
		||||
    u8* host_ptr;                         ///< Writable host pointer.
 | 
			
		||||
    std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
 | 
			
		||||
    std::optional<u64> timestamp;         ///< Timestamp to flush to guest memory.
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace VideoCommon
 | 
			
		||||
@ -6,6 +6,7 @@
 | 
			
		||||
 | 
			
		||||
#include <atomic>
 | 
			
		||||
#include <functional>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/engines/fermi_2d.h"
 | 
			
		||||
#include "video_core/gpu.h"
 | 
			
		||||
@ -17,6 +18,11 @@ class MemoryManager;
 | 
			
		||||
 | 
			
		||||
namespace VideoCore {
 | 
			
		||||
 | 
			
		||||
enum class QueryType {
 | 
			
		||||
    SamplesPassed,
 | 
			
		||||
};
 | 
			
		||||
constexpr std::size_t NumQueryTypes = 1;
 | 
			
		||||
 | 
			
		||||
enum class LoadCallbackStage {
 | 
			
		||||
    Prepare,
 | 
			
		||||
    Decompile,
 | 
			
		||||
@ -41,6 +47,12 @@ public:
 | 
			
		||||
    /// Dispatches a compute shader invocation
 | 
			
		||||
    virtual void DispatchCompute(GPUVAddr code_addr) = 0;
 | 
			
		||||
 | 
			
		||||
    /// Resets the counter of a query
 | 
			
		||||
    virtual void ResetCounter(QueryType type) = 0;
 | 
			
		||||
 | 
			
		||||
    /// Records a GPU query and caches it
 | 
			
		||||
    virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 | 
			
		||||
 | 
			
		||||
    /// Notify rasterizer that all caches should be flushed to Switch memory
 | 
			
		||||
    virtual void FlushAll() = 0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										120
									
								
								src/video_core/renderer_opengl/gl_query_cache.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								src/video_core/renderer_opengl/gl_query_cache.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,120 @@
 | 
			
		||||
// Copyright 2019 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include <glad/glad.h>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/memory_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_query_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
 | 
			
		||||
 | 
			
		||||
namespace OpenGL {
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
 | 
			
		||||
 | 
			
		||||
constexpr GLenum GetTarget(VideoCore::QueryType type) {
 | 
			
		||||
    return QueryTargets[static_cast<std::size_t>(type)];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
 | 
			
		||||
    : VideoCommon::QueryCacheBase<
 | 
			
		||||
          QueryCache, CachedQuery, CounterStream, HostCounter,
 | 
			
		||||
          std::vector<OGLQuery>>{system,
 | 
			
		||||
                                 static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
 | 
			
		||||
      gl_rasterizer{gl_rasterizer} {}
 | 
			
		||||
 | 
			
		||||
QueryCache::~QueryCache() = default;
 | 
			
		||||
 | 
			
		||||
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
 | 
			
		||||
    auto& reserve = query_pools[static_cast<std::size_t>(type)];
 | 
			
		||||
    OGLQuery query;
 | 
			
		||||
    if (reserve.empty()) {
 | 
			
		||||
        query.Create(GetTarget(type));
 | 
			
		||||
        return query;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    query = std::move(reserve.back());
 | 
			
		||||
    reserve.pop_back();
 | 
			
		||||
    return query;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
 | 
			
		||||
    query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool QueryCache::AnyCommandQueued() const noexcept {
 | 
			
		||||
    return gl_rasterizer.AnyCommandQueued();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
 | 
			
		||||
                         VideoCore::QueryType type)
 | 
			
		||||
    : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
 | 
			
		||||
      type{type}, query{cache.AllocateQuery(type)} {
 | 
			
		||||
    glBeginQuery(GetTarget(type), query.handle);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HostCounter::~HostCounter() {
 | 
			
		||||
    cache.Reserve(type, std::move(query));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void HostCounter::EndQuery() {
 | 
			
		||||
    if (!cache.AnyCommandQueued()) {
 | 
			
		||||
        // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
 | 
			
		||||
        // having any of these causes a lock. glFlush is considered a command, so we can safely wait
 | 
			
		||||
        // for this. Insert to the OpenGL command stream a flush.
 | 
			
		||||
        glFlush();
 | 
			
		||||
    }
 | 
			
		||||
    glEndQuery(GetTarget(type));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u64 HostCounter::BlockingQuery() const {
 | 
			
		||||
    GLint64 value;
 | 
			
		||||
    glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
 | 
			
		||||
    return static_cast<u64>(value);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
 | 
			
		||||
    : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
 | 
			
		||||
 | 
			
		||||
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
 | 
			
		||||
    : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
 | 
			
		||||
 | 
			
		||||
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
 | 
			
		||||
    VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
 | 
			
		||||
    cache = rhs.cache;
 | 
			
		||||
    type = rhs.type;
 | 
			
		||||
    return *this;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CachedQuery::Flush() {
 | 
			
		||||
    // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
 | 
			
		||||
    // To avoid this disable and re-enable keeping the dependency stream.
 | 
			
		||||
    // But we only have to do this if we have pending waits to be done.
 | 
			
		||||
    auto& stream = cache->Stream(type);
 | 
			
		||||
    const bool slice_counter = WaitPending() && stream.IsEnabled();
 | 
			
		||||
    if (slice_counter) {
 | 
			
		||||
        stream.Update(false);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    VideoCommon::CachedQueryBase<HostCounter>::Flush();
 | 
			
		||||
 | 
			
		||||
    if (slice_counter) {
 | 
			
		||||
        stream.Update(true);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
							
								
								
									
										78
									
								
								src/video_core/renderer_opengl/gl_query_cache.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								src/video_core/renderer_opengl/gl_query_cache.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,78 @@
 | 
			
		||||
// Copyright 2019 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/query_cache.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
 | 
			
		||||
namespace Core {
 | 
			
		||||
class System;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace OpenGL {
 | 
			
		||||
 | 
			
		||||
class CachedQuery;
 | 
			
		||||
class HostCounter;
 | 
			
		||||
class QueryCache;
 | 
			
		||||
class RasterizerOpenGL;
 | 
			
		||||
 | 
			
		||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
 | 
			
		||||
 | 
			
		||||
class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
 | 
			
		||||
                                                            HostCounter, std::vector<OGLQuery>> {
 | 
			
		||||
public:
 | 
			
		||||
    explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
 | 
			
		||||
    ~QueryCache();
 | 
			
		||||
 | 
			
		||||
    OGLQuery AllocateQuery(VideoCore::QueryType type);
 | 
			
		||||
 | 
			
		||||
    void Reserve(VideoCore::QueryType type, OGLQuery&& query);
 | 
			
		||||
 | 
			
		||||
    bool AnyCommandQueued() const noexcept;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    RasterizerOpenGL& gl_rasterizer;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
 | 
			
		||||
public:
 | 
			
		||||
    explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
 | 
			
		||||
                         VideoCore::QueryType type);
 | 
			
		||||
    ~HostCounter();
 | 
			
		||||
 | 
			
		||||
    void EndQuery();
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    u64 BlockingQuery() const override;
 | 
			
		||||
 | 
			
		||||
    QueryCache& cache;
 | 
			
		||||
    const VideoCore::QueryType type;
 | 
			
		||||
    OGLQuery query;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
 | 
			
		||||
public:
 | 
			
		||||
    explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
 | 
			
		||||
                         u8* host_ptr);
 | 
			
		||||
    CachedQuery(CachedQuery&& rhs) noexcept;
 | 
			
		||||
    CachedQuery(const CachedQuery&) = delete;
 | 
			
		||||
 | 
			
		||||
    CachedQuery& operator=(CachedQuery&& rhs) noexcept;
 | 
			
		||||
    CachedQuery& operator=(const CachedQuery&) = delete;
 | 
			
		||||
 | 
			
		||||
    void Flush() override;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    QueryCache* cache;
 | 
			
		||||
    VideoCore::QueryType type;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
@ -25,6 +25,7 @@
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/engines/shader_type.h"
 | 
			
		||||
#include "video_core/memory_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_query_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
 | 
			
		||||
@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
 | 
			
		||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
 | 
			
		||||
                                   ScreenInfo& info)
 | 
			
		||||
    : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
 | 
			
		||||
      shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
 | 
			
		||||
      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
 | 
			
		||||
      shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
 | 
			
		||||
      screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
 | 
			
		||||
    shader_program_manager = std::make_unique<GLShader::ProgramManager>();
 | 
			
		||||
    state.draw.shader_program = 0;
 | 
			
		||||
    state.Apply();
 | 
			
		||||
@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() {
 | 
			
		||||
    } else if (use_stencil) {
 | 
			
		||||
        glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ++num_queued_commands;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_Drawing);
 | 
			
		||||
    auto& gpu = system.GPU().Maxwell3D();
 | 
			
		||||
    const auto& regs = gpu.regs;
 | 
			
		||||
 | 
			
		||||
    query_cache.UpdateCounters();
 | 
			
		||||
 | 
			
		||||
    SyncRasterizeEnable(state);
 | 
			
		||||
    SyncColorMask();
 | 
			
		||||
@ -638,6 +644,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
        glTextureBarrier();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ++num_queued_commands;
 | 
			
		||||
 | 
			
		||||
    const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
 | 
			
		||||
    const GLsizei num_instances =
 | 
			
		||||
        static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
 | 
			
		||||
@ -707,6 +715,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
 | 
			
		||||
    state.ApplyProgramPipeline();
 | 
			
		||||
 | 
			
		||||
    glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
 | 
			
		||||
    ++num_queued_commands;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
 | 
			
		||||
    query_cache.ResetCounter(type);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 | 
			
		||||
                             std::optional<u64> timestamp) {
 | 
			
		||||
    query_cache.Query(gpu_addr, type, timestamp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::FlushAll() {}
 | 
			
		||||
@ -718,6 +736,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
    }
 | 
			
		||||
    texture_cache.FlushRegion(addr, size);
 | 
			
		||||
    buffer_cache.FlushRegion(addr, size);
 | 
			
		||||
    query_cache.FlushRegion(addr, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
@ -728,6 +747,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
    texture_cache.InvalidateRegion(addr, size);
 | 
			
		||||
    shader_cache.InvalidateRegion(addr, size);
 | 
			
		||||
    buffer_cache.InvalidateRegion(addr, size);
 | 
			
		||||
    query_cache.InvalidateRegion(addr, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
@ -738,10 +758,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::FlushCommands() {
 | 
			
		||||
    // Only flush when we have commands queued to OpenGL.
 | 
			
		||||
    if (num_queued_commands == 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    num_queued_commands = 0;
 | 
			
		||||
    glFlush();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::TickFrame() {
 | 
			
		||||
    // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
 | 
			
		||||
    num_queued_commands = 0;
 | 
			
		||||
 | 
			
		||||
    buffer_cache.TickFrame();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -24,6 +24,7 @@
 | 
			
		||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_device.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_query_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
 | 
			
		||||
@ -61,6 +62,8 @@ public:
 | 
			
		||||
    bool DrawMultiBatch(bool is_indexed) override;
 | 
			
		||||
    void Clear() override;
 | 
			
		||||
    void DispatchCompute(GPUVAddr code_addr) override;
 | 
			
		||||
    void ResetCounter(VideoCore::QueryType type) override;
 | 
			
		||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
			
		||||
    void FlushAll() override;
 | 
			
		||||
    void FlushRegion(CacheAddr addr, u64 size) override;
 | 
			
		||||
    void InvalidateRegion(CacheAddr addr, u64 size) override;
 | 
			
		||||
@ -75,6 +78,11 @@ public:
 | 
			
		||||
    void LoadDiskResources(const std::atomic_bool& stop_loading,
 | 
			
		||||
                           const VideoCore::DiskResourceLoadCallback& callback) override;
 | 
			
		||||
 | 
			
		||||
    /// Returns true when there are commands queued to the OpenGL server.
 | 
			
		||||
    bool AnyCommandQueued() const {
 | 
			
		||||
        return num_queued_commands > 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    /// Configures the color and depth framebuffer states.
 | 
			
		||||
    void ConfigureFramebuffers();
 | 
			
		||||
@ -180,10 +188,23 @@ private:
 | 
			
		||||
    /// Syncs the alpha test state to match the guest state
 | 
			
		||||
    void SyncAlphaTest();
 | 
			
		||||
 | 
			
		||||
    /// Check for extension that are not strictly required
 | 
			
		||||
    /// but are needed for correct emulation
 | 
			
		||||
    /// Check for extension that are not strictly required but are needed for correct emulation
 | 
			
		||||
    void CheckExtensions();
 | 
			
		||||
 | 
			
		||||
    std::size_t CalculateVertexArraysSize() const;
 | 
			
		||||
 | 
			
		||||
    std::size_t CalculateIndexBufferSize() const;
 | 
			
		||||
 | 
			
		||||
    /// Updates and returns a vertex array object representing current vertex format
 | 
			
		||||
    GLuint SetupVertexFormat();
 | 
			
		||||
 | 
			
		||||
    void SetupVertexBuffer(GLuint vao);
 | 
			
		||||
    void SetupVertexInstances(GLuint vao);
 | 
			
		||||
 | 
			
		||||
    GLintptr SetupIndexBuffer();
 | 
			
		||||
 | 
			
		||||
    void SetupShaders(GLenum primitive_mode);
 | 
			
		||||
 | 
			
		||||
    const Device device;
 | 
			
		||||
    OpenGLState state;
 | 
			
		||||
 | 
			
		||||
@ -191,6 +212,7 @@ private:
 | 
			
		||||
    ShaderCacheOpenGL shader_cache;
 | 
			
		||||
    SamplerCacheOpenGL sampler_cache;
 | 
			
		||||
    FramebufferCacheOpenGL framebuffer_cache;
 | 
			
		||||
    QueryCache query_cache;
 | 
			
		||||
 | 
			
		||||
    Core::System& system;
 | 
			
		||||
    ScreenInfo& screen_info;
 | 
			
		||||
@ -208,19 +230,8 @@ private:
 | 
			
		||||
    BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
 | 
			
		||||
    BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
 | 
			
		||||
 | 
			
		||||
    std::size_t CalculateVertexArraysSize() const;
 | 
			
		||||
 | 
			
		||||
    std::size_t CalculateIndexBufferSize() const;
 | 
			
		||||
 | 
			
		||||
    /// Updates and returns a vertex array object representing current vertex format
 | 
			
		||||
    GLuint SetupVertexFormat();
 | 
			
		||||
 | 
			
		||||
    void SetupVertexBuffer(GLuint vao);
 | 
			
		||||
    void SetupVertexInstances(GLuint vao);
 | 
			
		||||
 | 
			
		||||
    GLintptr SetupIndexBuffer();
 | 
			
		||||
 | 
			
		||||
    void SetupShaders(GLenum primitive_mode);
 | 
			
		||||
    /// Number of commands queued to the OpenGL driver. Reseted on flush.
 | 
			
		||||
    std::size_t num_queued_commands = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 | 
			
		||||
@ -207,4 +207,21 @@ void OGLFramebuffer::Release() {
 | 
			
		||||
    handle = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLQuery::Create(GLenum target) {
 | 
			
		||||
    if (handle != 0)
 | 
			
		||||
        return;
 | 
			
		||||
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
 | 
			
		||||
    glCreateQueries(target, 1, &handle);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLQuery::Release() {
 | 
			
		||||
    if (handle == 0)
 | 
			
		||||
        return;
 | 
			
		||||
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
 | 
			
		||||
    glDeleteQueries(1, &handle);
 | 
			
		||||
    handle = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 | 
			
		||||
@ -266,4 +266,29 @@ public:
 | 
			
		||||
    GLuint handle = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class OGLQuery : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLQuery() = default;
 | 
			
		||||
 | 
			
		||||
    OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLQuery() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLQuery& operator=(OGLQuery&& o) noexcept {
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Creates a new internal OpenGL resource and stores the handle
 | 
			
		||||
    void Create(GLenum target);
 | 
			
		||||
 | 
			
		||||
    /// Deletes the internal OpenGL resource
 | 
			
		||||
    void Release();
 | 
			
		||||
 | 
			
		||||
    GLuint handle = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 | 
			
		||||
@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
 | 
			
		||||
    features.depthBiasClamp = true;
 | 
			
		||||
    features.geometryShader = true;
 | 
			
		||||
    features.tessellationShader = true;
 | 
			
		||||
    features.occlusionQueryPrecise = true;
 | 
			
		||||
    features.fragmentStoresAndAtomics = true;
 | 
			
		||||
    features.shaderImageGatherExtended = true;
 | 
			
		||||
    features.shaderStorageImageWriteWithoutFormat = true;
 | 
			
		||||
@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
 | 
			
		||||
    bit8_storage.uniformAndStorageBuffer8BitAccess = true;
 | 
			
		||||
    SetNext(next, bit8_storage);
 | 
			
		||||
 | 
			
		||||
    vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
 | 
			
		||||
    host_query_reset.hostQueryReset = true;
 | 
			
		||||
    SetNext(next, host_query_reset);
 | 
			
		||||
 | 
			
		||||
    vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
 | 
			
		||||
    if (is_float16_supported) {
 | 
			
		||||
        float16_int8.shaderFloat16 = true;
 | 
			
		||||
@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
 | 
			
		||||
        VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
 | 
			
		||||
        VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
 | 
			
		||||
        VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
 | 
			
		||||
        VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
 | 
			
		||||
    };
 | 
			
		||||
    std::bitset<required_extensions.size()> available_extensions{};
 | 
			
		||||
 | 
			
		||||
@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
 | 
			
		||||
        std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
 | 
			
		||||
        std::make_pair(features.geometryShader, "geometryShader"),
 | 
			
		||||
        std::make_pair(features.tessellationShader, "tessellationShader"),
 | 
			
		||||
        std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
 | 
			
		||||
        std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
 | 
			
		||||
        std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
 | 
			
		||||
        std::make_pair(features.shaderStorageImageWriteWithoutFormat,
 | 
			
		||||
@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    extensions.reserve(13);
 | 
			
		||||
    extensions.reserve(14);
 | 
			
		||||
    extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
 | 
			
		||||
    extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
 | 
			
		||||
    extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
 | 
			
		||||
@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
 | 
			
		||||
    extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
 | 
			
		||||
    extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
 | 
			
		||||
    extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
 | 
			
		||||
    extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
 | 
			
		||||
 | 
			
		||||
    [[maybe_unused]] const bool nsight =
 | 
			
		||||
        std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										122
									
								
								src/video_core/renderer_vulkan/vk_query_cache.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								src/video_core/renderer_vulkan/vk_query_cache.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,122 @@
 | 
			
		||||
// Copyright 2020 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <cstddef>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "video_core/renderer_vulkan/declarations.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_device.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
 | 
			
		||||
 | 
			
		||||
namespace Vulkan {
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
 | 
			
		||||
 | 
			
		||||
constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
 | 
			
		||||
    return QUERY_TARGETS[static_cast<std::size_t>(type)];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
 | 
			
		||||
 | 
			
		||||
QueryPool::~QueryPool() = default;
 | 
			
		||||
 | 
			
		||||
void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
 | 
			
		||||
    device = &device_;
 | 
			
		||||
    type = type_;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
 | 
			
		||||
    std::size_t index;
 | 
			
		||||
    do {
 | 
			
		||||
        index = CommitResource(fence);
 | 
			
		||||
    } while (usage[index]);
 | 
			
		||||
    usage[index] = true;
 | 
			
		||||
 | 
			
		||||
    return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
 | 
			
		||||
    usage.resize(end);
 | 
			
		||||
 | 
			
		||||
    const auto dev = device->GetLogical();
 | 
			
		||||
    const u32 size = static_cast<u32>(end - begin);
 | 
			
		||||
    const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
 | 
			
		||||
    pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
 | 
			
		||||
    const auto it =
 | 
			
		||||
        std::find_if(std::begin(pools), std::end(pools),
 | 
			
		||||
                     [query_pool = query.first](auto& pool) { return query_pool == *pool; });
 | 
			
		||||
    ASSERT(it != std::end(pools));
 | 
			
		||||
 | 
			
		||||
    const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
 | 
			
		||||
    usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
			
		||||
                           const VKDevice& device, VKScheduler& scheduler)
 | 
			
		||||
    : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
 | 
			
		||||
                                  QueryPool>{system, rasterizer},
 | 
			
		||||
      device{device}, scheduler{scheduler} {
 | 
			
		||||
    for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
 | 
			
		||||
        query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
VKQueryCache::~VKQueryCache() = default;
 | 
			
		||||
 | 
			
		||||
std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
 | 
			
		||||
    return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VKQueryCache::Reserve(VideoCore::QueryType type,
 | 
			
		||||
                           std::pair<vk::QueryPool, std::uint32_t> query) {
 | 
			
		||||
    query_pools[static_cast<std::size_t>(type)].Reserve(query);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
 | 
			
		||||
                         VideoCore::QueryType type)
 | 
			
		||||
    : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
 | 
			
		||||
      type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
 | 
			
		||||
    const auto dev = cache.Device().GetLogical();
 | 
			
		||||
    cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
 | 
			
		||||
        dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
 | 
			
		||||
        cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HostCounter::~HostCounter() {
 | 
			
		||||
    cache.Reserve(type, query);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void HostCounter::EndQuery() {
 | 
			
		||||
    cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
 | 
			
		||||
        cmdbuf.endQuery(query.first, query.second, dld);
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u64 HostCounter::BlockingQuery() const {
 | 
			
		||||
    if (ticks >= cache.Scheduler().Ticks()) {
 | 
			
		||||
        cache.Scheduler().Flush();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const auto dev = cache.Device().GetLogical();
 | 
			
		||||
    const auto& dld = cache.Device().GetDispatchLoader();
 | 
			
		||||
    u64 value;
 | 
			
		||||
    dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
 | 
			
		||||
                            vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
 | 
			
		||||
    return value;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Vulkan
 | 
			
		||||
							
								
								
									
										104
									
								
								src/video_core/renderer_vulkan/vk_query_cache.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								src/video_core/renderer_vulkan/vk_query_cache.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,104 @@
 | 
			
		||||
// Copyright 2020 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <cstddef>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/query_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/declarations.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
 | 
			
		||||
 | 
			
		||||
namespace VideoCore {
 | 
			
		||||
class RasterizerInterface;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace Vulkan {
 | 
			
		||||
 | 
			
		||||
class CachedQuery;
 | 
			
		||||
class HostCounter;
 | 
			
		||||
class VKDevice;
 | 
			
		||||
class VKQueryCache;
 | 
			
		||||
class VKScheduler;
 | 
			
		||||
 | 
			
		||||
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
 | 
			
		||||
 | 
			
		||||
class QueryPool final : public VKFencedPool {
 | 
			
		||||
public:
 | 
			
		||||
    explicit QueryPool();
 | 
			
		||||
    ~QueryPool() override;
 | 
			
		||||
 | 
			
		||||
    void Initialize(const VKDevice& device, VideoCore::QueryType type);
 | 
			
		||||
 | 
			
		||||
    std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
 | 
			
		||||
 | 
			
		||||
    void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
    void Allocate(std::size_t begin, std::size_t end) override;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    static constexpr std::size_t GROW_STEP = 512;
 | 
			
		||||
 | 
			
		||||
    const VKDevice* device = nullptr;
 | 
			
		||||
    VideoCore::QueryType type = {};
 | 
			
		||||
 | 
			
		||||
    std::vector<UniqueQueryPool> pools;
 | 
			
		||||
    std::vector<bool> usage;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class VKQueryCache final
 | 
			
		||||
    : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
 | 
			
		||||
                                         QueryPool> {
 | 
			
		||||
public:
 | 
			
		||||
    explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
			
		||||
                          const VKDevice& device, VKScheduler& scheduler);
 | 
			
		||||
    ~VKQueryCache();
 | 
			
		||||
 | 
			
		||||
    std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
 | 
			
		||||
 | 
			
		||||
    void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
 | 
			
		||||
 | 
			
		||||
    const VKDevice& Device() const noexcept {
 | 
			
		||||
        return device;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    VKScheduler& Scheduler() const noexcept {
 | 
			
		||||
        return scheduler;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    const VKDevice& device;
 | 
			
		||||
    VKScheduler& scheduler;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
 | 
			
		||||
public:
 | 
			
		||||
    explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
 | 
			
		||||
                         VideoCore::QueryType type);
 | 
			
		||||
    ~HostCounter();
 | 
			
		||||
 | 
			
		||||
    void EndQuery();
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    u64 BlockingQuery() const override;
 | 
			
		||||
 | 
			
		||||
    VKQueryCache& cache;
 | 
			
		||||
    const VideoCore::QueryType type;
 | 
			
		||||
    const std::pair<vk::QueryPool, std::uint32_t> query;
 | 
			
		||||
    const u64 ticks;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
 | 
			
		||||
public:
 | 
			
		||||
    explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
 | 
			
		||||
        : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Vulkan
 | 
			
		||||
@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
 | 
			
		||||
                    staging_pool),
 | 
			
		||||
      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
 | 
			
		||||
      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
 | 
			
		||||
      sampler_cache(device) {}
 | 
			
		||||
      sampler_cache(device), query_cache(system, *this, device, scheduler) {
 | 
			
		||||
    scheduler.SetQueryCache(query_cache);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RasterizerVulkan::~RasterizerVulkan() = default;
 | 
			
		||||
 | 
			
		||||
@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
 | 
			
		||||
    FlushWork();
 | 
			
		||||
 | 
			
		||||
    query_cache.UpdateCounters();
 | 
			
		||||
 | 
			
		||||
    const auto& gpu = system.GPU().Maxwell3D();
 | 
			
		||||
    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
 | 
			
		||||
 | 
			
		||||
@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
void RasterizerVulkan::Clear() {
 | 
			
		||||
    MICROPROFILE_SCOPE(Vulkan_Clearing);
 | 
			
		||||
 | 
			
		||||
    query_cache.UpdateCounters();
 | 
			
		||||
 | 
			
		||||
    const auto& gpu = system.GPU().Maxwell3D();
 | 
			
		||||
    if (!system.GPU().Maxwell3D().ShouldExecute()) {
 | 
			
		||||
        return;
 | 
			
		||||
@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
 | 
			
		||||
    sampled_views.clear();
 | 
			
		||||
    image_views.clear();
 | 
			
		||||
 | 
			
		||||
    query_cache.UpdateCounters();
 | 
			
		||||
 | 
			
		||||
    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
 | 
			
		||||
    const ComputePipelineCacheKey key{
 | 
			
		||||
        code_addr,
 | 
			
		||||
@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
 | 
			
		||||
    query_cache.ResetCounter(type);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 | 
			
		||||
                             std::optional<u64> timestamp) {
 | 
			
		||||
    query_cache.Query(gpu_addr, type, timestamp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::FlushAll() {}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
    texture_cache.FlushRegion(addr, size);
 | 
			
		||||
    buffer_cache.FlushRegion(addr, size);
 | 
			
		||||
    query_cache.FlushRegion(addr, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
    texture_cache.InvalidateRegion(addr, size);
 | 
			
		||||
    pipeline_cache.InvalidateRegion(addr, size);
 | 
			
		||||
    buffer_cache.InvalidateRegion(addr, size);
 | 
			
		||||
    query_cache.InvalidateRegion(addr, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
			
		||||
 | 
			
		||||
@ -24,6 +24,7 @@
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
 | 
			
		||||
@ -96,7 +97,7 @@ struct ImageView {
 | 
			
		||||
    vk::ImageLayout* layout = nullptr;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
 | 
			
		||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
 | 
			
		||||
public:
 | 
			
		||||
    explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
 | 
			
		||||
                              VKScreenInfo& screen_info, const VKDevice& device,
 | 
			
		||||
@ -108,6 +109,8 @@ public:
 | 
			
		||||
    bool DrawMultiBatch(bool is_indexed) override;
 | 
			
		||||
    void Clear() override;
 | 
			
		||||
    void DispatchCompute(GPUVAddr code_addr) override;
 | 
			
		||||
    void ResetCounter(VideoCore::QueryType type) override;
 | 
			
		||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
			
		||||
    void FlushAll() override;
 | 
			
		||||
    void FlushRegion(CacheAddr addr, u64 size) override;
 | 
			
		||||
    void InvalidateRegion(CacheAddr addr, u64 size) override;
 | 
			
		||||
@ -247,6 +250,7 @@ private:
 | 
			
		||||
    VKPipelineCache pipeline_cache;
 | 
			
		||||
    VKBufferCache buffer_cache;
 | 
			
		||||
    VKSamplerCache sampler_cache;
 | 
			
		||||
    VKQueryCache query_cache;
 | 
			
		||||
 | 
			
		||||
    std::array<View, Maxwell::NumRenderTargets> color_attachments;
 | 
			
		||||
    View zeta_attachment;
 | 
			
		||||
 | 
			
		||||
@ -6,6 +6,7 @@
 | 
			
		||||
#include "common/microprofile.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/declarations.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_device.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
 | 
			
		||||
 | 
			
		||||
@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VKScheduler::AllocateNewContext() {
 | 
			
		||||
    ++ticks;
 | 
			
		||||
 | 
			
		||||
    std::unique_lock lock{mutex};
 | 
			
		||||
    current_fence = next_fence;
 | 
			
		||||
    next_fence = &resource_manager.CommitFence();
 | 
			
		||||
@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
 | 
			
		||||
    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
 | 
			
		||||
    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
 | 
			
		||||
                         device.GetDispatchLoader());
 | 
			
		||||
    // Enable counters once again. These are disabled when a command buffer is finished.
 | 
			
		||||
    if (query_cache) {
 | 
			
		||||
        query_cache->UpdateCounters();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VKScheduler::InvalidateState() {
 | 
			
		||||
@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VKScheduler::EndPendingOperations() {
 | 
			
		||||
    query_cache->DisableStreams();
 | 
			
		||||
    EndRenderPass();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -4,6 +4,7 @@
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <atomic>
 | 
			
		||||
#include <condition_variable>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <optional>
 | 
			
		||||
@ -18,6 +19,7 @@ namespace Vulkan {
 | 
			
		||||
 | 
			
		||||
class VKDevice;
 | 
			
		||||
class VKFence;
 | 
			
		||||
class VKQueryCache;
 | 
			
		||||
class VKResourceManager;
 | 
			
		||||
 | 
			
		||||
class VKFenceView {
 | 
			
		||||
@ -67,6 +69,11 @@ public:
 | 
			
		||||
    /// Binds a pipeline to the current execution context.
 | 
			
		||||
    void BindGraphicsPipeline(vk::Pipeline pipeline);
 | 
			
		||||
 | 
			
		||||
    /// Assigns the query cache.
 | 
			
		||||
    void SetQueryCache(VKQueryCache& query_cache_) {
 | 
			
		||||
        query_cache = &query_cache_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns true when viewports have been set in the current command buffer.
 | 
			
		||||
    bool TouchViewports() {
 | 
			
		||||
        return std::exchange(state.viewports, true);
 | 
			
		||||
@ -112,6 +119,11 @@ public:
 | 
			
		||||
        return current_fence;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns the current command buffer tick.
 | 
			
		||||
    u64 Ticks() const {
 | 
			
		||||
        return ticks;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    class Command {
 | 
			
		||||
    public:
 | 
			
		||||
@ -205,6 +217,8 @@ private:
 | 
			
		||||
 | 
			
		||||
    const VKDevice& device;
 | 
			
		||||
    VKResourceManager& resource_manager;
 | 
			
		||||
    VKQueryCache* query_cache = nullptr;
 | 
			
		||||
 | 
			
		||||
    vk::CommandBuffer current_cmdbuf;
 | 
			
		||||
    VKFence* current_fence = nullptr;
 | 
			
		||||
    VKFence* next_fence = nullptr;
 | 
			
		||||
@ -227,6 +241,7 @@ private:
 | 
			
		||||
    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
 | 
			
		||||
    std::mutex mutex;
 | 
			
		||||
    std::condition_variable cv;
 | 
			
		||||
    std::atomic<u64> ticks = 0;
 | 
			
		||||
    bool quit = false;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user