mirror of
				https://git.tardis.systems/mirrors/yuzu
				synced 2025-10-31 10:44:49 +01:00 
			
		
		
		
	Buffer cache: always use async buffer downloads and fix regression.
This commit is contained in:
		
							parent
							
								
									6f90dff293
								
							
						
					
					
						commit
						36c302fa32
					
				| @ -535,12 +535,12 @@ TEST_CASE("MemoryTracker: Cached write downloads") { | ||||
|     memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); | ||||
|     int num = 0; | ||||
|     memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||
|     REQUIRE(num == 1); | ||||
|     REQUIRE(num == 0); | ||||
|     num = 0; | ||||
|     memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||
|     REQUIRE(num == 0); | ||||
|     REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||||
|     REQUIRE(memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||||
|     memory_track->FlushCachedWrites(); | ||||
|     REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||||
|  | ||||
| @ -23,8 +23,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||||
|     common_ranges.clear(); | ||||
|     inline_buffer_id = NULL_BUFFER_ID; | ||||
| 
 | ||||
|     active_async_buffers = !Settings::IsGPULevelHigh(); | ||||
| 
 | ||||
|     if (!runtime.CanReportMemoryUsage()) { | ||||
|         minimum_memory = DEFAULT_EXPECTED_MEMORY; | ||||
|         critical_memory = DEFAULT_CRITICAL_MEMORY; | ||||
| @ -75,8 +73,6 @@ void BufferCache<P>::TickFrame() { | ||||
|     uniform_cache_hits[0] = 0; | ||||
|     uniform_cache_shots[0] = 0; | ||||
| 
 | ||||
|     active_async_buffers = !Settings::IsGPULevelHigh(); | ||||
| 
 | ||||
|     const bool skip_preferred = hits * 256 < shots * 251; | ||||
|     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | ||||
| 
 | ||||
| @ -491,9 +487,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||
| 
 | ||||
|     if (committed_ranges.empty()) { | ||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||
|             if (active_async_buffers) { | ||||
|                 async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||
|             } | ||||
| 
 | ||||
|             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
| @ -554,64 +549,65 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||
|     committed_ranges.clear(); | ||||
|     if (downloads.empty()) { | ||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||
|             if (active_async_buffers) { | ||||
|                 async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||
|             } | ||||
| 
 | ||||
|             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     if (active_async_buffers) { | ||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||
|             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | ||||
|             boost::container::small_vector<BufferCopy, 4> normalized_copies; | ||||
|             IntervalSet new_async_range{}; | ||||
|             runtime.PreCopyBarrier(); | ||||
|             for (auto& [copy, buffer_id] : downloads) { | ||||
|                 copy.dst_offset += download_staging.offset; | ||||
|                 const std::array copies{copy}; | ||||
|                 BufferCopy second_copy{copy}; | ||||
|                 Buffer& buffer = slot_buffers[buffer_id]; | ||||
|                 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | ||||
|                 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | ||||
|                 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | ||||
|                 async_downloads += std::make_pair(base_interval, 1); | ||||
|                 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||||
|                 normalized_copies.push_back(second_copy); | ||||
|             } | ||||
|             runtime.PostCopyBarrier(); | ||||
|             pending_downloads.emplace_back(std::move(normalized_copies)); | ||||
|             async_buffers.emplace_back(download_staging); | ||||
|         } else { | ||||
|     if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||
|         auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | ||||
|         boost::container::small_vector<BufferCopy, 4> normalized_copies; | ||||
|         IntervalSet new_async_range{}; | ||||
|         runtime.PreCopyBarrier(); | ||||
|         for (auto& [copy, buffer_id] : downloads) { | ||||
|             copy.dst_offset += download_staging.offset; | ||||
|             const std::array copies{copy}; | ||||
|             BufferCopy second_copy{copy}; | ||||
|             Buffer& buffer = slot_buffers[buffer_id]; | ||||
|             second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | ||||
|             VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | ||||
|             const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | ||||
|             async_downloads += std::make_pair(base_interval, 1); | ||||
|             runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||||
|             normalized_copies.push_back(second_copy); | ||||
|         } | ||||
|         runtime.PostCopyBarrier(); | ||||
|         pending_downloads.emplace_back(std::move(normalized_copies)); | ||||
|         async_buffers.emplace_back(download_staging); | ||||
|     } else { | ||||
|         if (!Settings::IsGPULevelHigh()) { | ||||
|             committed_ranges.clear(); | ||||
|             uncommitted_ranges.clear(); | ||||
|         } | ||||
|     } else { | ||||
|         if constexpr (USE_MEMORY_MAPS) { | ||||
|             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||||
|             runtime.PreCopyBarrier(); | ||||
|             for (auto& [copy, buffer_id] : downloads) { | ||||
|                 // Have in mind the staging buffer offset for the copy
 | ||||
|                 copy.dst_offset += download_staging.offset; | ||||
|                 const std::array copies{copy}; | ||||
|                 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | ||||
|             } | ||||
|             runtime.PostCopyBarrier(); | ||||
|             runtime.Finish(); | ||||
|             for (const auto& [copy, buffer_id] : downloads) { | ||||
|                 const Buffer& buffer = slot_buffers[buffer_id]; | ||||
|                 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||||
|                 // Undo the modified offset
 | ||||
|                 const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||||
|                 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||||
|                 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||||
|             } | ||||
|         } else { | ||||
|             const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||||
|             for (const auto& [copy, buffer_id] : downloads) { | ||||
|                 Buffer& buffer = slot_buffers[buffer_id]; | ||||
|                 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||||
|                 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||||
|                 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||||
|             if constexpr (USE_MEMORY_MAPS) { | ||||
|                 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||||
|                 runtime.PreCopyBarrier(); | ||||
|                 for (auto& [copy, buffer_id] : downloads) { | ||||
|                     // Have in mind the staging buffer offset for the copy
 | ||||
|                     copy.dst_offset += download_staging.offset; | ||||
|                     const std::array copies{copy}; | ||||
|                     runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, | ||||
|                                        false); | ||||
|                 } | ||||
|                 runtime.PostCopyBarrier(); | ||||
|                 runtime.Finish(); | ||||
|                 for (const auto& [copy, buffer_id] : downloads) { | ||||
|                     const Buffer& buffer = slot_buffers[buffer_id]; | ||||
|                     const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||||
|                     // Undo the modified offset
 | ||||
|                     const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||||
|                     const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||||
|                     cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||||
|                 } | ||||
|             } else { | ||||
|                 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||||
|                 for (const auto& [copy, buffer_id] : downloads) { | ||||
|                     Buffer& buffer = slot_buffers[buffer_id]; | ||||
|                     buffer.ImmediateDownload(copy.src_offset, | ||||
|                                              immediate_buffer.subspan(0, copy.size)); | ||||
|                     const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||||
|                     cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @ -572,8 +572,6 @@ private: | ||||
|     u64 critical_memory = 0; | ||||
|     BufferId inline_buffer_id; | ||||
| 
 | ||||
|     bool active_async_buffers = false; | ||||
| 
 | ||||
|     std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | ||||
|     std::vector<u8> tmp_buffer; | ||||
| }; | ||||
|  | ||||
| @ -302,6 +302,9 @@ public: | ||||
|                  (pending_pointer - pending_offset) * BYTES_PER_PAGE); | ||||
|         }; | ||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||
|             if constexpr (type == Type::GPU) { | ||||
|                 mask &= ~untracked_words[index]; | ||||
|             } | ||||
|             const u64 word = state_words[index] & mask; | ||||
|             if constexpr (clear) { | ||||
|                 if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||||
| @ -350,8 +353,13 @@ public: | ||||
|         static_assert(type != Type::Untracked); | ||||
| 
 | ||||
|         const std::span<const u64> state_words = words.template Span<type>(); | ||||
|         [[maybe_unused]] const std::span<const u64> untracked_words = | ||||
|             words.template Span<Type::Untracked>(); | ||||
|         bool result = false; | ||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||
|             if constexpr (type == Type::GPU) { | ||||
|                 mask &= ~untracked_words[index]; | ||||
|             } | ||||
|             const u64 word = state_words[index] & mask; | ||||
|             if (word != 0) { | ||||
|                 result = true; | ||||
| @ -372,9 +380,14 @@ public: | ||||
|     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||||
|         static_assert(type != Type::Untracked); | ||||
|         const std::span<const u64> state_words = words.template Span<type>(); | ||||
|         [[maybe_unused]] const std::span<const u64> untracked_words = | ||||
|             words.template Span<Type::Untracked>(); | ||||
|         u64 begin = std::numeric_limits<u64>::max(); | ||||
|         u64 end = 0; | ||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||
|             if constexpr (type == Type::GPU) { | ||||
|                 mask &= ~untracked_words[index]; | ||||
|             } | ||||
|             const u64 word = state_words[index] & mask; | ||||
|             if (word == 0) { | ||||
|                 return; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user