mirror of
				https://git.tardis.systems/mirrors/yuzu
				synced 2025-10-31 10:44:49 +01:00 
			
		
		
		
	GPU-SMMU: Estimate game leak and preallocate device region.
This commit is contained in:
		
							parent
							
								
									96fd1348ae
								
							
						
					
					
						commit
						0adc09e0af
					
				| @ -611,6 +611,8 @@ add_library(core STATIC | ||||
|     hle/service/ns/pdm_qry.h | ||||
|     hle/service/nvdrv/core/container.cpp | ||||
|     hle/service/nvdrv/core/container.h | ||||
|     hle/service/nvdrv/core/heap_mapper.cpp | ||||
|     hle/service/nvdrv/core/heap_mapper.h | ||||
|     hle/service/nvdrv/core/nvmap.cpp | ||||
|     hle/service/nvdrv/core/nvmap.h | ||||
|     hle/service/nvdrv/core/syncpoint_manager.cpp | ||||
|  | ||||
| @ -20,10 +20,10 @@ namespace Core { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| class PhysicalAddressContainer { | ||||
| class MultiAddressContainer { | ||||
| public: | ||||
|     PhysicalAddressContainer() = default; | ||||
|     ~PhysicalAddressContainer() = default; | ||||
|     MultiAddressContainer() = default; | ||||
|     ~MultiAddressContainer() = default; | ||||
| 
 | ||||
|     void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) { | ||||
|         buffer.resize(8); | ||||
| @ -145,7 +145,7 @@ struct DeviceMemoryManagerAllocator { | ||||
|     std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator> | ||||
|         pin_allocator; | ||||
|     Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator; | ||||
|     PhysicalAddressContainer multi_dev_address; | ||||
|     MultiAddressContainer multi_dev_address; | ||||
| 
 | ||||
|     /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
 | ||||
|     template <bool pin_area> | ||||
|  | ||||
| @ -8,6 +8,7 @@ | ||||
| 
 | ||||
| #include "core/hle/kernel/k_process.h" | ||||
| #include "core/hle/service/nvdrv/core/container.h" | ||||
| #include "core/hle/service/nvdrv/core/heap_mapper.h" | ||||
| #include "core/hle/service/nvdrv/core/nvmap.h" | ||||
| #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | ||||
| #include "core/memory.h" | ||||
| @ -36,6 +37,14 @@ Container::~Container() = default; | ||||
| 
 | ||||
| size_t Container::OpenSession(Kernel::KProcess* process) { | ||||
|     std::scoped_lock lk(impl->session_guard); | ||||
|     for (auto& session : impl->sessions) { | ||||
|         if (!session.is_active) { | ||||
|             continue; | ||||
|         } | ||||
|         if (session.process == process) { | ||||
|             return session.id; | ||||
|         } | ||||
|     } | ||||
|     size_t new_id{}; | ||||
|     auto* memory_interface = &process->GetMemory(); | ||||
|     auto& smmu = impl->host1x.MemoryManager(); | ||||
| @ -48,16 +57,65 @@ size_t Container::OpenSession(Kernel::KProcess* process) { | ||||
|         impl->sessions.emplace_back(new_id, process, smmu_id); | ||||
|         new_id = impl->new_ids++; | ||||
|     } | ||||
|     LOG_CRITICAL(Debug, "Created Session {}", new_id); | ||||
|     auto& session = impl->sessions[new_id]; | ||||
|     session.is_active = true; | ||||
|     // Optimization
 | ||||
|     if (process->IsApplication()) { | ||||
|         auto& page_table = process->GetPageTable().GetBasePageTable(); | ||||
|         auto heap_start = page_table.GetHeapRegionStart(); | ||||
| 
 | ||||
|         Kernel::KProcessAddress cur_addr = heap_start; | ||||
|         size_t region_size = 0; | ||||
|         VAddr region_start = 0; | ||||
|         while (true) { | ||||
|             Kernel::KMemoryInfo mem_info{}; | ||||
|             Kernel::Svc::PageInfo page_info{}; | ||||
|             R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), | ||||
|                                           cur_addr)); | ||||
|             auto svc_mem_info = mem_info.GetSvcMemoryInfo(); | ||||
| 
 | ||||
|             // check if this memory block is heap
 | ||||
|             if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { | ||||
|                 if (svc_mem_info.size > region_size) { | ||||
|                     region_size = svc_mem_info.size; | ||||
|                     region_start = svc_mem_info.base_address; | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             // Check if we're done.
 | ||||
|             const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size; | ||||
|             if (next_address <= GetInteger(cur_addr)) { | ||||
|                 break; | ||||
|             } | ||||
| 
 | ||||
|             cur_addr = next_address; | ||||
|         } | ||||
|         session.has_preallocated_area = false; | ||||
|         auto start_region = (region_size >> 15) >= 1024 ? smmu.Allocate(region_size) : 0; | ||||
|         if (start_region != 0) { | ||||
|             session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size, | ||||
|                                                           smmu_id, impl->host1x); | ||||
|             session.has_preallocated_area = true; | ||||
|             LOG_CRITICAL(Debug, "Preallocation created!"); | ||||
|         } | ||||
|     } | ||||
|     return new_id; | ||||
| } | ||||
| 
 | ||||
| void Container::CloseSession(size_t id) { | ||||
|     std::scoped_lock lk(impl->session_guard); | ||||
|     auto& session = impl->sessions[id]; | ||||
|     auto& smmu = impl->host1x.MemoryManager(); | ||||
|     if (session.has_preallocated_area) { | ||||
|         const DAddr region_start = session.mapper->GetRegionStart(); | ||||
|         const size_t region_size = session.mapper->GetRegionSize(); | ||||
|         session.mapper.reset(); | ||||
|         smmu.Free(region_start, region_size); | ||||
|         session.has_preallocated_area = false; | ||||
|     } | ||||
|     session.is_active = false; | ||||
|     smmu.UnregisterProcess(impl->sessions[id].smmu_id); | ||||
|     impl->id_pool.emplace_front(id); | ||||
|     LOG_CRITICAL(Debug, "Closed Session {}", id); | ||||
| } | ||||
| 
 | ||||
| Session* Container::GetSession(size_t id) { | ||||
|  | ||||
| @ -20,6 +20,7 @@ class Host1x; | ||||
| 
 | ||||
| namespace Service::Nvidia::NvCore { | ||||
| 
 | ||||
| class HeapMapper; | ||||
| class NvMap; | ||||
| class SyncpointManager; | ||||
| 
 | ||||
| @ -29,6 +30,9 @@ struct Session { | ||||
|     size_t id; | ||||
|     Kernel::KProcess* process; | ||||
|     size_t smmu_id; | ||||
|     bool has_preallocated_area{}; | ||||
|     std::unique_ptr<HeapMapper> mapper{}; | ||||
|     bool is_active{}; | ||||
| }; | ||||
| 
 | ||||
| class Container { | ||||
|  | ||||
							
								
								
									
										172
									
								
								src/core/hle/service/nvdrv/core/heap_mapper.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								src/core/hle/service/nvdrv/core/heap_mapper.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,172 @@ | ||||
| // SPDX-FileCopyrightText: 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #include <mutex> | ||||
| 
 | ||||
| #include <boost/container/small_vector.hpp> | ||||
| #define BOOST_NO_MT | ||||
| #include <boost/pool/detail/mutex.hpp> | ||||
| #undef BOOST_NO_MT | ||||
| #include <boost/icl/interval.hpp> | ||||
| #include <boost/icl/interval_base_set.hpp> | ||||
| #include <boost/icl/interval_set.hpp> | ||||
| #include <boost/icl/split_interval_map.hpp> | ||||
| #include <boost/pool/pool.hpp> | ||||
| #include <boost/pool/pool_alloc.hpp> | ||||
| #include <boost/pool/poolfwd.hpp> | ||||
| 
 | ||||
| #include "core/hle/service/nvdrv/core/heap_mapper.h" | ||||
| #include "video_core/host1x/host1x.h" | ||||
| 
 | ||||
| namespace boost { | ||||
| template <typename T> | ||||
| class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; | ||||
| } | ||||
| 
 | ||||
| namespace Service::Nvidia::NvCore { | ||||
| 
 | ||||
| using IntervalCompare = std::less<DAddr>; | ||||
| using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; | ||||
| using IntervalAllocator = boost::fast_pool_allocator<DAddr>; | ||||
| using IntervalSet = boost::icl::interval_set<DAddr>; | ||||
| using IntervalType = typename IntervalSet::interval_type; | ||||
| 
 | ||||
| template <typename Type> | ||||
| struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { | ||||
|     // types
 | ||||
|     typedef counter_add_functor<Type> type; | ||||
|     typedef boost::icl::identity_based_inplace_combine<Type> base_type; | ||||
| 
 | ||||
|     // public member functions
 | ||||
|     void operator()(Type& current, const Type& added) const { | ||||
|         current += added; | ||||
|         if (current < base_type::identity_element()) { | ||||
|             current = base_type::identity_element(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // public static functions
 | ||||
|     static void version(Type&){}; | ||||
| }; | ||||
| 
 | ||||
| using OverlapCombine = counter_add_functor<int>; | ||||
| using OverlapSection = boost::icl::inter_section<int>; | ||||
| using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; | ||||
| 
 | ||||
| struct HeapMapper::HeapMapperInternal { | ||||
|     HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {} | ||||
|     ~HeapMapperInternal() = default; | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | ||||
|                                  Func&& func) { | ||||
|         const DAddr start_address = cpu_addr; | ||||
|         const DAddr end_address = start_address + size; | ||||
|         const IntervalType search_interval{start_address, end_address}; | ||||
|         auto it = current_range.lower_bound(search_interval); | ||||
|         if (it == current_range.end()) { | ||||
|             return; | ||||
|         } | ||||
|         auto end_it = current_range.upper_bound(search_interval); | ||||
|         for (; it != end_it; it++) { | ||||
|             auto& inter = it->first; | ||||
|             DAddr inter_addr_end = inter.upper(); | ||||
|             DAddr inter_addr = inter.lower(); | ||||
|             if (inter_addr_end > end_address) { | ||||
|                 inter_addr_end = end_address; | ||||
|             } | ||||
|             if (inter_addr < start_address) { | ||||
|                 inter_addr = start_address; | ||||
|             } | ||||
|             func(inter_addr, inter_addr_end, it->second); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void RemoveEachInOverlapCounter(OverlapCounter& current_range, | ||||
|                                     const IntervalType search_interval, int subtract_value) { | ||||
|         bool any_removals = false; | ||||
|         current_range.add(std::make_pair(search_interval, subtract_value)); | ||||
|         do { | ||||
|             any_removals = false; | ||||
|             auto it = current_range.lower_bound(search_interval); | ||||
|             if (it == current_range.end()) { | ||||
|                 return; | ||||
|             } | ||||
|             auto end_it = current_range.upper_bound(search_interval); | ||||
|             for (; it != end_it; it++) { | ||||
|                 if (it->second <= 0) { | ||||
|                     any_removals = true; | ||||
|                     current_range.erase(it); | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } while (any_removals); | ||||
|     } | ||||
| 
 | ||||
|     IntervalSet base_set; | ||||
|     OverlapCounter mapping_overlaps; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
|     std::mutex guard; | ||||
| }; | ||||
| 
 | ||||
| HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id, | ||||
|                        Tegra::Host1x::Host1x& host1x) | ||||
|     : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_smmu_id{smmu_id} { | ||||
|     m_internal = std::make_unique<HeapMapperInternal>(host1x); | ||||
| } | ||||
| 
 | ||||
| HeapMapper::~HeapMapper() { | ||||
|     m_internal->device_memory.Unmap(m_daddress, m_size); | ||||
| } | ||||
| 
 | ||||
| DAddr HeapMapper::Map(VAddr start, size_t size) { | ||||
|     std::scoped_lock lk(m_internal->guard); | ||||
|     m_internal->base_set.clear(); | ||||
|     const IntervalType interval{start, start + size}; | ||||
|     m_internal->base_set.insert(interval); | ||||
|     m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int){ | ||||
|         const IntervalType other{start_addr, end_addr}; | ||||
|         m_internal->base_set.subtract(other); | ||||
|     }); | ||||
|     if (!m_internal->base_set.empty()) { | ||||
|         auto it = m_internal->base_set.begin(); | ||||
|         auto end_it = m_internal->base_set.end(); | ||||
|         for (; it != end_it; it++) { | ||||
|             const VAddr inter_addr_end = it->upper(); | ||||
|             const VAddr inter_addr = it->lower(); | ||||
|             const size_t offset = inter_addr - m_vaddress; | ||||
|             const size_t sub_size = inter_addr_end - inter_addr; | ||||
|             m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, m_smmu_id); | ||||
|         } | ||||
|     } | ||||
|     m_internal->mapping_overlaps += std::make_pair(interval, 1); | ||||
|     m_internal->base_set.clear(); | ||||
|     return m_daddress + (start - m_vaddress); | ||||
| } | ||||
| 
 | ||||
| void HeapMapper::Unmap(VAddr start, size_t size) { | ||||
|     std::scoped_lock lk(m_internal->guard); | ||||
|     m_internal->base_set.clear(); | ||||
|     m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int value) { | ||||
|         if (value <= 1) { | ||||
|             const IntervalType other{start_addr, end_addr}; | ||||
|             m_internal->base_set.insert(other); | ||||
|         } | ||||
|     }); | ||||
|     if (!m_internal->base_set.empty()) { | ||||
|         auto it = m_internal->base_set.begin(); | ||||
|         auto end_it = m_internal->base_set.end(); | ||||
|         for (; it != end_it; it++) { | ||||
|             const VAddr inter_addr_end = it->upper(); | ||||
|             const VAddr inter_addr = it->lower(); | ||||
|             const size_t offset = inter_addr - m_vaddress; | ||||
|             const size_t sub_size = inter_addr_end - inter_addr; | ||||
|             m_internal->device_memory.Unmap(m_daddress + offset, sub_size); | ||||
|         } | ||||
|     } | ||||
|     const IntervalType to_remove{start, start + size}; | ||||
|     m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1); | ||||
|     m_internal->base_set.clear(); | ||||
| } | ||||
| 
 | ||||
| } // namespace Service::Nvidia::NvCore
 | ||||
							
								
								
									
										48
									
								
								src/core/hle/service/nvdrv/core/heap_mapper.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								src/core/hle/service/nvdrv/core/heap_mapper.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,48 @@ | ||||
| // SPDX-FileCopyrightText: 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <memory> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace Tegra::Host1x { | ||||
| class Host1x; | ||||
| } // namespace Tegra::Host1x
 | ||||
| 
 | ||||
| namespace Service::Nvidia::NvCore { | ||||
| 
 | ||||
| class HeapMapper { | ||||
| public: | ||||
|     HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id, | ||||
|                Tegra::Host1x::Host1x& host1x); | ||||
|     ~HeapMapper(); | ||||
| 
 | ||||
|     bool IsInBounds(VAddr start, size_t size) const { | ||||
|         VAddr end = start + size; | ||||
|         return start >= m_vaddress && end <= (m_vaddress + m_size); | ||||
|     } | ||||
| 
 | ||||
|     DAddr Map(VAddr start, size_t size); | ||||
| 
 | ||||
|     void Unmap(VAddr start, size_t size); | ||||
| 
 | ||||
|     DAddr GetRegionStart() const { | ||||
|         return m_daddress; | ||||
|     } | ||||
| 
 | ||||
|     size_t GetRegionSize() const { | ||||
|         return m_size; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     struct HeapMapperInternal; | ||||
|     VAddr m_vaddress; | ||||
|     DAddr m_daddress; | ||||
|     size_t m_size; | ||||
|     size_t m_smmu_id; | ||||
|     std::unique_ptr<HeapMapperInternal> m_internal; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Service::Nvidia::NvCore
 | ||||
| @ -8,10 +8,12 @@ | ||||
| #include "common/assert.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "core/hle/service/nvdrv/core/container.h" | ||||
| #include "core/hle/service/nvdrv/core/heap_mapper.h" | ||||
| #include "core/hle/service/nvdrv/core/nvmap.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/host1x/host1x.h" | ||||
| 
 | ||||
| 
 | ||||
| using Core::Memory::YUZU_PAGESIZE; | ||||
| 
 | ||||
| namespace Service::Nvidia::NvCore { | ||||
| @ -90,10 +92,19 @@ void NvMap::UnmapHandle(Handle& handle_description) { | ||||
|     } | ||||
| 
 | ||||
|     // Free and unmap the handle from the SMMU
 | ||||
|     auto& smmu = host1x.MemoryManager(); | ||||
|     smmu.Unmap(handle_description.d_address, handle_description.aligned_size); | ||||
|     smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size)); | ||||
|     const size_t map_size = handle_description.aligned_size; | ||||
|     if (!handle_description.in_heap) { | ||||
|         auto& smmu = host1x.MemoryManager(); | ||||
|         smmu.Unmap(handle_description.d_address, map_size); | ||||
|         smmu.Free(handle_description.d_address, static_cast<size_t>(map_size)); | ||||
|         handle_description.d_address = 0; | ||||
|         return; | ||||
|     } | ||||
|     const VAddr vaddress = handle_description.address; | ||||
|     auto* session = core.GetSession(handle_description.session_id); | ||||
|     session->mapper->Unmap(vaddress, map_size); | ||||
|     handle_description.d_address = 0; | ||||
|     handle_description.in_heap = false; | ||||
| } | ||||
| 
 | ||||
| bool NvMap::TryRemoveHandle(const Handle& handle_description) { | ||||
| @ -188,24 +199,31 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are | ||||
|         DAddr address{}; | ||||
|         auto& smmu = host1x.MemoryManager(); | ||||
|         auto* session = core.GetSession(session_id); | ||||
|         while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) { | ||||
|             // Free handles until the allocation succeeds
 | ||||
|             std::scoped_lock queueLock(unmap_queue_lock); | ||||
|             if (auto freeHandleDesc{unmap_queue.front()}) { | ||||
|                 // Handles in the unmap queue are guaranteed not to be pinned so don't bother
 | ||||
|                 // checking if they are before unmapping
 | ||||
|                 std::scoped_lock freeLock(freeHandleDesc->mutex); | ||||
|                 if (handle_description->d_address) | ||||
|                     UnmapHandle(*freeHandleDesc); | ||||
|             } else { | ||||
|                 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); | ||||
|         const VAddr vaddress = handle_description->address; | ||||
|         const size_t map_size = handle_description->aligned_size; | ||||
|         handle_description->session_id = session_id; | ||||
|         if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) { | ||||
|             handle_description->d_address = session->mapper->Map(vaddress, map_size); | ||||
|             handle_description->in_heap = true; | ||||
|         } else { | ||||
|             while ((address = smmu.Allocate(map_size)) == 0) { | ||||
|                 // Free handles until the allocation succeeds
 | ||||
|                 std::scoped_lock queueLock(unmap_queue_lock); | ||||
|                 if (auto freeHandleDesc{unmap_queue.front()}) { | ||||
|                     // Handles in the unmap queue are guaranteed not to be pinned so don't bother
 | ||||
|                     // checking if they are before unmapping
 | ||||
|                     std::scoped_lock freeLock(freeHandleDesc->mutex); | ||||
|                     if (handle_description->d_address) | ||||
|                         UnmapHandle(*freeHandleDesc); | ||||
|                 } else { | ||||
|                     LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             handle_description->d_address = address; | ||||
|             smmu.Map(address, vaddress, map_size, session->smmu_id); | ||||
|             handle_description->in_heap = false; | ||||
|         } | ||||
| 
 | ||||
|         handle_description->d_address = address; | ||||
| 
 | ||||
|         smmu.Map(address, handle_description->address, handle_description->aligned_size, | ||||
|                  session->smmu_id); | ||||
|     } | ||||
| 
 | ||||
|     if (low_area_pin) { | ||||
|  | ||||
| @ -70,6 +70,8 @@ public: | ||||
| 
 | ||||
|         u8 kind{};        //!< Used for memory compression
 | ||||
|         bool allocated{}; //!< If the handle has been allocated with `Alloc`
 | ||||
|         bool in_heap{}; | ||||
|         size_t session_id{}; | ||||
| 
 | ||||
|         DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
 | ||||
|                            //!< this can also be in the nvdrv tmem
 | ||||
|  | ||||
| @ -34,8 +34,6 @@ | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/shader_notify.h" | ||||
| 
 | ||||
| #pragma optimize("", off) | ||||
| 
 | ||||
| namespace Tegra { | ||||
| 
 | ||||
| struct GPU::Impl { | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user