aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xREADME.md2
-rwxr-xr-xsrc/core/hle/service/hid/controllers/npad.cpp1
-rwxr-xr-xsrc/video_core/engines/fermi_2d.cpp12
-rwxr-xr-xsrc/video_core/memory_manager.cpp109
-rwxr-xr-xsrc/video_core/memory_manager.h22
-rwxr-xr-xsrc/video_core/rasterizer_interface.h3
-rwxr-xr-xsrc/video_core/renderer_opengl/gl_rasterizer.cpp7
-rwxr-xr-xsrc/video_core/renderer_opengl/gl_rasterizer.h1
-rwxr-xr-xsrc/video_core/renderer_vulkan/vk_rasterizer.cpp7
-rwxr-xr-xsrc/video_core/renderer_vulkan/vk_rasterizer.h1
-rwxr-xr-xsrc/video_core/texture_cache/image_base.cpp3
-rwxr-xr-xsrc/video_core/texture_cache/image_base.h39
-rwxr-xr-xsrc/video_core/texture_cache/texture_cache.h417
-rwxr-xr-xsrc/video_core/texture_cache/types.h1
-rwxr-xr-xsrc/video_core/texture_cache/util.cpp24
-rwxr-xr-xsrc/video_core/texture_cache/util.h4
16 files changed, 66 insertions, 587 deletions
diff --git a/README.md b/README.md
index 5a5eb80a2..485604290 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
1yuzu emulator early access 1yuzu emulator early access
2============= 2=============
3 3
4This is the source code for early-access 1840. 4This is the source code for early-access 1841.
5 5
6## Legal Notice 6## Legal Notice
7 7
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index dcbb13660..9e7aa086b 100755
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -254,7 +254,6 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) {
254} 254}
255 255
256void Controller_NPad::OnInit() { 256void Controller_NPad::OnInit() {
257 auto& kernel = system.Kernel();
258 for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { 257 for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) {
259 styleset_changed_events[i] = 258 styleset_changed_events[i] =
260 service_context.CreateEvent(fmt::format("npad:NpadStyleSetChanged_{}", i)); 259 service_context.CreateEvent(fmt::format("npad:NpadStyleSetChanged_{}", i));
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 5f075e20b..8107caad4 100755
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -66,18 +66,18 @@ void Fermi2D::Blit() {
66 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), 66 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
67 }; 67 };
68 68
69 Surface src = regs.src;
69 s32 src_address_offset = 0; 70 s32 src_address_offset = 0;
70 const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(regs.src.format)); 71 const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
71 if (regs.src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && 72 if (src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && src.width == config.src_x1 &&
72 regs.src.width == config.src_x1 && 73 config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0) {
73 config.src_x1 > static_cast<s32>(regs.src.pitch / bytes_per_pixel) && config.src_x0 > 0) {
74 src_address_offset = config.src_x0 * bytes_per_pixel; 74 src_address_offset = config.src_x0 * bytes_per_pixel;
75 regs.src.width -= config.src_x0; 75 src.width -= config.src_x0;
76 config.src_x1 -= config.src_x0; 76 config.src_x1 -= config.src_x0;
77 config.src_x0 = 0; 77 config.src_x0 = 0;
78 } 78 }
79 79
80 if (!rasterizer->AccelerateSurfaceCopy(regs.src, src_address_offset, regs.dst, config)) { 80 if (!rasterizer->AccelerateSurfaceCopy(src, src_address_offset, regs.dst, config)) {
81 UNIMPLEMENTED(); 81 UNIMPLEMENTED();
82 } 82 }
83} 83}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 9deea9a26..02385384c 100755
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,16 +69,11 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
69 } else { 69 } else {
70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); 70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
71 } 71 }
72 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
73 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
74 ASSERT(cpu_addr);
72 75
73 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); 76 rasterizer->UnmapMemory(*cpu_addr, size);
74
75 for (const auto& map : submapped_ranges) {
76 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
77 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
78 ASSERT(cpu_addr);
79
80 rasterizer->UnmapMemory(*cpu_addr, map.second);
81 }
82 77
83 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); 78 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
84} 79}
@@ -151,14 +146,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
151 146
152 //// Lock the new page 147 //// Lock the new page
153 // TryLockPage(page_entry, size); 148 // TryLockPage(page_entry, size);
154 auto& current_page = page_table[PageEntryIndex(gpu_addr)];
155 149
156 if ((!current_page.IsValid() && page_entry.IsValid()) || 150 page_table[PageEntryIndex(gpu_addr)] = page_entry;
157 current_page.ToAddress() != page_entry.ToAddress()) {
158 rasterizer->ModifyGPUMemory(gpu_addr, size);
159 }
160
161 current_page = page_entry;
162} 151}
163 152
164std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, 153std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
@@ -204,19 +193,6 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
204 return page_entry.ToAddress() + (gpu_addr & page_mask); 193 return page_entry.ToAddress() + (gpu_addr & page_mask);
205} 194}
206 195
207std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
208 size_t page_index{addr >> page_bits};
209 const size_t page_last{(addr + size + page_size - 1) >> page_bits};
210 while (page_index < page_last) {
211 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
212 if (page_addr && *page_addr != 0) {
213 return page_addr;
214 }
215 ++page_index;
216 }
217 return std::nullopt;
218}
219
220template <typename T> 196template <typename T>
221T MemoryManager::Read(GPUVAddr addr) const { 197T MemoryManager::Read(GPUVAddr addr) const {
222 if (auto page_pointer{GetPointer(addr)}; page_pointer) { 198 if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@@ -413,79 +389,4 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
413 return page <= Core::Memory::PAGE_SIZE; 389 return page <= Core::Memory::PAGE_SIZE;
414} 390}
415 391
416bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
417 size_t page_index{gpu_addr >> page_bits};
418 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
419 std::optional<VAddr> old_page_addr{};
420 while (page_index != page_last) {
421 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
422 if (!page_addr || *page_addr == 0) {
423 return false;
424 }
425 if (old_page_addr) {
426 if (*old_page_addr + page_size != *page_addr) {
427 return false;
428 }
429 }
430 old_page_addr = page_addr;
431 ++page_index;
432 }
433 return true;
434}
435
436bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
437 size_t page_index{gpu_addr >> page_bits};
438 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
439 while (page_index < page_last) {
440 if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
441 return false;
442 }
443 ++page_index;
444 }
445 return true;
446}
447
448std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
449 GPUVAddr gpu_addr, std::size_t size) const {
450 std::vector<std::pair<GPUVAddr, std::size_t>> result{};
451 size_t page_index{gpu_addr >> page_bits};
452 size_t remaining_size{size};
453 size_t page_offset{gpu_addr & page_mask};
454 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
455 std::optional<VAddr> old_page_addr{};
456 const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
457 if (!last_segment) {
458 GPUVAddr new_base_addr = page_index << page_bits;
459 last_segment = {new_base_addr, bytes};
460 } else {
461 last_segment->second += bytes;
462 }
463 };
464 const auto split = [this, &last_segment, &result] {
465 if (last_segment) {
466 result.push_back(*last_segment);
467 last_segment = std::nullopt;
468 }
469 };
470 while (remaining_size > 0) {
471 const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
472 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
473 if (!page_addr) {
474 split();
475 } else if (old_page_addr) {
476 if (*old_page_addr + page_size != *page_addr) {
477 split();
478 }
479 extend_size(num_bytes);
480 } else {
481 extend_size(num_bytes);
482 }
483 ++page_index;
484 page_offset = 0;
485 remaining_size -= num_bytes;
486 }
487 split();
488 return result;
489}
490
491} // namespace Tegra 392} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 509f14f26..5d6c196fa 100755
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -76,8 +76,6 @@ public:
76 76
77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; 77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
78 78
79 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
80
81 template <typename T> 79 template <typename T>
82 [[nodiscard]] T Read(GPUVAddr addr) const; 80 [[nodiscard]] T Read(GPUVAddr addr) const;
83 81
@@ -114,28 +112,10 @@ public:
114 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); 112 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
115 113
116 /** 114 /**
117 * Checks if a gpu region can be simply read with a pointer. 115 * IsGranularRange checks if a gpu region can be simply read with a pointer.
118 */ 116 */
119 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; 117 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
120 118
121 /**
122 * Checks if a gpu region is mapped by a single range of cpu addresses.
123 */
124 [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
125
126 /**
127 * Checks if a gpu region is mapped entirely.
128 */
129 [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
130
131 /**
132 * Returns a vector with all the subranges of cpu addresses mapped beneath.
133 * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
134 * will be returned;
135 */
136 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
137 std::size_t size) const;
138
139 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); 119 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
140 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); 120 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
141 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); 121 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 922dc4095..c1c636ceb 100755
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -93,9 +93,6 @@ public:
93 /// Unmap memory range 93 /// Unmap memory range
94 virtual void UnmapMemory(VAddr addr, u64 size) = 0; 94 virtual void UnmapMemory(VAddr addr, u64 size) = 0;
95 95
96 /// Remap GPU memory range. This means underneath backing memory changed
97 virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
98
99 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 96 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
100 /// and invalidated 97 /// and invalidated
101 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 98 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 3bc39b09b..ecf67e5ce 100755
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -631,13 +631,6 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
631 shader_cache.OnCPUWrite(addr, size); 631 shader_cache.OnCPUWrite(addr, size);
632} 632}
633 633
634void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
635 {
636 std::scoped_lock lock{texture_cache.mutex};
637 texture_cache.UnmapGPUMemory(addr, size);
638 }
639}
640
641void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { 634void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
642 if (!gpu.IsAsync()) { 635 if (!gpu.IsAsync()) {
643 gpu_memory.Write<u32>(addr, value); 636 gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 87d69a91c..7444ebd8d 100755
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -82,7 +82,6 @@ public:
82 void OnCPUWrite(VAddr addr, u64 size) override; 82 void OnCPUWrite(VAddr addr, u64 size) override;
83 void SyncGuestHost() override; 83 void SyncGuestHost() override;
84 void UnmapMemory(VAddr addr, u64 size) override; 84 void UnmapMemory(VAddr addr, u64 size) override;
85 void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
86 void SignalSemaphore(GPUVAddr addr, u32 value) override; 85 void SignalSemaphore(GPUVAddr addr, u32 value) override;
87 void SignalSyncPoint(u32 value) override; 86 void SignalSyncPoint(u32 value) override;
88 void ReleaseFences() override; 87 void ReleaseFences() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 37aba5a4a..524fb2ad4 100755
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -577,13 +577,6 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
577 pipeline_cache.OnCPUWrite(addr, size); 577 pipeline_cache.OnCPUWrite(addr, size);
578} 578}
579 579
580void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
581 {
582 std::scoped_lock lock{texture_cache.mutex};
583 texture_cache.UnmapGPUMemory(addr, size);
584 }
585}
586
587void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 580void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
588 if (!gpu.IsAsync()) { 581 if (!gpu.IsAsync()) {
589 gpu_memory.Write<u32>(addr, value); 582 gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 01d7eec5d..d4a4ee58e 100755
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -74,7 +74,6 @@ public:
74 void OnCPUWrite(VAddr addr, u64 size) override; 74 void OnCPUWrite(VAddr addr, u64 size) override;
75 void SyncGuestHost() override; 75 void SyncGuestHost() override;
76 void UnmapMemory(VAddr addr, u64 size) override; 76 void UnmapMemory(VAddr addr, u64 size) override;
77 void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
78 void SignalSemaphore(GPUVAddr addr, u32 value) override; 77 void SignalSemaphore(GPUVAddr addr, u32 value) override;
79 void SignalSyncPoint(u32 value) override; 78 void SignalSyncPoint(u32 value) override;
80 void ReleaseFences() override; 79 void ReleaseFences() override;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 2aae338b6..ad69d32d1 100755
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -69,9 +69,6 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
69 } 69 }
70} 70}
71 71
72ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
73 : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
74
75std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { 72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
76 if (other_addr < gpu_addr) { 73 if (other_addr < gpu_addr) {
77 // Subresource address can't be lower than the base 74 // Subresource address can't be lower than the base
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index ff1feda9b..e326cab71 100755
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,14 +25,12 @@ enum class ImageFlagBits : u32 {
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted 25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered 26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked 27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28 Remapped = 1 << 8, ///< Image has been remapped.
29 Sparse = 1 << 9, ///< Image has non continous submemory.
30 28
31 // Garbage Collection Flags 29 // Garbage Collection Flags
32 BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher 30 BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
33 ///< garbage collection priority 31 ///< garbage collection priority
34 Alias = 1 << 11, ///< This image has aliases and has priority on garbage 32 Alias = 1 << 9, ///< This image has aliases and has priority on garbage
35 ///< collection 33 ///< collection
36}; 34};
37DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 35DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
38 36
@@ -59,12 +57,6 @@ struct ImageBase {
59 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; 57 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
60 } 58 }
61 59
62 [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
63 const VAddr overlap_end = overlap_gpu_addr + overlap_size;
64 const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
65 return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
66 }
67
68 void CheckBadOverlapState(); 60 void CheckBadOverlapState();
69 void CheckAliasState(); 61 void CheckAliasState();
70 62
@@ -92,29 +84,6 @@ struct ImageBase {
92 84
93 std::vector<AliasedImage> aliased_images; 85 std::vector<AliasedImage> aliased_images;
94 std::vector<ImageId> overlapping_images; 86 std::vector<ImageId> overlapping_images;
95 ImageMapId map_view_id{};
96};
97
98struct ImageMapView {
99 explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
100
101 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
102 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
103 const VAddr cpu_addr_end = cpu_addr + size;
104 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
105 }
106
107 [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
108 const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
109 const GPUVAddr gpu_addr_end = gpu_addr + size;
110 return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
111 }
112
113 GPUVAddr gpu_addr;
114 VAddr cpu_addr;
115 size_t size;
116 ImageId image_id;
117 bool picked{};
118}; 87};
119 88
120struct ImageAllocBase { 89struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a3120a25e..917184df8 100755
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -13,7 +13,6 @@
13#include <span> 13#include <span>
14#include <type_traits> 14#include <type_traits>
15#include <unordered_map> 15#include <unordered_map>
16#include <unordered_set>
17#include <utility> 16#include <utility>
18#include <vector> 17#include <vector>
19 18
@@ -153,9 +152,6 @@ public:
153 /// Remove images in a region 152 /// Remove images in a region
154 void UnmapMemory(VAddr cpu_addr, size_t size); 153 void UnmapMemory(VAddr cpu_addr, size_t size);
155 154
156 /// Remove images in a region
157 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
158
159 /// Blit an image with the given parameters 155 /// Blit an image with the given parameters
160 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 156 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
161 const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset, 157 const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
@@ -192,22 +188,7 @@ public:
192private: 188private:
193 /// Iterate over all page indices in a range 189 /// Iterate over all page indices in a range
194 template <typename Func> 190 template <typename Func>
195 static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { 191 static void ForEachPage(VAddr addr, size_t size, Func&& func) {
196 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
197 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
198 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
199 if constexpr (RETURNS_BOOL) {
200 if (func(page)) {
201 break;
202 }
203 } else {
204 func(page);
205 }
206 }
207 }
208
209 template <typename Func>
210 static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
211 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; 192 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
212 const u64 page_end = (addr + size - 1) >> PAGE_BITS; 193 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
213 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { 194 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
@@ -237,7 +218,7 @@ private:
237 FramebufferId GetFramebufferId(const RenderTargets& key); 218 FramebufferId GetFramebufferId(const RenderTargets& key);
238 219
239 /// Refresh the contents (pixel data) of an image 220 /// Refresh the contents (pixel data) of an image
240 void RefreshContents(Image& image, ImageId image_id); 221 void RefreshContents(Image& image);
241 222
242 /// Upload data from guest to an image 223 /// Upload data from guest to an image
243 template <typename StagingBuffer> 224 template <typename StagingBuffer>
@@ -287,16 +268,6 @@ private:
287 template <typename Func> 268 template <typename Func>
288 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); 269 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
289 270
290 template <typename Func>
291 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
292
293 template <typename Func>
294 void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
295
296 /// Iterates over all the images in a region calling func
297 template <typename Func>
298 void ForEachSparseSegment(ImageBase& image, Func&& func);
299
300 /// Find or create an image view in the given image with the passed parameters 271 /// Find or create an image view in the given image with the passed parameters
301 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); 272 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
302 273
@@ -307,10 +278,10 @@ private:
307 void UnregisterImage(ImageId image); 278 void UnregisterImage(ImageId image);
308 279
309 /// Track CPU reads and writes for image 280 /// Track CPU reads and writes for image
310 void TrackImage(ImageBase& image, ImageId image_id); 281 void TrackImage(ImageBase& image);
311 282
312 /// Stop tracking CPU reads and writes for image 283 /// Stop tracking CPU reads and writes for image
313 void UntrackImage(ImageBase& image, ImageId image_id); 284 void UntrackImage(ImageBase& image);
314 285
315 /// Delete image from the cache 286 /// Delete image from the cache
316 void DeleteImage(ImageId image); 287 void DeleteImage(ImageId image);
@@ -368,13 +339,7 @@ private:
368 std::unordered_map<TSCEntry, SamplerId> samplers; 339 std::unordered_map<TSCEntry, SamplerId> samplers;
369 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 340 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
370 341
371 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; 342 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
372 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
373 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
374
375 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
376
377 VAddr virtual_invalid_space{};
378 343
379 bool has_deleted_images = false; 344 bool has_deleted_images = false;
380 u64 total_used_memory = 0; 345 u64 total_used_memory = 0;
@@ -383,7 +348,6 @@ private:
383 u64 critical_memory; 348 u64 critical_memory;
384 349
385 SlotVector<Image> slot_images; 350 SlotVector<Image> slot_images;
386 SlotVector<ImageMapView> slot_map_views;
387 SlotVector<ImageView> slot_image_views; 351 SlotVector<ImageView> slot_image_views;
388 SlotVector<ImageAlloc> slot_image_allocs; 352 SlotVector<ImageAlloc> slot_image_allocs;
389 SlotVector<Sampler> slot_samplers; 353 SlotVector<Sampler> slot_samplers;
@@ -494,7 +458,7 @@ void TextureCache<P>::RunGarbageCollector() {
494 } 458 }
495 } 459 }
496 if (True(image->flags & ImageFlagBits::Tracked)) { 460 if (True(image->flags & ImageFlagBits::Tracked)) {
497 UntrackImage(*image, image_id); 461 UntrackImage(*image);
498 } 462 }
499 UnregisterImage(image_id); 463 UnregisterImage(image_id);
500 DeleteImage(image_id); 464 DeleteImage(image_id);
@@ -693,9 +657,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
693 return; 657 return;
694 } 658 }
695 image.flags |= ImageFlagBits::CpuModified; 659 image.flags |= ImageFlagBits::CpuModified;
696 if (True(image.flags & ImageFlagBits::Tracked)) { 660 UntrackImage(image);
697 UntrackImage(image, image_id);
698 }
699 }); 661 });
700} 662}
701 663
@@ -732,7 +694,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
732 for (const ImageId id : deleted_images) { 694 for (const ImageId id : deleted_images) {
733 Image& image = slot_images[id]; 695 Image& image = slot_images[id];
734 if (True(image.flags & ImageFlagBits::Tracked)) { 696 if (True(image.flags & ImageFlagBits::Tracked)) {
735 UntrackImage(image, id); 697 UntrackImage(image);
736 } 698 }
737 UnregisterImage(id); 699 UnregisterImage(id);
738 DeleteImage(id); 700 DeleteImage(id);
@@ -740,23 +702,6 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
740} 702}
741 703
742template <class P> 704template <class P>
743void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
744 std::vector<ImageId> deleted_images;
745 ForEachImageInRegionGPU(gpu_addr, size,
746 [&](ImageId id, Image&) { deleted_images.push_back(id); });
747 for (const ImageId id : deleted_images) {
748 Image& image = slot_images[id];
749 if (True(image.flags & ImageFlagBits::Remapped)) {
750 continue;
751 }
752 image.flags |= ImageFlagBits::Remapped;
753 if (True(image.flags & ImageFlagBits::Tracked)) {
754 UntrackImage(image, id);
755 }
756 }
757}
758
759template <class P>
760void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 705void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
761 const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset, 706 const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
762 const Tegra::Engines::Fermi2D::Config& copy) { 707 const Tegra::Engines::Fermi2D::Config& copy) {
@@ -848,10 +793,9 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
848 if (it == page_table.end()) { 793 if (it == page_table.end()) {
849 return nullptr; 794 return nullptr;
850 } 795 }
851 const auto& image_map_ids = it->second; 796 const auto& image_ids = it->second;
852 for (const ImageMapId map_id : image_map_ids) { 797 for (const ImageId image_id : image_ids) {
853 const ImageMapView& map = slot_map_views[map_id]; 798 const ImageBase& image = slot_images[image_id];
854 const ImageBase& image = slot_images[map.image_id];
855 if (image.cpu_addr != cpu_addr) { 799 if (image.cpu_addr != cpu_addr) {
856 continue; 800 continue;
857 } 801 }
@@ -931,13 +875,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
931} 875}
932 876
933template <class P> 877template <class P>
934void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { 878void TextureCache<P>::RefreshContents(Image& image) {
935 if (False(image.flags & ImageFlagBits::CpuModified)) { 879 if (False(image.flags & ImageFlagBits::CpuModified)) {
936 // Only upload modified images 880 // Only upload modified images
937 return; 881 return;
938 } 882 }
939 image.flags &= ~ImageFlagBits::CpuModified; 883 image.flags &= ~ImageFlagBits::CpuModified;
940 TrackImage(image, image_id); 884 TrackImage(image);
941 885
942 if (image.info.num_samples > 1) { 886 if (image.info.num_samples > 1) {
943 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 887 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@@ -974,7 +918,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
974 918
975template <class P> 919template <class P>
976ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { 920ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
977 if (!IsValidEntry(gpu_memory, config)) { 921 if (!IsValidAddress(gpu_memory, config)) {
978 return NULL_IMAGE_VIEW_ID; 922 return NULL_IMAGE_VIEW_ID;
979 } 923 }
980 const auto [pair, is_new] = image_views.try_emplace(config); 924 const auto [pair, is_new] = image_views.try_emplace(config);
@@ -1016,20 +960,14 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
1016template <class P> 960template <class P>
1017ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 961ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1018 RelaxedOptions options) { 962 RelaxedOptions options) {
1019 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 963 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1020 if (!cpu_addr) { 964 if (!cpu_addr) {
1021 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 965 return ImageId{};
1022 if (!cpu_addr) {
1023 return ImageId{};
1024 }
1025 } 966 }
1026 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 967 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1027 const bool native_bgr = runtime.HasNativeBgr(); 968 const bool native_bgr = runtime.HasNativeBgr();
1028 ImageId image_id; 969 ImageId image_id;
1029 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 970 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1030 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1031 return false;
1032 }
1033 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { 971 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1034 const bool strict_size = False(options & RelaxedOptions::Size) && 972 const bool strict_size = False(options & RelaxedOptions::Size) &&
1035 True(existing_image.flags & ImageFlagBits::Strong); 973 True(existing_image.flags & ImageFlagBits::Strong);
@@ -1055,16 +993,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1055template <class P> 993template <class P>
1056ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 994ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1057 RelaxedOptions options) { 995 RelaxedOptions options) {
1058 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 996 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1059 if (!cpu_addr) {
1060 const auto size = CalculateGuestSizeInBytes(info);
1061 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
1062 if (!cpu_addr) {
1063 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
1064 virtual_invalid_space += Common::AlignUp(size, 32);
1065 cpu_addr = std::optional<VAddr>(fake_addr);
1066 }
1067 }
1068 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); 997 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
1069 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); 998 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
1070 const Image& image = slot_images[image_id]; 999 const Image& image = slot_images[image_id];
@@ -1084,19 +1013,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1084 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1013 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1085 const bool native_bgr = runtime.HasNativeBgr(); 1014 const bool native_bgr = runtime.HasNativeBgr();
1086 std::vector<ImageId> overlap_ids; 1015 std::vector<ImageId> overlap_ids;
1087 std::unordered_set<ImageId> overlaps_found;
1088 std::vector<ImageId> left_aliased_ids; 1016 std::vector<ImageId> left_aliased_ids;
1089 std::vector<ImageId> right_aliased_ids; 1017 std::vector<ImageId> right_aliased_ids;
1090 std::unordered_set<ImageId> ignore_textures;
1091 std::vector<ImageId> bad_overlap_ids; 1018 std::vector<ImageId> bad_overlap_ids;
1092 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { 1019 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
1093 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1094 ignore_textures.insert(overlap_id);
1095 return;
1096 }
1097 if (info.type != overlap.info.type) {
1098 return;
1099 }
1100 if (info.type == ImageType::Linear) { 1020 if (info.type == ImageType::Linear) {
1101 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { 1021 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1102 // Alias linear images with the same pitch 1022 // Alias linear images with the same pitch
@@ -1104,7 +1024,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1104 } 1024 }
1105 return; 1025 return;
1106 } 1026 }
1107 overlaps_found.insert(overlap_id);
1108 static constexpr bool strict_size = true; 1027 static constexpr bool strict_size = true;
1109 const std::optional<OverlapResult> solution = ResolveOverlap( 1028 const std::optional<OverlapResult> solution = ResolveOverlap(
1110 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); 1029 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1128,35 +1047,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1128 bad_overlap_ids.push_back(overlap_id); 1047 bad_overlap_ids.push_back(overlap_id);
1129 overlap.flags |= ImageFlagBits::BadOverlap; 1048 overlap.flags |= ImageFlagBits::BadOverlap;
1130 } 1049 }
1131 }; 1050 });
1132 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
1133 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
1134 if (!overlaps_found.contains(overlap_id)) {
1135 ignore_textures.insert(overlap_id);
1136 }
1137 };
1138 ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
1139 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1051 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1140 Image& new_image = slot_images[new_image_id]; 1052 Image& new_image = slot_images[new_image_id];
1141 1053
1142 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
1143 new_image.flags |= ImageFlagBits::Sparse;
1144 }
1145
1146 for (const ImageId overlap_id : ignore_textures) {
1147 Image& overlap = slot_images[overlap_id];
1148 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1149 UNIMPLEMENTED();
1150 }
1151 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1152 UntrackImage(overlap, overlap_id);
1153 }
1154 UnregisterImage(overlap_id);
1155 DeleteImage(overlap_id);
1156 }
1157
1158 // TODO: Only upload what we need 1054 // TODO: Only upload what we need
1159 RefreshContents(new_image, new_image_id); 1055 RefreshContents(new_image);
1160 1056
1161 for (const ImageId overlap_id : overlap_ids) { 1057 for (const ImageId overlap_id : overlap_ids) {
1162 Image& overlap = slot_images[overlap_id]; 1058 Image& overlap = slot_images[overlap_id];
@@ -1168,7 +1064,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1168 runtime.CopyImage(new_image, overlap, copies); 1064 runtime.CopyImage(new_image, overlap, copies);
1169 } 1065 }
1170 if (True(overlap.flags & ImageFlagBits::Tracked)) { 1066 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1171 UntrackImage(overlap, overlap_id); 1067 UntrackImage(overlap);
1172 } 1068 }
1173 UnregisterImage(overlap_id); 1069 UnregisterImage(overlap_id);
1174 DeleteImage(overlap_id); 1070 DeleteImage(overlap_id);
@@ -1304,8 +1200,7 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1304 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; 1200 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1305 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1201 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1306 boost::container::small_vector<ImageId, 32> images; 1202 boost::container::small_vector<ImageId, 32> images;
1307 boost::container::small_vector<ImageMapId, 32> maps; 1203 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
1308 ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
1309 const auto it = page_table.find(page); 1204 const auto it = page_table.find(page);
1310 if (it == page_table.end()) { 1205 if (it == page_table.end()) {
1311 if constexpr (BOOL_BREAK) { 1206 if constexpr (BOOL_BREAK) {
@@ -1314,105 +1209,12 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1314 return; 1209 return;
1315 } 1210 }
1316 } 1211 }
1317 for (const ImageMapId map_id : it->second) {
1318 ImageMapView& map = slot_map_views[map_id];
1319 if (map.picked) {
1320 continue;
1321 }
1322 if (!map.Overlaps(cpu_addr, size)) {
1323 continue;
1324 }
1325 map.picked = true;
1326 maps.push_back(map_id);
1327 Image& image = slot_images[map.image_id];
1328 if (True(image.flags & ImageFlagBits::Picked)) {
1329 continue;
1330 }
1331 image.flags |= ImageFlagBits::Picked;
1332 images.push_back(map.image_id);
1333 if constexpr (BOOL_BREAK) {
1334 if (func(map.image_id, image)) {
1335 return true;
1336 }
1337 } else {
1338 func(map.image_id, image);
1339 }
1340 }
1341 if constexpr (BOOL_BREAK) {
1342 return false;
1343 }
1344 });
1345 for (const ImageId image_id : images) {
1346 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1347 }
1348 for (const ImageMapId map_id : maps) {
1349 slot_map_views[map_id].picked = false;
1350 }
1351}
1352
1353template <class P>
1354template <typename Func>
1355void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
1356 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1357 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1358 boost::container::small_vector<ImageId, 8> images;
1359 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1360 const auto it = gpu_page_table.find(page);
1361 if (it == gpu_page_table.end()) {
1362 if constexpr (BOOL_BREAK) {
1363 return false;
1364 } else {
1365 return;
1366 }
1367 }
1368 for (const ImageId image_id : it->second) {
1369 Image& image = slot_images[image_id];
1370 if (True(image.flags & ImageFlagBits::Picked)) {
1371 continue;
1372 }
1373 if (!image.OverlapsGPU(gpu_addr, size)) {
1374 continue;
1375 }
1376 image.flags |= ImageFlagBits::Picked;
1377 images.push_back(image_id);
1378 if constexpr (BOOL_BREAK) {
1379 if (func(image_id, image)) {
1380 return true;
1381 }
1382 } else {
1383 func(image_id, image);
1384 }
1385 }
1386 if constexpr (BOOL_BREAK) {
1387 return false;
1388 }
1389 });
1390 for (const ImageId image_id : images) {
1391 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1392 }
1393}
1394
1395template <class P>
1396template <typename Func>
1397void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
1398 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1399 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1400 boost::container::small_vector<ImageId, 8> images;
1401 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1402 const auto it = sparse_page_table.find(page);
1403 if (it == sparse_page_table.end()) {
1404 if constexpr (BOOL_BREAK) {
1405 return false;
1406 } else {
1407 return;
1408 }
1409 }
1410 for (const ImageId image_id : it->second) { 1212 for (const ImageId image_id : it->second) {
1411 Image& image = slot_images[image_id]; 1213 Image& image = slot_images[image_id];
1412 if (True(image.flags & ImageFlagBits::Picked)) { 1214 if (True(image.flags & ImageFlagBits::Picked)) {
1413 continue; 1215 continue;
1414 } 1216 }
1415 if (!image.OverlapsGPU(gpu_addr, size)) { 1217 if (!image.Overlaps(cpu_addr, size)) {
1416 continue; 1218 continue;
1417 } 1219 }
1418 image.flags |= ImageFlagBits::Picked; 1220 image.flags |= ImageFlagBits::Picked;
@@ -1435,27 +1237,6 @@ void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size,
1435} 1237}
1436 1238
1437template <class P> 1239template <class P>
1438template <typename Func>
1439void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1440 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1441 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1442 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1443 for (auto& segment : segments) {
1444 const auto gpu_addr = segment.first;
1445 const auto size = segment.second;
1446 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1447 ASSERT(cpu_addr);
1448 if constexpr (RETURNS_BOOL) {
1449 if (func(gpu_addr, *cpu_addr, size)) {
1450 break;
1451 }
1452 } else {
1453 func(gpu_addr, *cpu_addr, size);
1454 }
1455 }
1456}
1457
1458template <class P>
1459ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { 1240ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1460 Image& image = slot_images[image_id]; 1241 Image& image = slot_images[image_id];
1461 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { 1242 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
@@ -1472,6 +1253,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1472 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), 1253 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1473 "Trying to register an already registered image"); 1254 "Trying to register an already registered image");
1474 image.flags |= ImageFlagBits::Registered; 1255 image.flags |= ImageFlagBits::Registered;
1256 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1257 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1475 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); 1258 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1476 if ((IsPixelFormatASTC(image.info.format) && 1259 if ((IsPixelFormatASTC(image.info.format) &&
1477 True(image.flags & ImageFlagBits::AcceleratedUpload)) || 1260 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
@@ -1479,27 +1262,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1479 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1262 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1480 } 1263 }
1481 total_used_memory += Common::AlignUp(tentative_size, 1024); 1264 total_used_memory += Common::AlignUp(tentative_size, 1024);
1482 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1483 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1484 if (False(image.flags & ImageFlagBits::Sparse)) {
1485 auto map_id =
1486 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
1487 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
1488 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1489 image.map_view_id = map_id;
1490 return;
1491 }
1492 std::vector<ImageViewId> sparse_maps{};
1493 ForEachSparseSegment(
1494 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1495 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1496 ForEachCPUPage(cpu_addr, size,
1497 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1498 sparse_maps.push_back(map_id);
1499 });
1500 sparse_views.emplace(image_id, std::move(sparse_maps));
1501 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1502 [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
1503} 1265}
1504 1266
1505template <class P> 1267template <class P>
@@ -1516,125 +1278,34 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1516 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1278 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1517 } 1279 }
1518 total_used_memory -= Common::AlignUp(tentative_size, 1024); 1280 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1519 const auto& clear_page_table = 1281 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1520 [this, image_id]( 1282 const auto page_it = page_table.find(page);
1521 u64 page, 1283 if (page_it == page_table.end()) {
1522 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { 1284 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1523 const auto page_it = selected_page_table.find(page); 1285 return;
1524 if (page_it == selected_page_table.end()) { 1286 }
1525 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); 1287 std::vector<ImageId>& image_ids = page_it->second;
1526 return; 1288 const auto vector_it = std::ranges::find(image_ids, image_id);
1527 } 1289 if (vector_it == image_ids.end()) {
1528 std::vector<ImageId>& image_ids = page_it->second; 1290 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
1529 const auto vector_it = std::ranges::find(image_ids, image_id); 1291 return;
1530 if (vector_it == image_ids.end()) { 1292 }
1531 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", 1293 image_ids.erase(vector_it);
1532 page << PAGE_BITS);
1533 return;
1534 }
1535 image_ids.erase(vector_it);
1536 };
1537 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1538 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
1539 if (False(image.flags & ImageFlagBits::Sparse)) {
1540 const auto map_id = image.map_view_id;
1541 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
1542 const auto page_it = page_table.find(page);
1543 if (page_it == page_table.end()) {
1544 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1545 return;
1546 }
1547 std::vector<ImageMapId>& image_map_ids = page_it->second;
1548 const auto vector_it = std::ranges::find(image_map_ids, map_id);
1549 if (vector_it == image_map_ids.end()) {
1550 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1551 page << PAGE_BITS);
1552 return;
1553 }
1554 image_map_ids.erase(vector_it);
1555 });
1556 slot_map_views.erase(map_id);
1557 return;
1558 }
1559 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1560 clear_page_table(page, sparse_page_table);
1561 }); 1294 });
1562 auto it = sparse_views.find(image_id);
1563 ASSERT(it != sparse_views.end());
1564 auto& sparse_maps = it->second;
1565 for (auto& map_view_id : sparse_maps) {
1566 const auto& map_range = slot_map_views[map_view_id];
1567 const VAddr cpu_addr = map_range.cpu_addr;
1568 const std::size_t size = map_range.size;
1569 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
1570 const auto page_it = page_table.find(page);
1571 if (page_it == page_table.end()) {
1572 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1573 return;
1574 }
1575 std::vector<ImageMapId>& image_map_ids = page_it->second;
1576 auto vector_it = image_map_ids.begin();
1577 while (vector_it != image_map_ids.end()) {
1578 ImageMapView& map = slot_map_views[*vector_it];
1579 if (map.image_id != image_id) {
1580 vector_it++;
1581 continue;
1582 }
1583 if (!map.picked) {
1584 map.picked = true;
1585 }
1586 vector_it = image_map_ids.erase(vector_it);
1587 }
1588 });
1589 slot_map_views.erase(map_view_id);
1590 }
1591 sparse_views.erase(it);
1592} 1295}
1593 1296
1594template <class P> 1297template <class P>
1595void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { 1298void TextureCache<P>::TrackImage(ImageBase& image) {
1596 ASSERT(False(image.flags & ImageFlagBits::Tracked)); 1299 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1597 image.flags |= ImageFlagBits::Tracked; 1300 image.flags |= ImageFlagBits::Tracked;
1598 if (False(image.flags & ImageFlagBits::Sparse)) { 1301 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1599 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1600 return;
1601 }
1602 if (True(image.flags & ImageFlagBits::Registered)) {
1603 auto it = sparse_views.find(image_id);
1604 ASSERT(it != sparse_views.end());
1605 auto& sparse_maps = it->second;
1606 for (auto& map_view_id : sparse_maps) {
1607 const auto& map = slot_map_views[map_view_id];
1608 const VAddr cpu_addr = map.cpu_addr;
1609 const std::size_t size = map.size;
1610 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1611 }
1612 return;
1613 }
1614 ForEachSparseSegment(image,
1615 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1616 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1617 });
1618} 1302}
1619 1303
1620template <class P> 1304template <class P>
1621void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { 1305void TextureCache<P>::UntrackImage(ImageBase& image) {
1622 ASSERT(True(image.flags & ImageFlagBits::Tracked)); 1306 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1623 image.flags &= ~ImageFlagBits::Tracked; 1307 image.flags &= ~ImageFlagBits::Tracked;
1624 if (False(image.flags & ImageFlagBits::Sparse)) { 1308 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1625 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1626 return;
1627 }
1628 ASSERT(True(image.flags & ImageFlagBits::Registered));
1629 auto it = sparse_views.find(image_id);
1630 ASSERT(it != sparse_views.end());
1631 auto& sparse_maps = it->second;
1632 for (auto& map_view_id : sparse_maps) {
1633 const auto& map = slot_map_views[map_view_id];
1634 const VAddr cpu_addr = map.cpu_addr;
1635 const std::size_t size = map.size;
1636 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1637 }
1638} 1309}
1639 1310
1640template <class P> 1311template <class P>
@@ -1776,10 +1447,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
1776 if (invalidate) { 1447 if (invalidate) {
1777 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); 1448 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1778 if (False(image.flags & ImageFlagBits::Tracked)) { 1449 if (False(image.flags & ImageFlagBits::Tracked)) {
1779 TrackImage(image, image_id); 1450 TrackImage(image);
1780 } 1451 }
1781 } else { 1452 } else {
1782 RefreshContents(image, image_id); 1453 RefreshContents(image);
1783 SynchronizeAliases(image_id); 1454 SynchronizeAliases(image_id);
1784 } 1455 }
1785 if (is_modification) { 1456 if (is_modification) {
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 9fbdc1ac6..c9571f7e4 100755
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -16,7 +16,6 @@ constexpr size_t MAX_MIP_LEVELS = 14;
16constexpr SlotId CORRUPT_ID{0xfffffffe}; 16constexpr SlotId CORRUPT_ID{0xfffffffe};
17 17
18using ImageId = SlotId; 18using ImageId = SlotId;
19using ImageMapId = SlotId;
20using ImageViewId = SlotId; 19using ImageViewId = SlotId;
21using ImageAllocId = SlotId; 20using ImageAllocId = SlotId;
22using SamplerId = SlotId; 21using SamplerId = SlotId;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 10093a11d..4efe042b6 100755
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -664,16 +664,6 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
664 return offsets; 664 return offsets;
665} 665}
666 666
667LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
668 const u32 num_levels = info.resources.levels;
669 const LevelInfo level_info = MakeLevelInfo(info);
670 LevelArray sizes{};
671 for (u32 level = 0; level < num_levels; ++level) {
672 sizes[level] = CalculateLevelSize(level_info, level);
673 }
674 return sizes;
675}
676
677std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { 667std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
678 ASSERT(info.type == ImageType::e3D); 668 ASSERT(info.type == ImageType::e3D);
679 std::vector<u32> offsets; 669 std::vector<u32> offsets;
@@ -786,20 +776,14 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
786 return copies; 776 return copies;
787} 777}
788 778
789bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { 779bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
790 const GPUVAddr address = config.Address(); 780 if (config.Address() == 0) {
791 if (address == 0) {
792 return false; 781 return false;
793 } 782 }
794 if (address > (1ULL << 48)) { 783 if (config.Address() > (u64(1) << 48)) {
795 return false; 784 return false;
796 } 785 }
797 if (gpu_memory.GpuToCpuAddress(address).has_value()) { 786 return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
798 return true;
799 }
800 const ImageInfo info{config};
801 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
802 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
803} 787}
804 788
805std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 789std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 766502908..cdc5cbc75 100755
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -40,8 +40,6 @@ struct OverlapResult {
40 40
41[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; 41[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
42 42
43[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
44
45[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); 43[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
46 44
47[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); 45[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
@@ -57,7 +55,7 @@ struct OverlapResult {
57 const ImageInfo& src, 55 const ImageInfo& src,
58 SubresourceBase base); 56 SubresourceBase base);
59 57
60[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); 58[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
61 59
62[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 60[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
63 GPUVAddr gpu_addr, const ImageInfo& info, 61 GPUVAddr gpu_addr, const ImageInfo& info,