diff options
author | pineappleEA <pineaea@gmail.com> | 2021-07-08 04:13:21 +0200 |
---|---|---|
committer | pineappleEA <pineaea@gmail.com> | 2021-07-08 04:13:21 +0200 |
commit | 8a46c57b52760ee05b8adee50645dba3c99b1cb3 (patch) | |
tree | ab50dd3f992cfd607f8e506be82aad3acc6ad2a4 | |
parent | 67d98725f830aa0e448cfffa330e7826d3a424f8 (diff) |
early-access version 1859EA-1859
-rwxr-xr-x | README.md | 2 | ||||
-rwxr-xr-x | src/tests/video_core/buffer_base.cpp | 2 | ||||
-rwxr-xr-x | src/video_core/buffer_cache/buffer_base.h | 14 | ||||
-rwxr-xr-x | src/video_core/buffer_cache/buffer_cache.h | 239 | ||||
-rwxr-xr-x | src/video_core/dma_pusher.cpp | 10 | ||||
-rwxr-xr-x | src/video_core/fence_manager.h | 41 | ||||
-rwxr-xr-x | src/video_core/gpu.cpp | 4 | ||||
-rwxr-xr-x | src/video_core/gpu_thread.cpp | 15 | ||||
-rwxr-xr-x | src/video_core/gpu_thread.h | 5 | ||||
-rwxr-xr-x | src/video_core/rasterizer_interface.h | 3 | ||||
-rwxr-xr-x | src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 | ||||
-rwxr-xr-x | src/video_core/renderer_opengl/gl_rasterizer.h | 1 | ||||
-rwxr-xr-x | src/video_core/renderer_vulkan/vk_fence_manager.cpp | 4 | ||||
-rwxr-xr-x | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 10 | ||||
-rwxr-xr-x | src/video_core/renderer_vulkan/vk_rasterizer.h | 1 | ||||
-rwxr-xr-x | src/video_core/renderer_vulkan/vk_scheduler.h | 4 | ||||
-rwxr-xr-x | src/video_core/texture_cache/types.h | 4 |
17 files changed, 270 insertions, 97 deletions
@@ -1,7 +1,7 @@ | |||
1 | yuzu emulator early access | 1 | yuzu emulator early access |
2 | ============= | 2 | ============= |
3 | 3 | ||
4 | This is the source code for early-access 1858. | 4 | This is the source code for early-access 1859. |
5 | 5 | ||
6 | ## Legal Notice | 6 | ## Legal Notice |
7 | 7 | ||
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index edced69bb..cfcdc2253 100755 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp | |||
@@ -536,7 +536,7 @@ TEST_CASE("BufferBase: Cached write downloads") { | |||
536 | REQUIRE(rasterizer.Count() == 63); | 536 | REQUIRE(rasterizer.Count() == 63); |
537 | buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); | 537 | buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); |
538 | int num = 0; | 538 | int num = 0; |
539 | buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | 539 | buffer.ForEachDownloadRange(c, WORD, true, [&](u64 offset, u64 size) { ++num; }); |
540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | 540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); |
541 | REQUIRE(num == 0); | 541 | REQUIRE(num == 0); |
542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | 542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index b121d36a3..9e39858c8 100755 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
@@ -226,19 +226,19 @@ public: | |||
226 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | 226 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified |
227 | template <typename Func> | 227 | template <typename Func> |
228 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { | 228 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { |
229 | ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func); | 229 | ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func); |
230 | } | 230 | } |
231 | 231 | ||
232 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | 232 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified |
233 | template <typename Func> | 233 | template <typename Func> |
234 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { | 234 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) { |
235 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func); | 235 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func); |
236 | } | 236 | } |
237 | 237 | ||
238 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | 238 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified |
239 | template <typename Func> | 239 | template <typename Func> |
240 | void ForEachDownloadRange(Func&& func) { | 240 | void ForEachDownloadRange(Func&& func) { |
241 | ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func); | 241 | ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func); |
242 | } | 242 | } |
243 | 243 | ||
244 | /// Mark buffer as picked | 244 | /// Mark buffer as picked |
@@ -415,7 +415,7 @@ private: | |||
415 | * @param func Function to call for each turned off region | 415 | * @param func Function to call for each turned off region |
416 | */ | 416 | */ |
417 | template <Type type, typename Func> | 417 | template <Type type, typename Func> |
418 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { | 418 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) { |
419 | static_assert(type != Type::Untracked); | 419 | static_assert(type != Type::Untracked); |
420 | 420 | ||
421 | const s64 difference = query_cpu_range - cpu_addr; | 421 | const s64 difference = query_cpu_range - cpu_addr; |
@@ -467,7 +467,9 @@ private: | |||
467 | bits = (bits << left_offset) >> left_offset; | 467 | bits = (bits << left_offset) >> left_offset; |
468 | 468 | ||
469 | const u64 current_word = state_words[word_index] & bits; | 469 | const u64 current_word = state_words[word_index] & bits; |
470 | state_words[word_index] &= ~bits; | 470 | if (clear) { |
471 | state_words[word_index] &= ~bits; | ||
472 | } | ||
471 | 473 | ||
472 | if constexpr (type == Type::CPU) { | 474 | if constexpr (type == Type::CPU) { |
473 | const u64 current_bits = untracked_words[word_index] & bits; | 475 | const u64 current_bits = untracked_words[word_index] & bits; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index cad7f902d..f04538dca 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <vector> | 15 | #include <vector> |
16 | 16 | ||
17 | #include <boost/container/small_vector.hpp> | 17 | #include <boost/container/small_vector.hpp> |
18 | #include <boost/icl/interval_set.hpp> | ||
18 | 19 | ||
19 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
20 | #include "common/div_ceil.h" | 21 | #include "common/div_ceil.h" |
@@ -77,6 +78,9 @@ class BufferCache { | |||
77 | using Runtime = typename P::Runtime; | 78 | using Runtime = typename P::Runtime; |
78 | using Buffer = typename P::Buffer; | 79 | using Buffer = typename P::Buffer; |
79 | 80 | ||
81 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
82 | using IntervalType = typename IntervalSet::interval_type; | ||
83 | |||
80 | struct Empty {}; | 84 | struct Empty {}; |
81 | 85 | ||
82 | struct OverlapResult { | 86 | struct OverlapResult { |
@@ -148,11 +152,14 @@ public: | |||
148 | /// Return true when there are uncommitted buffers to be downloaded | 152 | /// Return true when there are uncommitted buffers to be downloaded |
149 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | 153 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
150 | 154 | ||
155 | void AccumulateFlushes(); | ||
156 | |||
151 | /// Return true when the caller should wait for async downloads | 157 | /// Return true when the caller should wait for async downloads |
152 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | 158 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; |
153 | 159 | ||
154 | /// Commit asynchronous downloads | 160 | /// Commit asynchronous downloads |
155 | void CommitAsyncFlushes(); | 161 | void CommitAsyncFlushes(); |
162 | void CommitAsyncFlushesHigh(); | ||
156 | 163 | ||
157 | /// Pop asynchronous downloads | 164 | /// Pop asynchronous downloads |
158 | void PopAsyncFlushes(); | 165 | void PopAsyncFlushes(); |
@@ -160,6 +167,9 @@ public: | |||
160 | /// Return true when a CPU region is modified from the GPU | 167 | /// Return true when a CPU region is modified from the GPU |
161 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 168 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
162 | 169 | ||
170 | /// Return true when a CPU region is modified from the GPU | ||
171 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||
172 | |||
163 | std::mutex mutex; | 173 | std::mutex mutex; |
164 | 174 | ||
165 | private: | 175 | private: |
@@ -272,8 +282,6 @@ private: | |||
272 | 282 | ||
273 | void DeleteBuffer(BufferId buffer_id); | 283 | void DeleteBuffer(BufferId buffer_id); |
274 | 284 | ||
275 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); | ||
276 | |||
277 | void NotifyBufferDeletion(); | 285 | void NotifyBufferDeletion(); |
278 | 286 | ||
279 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; | 287 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; |
@@ -327,9 +335,9 @@ private: | |||
327 | 335 | ||
328 | std::vector<BufferId> cached_write_buffer_ids; | 336 | std::vector<BufferId> cached_write_buffer_ids; |
329 | 337 | ||
330 | // TODO: This data structure is not optimal and it should be reworked | 338 | IntervalSet uncommitted_ranges; |
331 | std::vector<BufferId> uncommitted_downloads; | 339 | IntervalSet common_ranges; |
332 | std::deque<std::vector<BufferId>> committed_downloads; | 340 | std::deque<IntervalSet> committed_ranges; |
333 | 341 | ||
334 | size_t immediate_buffer_capacity = 0; | 342 | size_t immediate_buffer_capacity = 0; |
335 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 343 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
@@ -352,6 +360,7 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
352 | // Ensure the first slot is used for the null buffer | 360 | // Ensure the first slot is used for the null buffer |
353 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 361 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
354 | deletion_iterator = slot_buffers.end(); | 362 | deletion_iterator = slot_buffers.end(); |
363 | common_ranges.clear(); | ||
355 | } | 364 | } |
356 | 365 | ||
357 | template <class P> | 366 | template <class P> |
@@ -547,29 +556,30 @@ void BufferCache<P>::FlushCachedWrites() { | |||
547 | 556 | ||
548 | template <class P> | 557 | template <class P> |
549 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | 558 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { |
550 | return !uncommitted_downloads.empty(); | 559 | return !uncommitted_ranges.empty() || !committed_ranges.empty(); |
551 | } | 560 | } |
552 | 561 | ||
553 | template <class P> | 562 | template <class P> |
554 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | 563 | void BufferCache<P>::AccumulateFlushes() { |
555 | return !committed_downloads.empty() && !committed_downloads.front().empty(); | 564 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { |
565 | uncommitted_ranges.clear(); | ||
566 | return; | ||
567 | } | ||
568 | if (uncommitted_ranges.empty()) { | ||
569 | return; | ||
570 | } | ||
571 | committed_ranges.emplace_back(std::move(uncommitted_ranges)); | ||
556 | } | 572 | } |
557 | 573 | ||
558 | template <class P> | 574 | template <class P> |
559 | void BufferCache<P>::CommitAsyncFlushes() { | 575 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
560 | // This is intentionally passing the value by copy | 576 | return false; |
561 | committed_downloads.push_front(uncommitted_downloads); | ||
562 | uncommitted_downloads.clear(); | ||
563 | } | 577 | } |
564 | 578 | ||
565 | template <class P> | 579 | template <class P> |
566 | void BufferCache<P>::PopAsyncFlushes() { | 580 | void BufferCache<P>::CommitAsyncFlushesHigh() { |
567 | if (committed_downloads.empty()) { | 581 | AccumulateFlushes(); |
568 | return; | 582 | if (committed_ranges.empty()) { |
569 | } | ||
570 | auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); }); | ||
571 | const std::span<const BufferId> download_ids = committed_downloads.back(); | ||
572 | if (download_ids.empty()) { | ||
573 | return; | 583 | return; |
574 | } | 584 | } |
575 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 585 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
@@ -577,20 +587,67 @@ void BufferCache<P>::PopAsyncFlushes() { | |||
577 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | 587 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; |
578 | u64 total_size_bytes = 0; | 588 | u64 total_size_bytes = 0; |
579 | u64 largest_copy = 0; | 589 | u64 largest_copy = 0; |
580 | for (const BufferId buffer_id : download_ids) { | 590 | for (const IntervalSet& intervals : committed_ranges) { |
581 | slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) { | 591 | for (auto& interval : intervals) { |
582 | downloads.push_back({ | 592 | const std::size_t size = interval.upper() - interval.lower(); |
583 | BufferCopy{ | 593 | const VAddr cpu_addr = interval.lower(); |
584 | .src_offset = range_offset, | 594 | const VAddr cpu_addr_end = interval.upper(); |
585 | .dst_offset = total_size_bytes, | 595 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
586 | .size = range_size, | 596 | boost::container::small_vector<BufferCopy, 1> copies; |
587 | }, | 597 | buffer.ForEachDownloadRange( |
588 | buffer_id, | 598 | cpu_addr, size, true, [&](u64 range_offset, u64 range_size) { |
599 | const VAddr buffer_addr = buffer.CpuAddr(); | ||
600 | const auto add_download = [&](VAddr start, VAddr end) { | ||
601 | const u64 new_offset = start - buffer_addr; | ||
602 | const u64 new_size = end - start; | ||
603 | downloads.push_back({ | ||
604 | BufferCopy{ | ||
605 | .src_offset = new_offset, | ||
606 | .dst_offset = total_size_bytes, | ||
607 | .size = new_size, | ||
608 | }, | ||
609 | buffer_id, | ||
610 | }); | ||
611 | // Align up to avoid cache conflicts | ||
612 | constexpr u64 align = 256ULL; | ||
613 | constexpr u64 mask = ~(align - 1ULL); | ||
614 | total_size_bytes += (new_size + align - 1) & mask; | ||
615 | largest_copy = std::max(largest_copy, new_size); | ||
616 | }; | ||
617 | |||
618 | const VAddr start_address = buffer_addr + range_offset; | ||
619 | const VAddr end_address = start_address + range_size; | ||
620 | const IntervalType search_interval{cpu_addr, 1}; | ||
621 | auto it = common_ranges.lower_bound(search_interval); | ||
622 | if (it == common_ranges.end()) { | ||
623 | it = common_ranges.begin(); | ||
624 | } | ||
625 | while (it != common_ranges.end()) { | ||
626 | VAddr inter_addr_end = it->upper(); | ||
627 | VAddr inter_addr = it->lower(); | ||
628 | if (inter_addr >= end_address) { | ||
629 | break; | ||
630 | } | ||
631 | if (inter_addr_end <= start_address) { | ||
632 | it++; | ||
633 | continue; | ||
634 | } | ||
635 | if (inter_addr_end > end_address) { | ||
636 | inter_addr_end = end_address; | ||
637 | } | ||
638 | if (inter_addr < start_address) { | ||
639 | inter_addr = start_address; | ||
640 | } | ||
641 | add_download(inter_addr, inter_addr_end); | ||
642 | it++; | ||
643 | } | ||
644 | const IntervalType subtract_interval{start_address, end_address}; | ||
645 | common_ranges.subtract(subtract_interval); | ||
646 | }); | ||
589 | }); | 647 | }); |
590 | total_size_bytes += range_size; | 648 | } |
591 | largest_copy = std::max(largest_copy, range_size); | ||
592 | }); | ||
593 | } | 649 | } |
650 | committed_ranges.clear(); | ||
594 | if (downloads.empty()) { | 651 | if (downloads.empty()) { |
595 | return; | 652 | return; |
596 | } | 653 | } |
@@ -623,6 +680,19 @@ void BufferCache<P>::PopAsyncFlushes() { | |||
623 | } | 680 | } |
624 | 681 | ||
625 | template <class P> | 682 | template <class P> |
683 | void BufferCache<P>::CommitAsyncFlushes() { | ||
684 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { | ||
685 | CommitAsyncFlushesHigh(); | ||
686 | } else { | ||
687 | uncommitted_ranges.clear(); | ||
688 | committed_ranges.clear(); | ||
689 | } | ||
690 | } | ||
691 | |||
692 | template <class P> | ||
693 | void BufferCache<P>::PopAsyncFlushes() {} | ||
694 | |||
695 | template <class P> | ||
626 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 696 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
627 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); | 697 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); |
628 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { | 698 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { |
@@ -642,6 +712,25 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
642 | } | 712 | } |
643 | 713 | ||
644 | template <class P> | 714 | template <class P> |
715 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | ||
716 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); | ||
717 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { | ||
718 | const BufferId image_id = page_table[page]; | ||
719 | if (!image_id) { | ||
720 | ++page; | ||
721 | continue; | ||
722 | } | ||
723 | Buffer& buffer = slot_buffers[image_id]; | ||
724 | if (buffer.IsRegionCpuModified(addr, size)) { | ||
725 | return true; | ||
726 | } | ||
727 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
728 | page = Common::DivCeil(end_addr, PAGE_SIZE); | ||
729 | } | ||
730 | return false; | ||
731 | } | ||
732 | |||
733 | template <class P> | ||
645 | void BufferCache<P>::BindHostIndexBuffer() { | 734 | void BufferCache<P>::BindHostIndexBuffer() { |
646 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | 735 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; |
647 | TouchBuffer(buffer); | 736 | TouchBuffer(buffer); |
@@ -1010,16 +1099,16 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | |||
1010 | Buffer& buffer = slot_buffers[buffer_id]; | 1099 | Buffer& buffer = slot_buffers[buffer_id]; |
1011 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | 1100 | buffer.MarkRegionAsGpuModified(cpu_addr, size); |
1012 | 1101 | ||
1013 | const bool is_accuracy_high = Settings::IsGPULevelHigh(); | 1102 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; |
1103 | common_ranges.add(base_interval); | ||
1104 | |||
1105 | const bool is_accuracy_high = | ||
1106 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
1014 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | 1107 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |
1015 | if (!is_accuracy_high || !is_async) { | 1108 | if (!is_async && !is_accuracy_high) { |
1016 | return; | ||
1017 | } | ||
1018 | if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) { | ||
1019 | // Already inserted | ||
1020 | return; | 1109 | return; |
1021 | } | 1110 | } |
1022 | uncommitted_downloads.push_back(buffer_id); | 1111 | uncommitted_ranges.add(base_interval); |
1023 | } | 1112 | } |
1024 | 1113 | ||
1025 | template <class P> | 1114 | template <class P> |
@@ -1103,7 +1192,6 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
1103 | if (!copies.empty()) { | 1192 | if (!copies.empty()) { |
1104 | runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); | 1193 | runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); |
1105 | } | 1194 | } |
1106 | ReplaceBufferDownloads(overlap_id, new_buffer_id); | ||
1107 | DeleteBuffer(overlap_id); | 1195 | DeleteBuffer(overlap_id); |
1108 | } | 1196 | } |
1109 | 1197 | ||
@@ -1244,14 +1332,51 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
1244 | boost::container::small_vector<BufferCopy, 1> copies; | 1332 | boost::container::small_vector<BufferCopy, 1> copies; |
1245 | u64 total_size_bytes = 0; | 1333 | u64 total_size_bytes = 0; |
1246 | u64 largest_copy = 0; | 1334 | u64 largest_copy = 0; |
1247 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 1335 | buffer.ForEachDownloadRange(cpu_addr, size, true, [&](u64 range_offset, u64 range_size) { |
1248 | copies.push_back(BufferCopy{ | 1336 | const VAddr buffer_addr = buffer.CpuAddr(); |
1249 | .src_offset = range_offset, | 1337 | const auto add_download = [&](VAddr start, VAddr end) { |
1250 | .dst_offset = total_size_bytes, | 1338 | const u64 new_offset = start - buffer_addr; |
1251 | .size = range_size, | 1339 | const u64 new_size = end - start; |
1252 | }); | 1340 | copies.push_back(BufferCopy{ |
1253 | total_size_bytes += range_size; | 1341 | .src_offset = new_offset, |
1254 | largest_copy = std::max(largest_copy, range_size); | 1342 | .dst_offset = total_size_bytes, |
1343 | .size = new_size, | ||
1344 | }); | ||
1345 | // Align up to avoid cache conflicts | ||
1346 | constexpr u64 align = 256ULL; | ||
1347 | constexpr u64 mask = ~(align - 1ULL); | ||
1348 | total_size_bytes += (new_size + align - 1) & mask; | ||
1349 | largest_copy = std::max(largest_copy, new_size); | ||
1350 | }; | ||
1351 | |||
1352 | const VAddr start_address = buffer_addr + range_offset; | ||
1353 | const VAddr end_address = start_address + range_size; | ||
1354 | const IntervalType search_interval{start_address - range_size, 1}; | ||
1355 | auto it = common_ranges.lower_bound(search_interval); | ||
1356 | if (it == common_ranges.end()) { | ||
1357 | it = common_ranges.begin(); | ||
1358 | } | ||
1359 | while (it != common_ranges.end()) { | ||
1360 | VAddr inter_addr_end = it->upper(); | ||
1361 | VAddr inter_addr = it->lower(); | ||
1362 | if (inter_addr >= end_address) { | ||
1363 | break; | ||
1364 | } | ||
1365 | if (inter_addr_end <= start_address) { | ||
1366 | it++; | ||
1367 | continue; | ||
1368 | } | ||
1369 | if (inter_addr_end > end_address) { | ||
1370 | inter_addr_end = end_address; | ||
1371 | } | ||
1372 | if (inter_addr < start_address) { | ||
1373 | inter_addr = start_address; | ||
1374 | } | ||
1375 | add_download(inter_addr, inter_addr_end); | ||
1376 | it++; | ||
1377 | } | ||
1378 | const IntervalType subtract_interval{start_address, end_address}; | ||
1379 | common_ranges.subtract(subtract_interval); | ||
1255 | }); | 1380 | }); |
1256 | if (total_size_bytes == 0) { | 1381 | if (total_size_bytes == 0) { |
1257 | return; | 1382 | return; |
@@ -1316,18 +1441,6 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
1316 | } | 1441 | } |
1317 | 1442 | ||
1318 | template <class P> | 1443 | template <class P> |
1319 | void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) { | ||
1320 | const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) { | ||
1321 | std::ranges::replace(buffers, old_buffer_id, new_buffer_id); | ||
1322 | if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) { | ||
1323 | buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end()); | ||
1324 | } | ||
1325 | }; | ||
1326 | replace(uncommitted_downloads); | ||
1327 | std::ranges::for_each(committed_downloads, replace); | ||
1328 | } | ||
1329 | |||
1330 | template <class P> | ||
1331 | void BufferCache<P>::NotifyBufferDeletion() { | 1444 | void BufferCache<P>::NotifyBufferDeletion() { |
1332 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 1445 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
1333 | dirty_uniform_buffers.fill(~u32{0}); | 1446 | dirty_uniform_buffers.fill(~u32{0}); |
@@ -1349,15 +1462,9 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
1349 | if (!cpu_addr || size == 0) { | 1462 | if (!cpu_addr || size == 0) { |
1350 | return NULL_BINDING; | 1463 | return NULL_BINDING; |
1351 | } | 1464 | } |
1352 | // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range. | ||
1353 | // It exists due to some games like Astral Chain operate out of bounds. | ||
1354 | // Binding the whole map range would be technically correct, but games have large maps that make | ||
1355 | // this approach unaffordable for now. | ||
1356 | static constexpr u32 arbitrary_extra_bytes = 0xc000; | ||
1357 | const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr)); | ||
1358 | const Binding binding{ | 1465 | const Binding binding{ |
1359 | .cpu_addr = *cpu_addr, | 1466 | .cpu_addr = *cpu_addr, |
1360 | .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end), | 1467 | .size = size, |
1361 | .buffer_id = BufferId{}, | 1468 | .buffer_id = BufferId{}, |
1362 | }; | 1469 | }; |
1363 | return binding; | 1470 | return binding; |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 8b33c04ab..8d28bd884 100755 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include "common/cityhash.h" | 5 | #include "common/cityhash.h" |
6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
7 | #include "common/settings.h" | ||
7 | #include "core/core.h" | 8 | #include "core/core.h" |
8 | #include "core/memory.h" | 9 | #include "core/memory.h" |
9 | #include "video_core/dma_pusher.h" | 10 | #include "video_core/dma_pusher.h" |
@@ -76,8 +77,13 @@ bool DmaPusher::Step() { | |||
76 | 77 | ||
77 | // Push buffer non-empty, read a word | 78 | // Push buffer non-empty, read a word |
78 | command_headers.resize(command_list_header.size); | 79 | command_headers.resize(command_list_header.size); |
79 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | 80 | if (Settings::IsGPULevelHigh()) { |
80 | command_list_header.size * sizeof(u32)); | 81 | gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), |
82 | command_list_header.size * sizeof(u32)); | ||
83 | } else { | ||
84 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | ||
85 | command_list_header.size * sizeof(u32)); | ||
86 | } | ||
81 | } | 87 | } |
82 | for (std::size_t index = 0; index < command_headers.size();) { | 88 | for (std::size_t index = 0; index < command_headers.size();) { |
83 | const CommandHeader& command_header = command_headers[index]; | 89 | const CommandHeader& command_header = command_headers[index]; |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index f055b61e9..be1bc7f64 100755 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <queue> | 8 | #include <queue> |
9 | 9 | ||
10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
11 | #include "common/settings.h" | ||
11 | #include "core/core.h" | 12 | #include "core/core.h" |
12 | #include "video_core/delayed_destruction_ring.h" | 13 | #include "video_core/delayed_destruction_ring.h" |
13 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
@@ -53,6 +54,12 @@ public: | |||
53 | delayed_destruction_ring.Tick(); | 54 | delayed_destruction_ring.Tick(); |
54 | } | 55 | } |
55 | 56 | ||
57 | // Unlike other fences, this one doesn't | ||
58 | void SignalOrdering() { | ||
59 | std::scoped_lock lock{buffer_cache.mutex}; | ||
60 | buffer_cache.AccumulateFlushes(); | ||
61 | } | ||
62 | |||
56 | void SignalSemaphore(GPUVAddr addr, u32 value) { | 63 | void SignalSemaphore(GPUVAddr addr, u32 value) { |
57 | TryReleasePendingFences(); | 64 | TryReleasePendingFences(); |
58 | const bool should_flush = ShouldFlush(); | 65 | const bool should_flush = ShouldFlush(); |
@@ -96,6 +103,23 @@ public: | |||
96 | } | 103 | } |
97 | } | 104 | } |
98 | 105 | ||
106 | void TryReleasePendingFences() { | ||
107 | while (!fences.empty()) { | ||
108 | TFence& current_fence = fences.front(); | ||
109 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { | ||
110 | return; | ||
111 | } | ||
112 | PopAsyncFlushes(); | ||
113 | if (current_fence->IsSemaphore()) { | ||
114 | gpu_memory.template Write<u32>(current_fence->GetAddress(), | ||
115 | current_fence->GetPayload()); | ||
116 | } else { | ||
117 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||
118 | } | ||
119 | PopFence(); | ||
120 | } | ||
121 | } | ||
122 | |||
99 | protected: | 123 | protected: |
100 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 124 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
101 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, | 125 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, |
@@ -125,23 +149,6 @@ protected: | |||
125 | TQueryCache& query_cache; | 149 | TQueryCache& query_cache; |
126 | 150 | ||
127 | private: | 151 | private: |
128 | void TryReleasePendingFences() { | ||
129 | while (!fences.empty()) { | ||
130 | TFence& current_fence = fences.front(); | ||
131 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { | ||
132 | return; | ||
133 | } | ||
134 | PopAsyncFlushes(); | ||
135 | if (current_fence->IsSemaphore()) { | ||
136 | gpu_memory.template Write<u32>(current_fence->GetAddress(), | ||
137 | current_fence->GetPayload()); | ||
138 | } else { | ||
139 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||
140 | } | ||
141 | PopFence(); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | bool ShouldWait() const { | 152 | bool ShouldWait() const { |
146 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 153 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
147 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || | 154 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 35cc561be..f317ddc2b 100755 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
@@ -268,11 +268,13 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
268 | case BufferMethods::SemaphoreAddressHigh: | 268 | case BufferMethods::SemaphoreAddressHigh: |
269 | case BufferMethods::SemaphoreAddressLow: | 269 | case BufferMethods::SemaphoreAddressLow: |
270 | case BufferMethods::SemaphoreSequence: | 270 | case BufferMethods::SemaphoreSequence: |
271 | case BufferMethods::RefCnt: | ||
272 | case BufferMethods::UnkCacheFlush: | 271 | case BufferMethods::UnkCacheFlush: |
273 | case BufferMethods::WrcacheFlush: | 272 | case BufferMethods::WrcacheFlush: |
274 | case BufferMethods::FenceValue: | 273 | case BufferMethods::FenceValue: |
275 | break; | 274 | break; |
275 | case BufferMethods::RefCnt: | ||
276 | rasterizer->SignalReference(); | ||
277 | break; | ||
276 | case BufferMethods::FenceAction: | 278 | case BufferMethods::FenceAction: |
277 | ProcessFenceActionMethod(); | 279 | ProcessFenceActionMethod(); |
278 | break; | 280 | break; |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 46f642b19..25c0d30dd 100755 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
@@ -8,6 +8,7 @@ | |||
8 | #include "common/settings.h" | 8 | #include "common/settings.h" |
9 | #include "common/thread.h" | 9 | #include "common/thread.h" |
10 | #include "core/core.h" | 10 | #include "core/core.h" |
11 | #include "core/core_timing.h" | ||
11 | #include "core/frontend/emu_window.h" | 12 | #include "core/frontend/emu_window.h" |
12 | #include "video_core/dma_pusher.h" | 13 | #include "video_core/dma_pusher.h" |
13 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
@@ -83,6 +84,17 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | |||
83 | rasterizer = renderer.ReadRasterizer(); | 84 | rasterizer = renderer.ReadRasterizer(); |
84 | thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), | 85 | thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), |
85 | std::ref(dma_pusher), std::ref(state)); | 86 | std::ref(dma_pusher), std::ref(state)); |
87 | gpu_sync_event = Core::Timing::CreateEvent( | ||
88 | "GPUHostSyncCallback", [this](std::uintptr_t, std::chrono::nanoseconds) { | ||
89 | if (!state.is_running) { | ||
90 | return; | ||
91 | } | ||
92 | |||
93 | OnCommandListEnd(); | ||
94 | const auto time_interval = std::chrono::nanoseconds{500 * 1000}; | ||
95 | system.CoreTiming().ScheduleEvent(time_interval, gpu_sync_event); | ||
96 | }); | ||
97 | system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{500 * 1000}, gpu_sync_event); | ||
86 | } | 98 | } |
87 | 99 | ||
88 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 100 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
@@ -128,6 +140,9 @@ void ThreadManager::ShutDown() { | |||
128 | state.cv.notify_all(); | 140 | state.cv.notify_all(); |
129 | } | 141 | } |
130 | 142 | ||
143 | system.CoreTiming().UnscheduleEvent(gpu_sync_event, 0); | ||
144 | system.CoreTiming().RemoveEvent(gpu_sync_event); | ||
145 | |||
131 | if (!thread.joinable()) { | 146 | if (!thread.joinable()) { |
132 | return; | 147 | return; |
133 | } | 148 | } |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 11a648f38..ea98df3b1 100755 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
@@ -20,6 +20,10 @@ class DmaPusher; | |||
20 | } // namespace Tegra | 20 | } // namespace Tegra |
21 | 21 | ||
22 | namespace Core { | 22 | namespace Core { |
23 | namespace Timing { | ||
24 | class CoreTiming; | ||
25 | struct EventType; | ||
26 | } // namespace Timing | ||
23 | namespace Frontend { | 27 | namespace Frontend { |
24 | class GraphicsContext; | 28 | class GraphicsContext; |
25 | } | 29 | } |
@@ -150,6 +154,7 @@ private: | |||
150 | 154 | ||
151 | SynchState state; | 155 | SynchState state; |
152 | std::thread thread; | 156 | std::thread thread; |
157 | std::shared_ptr<Core::Timing::EventType> gpu_sync_event; | ||
153 | }; | 158 | }; |
154 | 159 | ||
155 | } // namespace VideoCommon::GPUThread | 160 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index fcbdacae3..554dd70b6 100755 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
@@ -63,6 +63,9 @@ public: | |||
63 | /// Signal a GPU based syncpoint as a fence | 63 | /// Signal a GPU based syncpoint as a fence |
64 | virtual void SignalSyncPoint(u32 value) = 0; | 64 | virtual void SignalSyncPoint(u32 value) = 0; |
65 | 65 | ||
66 | /// Signal a GPU based reference as point | ||
67 | virtual void SignalReference() = 0; | ||
68 | |||
66 | /// Release all pending fences. | 69 | /// Release all pending fences. |
67 | virtual void ReleaseFences() = 0; | 70 | virtual void ReleaseFences() = 0; |
68 | 71 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 99bd7ac9c..183861a23 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
@@ -654,6 +654,13 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) { | |||
654 | fence_manager.SignalSyncPoint(value); | 654 | fence_manager.SignalSyncPoint(value); |
655 | } | 655 | } |
656 | 656 | ||
657 | void RasterizerOpenGL::SignalReference() { | ||
658 | if (!gpu.IsAsync()) { | ||
659 | return; | ||
660 | } | ||
661 | fence_manager.SignalOrdering(); | ||
662 | } | ||
663 | |||
657 | void RasterizerOpenGL::ReleaseFences() { | 664 | void RasterizerOpenGL::ReleaseFences() { |
658 | if (!gpu.IsAsync()) { | 665 | if (!gpu.IsAsync()) { |
659 | return; | 666 | return; |
@@ -670,6 +677,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
670 | 677 | ||
671 | void RasterizerOpenGL::WaitForIdle() { | 678 | void RasterizerOpenGL::WaitForIdle() { |
672 | glMemoryBarrier(GL_ALL_BARRIER_BITS); | 679 | glMemoryBarrier(GL_ALL_BARRIER_BITS); |
680 | SignalReference(); | ||
673 | } | 681 | } |
674 | 682 | ||
675 | void RasterizerOpenGL::FragmentBarrier() { | 683 | void RasterizerOpenGL::FragmentBarrier() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ad7326ece..87f44cd62 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
@@ -85,6 +85,7 @@ public: | |||
85 | void ModifyGPUMemory(GPUVAddr addr, u64 size) override; | 85 | void ModifyGPUMemory(GPUVAddr addr, u64 size) override; |
86 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 86 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
87 | void SignalSyncPoint(u32 value) override; | 87 | void SignalSyncPoint(u32 value) override; |
88 | void SignalReference() override; | ||
88 | void ReleaseFences() override; | 89 | void ReleaseFences() override; |
89 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 90 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
90 | void WaitForIdle() override; | 91 | void WaitForIdle() override; |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 3bec48d14..c2d6676e7 100755 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
@@ -34,6 +34,10 @@ bool InnerFence::IsSignaled() const { | |||
34 | if (is_stubbed) { | 34 | if (is_stubbed) { |
35 | return true; | 35 | return true; |
36 | } | 36 | } |
37 | if (scheduler.IsFree(wait_tick)) { | ||
38 | return true; | ||
39 | } | ||
40 | scheduler.Refresh(); | ||
37 | return scheduler.IsFree(wait_tick); | 41 | return scheduler.IsFree(wait_tick); |
38 | } | 42 | } |
39 | 43 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8ae2202bd..4fbf93ca0 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
@@ -600,11 +600,18 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { | |||
600 | fence_manager.SignalSyncPoint(value); | 600 | fence_manager.SignalSyncPoint(value); |
601 | } | 601 | } |
602 | 602 | ||
603 | void RasterizerVulkan::SignalReference() { | ||
604 | if (!gpu.IsAsync()) { | ||
605 | return; | ||
606 | } | ||
607 | fence_manager.SignalOrdering(); | ||
608 | } | ||
609 | |||
603 | void RasterizerVulkan::ReleaseFences() { | 610 | void RasterizerVulkan::ReleaseFences() { |
604 | if (!gpu.IsAsync()) { | 611 | if (!gpu.IsAsync()) { |
605 | return; | 612 | return; |
606 | } | 613 | } |
607 | fence_manager.WaitPendingFences(); | 614 | fence_manager.TryReleasePendingFences(); |
608 | } | 615 | } |
609 | 616 | ||
610 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 617 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
@@ -632,6 +639,7 @@ void RasterizerVulkan::WaitForIdle() { | |||
632 | cmdbuf.SetEvent(event, flags); | 639 | cmdbuf.SetEvent(event, flags); |
633 | cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); | 640 | cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); |
634 | }); | 641 | }); |
642 | SignalReference(); | ||
635 | } | 643 | } |
636 | 644 | ||
637 | void RasterizerVulkan::FragmentBarrier() { | 645 | void RasterizerVulkan::FragmentBarrier() { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index a29022a50..d15c36ddc 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
@@ -77,6 +77,7 @@ public: | |||
77 | void ModifyGPUMemory(GPUVAddr addr, u64 size) override; | 77 | void ModifyGPUMemory(GPUVAddr addr, u64 size) override; |
78 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 78 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
79 | void SignalSyncPoint(u32 value) override; | 79 | void SignalSyncPoint(u32 value) override; |
80 | void SignalReference() override; | ||
80 | void ReleaseFences() override; | 81 | void ReleaseFences() override; |
81 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 82 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
82 | void WaitForIdle() override; | 83 | void WaitForIdle() override; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..9e0a1d4e6 100755 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
@@ -83,6 +83,10 @@ public: | |||
83 | return master_semaphore->IsFree(tick); | 83 | return master_semaphore->IsFree(tick); |
84 | } | 84 | } |
85 | 85 | ||
86 | void Refresh() const noexcept { | ||
87 | return master_semaphore->Refresh(); | ||
88 | } | ||
89 | |||
86 | /// Waits for the given tick to trigger on the GPU. | 90 | /// Waits for the given tick to trigger on the GPU. |
87 | void Wait(u64 tick) { | 91 | void Wait(u64 tick) { |
88 | master_semaphore->Wait(tick); | 92 | master_semaphore->Wait(tick); |
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 9fbdc1ac6..47a11cb2f 100755 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h | |||
@@ -133,8 +133,8 @@ struct BufferImageCopy { | |||
133 | }; | 133 | }; |
134 | 134 | ||
135 | struct BufferCopy { | 135 | struct BufferCopy { |
136 | size_t src_offset; | 136 | u64 src_offset; |
137 | size_t dst_offset; | 137 | u64 dst_offset; |
138 | size_t size; | 138 | size_t size; |
139 | }; | 139 | }; |
140 | 140 | ||