aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpineappleEA <pineaea@gmail.com>2021-07-08 19:29:20 +0200
committerpineappleEA <pineaea@gmail.com>2021-07-08 19:29:20 +0200
commit2da54f6e737aa27aa7d8475085b640f0a739f0d9 (patch)
treee8e833330a68e44c6f35f4d925fe2ff70bb4ce97
parent8a46c57b52760ee05b8adee50645dba3c99b1cb3 (diff)
early-access version 1860EA-1860
-rwxr-xr-xREADME.md2
-rwxr-xr-xsrc/audio_core/audio_renderer.cpp30
-rwxr-xr-xsrc/audio_core/command_generator.cpp95
-rwxr-xr-xsrc/audio_core/command_generator.h23
-rwxr-xr-xsrc/audio_core/sink_context.cpp15
-rwxr-xr-xsrc/audio_core/sink_context.h2
-rwxr-xr-xsrc/video_core/buffer_cache/buffer_cache.h1
-rwxr-xr-xsrc/video_core/engines/shader_bytecode.h4
-rwxr-xr-xsrc/video_core/renderer_opengl/gl_arb_decompiler.cpp82
-rwxr-xr-xsrc/video_core/renderer_opengl/gl_shader_decompiler.cpp146
-rwxr-xr-xsrc/video_core/renderer_vulkan/vk_shader_decompiler.cpp115
-rwxr-xr-xsrc/video_core/shader/control_flow.cpp159
-rwxr-xr-xsrc/video_core/shader/control_flow.h13
-rwxr-xr-xsrc/video_core/shader/decode.cpp223
-rwxr-xr-xsrc/video_core/shader/decode/other.cpp11
-rwxr-xr-xsrc/video_core/shader/decode/texture.cpp4
-rwxr-xr-xsrc/video_core/shader/node.h15
-rwxr-xr-xsrc/video_core/shader/node_helper.cpp5
-rwxr-xr-xsrc/video_core/shader/node_helper.h3
-rwxr-xr-xsrc/video_core/shader/shader_ir.h95
20 files changed, 313 insertions, 730 deletions
diff --git a/README.md b/README.md
index 42f6be0ac..ae29363e6 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
1yuzu emulator early access 1yuzu emulator early access
2============= 2=============
3 3
4This is the source code for early-access 1859. 4This is the source code for early-access 1860.
5 5
6## Legal Notice 6## Legal Notice
7 7
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index ccd5ca6cc..7dba739b4 100755
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -29,10 +29,9 @@ namespace {
29 (static_cast<float>(r_channel) * r_mix_amount))); 29 (static_cast<float>(r_channel) * r_mix_amount)));
30} 30}
31 31
32[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel, 32[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(
33 s16 fc_channel, 33 s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel,
34 [[maybe_unused]] s16 lf_channel, 34 s16 br_channel) {
35 s16 bl_channel, s16 br_channel) {
36 // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels 35 // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels
37 // are mixed to be 36.94% 36 // are mixed to be 36.94%
38 37
@@ -57,11 +56,11 @@ namespace {
57 const std::array<float_le, 4>& coeff) { 56 const std::array<float_le, 4>& coeff) {
58 const auto left = 57 const auto left =
59 static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + 58 static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
60 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0]; 59 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3];
61 60
62 const auto right = 61 const auto right =
63 static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + 62 static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
64 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0]; 63 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3];
65 64
66 return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))}; 65 return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))};
67} 66}
@@ -241,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
241 const auto channel_count = buffer_offsets.size(); 240 const auto channel_count = buffer_offsets.size();
242 const auto& final_mix = mix_context.GetFinalMixInfo(); 241 const auto& final_mix = mix_context.GetFinalMixInfo();
243 const auto& in_params = final_mix.GetInParams(); 242 const auto& in_params = final_mix.GetInParams();
244 std::vector<s32*> mix_buffers(channel_count); 243 std::vector<std::span<s32>> mix_buffers(channel_count);
245 for (std::size_t i = 0; i < channel_count; i++) { 244 for (std::size_t i = 0; i < channel_count; i++) {
246 mix_buffers[i] = 245 mix_buffers[i] =
247 command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]); 246 command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]);
@@ -294,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
294 buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample); 293 buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample);
295 } else if (stream_channel_count == 2) { 294 } else if (stream_channel_count == 2) {
296 // Mix all channels into 2 channels 295 // Mix all channels into 2 channels
297 if (sink_context.HasDownMixingCoefficients()) { 296 const auto [left, right] = Mix6To2WithCoefficients(
298 const auto [left, right] = Mix6To2WithCoefficients( 297 fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
299 fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, 298 sink_context.GetDownmixCoefficients());
300 sink_context.GetDownmixCoefficients()); 299 buffer[i * stream_channel_count + 0] = left;
301 buffer[i * stream_channel_count + 0] = left; 300 buffer[i * stream_channel_count + 1] = right;
302 buffer[i * stream_channel_count + 1] = right;
303 } else {
304 const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample,
305 lf_sample, bl_sample, br_sample);
306 buffer[i * stream_channel_count + 0] = left;
307 buffer[i * stream_channel_count + 1] = right;
308 }
309 } else if (stream_channel_count == 6) { 301 } else if (stream_channel_count == 6) {
310 // Pass through 302 // Pass through
311 buffer[i * stream_channel_count + 0] = fl_sample; 303 buffer[i * stream_channel_count + 0] = fl_sample;
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index b3250be09..3b28806b2 100755
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
31 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f}; 31 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
32 32
33template <std::size_t N> 33template <std::size_t N>
34void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { 34void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) {
35 for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) { 35 for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) {
36 for (std::size_t j = 0; j < N; j++) { 36 for (std::size_t j = 0; j < N; j++) {
37 output[i + j] += 37 output[i + j] +=
@@ -40,7 +40,17 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
40 } 40 }
41} 41}
42 42
43s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) { 43s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta,
44 s32 sample_count) {
45 // XC2 passes in NaN mix volumes, causing further issues as we handle everything as s32 rather
46 // than float, so the NaN propogation is lost. As the samples get further modified for
47 // volume etc, they can get out of NaN range, so a later heuristic for catching this is
48 // more difficult. Handle that here by setting these samples to silence.
49 if (std::isnan(gain)) {
50 gain = 0.0f;
51 delta = 0.0f;
52 }
53
44 s32 x = 0; 54 s32 x = 0;
45 for (s32 i = 0; i < sample_count; i++) { 55 for (s32 i = 0; i < sample_count; i++) {
46 x = static_cast<s32>(static_cast<float>(input[i]) * gain); 56 x = static_cast<s32>(static_cast<float>(input[i]) * gain);
@@ -50,20 +60,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam
50 return x; 60 return x;
51} 61}
52 62
53void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) { 63void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta,
64 s32 sample_count) {
54 for (s32 i = 0; i < sample_count; i++) { 65 for (s32 i = 0; i < sample_count; i++) {
55 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); 66 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
56 gain += delta; 67 gain += delta;
57 } 68 }
58} 69}
59 70
60void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) { 71void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain,
72 s32 sample_count) {
61 for (s32 i = 0; i < sample_count; i++) { 73 for (s32 i = 0; i < sample_count; i++) {
62 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); 74 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
63 } 75 }
64} 76}
65 77
66s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) { 78s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) {
67 const bool positive = first_sample > 0; 79 const bool positive = first_sample > 0;
68 auto final_sample = std::abs(first_sample); 80 auto final_sample = std::abs(first_sample);
69 for (s32 i = 0; i < sample_count; i++) { 81 for (s32 i = 0; i < sample_count; i++) {
@@ -128,10 +140,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1,
128 1, 1, 1, 0, 0, 0, 0, 3, 3, 3}; 140 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
129 141
130template <std::size_t CHANNEL_COUNT> 142template <std::size_t CHANNEL_COUNT>
131void ApplyReverbGeneric(I3dl2ReverbState& state, 143void ApplyReverbGeneric(
132 const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input, 144 I3dl2ReverbState& state,
133 const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output, 145 const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input,
134 s32 sample_count) { 146 const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) {
135 147
136 auto GetTapLookup = []() { 148 auto GetTapLookup = []() {
137 if constexpr (CHANNEL_COUNT == 1) { 149 if constexpr (CHANNEL_COUNT == 1) {
@@ -457,8 +469,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff
457 "input_mix_buffer={}, output_mix_buffer={}", 469 "input_mix_buffer={}, output_mix_buffer={}",
458 node_id, input_offset, output_offset); 470 node_id, input_offset, output_offset);
459 } 471 }
460 const auto* input = GetMixBuffer(input_offset); 472 std::span<const s32> input = GetMixBuffer(input_offset);
461 auto* output = GetMixBuffer(output_offset); 473 std::span<s32> output = GetMixBuffer(output_offset);
462 474
463 // Biquad filter parameters 475 // Biquad filter parameters
464 const auto [n0, n1, n2] = params.numerator; 476 const auto [n0, n1, n2] = params.numerator;
@@ -551,8 +563,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
551 return; 563 return;
552 } 564 }
553 565
554 std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{}; 566 std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{};
555 std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{}; 567 std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{};
556 568
557 const auto status = params.status; 569 const auto status = params.status;
558 for (s32 i = 0; i < channel_count; i++) { 570 for (s32 i = 0; i < channel_count; i++) {
@@ -587,7 +599,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
587 for (s32 i = 0; i < channel_count; i++) { 599 for (s32 i = 0; i < channel_count; i++) {
588 // Only copy if the buffer input and output do not match! 600 // Only copy if the buffer input and output do not match!
589 if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) { 601 if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
590 std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32)); 602 std::memcpy(output[i].data(), input[i].data(),
603 worker_params.sample_count * sizeof(s32));
591 } 604 }
592 } 605 }
593 } 606 }
@@ -603,8 +616,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset,
603 for (s32 i = 0; i < channel_count; i++) { 616 for (s32 i = 0; i < channel_count; i++) {
604 // TODO(ogniK): Actually implement biquad filter 617 // TODO(ogniK): Actually implement biquad filter
605 if (params.input[i] != params.output[i]) { 618 if (params.input[i] != params.output[i]) {
606 const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); 619 std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]);
607 auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); 620 std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]);
608 ApplyMix<1>(output, input, 32768, worker_params.sample_count); 621 ApplyMix<1>(output, input, 32768, worker_params.sample_count);
609 } 622 }
610 } 623 }
@@ -643,14 +656,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf
643 656
644 if (samples_read != static_cast<int>(worker_params.sample_count) && 657 if (samples_read != static_cast<int>(worker_params.sample_count) &&
645 samples_read <= params.sample_count) { 658 samples_read <= params.sample_count) {
646 std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read); 659 std::memset(GetMixBuffer(output_index).data(), 0,
660 params.sample_count - samples_read);
647 } 661 }
648 } else { 662 } else {
649 AuxInfoDSP empty{}; 663 AuxInfoDSP empty{};
650 memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP)); 664 memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP));
651 memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP)); 665 memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP));
652 if (output_index != input_index) { 666 if (output_index != input_index) {
653 std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index), 667 std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(),
654 worker_params.sample_count * sizeof(s32)); 668 worker_params.sample_count * sizeof(s32));
655 } 669 }
656 } 670 }
@@ -668,7 +682,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter
668} 682}
669 683
670s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, 684s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
671 const s32* data, u32 sample_count, u32 write_offset, 685 std::span<const s32> data, u32 sample_count, u32 write_offset,
672 u32 write_count) { 686 u32 write_count) {
673 if (max_samples == 0) { 687 if (max_samples == 0) {
674 return 0; 688 return 0;
@@ -678,14 +692,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
678 return 0; 692 return 0;
679 } 693 }
680 694
681 std::size_t data_offset{}; 695 s32 data_offset{};
682 u32 remaining = sample_count; 696 u32 remaining = sample_count;
683 while (remaining > 0) { 697 while (remaining > 0) {
684 // Get position in buffer 698 // Get position in buffer
685 const auto base = send_buffer + (offset * sizeof(u32)); 699 const auto base = send_buffer + (offset * sizeof(u32));
686 const auto samples_to_grab = std::min(max_samples - offset, remaining); 700 const auto samples_to_grab = std::min(max_samples - offset, remaining);
687 // Write to output 701 // Write to output
688 memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32)); 702 memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32));
689 offset = (offset + samples_to_grab) % max_samples; 703 offset = (offset + samples_to_grab) % max_samples;
690 remaining -= samples_to_grab; 704 remaining -= samples_to_grab;
691 data_offset += samples_to_grab; 705 data_offset += samples_to_grab;
@@ -698,7 +712,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
698} 712}
699 713
700s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, 714s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
701 s32* out_data, u32 sample_count, u32 read_offset, 715 std::span<s32> out_data, u32 sample_count, u32 read_offset,
702 u32 read_count) { 716 u32 read_count) {
703 if (max_samples == 0) { 717 if (max_samples == 0) {
704 return 0; 718 return 0;
@@ -710,15 +724,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
710 } 724 }
711 725
712 u32 remaining = sample_count; 726 u32 remaining = sample_count;
727 s32 data_offset{};
713 while (remaining > 0) { 728 while (remaining > 0) {
714 const auto base = recv_buffer + (offset * sizeof(u32)); 729 const auto base = recv_buffer + (offset * sizeof(u32));
715 const auto samples_to_grab = std::min(max_samples - offset, remaining); 730 const auto samples_to_grab = std::min(max_samples - offset, remaining);
716 std::vector<s32> buffer(samples_to_grab); 731 std::vector<s32> buffer(samples_to_grab);
717 memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32)); 732 memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32));
718 std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32)); 733 std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32));
719 out_data += samples_to_grab;
720 offset = (offset + samples_to_grab) % max_samples; 734 offset = (offset + samples_to_grab) % max_samples;
721 remaining -= samples_to_grab; 735 remaining -= samples_to_grab;
736 data_offset += samples_to_grab;
722 } 737 }
723 738
724 if (read_count != 0) { 739 if (read_count != 0) {
@@ -965,8 +980,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t
965 node_id, input_offset, output_offset, volume); 980 node_id, input_offset, output_offset, volume);
966 } 981 }
967 982
968 auto* output = GetMixBuffer(output_offset); 983 std::span<s32> output = GetMixBuffer(output_offset);
969 const auto* input = GetMixBuffer(input_offset); 984 std::span<const s32> input = GetMixBuffer(input_offset);
970 985
971 const s32 gain = static_cast<s32>(volume * 32768.0f); 986 const s32 gain = static_cast<s32>(volume * 32768.0f);
972 // Mix with loop unrolling 987 // Mix with loop unrolling
@@ -1172,12 +1187,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
1172 return samples_processed; 1187 return samples_processed;
1173} 1188}
1174 1189
1175s32* CommandGenerator::GetMixBuffer(std::size_t index) { 1190std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) {
1176 return mix_buffer.data() + (index * worker_params.sample_count); 1191 return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count),
1192 worker_params.sample_count);
1177} 1193}
1178 1194
1179const s32* CommandGenerator::GetMixBuffer(std::size_t index) const { 1195std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const {
1180 return mix_buffer.data() + (index * worker_params.sample_count); 1196 return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count),
1197 worker_params.sample_count);
1181} 1198}
1182 1199
1183std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const { 1200std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const {
@@ -1188,15 +1205,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const {
1188 return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT; 1205 return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT;
1189} 1206}
1190 1207
1191s32* CommandGenerator::GetChannelMixBuffer(s32 channel) { 1208std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) {
1192 return GetMixBuffer(worker_params.mix_buffer_count + channel); 1209 return GetMixBuffer(worker_params.mix_buffer_count + channel);
1193} 1210}
1194 1211
1195const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const { 1212std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const {
1196 return GetMixBuffer(worker_params.mix_buffer_count + channel); 1213 return GetMixBuffer(worker_params.mix_buffer_count + channel);
1197} 1214}
1198 1215
1199void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, 1216void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
1200 VoiceState& dsp_state, s32 channel, 1217 VoiceState& dsp_state, s32 channel,
1201 s32 target_sample_rate, s32 sample_count, 1218 s32 target_sample_rate, s32 sample_count,
1202 s32 node_id) { 1219 s32 node_id) {
@@ -1208,7 +1225,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1208 node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate, 1225 node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate,
1209 in_params.mix_id, in_params.splitter_info_id); 1226 in_params.mix_id, in_params.splitter_info_id);
1210 } 1227 }
1211 ASSERT_OR_EXECUTE(output != nullptr, { return; }); 1228 ASSERT_OR_EXECUTE(output.data() != nullptr, { return; });
1212 1229
1213 const auto resample_rate = static_cast<s32>( 1230 const auto resample_rate = static_cast<s32>(
1214 static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) * 1231 static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) *
@@ -1225,6 +1242,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1225 } 1242 }
1226 1243
1227 std::size_t temp_mix_offset{}; 1244 std::size_t temp_mix_offset{};
1245 s32 samples_output{};
1228 auto samples_remaining = sample_count; 1246 auto samples_remaining = sample_count;
1229 while (samples_remaining > 0) { 1247 while (samples_remaining > 0) {
1230 const auto samples_to_output = std::min(samples_remaining, min_required_samples); 1248 const auto samples_to_output = std::min(samples_remaining, min_required_samples);
@@ -1328,20 +1346,21 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1328 1346
1329 if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) { 1347 if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) {
1330 // No need to resample 1348 // No need to resample
1331 std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32)); 1349 std::memcpy(output.data() + samples_output, sample_buffer.data(),
1350 samples_read * sizeof(s32));
1332 } else { 1351 } else {
1333 std::fill(sample_buffer.begin() + temp_mix_offset, 1352 std::fill(sample_buffer.begin() + temp_mix_offset,
1334 sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read), 1353 sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read),
1335 0); 1354 0);
1336 AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction, 1355 AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate,
1337 samples_to_output); 1356 dsp_state.fraction, samples_to_output);
1338 // Resample 1357 // Resample
1339 for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) { 1358 for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) {
1340 dsp_state.sample_history[i] = sample_buffer[samples_to_read + i]; 1359 dsp_state.sample_history[i] = sample_buffer[samples_to_read + i];
1341 } 1360 }
1342 } 1361 }
1343 output += samples_to_output;
1344 samples_remaining -= samples_to_output; 1362 samples_remaining -= samples_to_output;
1363 samples_output += samples_to_output;
1345 } 1364 }
1346} 1365}
1347 1366
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index f310d7317..59a33ba76 100755
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <span>
8#include "audio_core/common.h" 9#include "audio_core/common.h"
9#include "audio_core/voice_context.h" 10#include "audio_core/voice_context.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -41,10 +42,10 @@ public:
41 void PreCommand(); 42 void PreCommand();
42 void PostCommand(); 43 void PostCommand();
43 44
44 [[nodiscard]] s32* GetChannelMixBuffer(s32 channel); 45 [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel);
45 [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const; 46 [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const;
46 [[nodiscard]] s32* GetMixBuffer(std::size_t index); 47 [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index);
47 [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const; 48 [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const;
48 [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const; 49 [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const;
49 50
50 [[nodiscard]] std::size_t GetTotalMixBufferCount() const; 51 [[nodiscard]] std::size_t GetTotalMixBufferCount() const;
@@ -77,10 +78,11 @@ private:
77 void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled); 78 void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled);
78 [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index); 79 [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index);
79 80
80 s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data, 81 s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
81 u32 sample_count, u32 write_offset, u32 write_count); 82 std::span<const s32> data, u32 sample_count, u32 write_offset,
82 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, 83 u32 write_count);
83 u32 sample_count, u32 read_offset, u32 read_count); 84 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
85 std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count);
84 86
85 void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, 87 void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
86 std::vector<u8>& work_buffer); 88 std::vector<u8>& work_buffer);
@@ -91,8 +93,9 @@ private:
91 s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); 93 s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
92 s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset, 94 s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
93 s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); 95 s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
94 void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state, 96 void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
95 s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id); 97 VoiceState& dsp_state, s32 channel, s32 target_sample_rate,
98 s32 sample_count, s32 node_id);
96 99
97 AudioCommon::AudioRendererParameter& worker_params; 100 AudioCommon::AudioRendererParameter& worker_params;
98 VoiceContext& voice_context; 101 VoiceContext& voice_context;
diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp
index a69543696..cc55b290c 100755
--- a/src/audio_core/sink_context.cpp
+++ b/src/audio_core/sink_context.cpp
@@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const {
15void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) { 15void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) {
16 ASSERT(in.type == SinkTypes::Device); 16 ASSERT(in.type == SinkTypes::Device);
17 17
18 has_downmix_coefs = in.device.down_matrix_enabled; 18 if (in.device.down_matrix_enabled) {
19 if (has_downmix_coefs) {
20 downmix_coefficients = in.device.down_matrix_coef; 19 downmix_coefficients = in.device.down_matrix_coef;
20 } else {
21 downmix_coefficients = {
22 1.0f, // front
23 0.707f, // center
24 0.0f, // lfe
25 0.707f, // back
26 };
21 } 27 }
28
22 in_use = in.in_use; 29 in_use = in.in_use;
23 use_count = in.device.input_count; 30 use_count = in.device.input_count;
24 buffers = in.device.input; 31 buffers = in.device.input;
@@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const {
34 return buffer_ret; 41 return buffer_ret;
35} 42}
36 43
37bool SinkContext::HasDownMixingCoefficients() const {
38 return has_downmix_coefs;
39}
40
41const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const { 44const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const {
42 return downmix_coefficients; 45 return downmix_coefficients;
43} 46}
diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h
index 9e2b69785..254961fe2 100755
--- a/src/audio_core/sink_context.h
+++ b/src/audio_core/sink_context.h
@@ -84,7 +84,6 @@ public:
84 [[nodiscard]] bool InUse() const; 84 [[nodiscard]] bool InUse() const;
85 [[nodiscard]] std::vector<u8> OutputBuffers() const; 85 [[nodiscard]] std::vector<u8> OutputBuffers() const;
86 86
87 [[nodiscard]] bool HasDownMixingCoefficients() const;
88 [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const; 87 [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const;
89 88
90private: 89private:
@@ -92,7 +91,6 @@ private:
92 s32 use_count{}; 91 s32 use_count{};
93 std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{}; 92 std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{};
94 std::size_t sink_count{}; 93 std::size_t sink_count{};
95 bool has_downmix_coefs{false};
96 DownmixCoefficients downmix_coefficients{}; 94 DownmixCoefficients downmix_coefficients{};
97}; 95};
98} // namespace AudioCore 96} // namespace AudioCore
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f04538dca..910909201 100755
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -591,7 +591,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
591 for (auto& interval : intervals) { 591 for (auto& interval : intervals) {
592 const std::size_t size = interval.upper() - interval.lower(); 592 const std::size_t size = interval.upper() - interval.lower();
593 const VAddr cpu_addr = interval.lower(); 593 const VAddr cpu_addr = interval.lower();
594 const VAddr cpu_addr_end = interval.upper();
595 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 594 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
596 boost::container::small_vector<BufferCopy, 1> copies; 595 boost::container::small_vector<BufferCopy, 1> copies;
597 buffer.ForEachDownloadRange( 596 buffer.ForEachDownloadRange(
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 5d659dcaf..8b45f1b62 100755
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1785,8 +1785,6 @@ public:
1785 SSY, 1785 SSY,
1786 SYNC, 1786 SYNC,
1787 BRK, 1787 BRK,
1788 CAL,
1789 RET,
1790 DEPBAR, 1788 DEPBAR,
1791 VOTE, 1789 VOTE,
1792 VOTE_VTG, 1790 VOTE_VTG,
@@ -2110,8 +2108,6 @@ private:
2110 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), 2108 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
2111 INST("111000110100----", Id::BRK, Type::Flow, "BRK"), 2109 INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
2112 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 2110 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
2113 INST("111000100110----", Id::CAL, Type::Flow, "CAL"),
2114 INST("111000110010----", Id::RET, Type::Flow, "RET"),
2115 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 2111 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
2116 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 2112 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
2117 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), 2113 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index 84b8a3243..e8d8d2aa5 100755
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -491,9 +491,6 @@ private:
491 const Registry& registry; 491 const Registry& registry;
492 const ShaderType stage; 492 const ShaderType stage;
493 493
494 std::shared_ptr<ShaderFunctionIR> context_func;
495 u32 ast_var_base{};
496
497 std::size_t num_temporaries = 0; 494 std::size_t num_temporaries = 0;
498 std::size_t max_temporaries = 0; 495 std::size_t max_temporaries = 0;
499 496
@@ -810,33 +807,13 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R
810 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { 807 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
811 DefineGlobalMemory(); 808 DefineGlobalMemory();
812 809
813 context_func = ir.GetMainFunction();
814 ast_var_base = 0;
815
816 AddLine("TEMP RC;"); 810 AddLine("TEMP RC;");
817 AddLine("TEMP FSWZA[4];"); 811 AddLine("TEMP FSWZA[4];");
818 AddLine("TEMP FSWZB[4];"); 812 AddLine("TEMP FSWZB[4];");
819 InitializeVariables(); 813 if (ir.IsDecompiled()) {
820 AddLine("main:");
821 if (context_func->IsDecompiled()) {
822 DecompileAST(); 814 DecompileAST();
823 } else { 815 } else {
824 DecompileBranchMode(); 816 DecompileBranchMode();
825 AddLine("RET;");
826 }
827
828 const auto& subfunctions = ir.GetSubFunctions();
829 auto it = subfunctions.begin();
830 while (it != subfunctions.end()) {
831 context_func = *it;
832 AddLine("func_{}:", context_func->GetId());
833 if (context_func->IsDecompiled()) {
834 DecompileAST();
835 } else {
836 DecompileBranchMode();
837 AddLine("RET;");
838 }
839 it++;
840 } 817 }
841 AddLine("END"); 818 AddLine("END");
842 819
@@ -1083,38 +1060,41 @@ void ARBDecompiler::InitializeVariables() {
1083} 1060}
1084 1061
1085void ARBDecompiler::DecompileAST() { 1062void ARBDecompiler::DecompileAST() {
1086 const u32 num_flow_variables = context_func->GetASTNumVariables(); 1063 const u32 num_flow_variables = ir.GetASTNumVariables();
1087 for (u32 i = 0; i < num_flow_variables; ++i) { 1064 for (u32 i = 0; i < num_flow_variables; ++i) {
1088 AddLine("TEMP F{};", i + ast_var_base); 1065 AddLine("TEMP F{};", i);
1089 } 1066 }
1090 for (u32 i = 0; i < num_flow_variables; ++i) { 1067 for (u32 i = 0; i < num_flow_variables; ++i) {
1091 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i + ast_var_base); 1068 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1092 } 1069 }
1093 1070
1094 VisitAST(context_func->GetASTProgram()); 1071 InitializeVariables();
1095 ast_var_base += num_flow_variables; 1072
1073 VisitAST(ir.GetASTProgram());
1096} 1074}
1097 1075
1098void ARBDecompiler::DecompileBranchMode() { 1076void ARBDecompiler::DecompileBranchMode() {
1099 static constexpr u32 FLOW_STACK_SIZE = 20; 1077 static constexpr u32 FLOW_STACK_SIZE = 20;
1100 if (!context_func->IsFlowStackDisabled()) { 1078 if (!ir.IsFlowStackDisabled()) {
1101 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); 1079 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1102 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); 1080 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1103 AddLine("TEMP SSY_TOP;"); 1081 AddLine("TEMP SSY_TOP;");
1104 AddLine("TEMP PBK_TOP;"); 1082 AddLine("TEMP PBK_TOP;");
1105 } 1083 }
1106 1084
1107 AddLine("TEMP PC{};", context_func->GetId()); 1085 AddLine("TEMP PC;");
1108 1086
1109 if (!context_func->IsFlowStackDisabled()) { 1087 if (!ir.IsFlowStackDisabled()) {
1110 AddLine("MOV.U SSY_TOP.x, 0;"); 1088 AddLine("MOV.U SSY_TOP.x, 0;");
1111 AddLine("MOV.U PBK_TOP.x, 0;"); 1089 AddLine("MOV.U PBK_TOP.x, 0;");
1112 } 1090 }
1113 1091
1114 const auto basic_block_end = context_func->GetBasicBlocks().end(); 1092 InitializeVariables();
1115 auto basic_block_it = context_func->GetBasicBlocks().begin(); 1093
1094 const auto basic_block_end = ir.GetBasicBlocks().end();
1095 auto basic_block_it = ir.GetBasicBlocks().begin();
1116 const u32 first_address = basic_block_it->first; 1096 const u32 first_address = basic_block_it->first;
1117 AddLine("MOV.U PC{}.x, {};", context_func->GetId(), first_address); 1097 AddLine("MOV.U PC.x, {};", first_address);
1118 1098
1119 AddLine("REP;"); 1099 AddLine("REP;");
1120 1100
@@ -1123,7 +1103,7 @@ void ARBDecompiler::DecompileBranchMode() {
1123 const auto& [address, bb] = *basic_block_it; 1103 const auto& [address, bb] = *basic_block_it;
1124 ++num_blocks; 1104 ++num_blocks;
1125 1105
1126 AddLine("SEQ.S.CC RC.x, PC{}.x, {};", context_func->GetId(), address); 1106 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1127 AddLine("IF NE.x;"); 1107 AddLine("IF NE.x;");
1128 1108
1129 VisitBlock(bb); 1109 VisitBlock(bb);
@@ -1134,7 +1114,7 @@ void ARBDecompiler::DecompileBranchMode() {
1134 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); 1114 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1135 if (!op || op->GetCode() != OperationCode::Branch) { 1115 if (!op || op->GetCode() != OperationCode::Branch) {
1136 const u32 next_address = basic_block_it->first; 1116 const u32 next_address = basic_block_it->first;
1137 AddLine("MOV.U PC{}.x, {};", context_func->GetId(), next_address); 1117 AddLine("MOV.U PC.x, {};", next_address);
1138 AddLine("CONT;"); 1118 AddLine("CONT;");
1139 } 1119 }
1140 } 1120 }
@@ -1172,8 +1152,7 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
1172 } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { 1152 } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1173 VisitBlock(decoded->nodes); 1153 VisitBlock(decoded->nodes);
1174 } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) { 1154 } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1175 AddLine("MOV.U F{}, {};", var_set->index + ast_var_base, 1155 AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
1176 VisitExpression(var_set->condition));
1177 ResetTemporaries(); 1156 ResetTemporaries();
1178 } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { 1157 } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1179 const std::string condition = VisitExpression(do_while->condition); 1158 const std::string condition = VisitExpression(do_while->condition);
@@ -1193,11 +1172,7 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
1193 ResetTemporaries(); 1172 ResetTemporaries();
1194 } 1173 }
1195 if (ast_return->kills) { 1174 if (ast_return->kills) {
1196 if (stage == ShaderType::Fragment) { 1175 AddLine("KIL TR;");
1197 AddLine("KIL TR;");
1198 } else {
1199 AddLine("RET;");
1200 }
1201 } else { 1176 } else {
1202 Exit(); 1177 Exit();
1203 } 1178 }
@@ -1244,7 +1219,7 @@ std::string ARBDecompiler::VisitExpression(const Expr& node) {
1244 return Visit(ir.GetConditionCode(expr->cc)); 1219 return Visit(ir.GetConditionCode(expr->cc));
1245 } 1220 }
1246 if (const auto expr = std::get_if<ExprVar>(&*node)) { 1221 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1247 return fmt::format("F{}.x", expr->var_index + ast_var_base); 1222 return fmt::format("F{}.x", expr->var_index);
1248 } 1223 }
1249 if (const auto expr = std::get_if<ExprBoolean>(&*node)) { 1224 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1250 return expr->value ? "0xffffffff" : "0"; 1225 return expr->value ? "0xffffffff" : "0";
@@ -1431,11 +1406,6 @@ std::string ARBDecompiler::Visit(const Node& node) {
1431 return {}; 1406 return {};
1432 } 1407 }
1433 1408
1434 if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
1435 AddLine("CAL func_{};", func_call->GetFuncId());
1436 return {};
1437 }
1438
1439 if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { 1409 if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
1440 // Uncommenting this will generate invalid code. GLASM lacks comments. 1410 // Uncommenting this will generate invalid code. GLASM lacks comments.
1441 // AddLine("// {}", cmt->GetText()); 1411 // AddLine("// {}", cmt->GetText());
@@ -1509,7 +1479,7 @@ std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
1509} 1479}
1510 1480
1511void ARBDecompiler::Exit() { 1481void ARBDecompiler::Exit() {
1512 if (!context_func->IsMain() || stage != ShaderType::Fragment) { 1482 if (stage != ShaderType::Fragment) {
1513 AddLine("RET;"); 1483 AddLine("RET;");
1514 return; 1484 return;
1515 } 1485 }
@@ -2051,13 +2021,13 @@ std::string ARBDecompiler::ImageStore(Operation operation) {
2051 2021
2052std::string ARBDecompiler::Branch(Operation operation) { 2022std::string ARBDecompiler::Branch(Operation operation) {
2053 const auto target = std::get<ImmediateNode>(*operation[0]); 2023 const auto target = std::get<ImmediateNode>(*operation[0]);
2054 AddLine("MOV.U PC{}.x, {};", context_func->GetId(), target.GetValue()); 2024 AddLine("MOV.U PC.x, {};", target.GetValue());
2055 AddLine("CONT;"); 2025 AddLine("CONT;");
2056 return {}; 2026 return {};
2057} 2027}
2058 2028
2059std::string ARBDecompiler::BranchIndirect(Operation operation) { 2029std::string ARBDecompiler::BranchIndirect(Operation operation) {
2060 AddLine("MOV.U PC{}.x, {};", context_func->GetId(), Visit(operation[0])); 2030 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
2061 AddLine("CONT;"); 2031 AddLine("CONT;");
2062 return {}; 2032 return {};
2063} 2033}
@@ -2075,7 +2045,7 @@ std::string ARBDecompiler::PopFlowStack(Operation operation) {
2075 const auto stack = std::get<MetaStackClass>(operation.GetMeta()); 2045 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2076 const std::string_view stack_name = StackName(stack); 2046 const std::string_view stack_name = StackName(stack);
2077 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); 2047 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2078 AddLine("MOV.U PC{}.x, {}[{}_TOP.x].x;", context_func->GetId(), stack_name, stack_name); 2048 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
2079 AddLine("CONT;"); 2049 AddLine("CONT;");
2080 return {}; 2050 return {};
2081} 2051}
@@ -2086,10 +2056,6 @@ std::string ARBDecompiler::Exit(Operation) {
2086} 2056}
2087 2057
2088std::string ARBDecompiler::Discard(Operation) { 2058std::string ARBDecompiler::Discard(Operation) {
2089 if (stage != ShaderType::Fragment) {
2090 AddLine("RET;");
2091 return {};
2092 }
2093 AddLine("KIL TR;"); 2059 AddLine("KIL TR;");
2094 return {}; 2060 return {};
2095} 2061}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 00bd9baaf..9c28498e8 100755
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -79,11 +79,6 @@ const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); 79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80)"; 80)";
81 81
82enum class HelperFunction {
83 SignedAtomic = 0,
84 Total,
85};
86
87class ShaderWriter final { 82class ShaderWriter final {
88public: 83public:
89 void AddExpression(std::string_view text) { 84 void AddExpression(std::string_view text) {
@@ -439,28 +434,6 @@ public:
439 DeclareInternalFlags(); 434 DeclareInternalFlags();
440 DeclareCustomVariables(); 435 DeclareCustomVariables();
441 DeclarePhysicalAttributeReader(); 436 DeclarePhysicalAttributeReader();
442 DeclareHelpersForward();
443
444 const auto& subfunctions = ir.GetSubFunctions();
445 auto it = subfunctions.rbegin();
446 while (it != subfunctions.rend()) {
447 context_func = *it;
448 code.AddLine("void func_{}() {{", context_func->GetId());
449 ++code.scope;
450
451 if (context_func->IsDecompiled()) {
452 DecompileAST();
453 } else {
454 DecompileBranchMode();
455 }
456
457 --code.scope;
458 code.AddLine("}}");
459
460 it++;
461 }
462
463 context_func = ir.GetMainFunction();
464 437
465 code.AddLine("void main() {{"); 438 code.AddLine("void main() {{");
466 ++code.scope; 439 ++code.scope;
@@ -469,7 +442,7 @@ public:
469 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); 442 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
470 } 443 }
471 444
472 if (context_func->IsDecompiled()) { 445 if (ir.IsDecompiled()) {
473 DecompileAST(); 446 DecompileAST();
474 } else { 447 } else {
475 DecompileBranchMode(); 448 DecompileBranchMode();
@@ -477,9 +450,6 @@ public:
477 450
478 --code.scope; 451 --code.scope;
479 code.AddLine("}}"); 452 code.AddLine("}}");
480
481 code.AddNewLine();
482 DeclareHelpers();
483 } 453 }
484 454
485 std::string GetResult() { 455 std::string GetResult() {
@@ -492,13 +462,13 @@ private:
492 462
493 void DecompileBranchMode() { 463 void DecompileBranchMode() {
494 // VM's program counter 464 // VM's program counter
495 const auto first_address = context_func->GetBasicBlocks().begin()->first; 465 const auto first_address = ir.GetBasicBlocks().begin()->first;
496 code.AddLine("uint jmp_to = {}U;", first_address); 466 code.AddLine("uint jmp_to = {}U;", first_address);
497 467
498 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 468 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
499 // unlikely that shaders will use 20 nested SSYs and PBKs. 469 // unlikely that shaders will use 20 nested SSYs and PBKs.
500 constexpr u32 FLOW_STACK_SIZE = 20; 470 constexpr u32 FLOW_STACK_SIZE = 20;
501 if (!context_func->IsFlowStackDisabled()) { 471 if (!ir.IsFlowStackDisabled()) {
502 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { 472 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
503 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); 473 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
504 code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); 474 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
@@ -510,7 +480,7 @@ private:
510 480
511 code.AddLine("switch (jmp_to) {{"); 481 code.AddLine("switch (jmp_to) {{");
512 482
513 for (const auto& pair : context_func->GetBasicBlocks()) { 483 for (const auto& pair : ir.GetBasicBlocks()) {
514 const auto& [address, bb] = pair; 484 const auto& [address, bb] = pair;
515 code.AddLine("case 0x{:X}U: {{", address); 485 code.AddLine("case 0x{:X}U: {{", address);
516 ++code.scope; 486 ++code.scope;
@@ -629,7 +599,7 @@ private:
629 size = limit; 599 size = limit;
630 } 600 }
631 601
632 code.AddLine("shared uint {}[{}];", GetSharedMemory(), size / 4); 602 code.AddLine("shared uint smem[{}];", size / 4);
633 code.AddNewLine(); 603 code.AddNewLine();
634 } 604 }
635 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", 605 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
@@ -1013,27 +983,6 @@ private:
1013 } 983 }
1014 } 984 }
1015 985
1016 void DeclareHelpersForward() {
1017 code.AddLine("int Helpers_AtomicShared(uint offset, int value, bool is_min);");
1018 code.AddNewLine();
1019 }
1020
1021 void DeclareHelpers() {
1022 if (IsHelperEnabled(HelperFunction::SignedAtomic)) {
1023 code.AddLine(
1024 R"(int Helpers_AtomicShared(uint offset, int value, bool is_min) {{
1025 uint oldValue, newValue;
1026 do {{
1027 oldValue = {}[offset];
1028 newValue = is_min ? uint(min(int(oldValue), value)) : uint(max(int(oldValue), value));
1029 }} while (atomicCompSwap({}[offset], newValue, oldValue) != oldValue);
1030 return int(oldValue);
1031}})",
1032 GetSharedMemory(), GetSharedMemory());
1033 code.AddNewLine();
1034 }
1035 }
1036
1037 void VisitBlock(const NodeBlock& bb) { 986 void VisitBlock(const NodeBlock& bb) {
1038 for (const auto& node : bb) { 987 for (const auto& node : bb) {
1039 Visit(node).CheckVoid(); 988 Visit(node).CheckVoid();
@@ -1160,9 +1109,7 @@ private:
1160 } 1109 }
1161 1110
1162 if (const auto smem = std::get_if<SmemNode>(&*node)) { 1111 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1163 return { 1112 return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1164 fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
1165 Type::Uint};
1166 } 1113 }
1167 1114
1168 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { 1115 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1184,11 +1131,6 @@ private:
1184 return {}; 1131 return {};
1185 } 1132 }
1186 1133
1187 if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
1188 code.AddLine("func_{}();", func_call->GetFuncId());
1189 return {};
1190 }
1191
1192 if (const auto comment = std::get_if<CommentNode>(&*node)) { 1134 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1193 code.AddLine("// " + comment->GetText()); 1135 code.AddLine("// " + comment->GetText());
1194 return {}; 1136 return {};
@@ -1656,9 +1598,7 @@ private:
1656 Type::Uint}; 1598 Type::Uint};
1657 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { 1599 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1658 ASSERT(stage == ShaderType::Compute); 1600 ASSERT(stage == ShaderType::Compute);
1659 target = { 1601 target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1660 fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
1661 Type::Uint};
1662 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1602 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1663 const std::string real = Visit(gmem->GetRealAddress()).AsUint(); 1603 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1664 const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); 1604 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
@@ -2175,14 +2115,7 @@ private:
2175 UNIMPLEMENTED_IF(meta->sampler.is_array); 2115 UNIMPLEMENTED_IF(meta->sampler.is_array);
2176 const std::size_t count = operation.GetOperandsCount(); 2116 const std::size_t count = operation.GetOperandsCount();
2177 2117
2178 std::string expr = "texelFetch"; 2118 std::string expr = "texelFetch(";
2179
2180 if (!meta->aoffi.empty()) {
2181 expr += "Offset";
2182 }
2183
2184 expr += '(';
2185
2186 expr += GetSampler(meta->sampler); 2119 expr += GetSampler(meta->sampler);
2187 expr += ", "; 2120 expr += ", ";
2188 2121
@@ -2204,20 +2137,6 @@ private:
2204 expr += ", "; 2137 expr += ", ";
2205 expr += Visit(meta->lod).AsInt(); 2138 expr += Visit(meta->lod).AsInt();
2206 } 2139 }
2207
2208 if (!meta->aoffi.empty()) {
2209 expr += ", ";
2210 expr += constructors.at(meta->aoffi.size() - 1);
2211 expr += '(';
2212 for (size_t i = 0; i < meta->aoffi.size(); ++i) {
2213 if (i > 0) {
2214 expr += ", ";
2215 }
2216 expr += Visit(meta->aoffi[i]).AsInt();
2217 }
2218 expr += ')';
2219 }
2220
2221 expr += ')'; 2140 expr += ')';
2222 expr += GetSwizzle(meta->element); 2141 expr += GetSwizzle(meta->element);
2223 2142
@@ -2264,11 +2183,8 @@ private:
2264 template <const std::string_view& opname, Type type> 2183 template <const std::string_view& opname, Type type>
2265 Expression Atomic(Operation operation) { 2184 Expression Atomic(Operation operation) {
2266 if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { 2185 if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
2267 // Use a helper as a workaround due to memory being uint 2186 UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
2268 SetHelperEnabled(HelperFunction::SignedAtomic, true); 2187 return {};
2269 return {fmt::format("Helpers_AtomicShared({}, {}, {})", Visit(operation[0]).AsInt(),
2270 Visit(operation[1]).AsInt(), opname == Func::Min),
2271 Type::Int};
2272 } 2188 }
2273 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), 2189 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
2274 Visit(operation[1]).AsUint()), 2190 Visit(operation[1]).AsUint()),
@@ -2351,9 +2267,7 @@ private:
2351 } 2267 }
2352 2268
2353 Expression Exit(Operation operation) { 2269 Expression Exit(Operation operation) {
2354 if (context_func->IsMain()) { 2270 PreExit();
2355 PreExit();
2356 }
2357 code.AddLine("return;"); 2271 code.AddLine("return;");
2358 return {}; 2272 return {};
2359 } 2273 }
@@ -2363,11 +2277,7 @@ private:
2363 // about unexecuted instructions that may follow this. 2277 // about unexecuted instructions that may follow this.
2364 code.AddLine("if (true) {{"); 2278 code.AddLine("if (true) {{");
2365 ++code.scope; 2279 ++code.scope;
2366 if (stage != ShaderType::Fragment) { 2280 code.AddLine("discard;");
2367 code.AddLine("return;");
2368 } else {
2369 code.AddLine("discard;");
2370 }
2371 --code.scope; 2281 --code.scope;
2372 code.AddLine("}}"); 2282 code.AddLine("}}");
2373 return {}; 2283 return {};
@@ -2478,7 +2388,7 @@ private:
2478 } 2388 }
2479 2389
2480 Expression Barrier(Operation) { 2390 Expression Barrier(Operation) {
2481 if (!context_func->IsDecompiled()) { 2391 if (!ir.IsDecompiled()) {
2482 LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); 2392 LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
2483 return {}; 2393 return {};
2484 } 2394 }
@@ -2795,10 +2705,6 @@ private:
2795 } 2705 }
2796 } 2706 }
2797 2707
2798 constexpr std::string_view GetSharedMemory() const {
2799 return "shared_mem";
2800 }
2801
2802 std::string GetInternalFlag(InternalFlag flag) const { 2708 std::string GetInternalFlag(InternalFlag flag) const {
2803 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", 2709 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
2804 "overflow_flag"}; 2710 "overflow_flag"};
@@ -2840,14 +2746,6 @@ private:
2840 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); 2746 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2841 } 2747 }
2842 2748
2843 void SetHelperEnabled(HelperFunction hf, bool enabled) {
2844 helper_functions_enabled[static_cast<size_t>(hf)] = enabled;
2845 }
2846
2847 bool IsHelperEnabled(HelperFunction hf) const {
2848 return helper_functions_enabled[static_cast<size_t>(hf)];
2849 }
2850
2851 const Device& device; 2749 const Device& device;
2852 const ShaderIR& ir; 2750 const ShaderIR& ir;
2853 const Registry& registry; 2751 const Registry& registry;
@@ -2857,13 +2755,9 @@ private:
2857 const Header header; 2755 const Header header;
2858 std::unordered_map<u8, VaryingTFB> transform_feedback; 2756 std::unordered_map<u8, VaryingTFB> transform_feedback;
2859 2757
2860 std::shared_ptr<ShaderFunctionIR> context_func;
2861
2862 ShaderWriter code; 2758 ShaderWriter code;
2863 2759
2864 std::optional<u32> max_input_vertices; 2760 std::optional<u32> max_input_vertices;
2865
2866 std::array<bool, static_cast<size_t>(HelperFunction::Total)> helper_functions_enabled{};
2867}; 2761};
2868 2762
2869std::string GetFlowVariable(u32 index) { 2763std::string GetFlowVariable(u32 index) {
@@ -3008,15 +2902,9 @@ public:
3008 decomp.code.scope++; 2902 decomp.code.scope++;
3009 } 2903 }
3010 if (ast.kills) { 2904 if (ast.kills) {
3011 if (decomp.stage != ShaderType::Fragment) { 2905 decomp.code.AddLine("discard;");
3012 decomp.code.AddLine("return;");
3013 } else {
3014 decomp.code.AddLine("discard;");
3015 }
3016 } else { 2906 } else {
3017 if (decomp.context_func->IsMain()) { 2907 decomp.PreExit();
3018 decomp.PreExit();
3019 }
3020 decomp.code.AddLine("return;"); 2908 decomp.code.AddLine("return;");
3021 } 2909 }
3022 if (!is_true) { 2910 if (!is_true) {
@@ -3049,13 +2937,13 @@ private:
3049}; 2937};
3050 2938
3051void GLSLDecompiler::DecompileAST() { 2939void GLSLDecompiler::DecompileAST() {
3052 const u32 num_flow_variables = context_func->GetASTNumVariables(); 2940 const u32 num_flow_variables = ir.GetASTNumVariables();
3053 for (u32 i = 0; i < num_flow_variables; i++) { 2941 for (u32 i = 0; i < num_flow_variables; i++) {
3054 code.AddLine("bool {} = false;", GetFlowVariable(i)); 2942 code.AddLine("bool {} = false;", GetFlowVariable(i));
3055 } 2943 }
3056 2944
3057 ASTDecompiler decompiler{*this}; 2945 ASTDecompiler decompiler{*this};
3058 decompiler.Visit(context_func->GetASTProgram()); 2946 decompiler.Visit(ir.GetASTProgram());
3059} 2947}
3060 2948
3061} // Anonymous namespace 2949} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 258e2f5df..c6846d886 100755
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -406,38 +406,10 @@ private:
406 binding = DeclareStorageTexels(binding); 406 binding = DeclareStorageTexels(binding);
407 binding = DeclareImages(binding); 407 binding = DeclareImages(binding);
408 408
409 const auto& subfunctions = ir.GetSubFunctions();
410
411 labels.resize(subfunctions.size() + 1);
412 other_functions.resize(subfunctions.size());
413
414 auto it = subfunctions.rbegin();
415 while (it != subfunctions.rend()) {
416 context_func = *it;
417 other_functions[context_func->GetId() - 1] =
418 OpFunction(t_void, {}, TypeFunction(t_void));
419 AddLabel();
420
421 if (context_func->IsDecompiled()) {
422 DeclareFlowVariables();
423 DecompileAST();
424 } else {
425 AllocateLabels();
426 DecompileBranchMode();
427 }
428
429 OpReturn();
430 OpFunctionEnd();
431
432 it++;
433 }
434
435 context_func = ir.GetMainFunction();
436
437 const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); 409 const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
438 AddLabel(); 410 AddLabel();
439 411
440 if (context_func->IsDecompiled()) { 412 if (ir.IsDecompiled()) {
441 DeclareFlowVariables(); 413 DeclareFlowVariables();
442 DecompileAST(); 414 DecompileAST();
443 } else { 415 } else {
@@ -469,18 +441,16 @@ private:
469 void DecompileAST(); 441 void DecompileAST();
470 442
471 void DecompileBranchMode() { 443 void DecompileBranchMode() {
472 const u32 first_address = context_func->GetBasicBlocks().begin()->first; 444 const u32 first_address = ir.GetBasicBlocks().begin()->first;
473 const u32 func_id = context_func->GetId(); 445 const Id loop_label = OpLabel("loop");
474 const std::string func_id_msg = std::to_string(func_id); 446 const Id merge_label = OpLabel("merge");
475 const Id loop_label = OpLabel("loop_" + func_id_msg);
476 const Id merge_label = OpLabel("merge_" + func_id_msg);
477 const Id dummy_label = OpLabel(); 447 const Id dummy_label = OpLabel();
478 const Id jump_label = OpLabel(); 448 const Id jump_label = OpLabel();
479 continue_label = OpLabel("continue_" + func_id_msg); 449 continue_label = OpLabel("continue");
480 450
481 std::vector<Sirit::Literal> literals; 451 std::vector<Sirit::Literal> literals;
482 std::vector<Id> branch_labels; 452 std::vector<Id> branch_labels;
483 for (const auto& [literal, label] : labels[func_id]) { 453 for (const auto& [literal, label] : labels) {
484 literals.push_back(literal); 454 literals.push_back(literal);
485 branch_labels.push_back(label); 455 branch_labels.push_back(label);
486 } 456 }
@@ -492,11 +462,11 @@ private:
492 std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); 462 std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
493 std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); 463 std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
494 464
495 Name(jmp_to, "jmp_to_" + func_id_msg); 465 Name(jmp_to, "jmp_to");
496 Name(ssy_flow_stack, "ssy_flow_stack_" + func_id_msg); 466 Name(ssy_flow_stack, "ssy_flow_stack");
497 Name(ssy_flow_stack_top, "ssy_flow_stack_top_" + func_id_msg); 467 Name(ssy_flow_stack_top, "ssy_flow_stack_top");
498 Name(pbk_flow_stack, "pbk_flow_stack_" + func_id_msg); 468 Name(pbk_flow_stack, "pbk_flow_stack");
499 Name(pbk_flow_stack_top, "pbk_flow_stack_top_" + func_id_msg); 469 Name(pbk_flow_stack_top, "pbk_flow_stack_top");
500 470
501 DefinePrologue(); 471 DefinePrologue();
502 472
@@ -514,14 +484,13 @@ private:
514 AddLabel(default_branch); 484 AddLabel(default_branch);
515 OpReturn(); 485 OpReturn();
516 486
517 for (const auto& [address, bb] : context_func->GetBasicBlocks()) { 487 for (const auto& [address, bb] : ir.GetBasicBlocks()) {
518 AddLabel(labels[func_id].at(address)); 488 AddLabel(labels.at(address));
519 489
520 VisitBasicBlock(bb); 490 VisitBasicBlock(bb);
521 491
522 const auto next_it = labels[func_id].lower_bound(address + 1); 492 const auto next_it = labels.lower_bound(address + 1);
523 const Id next_label = 493 const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
524 next_it != labels[func_id].end() ? next_it->second : default_branch;
525 OpBranch(next_label); 494 OpBranch(next_label);
526 } 495 }
527 496
@@ -539,10 +508,9 @@ private:
539 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 508 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
540 509
541 void AllocateLabels() { 510 void AllocateLabels() {
542 const u32 func_id = context_func->GetId(); 511 for (const auto& pair : ir.GetBasicBlocks()) {
543 for (const auto& pair : context_func->GetBasicBlocks()) {
544 const u32 address = pair.first; 512 const u32 address = pair.first;
545 labels[func_id].emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); 513 labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
546 } 514 }
547 } 515 }
548 516
@@ -621,14 +589,6 @@ private:
621 DeclareOutputVertex(); 589 DeclareOutputVertex();
622 } 590 }
623 591
624 void SafeKill() {
625 if (stage != ShaderType::Fragment) {
626 OpReturn();
627 return;
628 }
629 OpKill();
630 }
631
632 void DeclareFragment() { 592 void DeclareFragment() {
633 if (stage != ShaderType::Fragment) { 593 if (stage != ShaderType::Fragment) {
634 return; 594 return;
@@ -696,7 +656,7 @@ private:
696 } 656 }
697 657
698 void DeclareFlowVariables() { 658 void DeclareFlowVariables() {
699 for (u32 i = 0; i < context_func->GetASTNumVariables(); i++) { 659 for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
700 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); 660 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
701 Name(id, fmt::format("flow_var_{}", static_cast<u32>(i))); 661 Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
702 flow_variables.emplace(i, AddGlobalVariable(id)); 662 flow_variables.emplace(i, AddGlobalVariable(id));
@@ -1373,12 +1333,6 @@ private:
1373 return {}; 1333 return {};
1374 } 1334 }
1375 1335
1376 if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
1377 const u32 func_id = func_call->GetFuncId();
1378 OpFunctionCall(t_void, other_functions[func_id - 1]);
1379 return {};
1380 }
1381
1382 if (const auto comment = std::get_if<CommentNode>(&*node)) { 1336 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1383 if (device.HasDebuggingToolAttached()) { 1337 if (device.HasDebuggingToolAttached()) {
1384 // We should insert comments with OpString instead of using named variables 1338 // We should insert comments with OpString instead of using named variables
@@ -2170,7 +2124,7 @@ private:
2170 2124
2171 OpBranchConditional(condition, true_label, discard_label); 2125 OpBranchConditional(condition, true_label, discard_label);
2172 AddLabel(discard_label); 2126 AddLabel(discard_label);
2173 SafeKill(); 2127 OpKill();
2174 AddLabel(true_label); 2128 AddLabel(true_label);
2175 } 2129 }
2176 2130
@@ -2221,9 +2175,7 @@ private:
2221 } 2175 }
2222 2176
2223 Expression Exit(Operation operation) { 2177 Expression Exit(Operation operation) {
2224 if (context_func->IsMain()) { 2178 PreExit();
2225 PreExit();
2226 }
2227 inside_branch = true; 2179 inside_branch = true;
2228 if (conditional_branch_set) { 2180 if (conditional_branch_set) {
2229 OpReturn(); 2181 OpReturn();
@@ -2240,12 +2192,12 @@ private:
2240 Expression Discard(Operation operation) { 2192 Expression Discard(Operation operation) {
2241 inside_branch = true; 2193 inside_branch = true;
2242 if (conditional_branch_set) { 2194 if (conditional_branch_set) {
2243 SafeKill(); 2195 OpKill();
2244 } else { 2196 } else {
2245 const Id dummy = OpLabel(); 2197 const Id dummy = OpLabel();
2246 OpBranch(dummy); 2198 OpBranch(dummy);
2247 AddLabel(dummy); 2199 AddLabel(dummy);
2248 SafeKill(); 2200 OpKill();
2249 AddLabel(); 2201 AddLabel();
2250 } 2202 }
2251 return {}; 2203 return {};
@@ -2324,7 +2276,7 @@ private:
2324 } 2276 }
2325 2277
2326 Expression Barrier(Operation) { 2278 Expression Barrier(Operation) {
2327 if (!context_func->IsDecompiled()) { 2279 if (!ir.IsDecompiled()) {
2328 LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); 2280 LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
2329 return {}; 2281 return {};
2330 } 2282 }
@@ -2818,8 +2770,6 @@ private:
2818 const Specialization& specialization; 2770 const Specialization& specialization;
2819 std::unordered_map<u8, VaryingTFB> transform_feedback; 2771 std::unordered_map<u8, VaryingTFB> transform_feedback;
2820 2772
2821 std::shared_ptr<ShaderFunctionIR> context_func;
2822
2823 const Id t_void = Name(TypeVoid(), "void"); 2773 const Id t_void = Name(TypeVoid(), "void");
2824 2774
2825 const Id t_bool = Name(TypeBool(), "bool"); 2775 const Id t_bool = Name(TypeBool(), "bool");
@@ -2946,8 +2896,7 @@ private:
2946 Id ssy_flow_stack{}; 2896 Id ssy_flow_stack{};
2947 Id pbk_flow_stack{}; 2897 Id pbk_flow_stack{};
2948 Id continue_label{}; 2898 Id continue_label{};
2949 std::vector<std::map<u32, Id>> labels; 2899 std::map<u32, Id> labels;
2950 std::vector<Id> other_functions;
2951 2900
2952 bool conditional_branch_set{}; 2901 bool conditional_branch_set{};
2953 bool inside_branch{}; 2902 bool inside_branch{};
@@ -3098,11 +3047,9 @@ public:
3098 decomp.OpBranchConditional(condition, then_label, endif_label); 3047 decomp.OpBranchConditional(condition, then_label, endif_label);
3099 decomp.AddLabel(then_label); 3048 decomp.AddLabel(then_label);
3100 if (ast.kills) { 3049 if (ast.kills) {
3101 decomp.SafeKill(); 3050 decomp.OpKill();
3102 } else { 3051 } else {
3103 if (decomp.context_func->IsMain()) { 3052 decomp.PreExit();
3104 decomp.PreExit();
3105 }
3106 decomp.OpReturn(); 3053 decomp.OpReturn();
3107 } 3054 }
3108 decomp.AddLabel(endif_label); 3055 decomp.AddLabel(endif_label);
@@ -3111,11 +3058,9 @@ public:
3111 decomp.OpBranch(next_block); 3058 decomp.OpBranch(next_block);
3112 decomp.AddLabel(next_block); 3059 decomp.AddLabel(next_block);
3113 if (ast.kills) { 3060 if (ast.kills) {
3114 decomp.SafeKill(); 3061 decomp.OpKill();
3115 } else { 3062 } else {
3116 if (decomp.context_func->IsMain()) { 3063 decomp.PreExit();
3117 decomp.PreExit();
3118 }
3119 decomp.OpReturn(); 3064 decomp.OpReturn();
3120 } 3065 }
3121 decomp.AddLabel(decomp.OpLabel()); 3066 decomp.AddLabel(decomp.OpLabel());
@@ -3152,7 +3097,7 @@ private:
3152}; 3097};
3153 3098
3154void SPIRVDecompiler::DecompileAST() { 3099void SPIRVDecompiler::DecompileAST() {
3155 const u32 num_flow_variables = context_func->GetASTNumVariables(); 3100 const u32 num_flow_variables = ir.GetASTNumVariables();
3156 for (u32 i = 0; i < num_flow_variables; i++) { 3101 for (u32 i = 0; i < num_flow_variables; i++) {
3157 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); 3102 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
3158 Name(id, fmt::format("flow_var_{}", i)); 3103 Name(id, fmt::format("flow_var_{}", i));
@@ -3161,7 +3106,7 @@ void SPIRVDecompiler::DecompileAST() {
3161 3106
3162 DefinePrologue(); 3107 DefinePrologue();
3163 3108
3164 const ASTNode program = context_func->GetASTProgram(); 3109 const ASTNode program = ir.GetASTProgram();
3165 ASTDecompiler decompiler{*this}; 3110 ASTDecompiler decompiler{*this};
3166 decompiler.Visit(program); 3111 decompiler.Visit(program);
3167 3112
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 7c8bd7e2f..43d965f2f 100755
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -7,7 +7,6 @@
7#include <set> 7#include <set>
8#include <stack> 8#include <stack>
9#include <unordered_map> 9#include <unordered_map>
10#include <unordered_set>
11#include <vector> 10#include <vector>
12 11
13#include "common/assert.h" 12#include "common/assert.h"
@@ -27,29 +26,17 @@ using Tegra::Shader::OpCode;
27 26
28constexpr s32 unassigned_branch = -2; 27constexpr s32 unassigned_branch = -2;
29 28
30enum class JumpLabel : u32 {
31 SSYClass = 0,
32 PBKClass = 1,
33};
34
35struct JumpItem {
36 JumpLabel type;
37 u32 address;
38
39 bool operator==(const JumpItem& other) const {
40 return std::tie(type, address) == std::tie(other.type, other.address);
41 }
42};
43
44struct Query { 29struct Query {
45 u32 address{}; 30 u32 address{};
46 std::stack<JumpItem> stack{}; 31 std::stack<u32> ssy_stack{};
32 std::stack<u32> pbk_stack{};
47}; 33};
48 34
49struct BlockStack { 35struct BlockStack {
50 BlockStack() = default; 36 BlockStack() = default;
51 explicit BlockStack(const Query& q) : stack{q.stack} {} 37 explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
52 std::stack<JumpItem> stack{}; 38 std::stack<u32> ssy_stack{};
39 std::stack<u32> pbk_stack{};
53}; 40};
54 41
55template <typename T, typename... Args> 42template <typename T, typename... Args>
@@ -78,36 +65,20 @@ struct BlockInfo {
78 } 65 }
79}; 66};
80 67
81struct ProgramControl {
82 std::unordered_set<u32> found_functions{};
83 std::list<u32> pending_functions{};
84
85 void RegisterFunction(u32 address) {
86 if (found_functions.count(address) != 0) {
87 return;
88 }
89 found_functions.insert(address);
90 pending_functions.emplace_back(address);
91 }
92};
93
94struct CFGRebuildState { 68struct CFGRebuildState {
95 explicit CFGRebuildState(ProgramControl& control_, const ProgramCode& program_code_, u32 start_, 69 explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
96 u32 base_start_, Registry& registry_) 70 : program_code{program_code_}, registry{registry_}, start{start_} {}
97 : control{control_}, program_code{program_code_}, registry{registry_}, start{start_},
98 base_start{base_start_} {}
99 71
100 ProgramControl& control;
101 const ProgramCode& program_code; 72 const ProgramCode& program_code;
102 Registry& registry; 73 Registry& registry;
103 u32 start{}; 74 u32 start{};
104 u32 base_start{};
105 std::vector<BlockInfo> block_info; 75 std::vector<BlockInfo> block_info;
106 std::list<u32> inspect_queries; 76 std::list<u32> inspect_queries;
107 std::list<Query> queries; 77 std::list<Query> queries;
108 std::unordered_map<u32, u32> registered; 78 std::unordered_map<u32, u32> registered;
109 std::set<u32> labels; 79 std::set<u32> labels;
110 std::map<u32, JumpItem> jump_labels; 80 std::map<u32, u32> ssy_labels;
81 std::map<u32, u32> pbk_labels;
111 std::unordered_map<u32, BlockStack> stacks; 82 std::unordered_map<u32, BlockStack> stacks;
112 ASTManager* manager{}; 83 ASTManager* manager{};
113}; 84};
@@ -182,7 +153,7 @@ template <typename Result, typename TestCallable, typename PackCallable>
182std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, 153std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
183 PackCallable pack) { 154 PackCallable pack) {
184 for (; pos >= state.start; --pos) { 155 for (; pos >= state.start; --pos) {
185 if (IsSchedInstruction(pos, state.base_start)) { 156 if (IsSchedInstruction(pos, state.start)) {
186 continue; 157 continue;
187 } 158 }
188 const Instruction instr = state.program_code[pos]; 159 const Instruction instr = state.program_code[pos];
@@ -291,7 +262,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
291 single_branch.ignore = true; 262 single_branch.ignore = true;
292 break; 263 break;
293 } 264 }
294 if (IsSchedInstruction(offset, state.base_start)) { 265 if (IsSchedInstruction(offset, state.start)) {
295 offset++; 266 offset++;
296 continue; 267 continue;
297 } 268 }
@@ -303,7 +274,6 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
303 } 274 }
304 275
305 switch (opcode->get().GetId()) { 276 switch (opcode->get().GetId()) {
306 case OpCode::Id::RET:
307 case OpCode::Id::EXIT: { 277 case OpCode::Id::EXIT: {
308 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 278 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
309 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); 279 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
@@ -441,20 +411,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
441 case OpCode::Id::SSY: { 411 case OpCode::Id::SSY: {
442 const u32 target = offset + instr.bra.GetBranchTarget(); 412 const u32 target = offset + instr.bra.GetBranchTarget();
443 insert_label(state, target); 413 insert_label(state, target);
444 JumpItem it = {JumpLabel::SSYClass, target}; 414 state.ssy_labels.emplace(offset, target);
445 state.jump_labels.emplace(offset, it);
446 break; 415 break;
447 } 416 }
448 case OpCode::Id::PBK: { 417 case OpCode::Id::PBK: {
449 const u32 target = offset + instr.bra.GetBranchTarget(); 418 const u32 target = offset + instr.bra.GetBranchTarget();
450 insert_label(state, target); 419 insert_label(state, target);
451 JumpItem it = {JumpLabel::PBKClass, target}; 420 state.pbk_labels.emplace(offset, target);
452 state.jump_labels.emplace(offset, it);
453 break;
454 }
455 case OpCode::Id::CAL: {
456 const u32 target = offset + instr.bra.GetBranchTarget();
457 state.control.RegisterFunction(target);
458 break; 421 break;
459 } 422 }
460 case OpCode::Id::BRX: { 423 case OpCode::Id::BRX: {
@@ -550,7 +513,7 @@ bool TryInspectAddress(CFGRebuildState& state) {
550} 513}
551 514
552bool TryQuery(CFGRebuildState& state) { 515bool TryQuery(CFGRebuildState& state) {
553 const auto gather_labels = [](std::stack<JumpItem>& cc, std::map<u32, JumpItem>& labels, 516 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
554 BlockInfo& block) { 517 BlockInfo& block) {
555 auto gather_start = labels.lower_bound(block.start); 518 auto gather_start = labels.lower_bound(block.start);
556 const auto gather_end = labels.upper_bound(block.end); 519 const auto gather_end = labels.upper_bound(block.end);
@@ -559,19 +522,6 @@ bool TryQuery(CFGRebuildState& state) {
559 ++gather_start; 522 ++gather_start;
560 } 523 }
561 }; 524 };
562 const auto pop_labels = [](JumpLabel type, SingleBranch* branch, Query& query) -> bool {
563 while (!query.stack.empty() && query.stack.top().type != type) {
564 query.stack.pop();
565 }
566 if (query.stack.empty()) {
567 return false;
568 }
569 if (branch->address == unassigned_branch) {
570 branch->address = query.stack.top().address;
571 }
572 query.stack.pop();
573 return true;
574 };
575 if (state.queries.empty()) { 525 if (state.queries.empty()) {
576 return false; 526 return false;
577 } 527 }
@@ -584,7 +534,8 @@ bool TryQuery(CFGRebuildState& state) {
584 // consumes a label. Schedule new queries accordingly 534 // consumes a label. Schedule new queries accordingly
585 if (block.visited) { 535 if (block.visited) {
586 BlockStack& stack = state.stacks[q.address]; 536 BlockStack& stack = state.stacks[q.address];
587 const bool all_okay = (stack.stack.empty() || q.stack == stack.stack); 537 const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
538 (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
588 state.queries.pop_front(); 539 state.queries.pop_front();
589 return all_okay; 540 return all_okay;
590 } 541 }
@@ -593,7 +544,8 @@ bool TryQuery(CFGRebuildState& state) {
593 544
594 Query q2(q); 545 Query q2(q);
595 state.queries.pop_front(); 546 state.queries.pop_front();
596 gather_labels(q2.stack, state.jump_labels, block); 547 gather_labels(q2.ssy_stack, state.ssy_labels, block);
548 gather_labels(q2.pbk_stack, state.pbk_labels, block);
597 if (std::holds_alternative<SingleBranch>(*block.branch)) { 549 if (std::holds_alternative<SingleBranch>(*block.branch)) {
598 auto* branch = std::get_if<SingleBranch>(block.branch.get()); 550 auto* branch = std::get_if<SingleBranch>(block.branch.get());
599 if (!branch->condition.IsUnconditional()) { 551 if (!branch->condition.IsUnconditional()) {
@@ -603,10 +555,16 @@ bool TryQuery(CFGRebuildState& state) {
603 555
604 auto& conditional_query = state.queries.emplace_back(q2); 556 auto& conditional_query = state.queries.emplace_back(q2);
605 if (branch->is_sync) { 557 if (branch->is_sync) {
606 pop_labels(JumpLabel::SSYClass, branch, conditional_query); 558 if (branch->address == unassigned_branch) {
559 branch->address = conditional_query.ssy_stack.top();
560 }
561 conditional_query.ssy_stack.pop();
607 } 562 }
608 if (branch->is_brk) { 563 if (branch->is_brk) {
609 pop_labels(JumpLabel::PBKClass, branch, conditional_query); 564 if (branch->address == unassigned_branch) {
565 branch->address = conditional_query.pbk_stack.top();
566 }
567 conditional_query.pbk_stack.pop();
610 } 568 }
611 conditional_query.address = branch->address; 569 conditional_query.address = branch->address;
612 return true; 570 return true;
@@ -688,23 +646,25 @@ void DecompileShader(CFGRebuildState& state) {
688 state.manager->Decompile(); 646 state.manager->Decompile();
689} 647}
690 648
691ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_code, 649} // Anonymous namespace
692 u32 start_address, u32 base_start, const CompilerSettings& settings, 650
693 Registry& registry) { 651std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
694 ShaderFunction result_out{}; 652 const CompilerSettings& settings,
653 Registry& registry) {
654 auto result_out = std::make_unique<ShaderCharacteristics>();
695 if (settings.depth == CompileDepth::BruteForce) { 655 if (settings.depth == CompileDepth::BruteForce) {
696 result_out.settings.depth = CompileDepth::BruteForce; 656 result_out->settings.depth = CompileDepth::BruteForce;
697 return result_out; 657 return result_out;
698 } 658 }
699 659
700 CFGRebuildState state{control, program_code, start_address, base_start, registry}; 660 CFGRebuildState state{program_code, start_address, registry};
701 // Inspect Code and generate blocks 661 // Inspect Code and generate blocks
702 state.labels.clear(); 662 state.labels.clear();
703 state.labels.emplace(start_address); 663 state.labels.emplace(start_address);
704 state.inspect_queries.push_back(state.start); 664 state.inspect_queries.push_back(state.start);
705 while (!state.inspect_queries.empty()) { 665 while (!state.inspect_queries.empty()) {
706 if (!TryInspectAddress(state)) { 666 if (!TryInspectAddress(state)) {
707 result_out.settings.depth = CompileDepth::BruteForce; 667 result_out->settings.depth = CompileDepth::BruteForce;
708 return result_out; 668 return result_out;
709 } 669 }
710 } 670 }
@@ -715,7 +675,7 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_
715 675
716 if (settings.depth != CompileDepth::FlowStack) { 676 if (settings.depth != CompileDepth::FlowStack) {
717 // Decompile Stacks 677 // Decompile Stacks
718 state.queries.push_back(Query{state.start, {}}); 678 state.queries.push_back(Query{state.start, {}, {}});
719 decompiled = true; 679 decompiled = true;
720 while (!state.queries.empty()) { 680 while (!state.queries.empty()) {
721 if (!TryQuery(state)) { 681 if (!TryQuery(state)) {
@@ -745,18 +705,19 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_
745 state.manager->ShowCurrentState("Of Shader"); 705 state.manager->ShowCurrentState("Of Shader");
746 state.manager->Clear(); 706 state.manager->Clear();
747 } else { 707 } else {
748 result_out.start = start_address; 708 auto characteristics = std::make_unique<ShaderCharacteristics>();
749 result_out.settings.depth = settings.depth; 709 characteristics->start = start_address;
750 result_out.manager = std::move(manager); 710 characteristics->settings.depth = settings.depth;
751 result_out.end = state.block_info.back().end + 1; 711 characteristics->manager = std::move(manager);
752 return result_out; 712 characteristics->end = state.block_info.back().end + 1;
713 return characteristics;
753 } 714 }
754 } 715 }
755 716
756 result_out.start = start_address; 717 result_out->start = start_address;
757 result_out.settings.depth = 718 result_out->settings.depth =
758 use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; 719 use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
759 result_out.blocks.clear(); 720 result_out->blocks.clear();
760 for (auto& block : state.block_info) { 721 for (auto& block : state.block_info) {
761 ShaderBlock new_block{}; 722 ShaderBlock new_block{};
762 new_block.start = block.start; 723 new_block.start = block.start;
@@ -765,20 +726,20 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_
765 if (!new_block.ignore_branch) { 726 if (!new_block.ignore_branch) {
766 new_block.branch = block.branch; 727 new_block.branch = block.branch;
767 } 728 }
768 result_out.end = std::max(result_out.end, block.end); 729 result_out->end = std::max(result_out->end, block.end);
769 result_out.blocks.push_back(new_block); 730 result_out->blocks.push_back(new_block);
770 } 731 }
771 if (!use_flow_stack) { 732 if (!use_flow_stack) {
772 result_out.labels = std::move(state.labels); 733 result_out->labels = std::move(state.labels);
773 return result_out; 734 return result_out;
774 } 735 }
775 736
776 auto back = result_out.blocks.begin(); 737 auto back = result_out->blocks.begin();
777 auto next = std::next(back); 738 auto next = std::next(back);
778 while (next != result_out.blocks.end()) { 739 while (next != result_out->blocks.end()) {
779 if (!state.labels.contains(next->start) && next->start == back->end + 1) { 740 if (!state.labels.contains(next->start) && next->start == back->end + 1) {
780 back->end = next->end; 741 back->end = next->end;
781 next = result_out.blocks.erase(next); 742 next = result_out->blocks.erase(next);
782 continue; 743 continue;
783 } 744 }
784 back = next; 745 back = next;
@@ -787,22 +748,4 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_
787 748
788 return result_out; 749 return result_out;
789} 750}
790
791} // Anonymous namespace
792
793std::unique_ptr<ShaderProgram> ScanFlow(const ProgramCode& program_code, u32 start_address,
794 const CompilerSettings& settings, Registry& registry) {
795 ProgramControl control{};
796 auto result_out = std::make_unique<ShaderProgram>();
797 result_out->main =
798 ScanFunction(control, program_code, start_address, start_address, settings, registry);
799 while (!control.pending_functions.empty()) {
800 u32 address = control.pending_functions.front();
801 auto fun = ScanFunction(control, program_code, address, start_address, settings, registry);
802 result_out->subfunctions.emplace(address, std::move(fun));
803 control.pending_functions.pop_front();
804 }
805 return result_out;
806}
807
808} // namespace VideoCommon::Shader 751} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5ef2251b9..37bf96492 100755
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <list> 7#include <list>
8#include <map>
9#include <optional> 8#include <optional>
10#include <set> 9#include <set>
11#include <variant> 10#include <variant>
@@ -102,7 +101,7 @@ struct ShaderBlock {
102 } 101 }
103}; 102};
104 103
105struct ShaderFunction { 104struct ShaderCharacteristics {
106 std::list<ShaderBlock> blocks{}; 105 std::list<ShaderBlock> blocks{};
107 std::set<u32> labels{}; 106 std::set<u32> labels{};
108 u32 start{}; 107 u32 start{};
@@ -111,12 +110,8 @@ struct ShaderFunction {
111 CompilerSettings settings{}; 110 CompilerSettings settings{};
112}; 111};
113 112
114struct ShaderProgram { 113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
115 ShaderFunction main; 114 const CompilerSettings& settings,
116 std::map<u32, ShaderFunction> subfunctions; 115 Registry& registry);
117};
118
119std::unique_ptr<ShaderProgram> ScanFlow(const ProgramCode& program_code, u32 start_address,
120 const CompilerSettings& settings, Registry& registry);
121 116
122} // namespace VideoCommon::Shader 117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 355c724a3..6576d1208 100755
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -64,52 +64,9 @@ std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
64 64
65} // Anonymous namespace 65} // Anonymous namespace
66 66
67class ExprDecoder {
68public:
69 explicit ExprDecoder(ShaderIR& ir_) : ir(ir_) {}
70
71 void operator()(const ExprAnd& expr) {
72 Visit(expr.operand1);
73 Visit(expr.operand2);
74 }
75
76 void operator()(const ExprOr& expr) {
77 Visit(expr.operand1);
78 Visit(expr.operand2);
79 }
80
81 void operator()(const ExprNot& expr) {
82 Visit(expr.operand1);
83 }
84
85 void operator()(const ExprPredicate& expr) {
86 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
87 if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
88 ir.used_predicates.insert(pred);
89 }
90 }
91
92 void operator()(const ExprCondCode& expr) {}
93
94 void operator()(const ExprVar& expr) {}
95
96 void operator()(const ExprBoolean& expr) {}
97
98 void operator()(const ExprGprEqual& expr) {
99 ir.used_registers.insert(expr.gpr);
100 }
101
102 void Visit(const Expr& node) {
103 return std::visit(*this, *node);
104 }
105
106private:
107 ShaderIR& ir;
108};
109
110class ASTDecoder { 67class ASTDecoder {
111public: 68public:
112 explicit ASTDecoder(ShaderIR& ir_) : ir(ir_), decoder(ir_) {} 69 explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
113 70
114 void operator()(ASTProgram& ast) { 71 void operator()(ASTProgram& ast) {
115 ASTNode current = ast.nodes.GetFirst(); 72 ASTNode current = ast.nodes.GetFirst();
@@ -120,7 +77,6 @@ public:
120 } 77 }
121 78
122 void operator()(ASTIfThen& ast) { 79 void operator()(ASTIfThen& ast) {
123 decoder.Visit(ast.condition);
124 ASTNode current = ast.nodes.GetFirst(); 80 ASTNode current = ast.nodes.GetFirst();
125 while (current) { 81 while (current) {
126 Visit(current); 82 Visit(current);
@@ -140,18 +96,13 @@ public:
140 96
141 void operator()(ASTBlockDecoded& ast) {} 97 void operator()(ASTBlockDecoded& ast) {}
142 98
143 void operator()(ASTVarSet& ast) { 99 void operator()(ASTVarSet& ast) {}
144 decoder.Visit(ast.condition);
145 }
146 100
147 void operator()(ASTLabel& ast) {} 101 void operator()(ASTLabel& ast) {}
148 102
149 void operator()(ASTGoto& ast) { 103 void operator()(ASTGoto& ast) {}
150 decoder.Visit(ast.condition);
151 }
152 104
153 void operator()(ASTDoWhile& ast) { 105 void operator()(ASTDoWhile& ast) {
154 decoder.Visit(ast.condition);
155 ASTNode current = ast.nodes.GetFirst(); 106 ASTNode current = ast.nodes.GetFirst();
156 while (current) { 107 while (current) {
157 Visit(current); 108 Visit(current);
@@ -159,13 +110,9 @@ public:
159 } 110 }
160 } 111 }
161 112
162 void operator()(ASTReturn& ast) { 113 void operator()(ASTReturn& ast) {}
163 decoder.Visit(ast.condition);
164 }
165 114
166 void operator()(ASTBreak& ast) { 115 void operator()(ASTBreak& ast) {}
167 decoder.Visit(ast.condition);
168 }
169 116
170 void Visit(ASTNode& node) { 117 void Visit(ASTNode& node) {
171 std::visit(*this, *node->GetInnerData()); 118 std::visit(*this, *node->GetInnerData());
@@ -178,113 +125,77 @@ public:
178 125
179private: 126private:
180 ShaderIR& ir; 127 ShaderIR& ir;
181 ExprDecoder decoder;
182}; 128};
183 129
184void ShaderIR::Decode() { 130void ShaderIR::Decode() {
185 const auto decode_function = ([this](ShaderFunction& shader_info) { 131 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
186 coverage_end = std::max<u32>(0, shader_info.end); 132
187 switch (shader_info.settings.depth) { 133 decompiled = false;
188 case CompileDepth::FlowStack: { 134 auto info = ScanFlow(program_code, main_offset, settings, registry);
189 for (const auto& block : shader_info.blocks) { 135 auto& shader_info = *info;
190 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); 136 coverage_begin = shader_info.start;
191 } 137 coverage_end = shader_info.end;
192 break; 138 switch (shader_info.settings.depth) {
139 case CompileDepth::FlowStack: {
140 for (const auto& block : shader_info.blocks) {
141 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
193 } 142 }
194 case CompileDepth::NoFlowStack: { 143 break;
195 disable_flow_stack = true; 144 }
196 const auto insert_block = [this](NodeBlock& nodes, u32 label) { 145 case CompileDepth::NoFlowStack: {
197 if (label == static_cast<u32>(exit_branch)) { 146 disable_flow_stack = true;
198 return; 147 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
199 } 148 if (label == static_cast<u32>(exit_branch)) {
200 basic_blocks.insert({label, nodes}); 149 return;
201 };
202 const auto& blocks = shader_info.blocks;
203 NodeBlock current_block;
204 u32 current_label = static_cast<u32>(exit_branch);
205 for (const auto& block : blocks) {
206 if (shader_info.labels.contains(block.start)) {
207 insert_block(current_block, current_label);
208 current_block.clear();
209 current_label = block.start;
210 }
211 if (!block.ignore_branch) {
212 DecodeRangeInner(current_block, block.start, block.end);
213 InsertControlFlow(current_block, block);
214 } else {
215 DecodeRangeInner(current_block, block.start, block.end + 1);
216 }
217 } 150 }
218 insert_block(current_block, current_label); 151 basic_blocks.insert({label, nodes});
219 break; 152 };
220 } 153 const auto& blocks = shader_info.blocks;
221 case CompileDepth::DecompileBackwards: 154 NodeBlock current_block;
222 case CompileDepth::FullDecompile: { 155 u32 current_label = static_cast<u32>(exit_branch);
223 program_manager = std::move(shader_info.manager); 156 for (const auto& block : blocks) {
224 disable_flow_stack = true; 157 if (shader_info.labels.contains(block.start)) {
225 decompiled = true; 158 insert_block(current_block, current_label);
226 ASTDecoder decoder{*this}; 159 current_block.clear();
227 ASTNode program = program_manager.GetProgram(); 160 current_label = block.start;
228 decoder.Visit(program);
229 break;
230 }
231 default:
232 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
233 [[fallthrough]];
234 case CompileDepth::BruteForce: {
235 const auto shader_end = static_cast<u32>(program_code.size());
236 coverage_begin = main_offset;
237 coverage_end = shader_end;
238 for (u32 label = main_offset; label < shader_end; ++label) {
239 basic_blocks.insert({label, DecodeRange(label, label + 1)});
240 } 161 }
241 break; 162 if (!block.ignore_branch) {
242 } 163 DecodeRangeInner(current_block, block.start, block.end);
243 } 164 InsertControlFlow(current_block, block);
244 if (settings.depth != shader_info.settings.depth) {
245 LOG_WARNING(
246 HW_GPU,
247 "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
248 CompileDepthAsString(settings.depth),
249 CompileDepthAsString(shader_info.settings.depth));
250 }
251 });
252 const auto gen_function =
253 ([this](ShaderFunction& shader_info, u32 id) -> std::shared_ptr<ShaderFunctionIR> {
254 std::shared_ptr<ShaderFunctionIR> result;
255 if (decompiled) {
256 result = std::make_shared<ShaderFunctionIR>(std::move(program_manager), id,
257 shader_info.start, shader_info.end);
258 } else { 165 } else {
259 result = 166 DecodeRangeInner(current_block, block.start, block.end + 1);
260 std::make_shared<ShaderFunctionIR>(std::move(basic_blocks), disable_flow_stack,
261 id, shader_info.start, shader_info.end);
262 } 167 }
263 decompiled = false; 168 }
264 disable_flow_stack = false; 169 insert_block(current_block, current_label);
265 basic_blocks.clear(); 170 break;
266 program_manager.Clear(); 171 }
267 return result; 172 case CompileDepth::DecompileBackwards:
268 }); 173 case CompileDepth::FullDecompile: {
269 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 174 program_manager = std::move(shader_info.manager);
270 175 disable_flow_stack = true;
271 decompiled = false; 176 decompiled = true;
272 auto info = ScanFlow(program_code, main_offset, settings, registry); 177 ASTDecoder decoder{*this};
273 u32 id_start = 1; 178 ASTNode program = GetASTProgram();
274 for (auto& pair : info->subfunctions) { 179 decoder.Visit(program);
275 func_map.emplace(pair.first, id_start); 180 break;
276 id_start++; 181 }
182 default:
183 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
184 [[fallthrough]];
185 case CompileDepth::BruteForce: {
186 const auto shader_end = static_cast<u32>(program_code.size());
187 coverage_begin = main_offset;
188 coverage_end = shader_end;
189 for (u32 label = main_offset; label < shader_end; ++label) {
190 basic_blocks.insert({label, DecodeRange(label, label + 1)});
191 }
192 break;
193 }
277 } 194 }
278 coverage_begin = info->main.start; 195 if (settings.depth != shader_info.settings.depth) {
279 coverage_end = 0; 196 LOG_WARNING(
280 decode_function(info->main); 197 HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
281 main_function = gen_function(info->main, 0); 198 CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
282 subfunctions.resize(info->subfunctions.size());
283 for (auto& pair : info->subfunctions) {
284 auto& func_info = pair.second;
285 decode_function(func_info);
286 u32 id = func_map[pair.first];
287 subfunctions[id - 1] = gen_function(func_info, id);
288 } 199 }
289} 200}
290 201
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 2bc596512..5f88537bc 100755
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -33,7 +33,6 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
33 // With the previous preconditions, this instruction is a no-operation. 33 // With the previous preconditions, this instruction is a no-operation.
34 break; 34 break;
35 } 35 }
36 case OpCode::Id::RET:
37 case OpCode::Id::EXIT: { 36 case OpCode::Id::EXIT: {
38 const ConditionCode cc = instr.flow_condition_code; 37 const ConditionCode cc = instr.flow_condition_code;
39 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); 38 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
@@ -313,16 +312,6 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
313 LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); 312 LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
314 break; 313 break;
315 } 314 }
316 case OpCode::Id::CAL: {
317 const u32 target = pc + instr.bra.GetBranchTarget();
318 const auto it = func_map.find(target);
319 if (it == func_map.end()) {
320 UNREACHABLE();
321 break;
322 }
323 bb.push_back(FunctionCall(it->second));
324 break;
325 }
326 default: 315 default:
327 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); 316 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
328 } 317 }
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 43a166b6f..c69681e8d 100755
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -339,6 +339,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
339 const TextureType texture_type{instr.tlds.GetTextureType()}; 339 const TextureType texture_type{instr.tlds.GetTextureType()};
340 const bool is_array{instr.tlds.IsArrayTexture()}; 340 const bool is_array{instr.tlds.IsArrayTexture()};
341 341
342 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
343 "AOFFI is not implemented");
342 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); 344 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
343 345
344 const Node4 components = GetTldsCode(instr, texture_type, is_array); 346 const Node4 components = GetTldsCode(instr, texture_type, is_array);
@@ -820,7 +822,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
820 for (std::size_t i = 0; i < type_coord_count; ++i) { 822 for (std::size_t i = 0; i < type_coord_count; ++i) {
821 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); 823 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
822 coords.push_back( 824 coords.push_back(
823 GetRegister(last && !aoffi_enabled ? last_coord_register : (coord_register + i))); 825 GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
824 } 826 }
825 827
826 const Node array = is_array ? GetRegister(array_register) : nullptr; 828 const Node array = is_array ? GetRegister(array_register) : nullptr;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a58e7c65e..b54d33763 100755
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -267,11 +267,10 @@ class PatchNode;
267class SmemNode; 267class SmemNode;
268class GmemNode; 268class GmemNode;
269class CommentNode; 269class CommentNode;
270class FunctionCallNode;
271 270
272using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, 271using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
273 InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, 272 InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
274 LmemNode, SmemNode, GmemNode, FunctionCallNode, CommentNode>; 273 LmemNode, SmemNode, GmemNode, CommentNode>;
275using Node = std::shared_ptr<NodeData>; 274using Node = std::shared_ptr<NodeData>;
276using Node4 = std::array<Node, 4>; 275using Node4 = std::array<Node, 4>;
277using NodeBlock = std::vector<Node>; 276using NodeBlock = std::vector<Node>;
@@ -495,18 +494,6 @@ private:
495 std::vector<Node> code; ///< Code to execute 494 std::vector<Node> code; ///< Code to execute
496}; 495};
497 496
498class FunctionCallNode final : public AmendNode {
499public:
500 explicit FunctionCallNode(u32 func_id_) : func_id{func_id_} {}
501
502 [[nodiscard]] u32 GetFuncId() const {
503 return func_id;
504 }
505
506private:
507 u32 func_id; ///< Id of the function to call
508};
509
510/// A general purpose register 497/// A general purpose register
511class GprNode final { 498class GprNode final {
512public: 499public:
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index cef9c26bc..6a5b6940d 100755
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -19,11 +19,6 @@ Node Comment(std::string text) {
19 return MakeNode<CommentNode>(std::move(text)); 19 return MakeNode<CommentNode>(std::move(text));
20} 20}
21 21
22/// Creates a function call
23Node FunctionCall(u32 func_id) {
24 return MakeNode<FunctionCallNode>(func_id);
25}
26
27Node Immediate(u32 value) { 22Node Immediate(u32 value) {
28 return MakeNode<ImmediateNode>(value); 23 return MakeNode<ImmediateNode>(value);
29} 24}
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 3f882cd25..1e0886185 100755
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -27,9 +27,6 @@ Node Conditional(Node condition, std::vector<Node> code);
27/// Creates a commentary node 27/// Creates a commentary node
28Node Comment(std::string text); 28Node Comment(std::string text);
29 29
30/// Creates a function call
31Node FunctionCall(u32 func_id);
32
33/// Creates an u32 immediate 30/// Creates an u32 immediate
34Node Immediate(u32 value); 31Node Immediate(u32 value);
35 32
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 94715b069..1cd7c14d7 100755
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -26,7 +26,7 @@ namespace VideoCommon::Shader {
26 26
27struct ShaderBlock; 27struct ShaderBlock;
28 28
29constexpr u32 MAX_PROGRAM_LENGTH = 0x2000; 29constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
30 30
31struct ConstBuffer { 31struct ConstBuffer {
32 constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) 32 constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
@@ -64,68 +64,16 @@ struct GlobalMemoryUsage {
64 bool is_written{}; 64 bool is_written{};
65}; 65};
66 66
67class ShaderFunctionIR final {
68public:
69 explicit ShaderFunctionIR(std::map<u32, NodeBlock>&& basic_blocks_, bool disable_flow_stack_,
70 u32 id_, u32 coverage_begin_, u32 coverage_end_)
71 : basic_blocks{std::move(basic_blocks_)}, decompiled{false},
72 disable_flow_stack{disable_flow_stack_}, id{id_}, coverage_begin{coverage_begin_},
73 coverage_end{coverage_end_} {}
74 explicit ShaderFunctionIR(ASTManager&& program_manager_, u32 id_, u32 coverage_begin_,
75 u32 coverage_end_)
76 : program_manager{std::move(program_manager_)}, decompiled{true}, disable_flow_stack{true},
77 id{id_}, coverage_begin{coverage_begin_}, coverage_end{coverage_end_} {}
78
79 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
80 return basic_blocks;
81 }
82
83 [[nodiscard]] bool IsFlowStackDisabled() const {
84 return disable_flow_stack;
85 }
86
87 [[nodiscard]] bool IsDecompiled() const {
88 return decompiled;
89 }
90
91 const ASTManager& GetASTManager() const {
92 return program_manager;
93 }
94
95 [[nodiscard]] ASTNode GetASTProgram() const {
96 return program_manager.GetProgram();
97 }
98
99 [[nodiscard]] u32 GetASTNumVariables() const {
100 return program_manager.GetVariables();
101 }
102
103 [[nodiscard]] bool IsMain() const {
104 return id == 0;
105 }
106
107 [[nodiscard]] u32 GetId() const {
108 return id;
109 }
110
111private:
112 std::map<u32, NodeBlock> basic_blocks;
113 ASTManager program_manager{true, true};
114
115 bool decompiled{};
116 bool disable_flow_stack{};
117 u32 id{};
118
119 u32 coverage_begin{};
120 u32 coverage_end{};
121};
122
123class ShaderIR final { 67class ShaderIR final {
124public: 68public:
125 explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, 69 explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
126 CompilerSettings settings_, Registry& registry_); 70 CompilerSettings settings_, Registry& registry_);
127 ~ShaderIR(); 71 ~ShaderIR();
128 72
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
74 return basic_blocks;
75 }
76
129 const std::set<u32>& GetRegisters() const { 77 const std::set<u32>& GetRegisters() const {
130 return used_registers; 78 return used_registers;
131 } 79 }
@@ -207,6 +155,26 @@ public:
207 return header; 155 return header;
208 } 156 }
209 157
158 bool IsFlowStackDisabled() const {
159 return disable_flow_stack;
160 }
161
162 bool IsDecompiled() const {
163 return decompiled;
164 }
165
166 const ASTManager& GetASTManager() const {
167 return program_manager;
168 }
169
170 ASTNode GetASTProgram() const {
171 return program_manager.GetProgram();
172 }
173
174 u32 GetASTNumVariables() const {
175 return program_manager.GetVariables();
176 }
177
210 u32 ConvertAddressToNvidiaSpace(u32 address) const { 178 u32 ConvertAddressToNvidiaSpace(u32 address) const {
211 return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); 179 return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
212 } 180 }
@@ -222,16 +190,7 @@ public:
222 return num_custom_variables; 190 return num_custom_variables;
223 } 191 }
224 192
225 std::shared_ptr<ShaderFunctionIR> GetMainFunction() const {
226 return main_function;
227 }
228
229 const std::vector<std::shared_ptr<ShaderFunctionIR>>& GetSubFunctions() const {
230 return subfunctions;
231 }
232
233private: 193private:
234 friend class ExprDecoder;
235 friend class ASTDecoder; 194 friend class ASTDecoder;
236 195
237 struct SamplerInfo { 196 struct SamplerInfo {
@@ -494,10 +453,6 @@ private:
494 std::vector<Node> amend_code; 453 std::vector<Node> amend_code;
495 u32 num_custom_variables{}; 454 u32 num_custom_variables{};
496 455
497 std::shared_ptr<ShaderFunctionIR> main_function;
498 std::vector<std::shared_ptr<ShaderFunctionIR>> subfunctions;
499 std::unordered_map<u32, u32> func_map;
500
501 std::set<u32> used_registers; 456 std::set<u32> used_registers;
502 std::set<Tegra::Shader::Pred> used_predicates; 457 std::set<Tegra::Shader::Pred> used_predicates;
503 std::set<Tegra::Shader::Attribute::Index> used_input_attributes; 458 std::set<Tegra::Shader::Attribute::Index> used_input_attributes;