diff options
-rw-r--r-- | ARMeilleure/ARMeilleure.csproj | 1 | ||||
-rw-r--r-- | ARMeilleure/CodeGen/X86/Assembler.cs | 105 | ||||
-rw-r--r-- | ARMeilleure/CodeGen/X86/AssemblerTable.cs | 2 | ||||
-rw-r--r-- | ARMeilleure/CodeGen/X86/HardwareCapabilities.cs | 52 | ||||
-rw-r--r-- | ARMeilleure/CodeGen/X86/IntrinsicTable.cs | 1 | ||||
-rw-r--r-- | ARMeilleure/CodeGen/X86/X86Instruction.cs | 1 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdLogical.cs | 33 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdLogical32.cs | 7 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdMove32.cs | 9 | ||||
-rw-r--r-- | ARMeilleure/IntermediateRepresentation/Intrinsic.cs | 1 | ||||
-rw-r--r-- | ARMeilleure/Optimizations.cs | 11 | ||||
-rw-r--r-- | ARMeilleure/Translation/PTC/Ptc.cs | 14 |
12 files changed, 226 insertions, 11 deletions
diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj index 1c2135ed5..fa5551154 100644 --- a/ARMeilleure/ARMeilleure.csproj +++ b/ARMeilleure/ARMeilleure.csproj | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | <ItemGroup> | 8 | <ItemGroup> |
9 | <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> | 9 | <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> |
10 | <ProjectReference Include="..\Ryujinx.Memory\Ryujinx.Memory.csproj" /> | ||
10 | </ItemGroup> | 11 | </ItemGroup> |
11 | 12 | ||
12 | <ItemGroup> | 13 | <ItemGroup> |
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 2ea4208b3..67736a31f 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs | |||
@@ -1034,7 +1034,13 @@ namespace ARMeilleure.CodeGen.X86 | |||
1034 | 1034 | ||
1035 | Debug.Assert(opCode != BadOp, "Invalid opcode value."); | 1035 | Debug.Assert(opCode != BadOp, "Invalid opcode value."); |
1036 | 1036 | ||
1037 | if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) | 1037 | if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding) |
1038 | { | ||
1039 | WriteEvexInst(dest, src1, src2, type, flags, opCode); | ||
1040 | |||
1041 | opCode &= 0xff; | ||
1042 | } | ||
1043 | else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) | ||
1038 | { | 1044 | { |
1039 | // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. | 1045 | // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. |
1040 | 1046 | ||
@@ -1153,6 +1159,103 @@ namespace ARMeilleure.CodeGen.X86 | |||
1153 | } | 1159 | } |
1154 | } | 1160 | } |
1155 | 1161 | ||
1162 | private void WriteEvexInst( | ||
1163 | Operand dest, | ||
1164 | Operand src1, | ||
1165 | Operand src2, | ||
1166 | OperandType type, | ||
1167 | InstructionFlags flags, | ||
1168 | int opCode, | ||
1169 | bool broadcast = false, | ||
1170 | int registerWidth = 128, | ||
1171 | int maskRegisterIdx = 0, | ||
1172 | bool zeroElements = false) | ||
1173 | { | ||
1174 | int op1Idx = dest.GetRegister().Index; | ||
1175 | int op2Idx = src1.GetRegister().Index; | ||
1176 | int op3Idx = src2.GetRegister().Index; | ||
1177 | |||
1178 | WriteByte(0x62); | ||
1179 | |||
1180 | // P0 | ||
1181 | // Extend operand 1 register | ||
1182 | bool r = (op1Idx & 8) == 0; | ||
1183 | // Extend operand 3 register | ||
1184 | bool x = (op3Idx & 16) == 0; | ||
1185 | // Extend operand 3 register | ||
1186 | bool b = (op3Idx & 8) == 0; | ||
1187 | // Extend operand 1 register | ||
1188 | bool rp = (op1Idx & 16) == 0; | ||
1189 | // Escape code index | ||
1190 | byte mm = 0b00; | ||
1191 | |||
1192 | switch ((ushort)(opCode >> 8)) | ||
1193 | { | ||
1194 | case 0xf00: mm = 0b01; break; | ||
1195 | case 0xf38: mm = 0b10; break; | ||
1196 | case 0xf3a: mm = 0b11; break; | ||
1197 | |||
1198 | default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break; | ||
1199 | } | ||
1200 | |||
1201 | WriteByte( | ||
1202 | (byte)( | ||
1203 | (r ? 0x80 : 0) | | ||
1204 | (x ? 0x40 : 0) | | ||
1205 | (b ? 0x20 : 0) | | ||
1206 | (rp ? 0x10 : 0) | | ||
1207 | mm)); | ||
1208 | |||
1209 | // P1 | ||
1210 | // Specify 64-bit lane mode | ||
1211 | bool w = Is64Bits(type); | ||
1212 | // Operand 2 register index | ||
1213 | byte vvvv = (byte)(~op2Idx & 0b1111); | ||
1214 | // Opcode prefix | ||
1215 | byte pp = (flags & InstructionFlags.PrefixMask) switch | ||
1216 | { | ||
1217 | InstructionFlags.Prefix66 => 0b01, | ||
1218 | InstructionFlags.PrefixF3 => 0b10, | ||
1219 | InstructionFlags.PrefixF2 => 0b11, | ||
1220 | _ => 0 | ||
1221 | }; | ||
1222 | WriteByte( | ||
1223 | (byte)( | ||
1224 | (w ? 0x80 : 0) | | ||
1225 | (vvvv << 3) | | ||
1226 | 0b100 | | ||
1227 | pp)); | ||
1228 | |||
1229 | // P2 | ||
1230 | // Mask register determines what elements to zero, rather than what elements to merge | ||
1231 | bool z = zeroElements; | ||
1232 | // Specifies register-width | ||
1233 | byte ll = 0b00; | ||
1234 | switch (registerWidth) | ||
1235 | { | ||
1236 | case 128: ll = 0b00; break; | ||
1237 | case 256: ll = 0b01; break; | ||
1238 | case 512: ll = 0b10; break; | ||
1239 | |||
1240 | default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break; | ||
1241 | } | ||
1242 | // Embedded broadcast in the case of a memory operand | ||
1243 | bool bcast = broadcast; | ||
1244 | // Extend operand 2 register | ||
1245 | bool vp = (op2Idx & 16) == 0; | ||
1246 | // Mask register index | ||
1247 | Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}."); | ||
1248 | byte aaa = (byte)(maskRegisterIdx & 0b111); | ||
1249 | |||
1250 | WriteByte( | ||
1251 | (byte)( | ||
1252 | (z ? 0x80 : 0) | | ||
1253 | (ll << 5) | | ||
1254 | (bcast ? 0x10 : 0) | | ||
1255 | (vp ? 8 : 0) | | ||
1256 | aaa)); | ||
1257 | } | ||
1258 | |||
1156 | private void WriteCompactInst(Operand operand, int opCode) | 1259 | private void WriteCompactInst(Operand operand, int opCode) |
1157 | { | 1260 | { |
1158 | int regIndex = operand.GetRegister().Index; | 1261 | int regIndex = operand.GetRegister().Index; |
diff --git a/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/ARMeilleure/CodeGen/X86/AssemblerTable.cs index ecdc029f9..b47b3ecd1 100644 --- a/ARMeilleure/CodeGen/X86/AssemblerTable.cs +++ b/ARMeilleure/CodeGen/X86/AssemblerTable.cs | |||
@@ -20,6 +20,7 @@ namespace ARMeilleure.CodeGen.X86 | |||
20 | Reg8Dest = 1 << 2, | 20 | Reg8Dest = 1 << 2, |
21 | RexW = 1 << 3, | 21 | RexW = 1 << 3, |
22 | Vex = 1 << 4, | 22 | Vex = 1 << 4, |
23 | Evex = 1 << 5, | ||
23 | 24 | ||
24 | PrefixBit = 16, | 25 | PrefixBit = 16, |
25 | PrefixMask = 7 << PrefixBit, | 26 | PrefixMask = 7 << PrefixBit, |
@@ -278,6 +279,7 @@ namespace ARMeilleure.CodeGen.X86 | |||
278 | Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); | 279 | Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); |
279 | Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66)); | 280 | Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66)); |
280 | Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); | 281 | Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); |
282 | Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66)); | ||
281 | Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); | 283 | Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); |
282 | Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); | 284 | Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); |
283 | Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); | 285 | Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); |
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs index c12a4e28b..63a9e46a2 100644 --- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs | |||
@@ -1,10 +1,14 @@ | |||
1 | using Ryujinx.Memory; | ||
1 | using System; | 2 | using System; |
3 | using System.Runtime.InteropServices; | ||
2 | using System.Runtime.Intrinsics.X86; | 4 | using System.Runtime.Intrinsics.X86; |
3 | 5 | ||
4 | namespace ARMeilleure.CodeGen.X86 | 6 | namespace ARMeilleure.CodeGen.X86 |
5 | { | 7 | { |
6 | static class HardwareCapabilities | 8 | static class HardwareCapabilities |
7 | { | 9 | { |
10 | private delegate uint GetXcr0(); | ||
11 | |||
8 | static HardwareCapabilities() | 12 | static HardwareCapabilities() |
9 | { | 13 | { |
10 | if (!X86Base.IsSupported) | 14 | if (!X86Base.IsSupported) |
@@ -24,6 +28,28 @@ namespace ARMeilleure.CodeGen.X86 | |||
24 | FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7; | 28 | FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7; |
25 | FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7; | 29 | FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7; |
26 | } | 30 | } |
31 | |||
32 | Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax(); | ||
33 | } | ||
34 | |||
35 | private static uint GetXcr0Eax() | ||
36 | { | ||
37 | ReadOnlySpan<byte> asmGetXcr0 = new byte[] | ||
38 | { | ||
39 | 0x31, 0xc9, // xor ecx, ecx | ||
40 | 0xf, 0x01, 0xd0, // xgetbv | ||
41 | 0xc3, // ret | ||
42 | }; | ||
43 | |||
44 | using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length); | ||
45 | |||
46 | memGetXcr0.Write(0, asmGetXcr0); | ||
47 | |||
48 | memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute); | ||
49 | |||
50 | var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer); | ||
51 | |||
52 | return fGetXcr0(); | ||
27 | } | 53 | } |
28 | 54 | ||
29 | [Flags] | 55 | [Flags] |
@@ -44,6 +70,7 @@ namespace ARMeilleure.CodeGen.X86 | |||
44 | Sse42 = 1 << 20, | 70 | Sse42 = 1 << 20, |
45 | Popcnt = 1 << 23, | 71 | Popcnt = 1 << 23, |
46 | Aes = 1 << 25, | 72 | Aes = 1 << 25, |
73 | Osxsave = 1 << 27, | ||
47 | Avx = 1 << 28, | 74 | Avx = 1 << 28, |
48 | F16c = 1 << 29 | 75 | F16c = 1 << 29 |
49 | } | 76 | } |
@@ -52,7 +79,11 @@ namespace ARMeilleure.CodeGen.X86 | |||
52 | public enum FeatureFlags7Ebx | 79 | public enum FeatureFlags7Ebx |
53 | { | 80 | { |
54 | Avx2 = 1 << 5, | 81 | Avx2 = 1 << 5, |
55 | Sha = 1 << 29 | 82 | Avx512f = 1 << 16, |
83 | Avx512dq = 1 << 17, | ||
84 | Sha = 1 << 29, | ||
85 | Avx512bw = 1 << 30, | ||
86 | Avx512vl = 1 << 31 | ||
56 | } | 87 | } |
57 | 88 | ||
58 | [Flags] | 89 | [Flags] |
@@ -61,10 +92,21 @@ namespace ARMeilleure.CodeGen.X86 | |||
61 | Gfni = 1 << 8, | 92 | Gfni = 1 << 8, |
62 | } | 93 | } |
63 | 94 | ||
95 | [Flags] | ||
96 | public enum Xcr0FlagsEax | ||
97 | { | ||
98 | Sse = 1 << 1, | ||
99 | YmmHi128 = 1 << 2, | ||
100 | Opmask = 1 << 5, | ||
101 | ZmmHi256 = 1 << 6, | ||
102 | Hi16Zmm = 1 << 7 | ||
103 | } | ||
104 | |||
64 | public static FeatureFlags1Edx FeatureInfo1Edx { get; } | 105 | public static FeatureFlags1Edx FeatureInfo1Edx { get; } |
65 | public static FeatureFlags1Ecx FeatureInfo1Ecx { get; } | 106 | public static FeatureFlags1Ecx FeatureInfo1Ecx { get; } |
66 | public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0; | 107 | public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0; |
67 | public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0; | 108 | public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0; |
109 | public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0; | ||
68 | 110 | ||
69 | public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse); | 111 | public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse); |
70 | public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2); | 112 | public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2); |
@@ -76,8 +118,13 @@ namespace ARMeilleure.CodeGen.X86 | |||
76 | public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42); | 118 | public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42); |
77 | public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt); | 119 | public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt); |
78 | public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes); | 120 | public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes); |
79 | public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx); | 121 | public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128); |
80 | public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx; | 122 | public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx; |
123 | public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Osxsave) | ||
124 | && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm); | ||
125 | public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F; | ||
126 | public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F; | ||
127 | public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F; | ||
81 | public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c); | 128 | public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c); |
82 | public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha); | 129 | public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha); |
83 | public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni); | 130 | public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni); |
@@ -85,5 +132,6 @@ namespace ARMeilleure.CodeGen.X86 | |||
85 | public static bool ForceLegacySse { get; set; } | 132 | public static bool ForceLegacySse { get; set; } |
86 | 133 | ||
87 | public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse; | 134 | public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse; |
135 | public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse; | ||
88 | } | 136 | } |
89 | } \ No newline at end of file | 137 | } \ No newline at end of file |
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index 8c909ac13..c788fa442 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs | |||
@@ -180,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86 | |||
180 | Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma)); | 180 | Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma)); |
181 | Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma)); | 181 | Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma)); |
182 | Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma)); | 182 | Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma)); |
183 | Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm)); | ||
183 | Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); | 184 | Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); |
184 | Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); | 185 | Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); |
185 | } | 186 | } |
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index b024394e1..ecfc432d7 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs | |||
@@ -219,6 +219,7 @@ namespace ARMeilleure.CodeGen.X86 | |||
219 | Vfnmsub231sd, | 219 | Vfnmsub231sd, |
220 | Vfnmsub231ss, | 220 | Vfnmsub231ss, |
221 | Vpblendvb, | 221 | Vpblendvb, |
222 | Vpternlogd, | ||
222 | Xor, | 223 | Xor, |
223 | Xorpd, | 224 | Xorpd, |
224 | Xorps, | 225 | Xorps, |
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs index 8ca815801..2bf531e6c 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs | |||
@@ -254,7 +254,22 @@ namespace ARMeilleure.Instructions | |||
254 | 254 | ||
255 | public static void Not_V(ArmEmitterContext context) | 255 | public static void Not_V(ArmEmitterContext context) |
256 | { | 256 | { |
257 | if (Optimizations.UseSse2) | 257 | if (Optimizations.UseAvx512Ortho) |
258 | { | ||
259 | OpCodeSimd op = (OpCodeSimd)context.CurrOp; | ||
260 | |||
261 | Operand n = GetVec(op.Rn); | ||
262 | |||
263 | Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010)); | ||
264 | |||
265 | if (op.RegisterSize == RegisterSize.Simd64) | ||
266 | { | ||
267 | res = context.VectorZeroUpper64(res); | ||
268 | } | ||
269 | |||
270 | context.Copy(GetVec(op.Rd), res); | ||
271 | } | ||
272 | else if (Optimizations.UseSse2) | ||
258 | { | 273 | { |
259 | OpCodeSimd op = (OpCodeSimd)context.CurrOp; | 274 | OpCodeSimd op = (OpCodeSimd)context.CurrOp; |
260 | 275 | ||
@@ -283,6 +298,22 @@ namespace ARMeilleure.Instructions | |||
283 | { | 298 | { |
284 | InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV); | 299 | InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV); |
285 | } | 300 | } |
301 | else if (Optimizations.UseAvx512Ortho) | ||
302 | { | ||
303 | OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; | ||
304 | |||
305 | Operand n = GetVec(op.Rn); | ||
306 | Operand m = GetVec(op.Rm); | ||
307 | |||
308 | Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010)); | ||
309 | |||
310 | if (op.RegisterSize == RegisterSize.Simd64) | ||
311 | { | ||
312 | res = context.VectorZeroUpper64(res); | ||
313 | } | ||
314 | |||
315 | context.Copy(GetVec(op.Rd), res); | ||
316 | } | ||
286 | else if (Optimizations.UseSse2) | 317 | else if (Optimizations.UseSse2) |
287 | { | 318 | { |
288 | OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; | 319 | OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; |
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs index c2a04778b..68ef4ed17 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs | |||
@@ -151,6 +151,13 @@ namespace ARMeilleure.Instructions | |||
151 | { | 151 | { |
152 | InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m)); | 152 | InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m)); |
153 | } | 153 | } |
154 | else if (Optimizations.UseAvx512Ortho) | ||
155 | { | ||
156 | EmitVectorBinaryOpSimd32(context, (n, m) => | ||
157 | { | ||
158 | return context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010)); | ||
159 | }); | ||
160 | } | ||
154 | else if (Optimizations.UseSse2) | 161 | else if (Optimizations.UseSse2) |
155 | { | 162 | { |
156 | Operand mask = context.VectorOne(); | 163 | Operand mask = context.VectorOne(); |
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index 17100eb9c..b8b91b31d 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs | |||
@@ -34,7 +34,14 @@ namespace ARMeilleure.Instructions | |||
34 | 34 | ||
35 | public static void Vmvn_I(ArmEmitterContext context) | 35 | public static void Vmvn_I(ArmEmitterContext context) |
36 | { | 36 | { |
37 | if (Optimizations.UseSse2) | 37 | if (Optimizations.UseAvx512Ortho) |
38 | { | ||
39 | EmitVectorUnaryOpSimd32(context, (op1) => | ||
40 | { | ||
41 | return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101)); | ||
42 | }); | ||
43 | } | ||
44 | else if (Optimizations.UseSse2) | ||
38 | { | 45 | { |
39 | EmitVectorUnaryOpSimd32(context, (op1) => | 46 | EmitVectorUnaryOpSimd32(context, (op1) => |
40 | { | 47 | { |
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs index a665e4b7a..b629345ee 100644 --- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs +++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs | |||
@@ -173,6 +173,7 @@ namespace ARMeilleure.IntermediateRepresentation | |||
173 | X86Vfnmadd231ss, | 173 | X86Vfnmadd231ss, |
174 | X86Vfnmsub231sd, | 174 | X86Vfnmsub231sd, |
175 | X86Vfnmsub231ss, | 175 | X86Vfnmsub231ss, |
176 | X86Vpternlogd, | ||
176 | X86Xorpd, | 177 | X86Xorpd, |
177 | X86Xorps, | 178 | X86Xorps, |
178 | 179 | ||
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs index 9044314f6..a84a4dc4f 100644 --- a/ARMeilleure/Optimizations.cs +++ b/ARMeilleure/Optimizations.cs | |||
@@ -23,6 +23,10 @@ namespace ARMeilleure | |||
23 | public static bool UseSse42IfAvailable { get; set; } = true; | 23 | public static bool UseSse42IfAvailable { get; set; } = true; |
24 | public static bool UsePopCntIfAvailable { get; set; } = true; | 24 | public static bool UsePopCntIfAvailable { get; set; } = true; |
25 | public static bool UseAvxIfAvailable { get; set; } = true; | 25 | public static bool UseAvxIfAvailable { get; set; } = true; |
26 | public static bool UseAvx512FIfAvailable { get; set; } = true; | ||
27 | public static bool UseAvx512VlIfAvailable { get; set; } = true; | ||
28 | public static bool UseAvx512BwIfAvailable { get; set; } = true; | ||
29 | public static bool UseAvx512DqIfAvailable { get; set; } = true; | ||
26 | public static bool UseF16cIfAvailable { get; set; } = true; | 30 | public static bool UseF16cIfAvailable { get; set; } = true; |
27 | public static bool UseFmaIfAvailable { get; set; } = true; | 31 | public static bool UseFmaIfAvailable { get; set; } = true; |
28 | public static bool UseAesniIfAvailable { get; set; } = true; | 32 | public static bool UseAesniIfAvailable { get; set; } = true; |
@@ -47,11 +51,18 @@ namespace ARMeilleure | |||
47 | internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42; | 51 | internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42; |
48 | internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt; | 52 | internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt; |
49 | internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse; | 53 | internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse; |
54 | internal static bool UseAvx512F => UseAvx512FIfAvailable && X86HardwareCapabilities.SupportsAvx512F && !ForceLegacySse; | ||
55 | internal static bool UseAvx512Vl => UseAvx512VlIfAvailable && X86HardwareCapabilities.SupportsAvx512Vl && !ForceLegacySse; | ||
56 | internal static bool UseAvx512Bw => UseAvx512BwIfAvailable && X86HardwareCapabilities.SupportsAvx512Bw && !ForceLegacySse; | ||
57 | internal static bool UseAvx512Dq => UseAvx512DqIfAvailable && X86HardwareCapabilities.SupportsAvx512Dq && !ForceLegacySse; | ||
50 | internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c; | 58 | internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c; |
51 | internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma; | 59 | internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma; |
52 | internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni; | 60 | internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni; |
53 | internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq; | 61 | internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq; |
54 | internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha; | 62 | internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha; |
55 | internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni; | 63 | internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni; |
64 | |||
65 | internal static bool UseAvx512Ortho => UseAvx512F && UseAvx512Vl; | ||
66 | internal static bool UseAvx512OrthoFloat => UseAvx512Ortho && UseAvx512Dq; | ||
56 | } | 67 | } |
57 | } | 68 | } |
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 0b23fd043..17f687062 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs | |||
@@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC | |||
30 | private const string OuterHeaderMagicString = "PTCohd\0\0"; | 30 | private const string OuterHeaderMagicString = "PTCohd\0\0"; |
31 | private const string InnerHeaderMagicString = "PTCihd\0\0"; | 31 | private const string InnerHeaderMagicString = "PTCihd\0\0"; |
32 | 32 | ||
33 | private const uint InternalVersion = 4484; //! To be incremented manually for each change to the ARMeilleure project. | 33 | private const uint InternalVersion = 4485; //! To be incremented manually for each change to the ARMeilleure project. |
34 | 34 | ||
35 | private const string ActualDir = "0"; | 35 | private const string ActualDir = "0"; |
36 | private const string BackupDir = "1"; | 36 | private const string BackupDir = "1"; |
@@ -969,6 +969,7 @@ namespace ARMeilleure.Translation.PTC | |||
969 | (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap, | 969 | (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap, |
970 | (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2, | 970 | (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2, |
971 | (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo, | 971 | (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo, |
972 | 0, | ||
972 | 0); | 973 | 0); |
973 | } | 974 | } |
974 | else if (RuntimeInformation.ProcessArchitecture == Architecture.X64) | 975 | else if (RuntimeInformation.ProcessArchitecture == Architecture.X64) |
@@ -977,11 +978,12 @@ namespace ARMeilleure.Translation.PTC | |||
977 | (ulong)X86HardwareCapabilities.FeatureInfo1Ecx, | 978 | (ulong)X86HardwareCapabilities.FeatureInfo1Ecx, |
978 | (ulong)X86HardwareCapabilities.FeatureInfo1Edx, | 979 | (ulong)X86HardwareCapabilities.FeatureInfo1Edx, |
979 | (ulong)X86HardwareCapabilities.FeatureInfo7Ebx, | 980 | (ulong)X86HardwareCapabilities.FeatureInfo7Ebx, |
980 | (ulong)X86HardwareCapabilities.FeatureInfo7Ecx); | 981 | (ulong)X86HardwareCapabilities.FeatureInfo7Ecx, |
982 | (ulong)X86HardwareCapabilities.Xcr0InfoEax); | ||
981 | } | 983 | } |
982 | else | 984 | else |
983 | { | 985 | { |
984 | return new FeatureInfo(0, 0, 0, 0); | 986 | return new FeatureInfo(0, 0, 0, 0, 0); |
985 | } | 987 | } |
986 | } | 988 | } |
987 | 989 | ||
@@ -1002,7 +1004,7 @@ namespace ARMeilleure.Translation.PTC | |||
1002 | return osPlatform; | 1004 | return osPlatform; |
1003 | } | 1005 | } |
1004 | 1006 | ||
1005 | [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 78*/)] | 1007 | [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 86*/)] |
1006 | private struct OuterHeader | 1008 | private struct OuterHeader |
1007 | { | 1009 | { |
1008 | public ulong Magic; | 1010 | public ulong Magic; |
@@ -1034,8 +1036,8 @@ namespace ARMeilleure.Translation.PTC | |||
1034 | } | 1036 | } |
1035 | } | 1037 | } |
1036 | 1038 | ||
1037 | [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)] | 1039 | [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 40*/)] |
1038 | private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3); | 1040 | private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3, ulong FeatureInfo4); |
1039 | 1041 | ||
1040 | [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)] | 1042 | [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)] |
1041 | private struct InnerHeader | 1043 | private struct InnerHeader |