aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs
blob: 13006934e25ff98d2ef1d733054bfc1f563f1c72 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Runtime.InteropServices;

namespace Ryujinx.Graphics.Nvdec.Vp9
{
    internal static class LoopFilter
    {
        public const int MaxLoopFilter = 63;

        public const int MaxRefLfDeltas = 4;
        public const int MaxModeLfDeltas = 2;

        // 64 bit masks for left transform size. Each 1 represents a position where
        // we should apply a loop filter across the left border of an 8x8 block
        // boundary.
        //
        // In the case of TX_16X16 ->  ( in low order byte first we end up with
        // a mask that looks like this
        //
        //    10101010
        //    10101010
        //    10101010
        //    10101010
        //    10101010
        //    10101010
        //    10101010
        //    10101010
        //
        // A loopfilter should be applied to every other 8x8 horizontally.
        private static readonly ulong[] Left64X64TxformMask = new ulong[]
        {
            0xffffffffffffffffUL,  // TX_4X4
            0xffffffffffffffffUL,  // TX_8x8
            0x5555555555555555UL,  // TX_16x16
            0x1111111111111111UL,  // TX_32x32
        };

        // 64 bit masks for above transform size. Each 1 represents a position where
        // we should apply a loop filter across the top border of an 8x8 block
        // boundary.
        //
        // In the case of TX_32x32 ->  ( in low order byte first we end up with
        // a mask that looks like this
        //
        //    11111111
        //    00000000
        //    00000000
        //    00000000
        //    11111111
        //    00000000
        //    00000000
        //    00000000
        //
        // A loopfilter should be applied to every other 4 the row vertically.
        private static readonly ulong[] Above64X64TxformMask = new ulong[]
        {
            0xffffffffffffffffUL,  // TX_4X4
            0xffffffffffffffffUL,  // TX_8x8
            0x00ff00ff00ff00ffUL,  // TX_16x16
            0x000000ff000000ffUL,  // TX_32x32
        };

        // 64 bit masks for prediction sizes (left). Each 1 represents a position
        // where left border of an 8x8 block. These are aligned to the right most
        // appropriate bit, and then shifted into place.
        //
        // In the case of TX_16x32 ->  ( low order byte first ) we end up with
        // a mask that looks like this :
        //
        //  10000000
        //  10000000
        //  10000000
        //  10000000
        //  00000000
        //  00000000
        //  00000000
        //  00000000
        private static readonly ulong[] LeftPredictionMask = new ulong[]
        {
            0x0000000000000001UL,  // BLOCK_4X4,
            0x0000000000000001UL,  // BLOCK_4X8,
            0x0000000000000001UL,  // BLOCK_8X4,
            0x0000000000000001UL,  // BLOCK_8X8,
            0x0000000000000101UL,  // BLOCK_8X16,
            0x0000000000000001UL,  // BLOCK_16X8,
            0x0000000000000101UL,  // BLOCK_16X16,
            0x0000000001010101UL,  // BLOCK_16X32,
            0x0000000000000101UL,  // BLOCK_32X16,
            0x0000000001010101UL,  // BLOCK_32X32,
            0x0101010101010101UL,  // BLOCK_32X64,
            0x0000000001010101UL,  // BLOCK_64X32,
            0x0101010101010101UL,  // BLOCK_64X64
        };

        // 64 bit mask to shift and set for each prediction size.
        private static readonly ulong[] AbovePredictionMask = new ulong[]
        {
            0x0000000000000001UL,  // BLOCK_4X4
            0x0000000000000001UL,  // BLOCK_4X8
            0x0000000000000001UL,  // BLOCK_8X4
            0x0000000000000001UL,  // BLOCK_8X8
            0x0000000000000001UL,  // BLOCK_8X16,
            0x0000000000000003UL,  // BLOCK_16X8
            0x0000000000000003UL,  // BLOCK_16X16
            0x0000000000000003UL,  // BLOCK_16X32,
            0x000000000000000fUL,  // BLOCK_32X16,
            0x000000000000000fUL,  // BLOCK_32X32,
            0x000000000000000fUL,  // BLOCK_32X64,
            0x00000000000000ffUL,  // BLOCK_64X32,
            0x00000000000000ffUL,  // BLOCK_64X64
        };

        // 64 bit mask to shift and set for each prediction size. A bit is set for
        // each 8x8 block that would be in the left most block of the given block
        // size in the 64x64 block.
        private static readonly ulong[] SizeMask = new ulong[]
        {
            0x0000000000000001UL,  // BLOCK_4X4
            0x0000000000000001UL,  // BLOCK_4X8
            0x0000000000000001UL,  // BLOCK_8X4
            0x0000000000000001UL,  // BLOCK_8X8
            0x0000000000000101UL,  // BLOCK_8X16,
            0x0000000000000003UL,  // BLOCK_16X8
            0x0000000000000303UL,  // BLOCK_16X16
            0x0000000003030303UL,  // BLOCK_16X32,
            0x0000000000000f0fUL,  // BLOCK_32X16,
            0x000000000f0f0f0fUL,  // BLOCK_32X32,
            0x0f0f0f0f0f0f0f0fUL,  // BLOCK_32X64,
            0x00000000ffffffffUL,  // BLOCK_64X32,
            0xffffffffffffffffUL,  // BLOCK_64X64
        };

        // These are used for masking the left and above borders.
        private const ulong LeftBorder = 0x1111111111111111UL;
        private const ulong AboveBorder = 0x000000ff000000ffUL;

        // 16 bit masks for uv transform sizes.
        private static readonly ushort[] Left64X64TxformMaskUv = new ushort[]
        {
            0xffff,  // TX_4X4
            0xffff,  // TX_8x8
            0x5555,  // TX_16x16
            0x1111,  // TX_32x32
        };

        private static readonly ushort[] Above64X64TxformMaskUv = new ushort[]
        {
            0xffff,  // TX_4X4
            0xffff,  // TX_8x8
            0x0f0f,  // TX_16x16
            0x000f,  // TX_32x32
        };

        // 16 bit left mask to shift and set for each uv prediction size.
        private static readonly ushort[] LeftPredictionMaskUv = new ushort[]
        {
            0x0001,  // BLOCK_4X4,
            0x0001,  // BLOCK_4X8,
            0x0001,  // BLOCK_8X4,
            0x0001,  // BLOCK_8X8,
            0x0001,  // BLOCK_8X16,
            0x0001,  // BLOCK_16X8,
            0x0001,  // BLOCK_16X16,
            0x0011,  // BLOCK_16X32,
            0x0001,  // BLOCK_32X16,
            0x0011,  // BLOCK_32X32,
            0x1111,  // BLOCK_32X64
            0x0011,  // BLOCK_64X32,
            0x1111,  // BLOCK_64X64
        };

        // 16 bit above mask to shift and set for uv each prediction size.
        private static readonly ushort[] AbovePredictionMaskUv = new ushort[]
        {
            0x0001,  // BLOCK_4X4
            0x0001,  // BLOCK_4X8
            0x0001,  // BLOCK_8X4
            0x0001,  // BLOCK_8X8
            0x0001,  // BLOCK_8X16,
            0x0001,  // BLOCK_16X8
            0x0001,  // BLOCK_16X16
            0x0001,  // BLOCK_16X32,
            0x0003,  // BLOCK_32X16,
            0x0003,  // BLOCK_32X32,
            0x0003,  // BLOCK_32X64,
            0x000f,  // BLOCK_64X32,
            0x000f,  // BLOCK_64X64
        };

        // 64 bit mask to shift and set for each uv prediction size
        private static readonly ushort[] SizeMaskUv = new ushort[]
        {
            0x0001,  // BLOCK_4X4
            0x0001,  // BLOCK_4X8
            0x0001,  // BLOCK_8X4
            0x0001,  // BLOCK_8X8
            0x0001,  // BLOCK_8X16,
            0x0001,  // BLOCK_16X8
            0x0001,  // BLOCK_16X16
            0x0011,  // BLOCK_16X32,
            0x0003,  // BLOCK_32X16,
            0x0033,  // BLOCK_32X32,
            0x3333,  // BLOCK_32X64,
            0x00ff,  // BLOCK_64X32,
            0xffff,  // BLOCK_64X64
        };

        private const ushort LeftBorderUv = 0x1111;
        private const ushort AboveBorderUv = 0x000f;

        private static readonly int[] ModeLfLut = new int[]
        {
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
            1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
        };

        private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi)
        {
            return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][ModeLfLut[(int)mi.Mode]];
        }

        private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol)
        {
            return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)];
        }

        // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
        // or greater area.
        private static readonly byte[][] FirstBlockIn16x16 = new byte[][]
        {
            new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
            new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
            new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
            new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }
        };

        // This function sets up the bit masks for a block represented
        // by miRow, miCol in a 64x64 region.
        public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh)
        {
            BlockSize blockSize = mi.SbType;
            TxSize txSizeY = mi.TxSize;
            ref LoopFilterInfoN lfiN = ref cm.LfInfo;
            int filterLevel = GetFilterLevel(ref lfiN, ref mi);
            TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1];
            ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol);
            ref ulong leftY = ref lfm.LeftY[(int)txSizeY];
            ref ulong aboveY = ref lfm.AboveY[(int)txSizeY];
            ref ulong int4X4Y = ref lfm.Int4x4Y;
            ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv];
            ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv];
            ref ushort int4X4Uv = ref lfm.Int4x4Uv;
            int rowInSb = (miRow & 7);
            int colInSb = (miCol & 7);
            int shiftY = colInSb + (rowInSb << 3);
            int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2);
            int buildUv = FirstBlockIn16x16[rowInSb][colInSb];

            if (filterLevel == 0)
            {
                return;
            }
            else
            {
                int index = shiftY;
                int i;
                for (i = 0; i < bh; i++)
                {
                    MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index).Slice(0, bw).Fill((byte)filterLevel);
                    index += 8;
                }
            }

            // These set 1 in the current block size for the block size edges.
            // For instance if the block size is 32x16, we'll set:
            //    above =   1111
            //              0000
            //    and
            //    left  =   1000
            //          =   1000
            // NOTE : In this example the low bit is left most ( 1000 ) is stored as
            //        1,  not 8...
            //
            // U and V set things on a 16 bit scale.
            //
            aboveY |= AbovePredictionMask[(int)blockSize] << shiftY;
            leftY |= LeftPredictionMask[(int)blockSize] << shiftY;

            if (buildUv != 0)
            {
                aboveUv |= (ushort)(AbovePredictionMaskUv[(int)blockSize] << shiftUv);
                leftUv |= (ushort)(LeftPredictionMaskUv[(int)blockSize] << shiftUv);
            }

            // If the block has no coefficients and is not intra we skip applying
            // the loop filter on block edges.
            if (mi.Skip != 0 && mi.IsInterBlock())
            {
                return;
            }

            // Add a mask for the transform size. The transform size mask is set to
            // be correct for a 64x64 prediction block size. Mask to match the size of
            // the block we are working on and then shift it into place.
            aboveY |= (SizeMask[(int)blockSize] & Above64X64TxformMask[(int)txSizeY]) << shiftY;
            leftY |= (SizeMask[(int)blockSize] & Left64X64TxformMask[(int)txSizeY]) << shiftY;

            if (buildUv != 0)
            {
                aboveUv |= (ushort)((SizeMaskUv[(int)blockSize] & Above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
                leftUv |= (ushort)((SizeMaskUv[(int)blockSize] & Left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
            }

            // Try to determine what to do with the internal 4x4 block boundaries. These
            // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
            // internal ones can be skipped and don't depend on the prediction block size.
            if (txSizeY == TxSize.Tx4x4)
            {
                int4X4Y |= SizeMask[(int)blockSize] << shiftY;
            }

            if (buildUv != 0 && txSizeUv == TxSize.Tx4x4)
            {
                int4X4Uv |= (ushort)((SizeMaskUv[(int)blockSize] & 0xffff) << shiftUv);
            }
        }

        public static unsafe void ResetLfm(ref Vp9Common cm)
        {
            if (cm.Lf.FilterLevel != 0)
            {
                MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride);
            }
        }

        private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl)
        {
            int lvl;

            // For each possible value for the loop filter fill out limits
            for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
            {
                // Set loop filter parameters that control sharpness.
                int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0));

                if (sharpnessLvl > 0)
                {
                    if (blockInsideLimit > (9 - sharpnessLvl))
                    {
                        blockInsideLimit = (9 - sharpnessLvl);
                    }
                }

                if (blockInsideLimit < 1)
                {
                    blockInsideLimit = 1;
                }

                lfi.Lfthr[lvl].Lim.ToSpan().Fill((byte)blockInsideLimit);
                lfi.Lfthr[lvl].Mblim.ToSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit));
            }
        }

        public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl)
        {
            int segId;
            // nShift is the multiplier for lfDeltas
            // the multiplier is 1 for when filterLvl is between 0 and 31;
            // 2 when filterLvl is between 32 and 63
            int scale = 1 << (defaultFiltLvl >> 5);
            ref LoopFilterInfoN lfi = ref cm.LfInfo;
            ref Types.LoopFilter lf = ref cm.Lf;
            ref Segmentation seg = ref cm.Seg;

            // Update limits if sharpness has changed
            if (lf.LastSharpnessLevel != lf.SharpnessLevel)
            {
                UpdateSharpness(ref lfi, lf.SharpnessLevel);
                lf.LastSharpnessLevel = lf.SharpnessLevel;
            }

            for (segId = 0; segId < Constants.MaxSegments; segId++)
            {
                int lvlSeg = defaultFiltLvl;
                if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0)
                {
                    int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf);
                    lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter);
                }

                if (!lf.ModeRefDeltaEnabled)
                {
                    // We could get rid of this if we assume that deltas are set to
                    // zero when not in use; encoder always uses deltas
                    MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].ToSpan()).Fill((byte)lvlSeg);
                }
                else
                {
                    int refr, mode;
                    int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale;
                    lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);

                    for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
                    {
                        for (mode = 0; mode < MaxModeLfDeltas; ++mode)
                        {
                            int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale;
                            lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
                        }
                    }
                }
            }
        }
    }
}