Implement macroblock filtering (still not working: the extra rows in yuv buffer for filtering are missing)

6 years ago · 524da752ad
6 changed files with 473 additions and 80 deletions
--- a/src/ImageSharp/Formats/WebP/LossyUtils.cs
+++ b/src/ImageSharp/Formats/WebP/LossyUtils.cs
@ -279,7 +279,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
            Dst(dst, 2, 0, bc);
            Dst(dst, 3, 2, bc);
            Dst(dst, 3, 0, Avg2(C, D));
-            Dst(dst, 0, 3, Avg3(K, I, J));
+            Dst(dst, 0, 3, Avg3(K, J, I));
            Dst(dst, 0, 2, Avg3(J, I, X));
            byte ixa = Avg3(I, X, A);
            Dst(dst, 0, 1, ixa);
@ -447,14 +447,14 @@ namespace SixLabors.ImageSharp.Formats.WebP
            for (int i = 0; i < 4; ++i)
            {
                // vertical pass
-                int a = src[srcOffset] + src[srcOffset + 8]; // [-4096, 4094]
-                int b = src[srcOffset] - src[srcOffset + 8]; // [-4095, 4095]
-                int c = Mul2(src[srcOffset + 4]) - Mul1(src[srcOffset + 12]); // [-3783, 3783]
-                int d = Mul1(src[srcOffset + 4]) + Mul2(src[srcOffset + 12]); // [-3785, 3781]
-                tmp[tmpOffset] = a + d; // [-7881, 7875]
-                tmp[tmpOffset + 1] = b + c; // [-7878, 7878]
-                tmp[tmpOffset + 2] = b - c; // [-7878, 7878]
-                tmp[tmpOffset + 3] = a - d; // [-7877, 7879]
+                int a = src[srcOffset] + src[srcOffset + 8];
+                int b = src[srcOffset] - src[srcOffset + 8];
+                int c = Mul2(src[srcOffset + 4]) - Mul1(src[srcOffset + 12]);
+                int d = Mul1(src[srcOffset + 4]) + Mul2(src[srcOffset + 12]);
+                tmp[tmpOffset] = a + d;
+                tmp[tmpOffset + 1] = b + c;
+                tmp[tmpOffset + 2] = b - c;
+                tmp[tmpOffset + 3] = a - d;
                tmpOffset += 4;
                srcOffset++;
            }
@ -462,10 +462,8 @@ namespace SixLabors.ImageSharp.Formats.WebP
            // Each pass is expanding the dynamic range by ~3.85 (upper bound).
            // The exact value is (2. + (20091 + 35468) / 65536).
            // After the second pass, maximum interval is [-3794, 3794], assuming
-            // an input in [-2048, 2047] interval. We then need to add a dst value
-            // in the [0, 255] range.
-            // In the worst case scenario, the input to clip_8b() can be as large as
-            // [-60713, 60968].
+            // an input in [-2048, 2047] interval. We then need to add a dst value in the [0, 255] range.
+            // In the worst case scenario, the input to clip_8b() can be as large as [-60713, 60968].
            tmpOffset = 0;
            for (int i = 0; i < 4; ++i)
            {
@ -560,9 +558,105 @@ namespace SixLabors.ImageSharp.Formats.WebP
            }
        }

-        // We process u and v together stashed into 32bit(16bit each).
+        // Simple In-loop filtering (Paragraph 15.2)
+        public static void SimpleVFilter16(byte[] p, int offset, int stride, int thresh)
+        {
+            int thresh2 = (2 * thresh) + 1;
+            for (int i = 0; i < 16; ++i)
+            {
+                if (NeedsFilter(p, offset + i, stride, thresh2))
+                {
+                    DoFilter2(p, offset + i, stride);
+                }
+            }
+        }
+
+        public static void SimpleHFilter16(byte[] p, int offset, int stride, int thresh)
+        {
+            int thresh2 = (2 * thresh) + 1;
+            for (int i = 0; i < 16; ++i)
+            {
+                if (NeedsFilter(p, offset + (i * stride), 1, thresh2))
+                {
+                    DoFilter2(p, offset + (i * stride), 1);
+                }
+            }
+        }
+
+        public static void SimpleVFilter16i(byte[] p, int offset, int stride, int thresh)
+        {
+            for (int k = 3; k > 0; --k)
+            {
+                offset += 4 * stride;
+                SimpleVFilter16(p, offset,  stride, thresh);
+            }
+        }
+
+        public static void SimpleHFilter16i(byte[] p, int offset, int stride, int thresh)
+        {
+            for (int k = 3; k > 0; --k)
+            {
+                offset += stride;
+                SimpleHFilter16(p, offset, stride, thresh);
+            }
+        }
+
+        public static void VFilter16(byte[] p, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            FilterLoop26(p, offset, stride, 1, 16, thresh, ithresh, hevThresh);
+        }
+
+        public static void HFilter16(byte[] p, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            FilterLoop26(p, offset, 1, stride, 16, thresh, ithresh, hevThresh);
+        }
+
+        public static void VFilter16i(byte[] p, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            for (int k = 3; k > 0; --k)
+            {
+                offset += 4 * stride;
+                FilterLoop24(p, offset, stride, 1, 16, thresh, ithresh, hevThresh);
+            }
+        }
+
+        public static void HFilter16i(byte[] p, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            for (int k = 3; k > 0; --k)
+            {
+                offset += 4;
+                FilterLoop24(p, offset, 1, stride, 16, thresh, ithresh, hevThresh);
+            }
+        }
+
+        // 8-pixels wide variant, for chroma filtering.
+        public static void VFilter8(byte[] u, byte[] v, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            FilterLoop26(u, offset, stride, 1, 8, thresh, ithresh, hevThresh);
+            FilterLoop26(v, offset, stride, 1, 8, thresh, ithresh, hevThresh);
+        }
+
+        public static void HFilter8(byte[] u, byte[] v, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            FilterLoop26(u, offset, 1, stride, 8, thresh, ithresh, hevThresh);
+            FilterLoop26(v, offset, 1, stride, 8, thresh, ithresh, hevThresh);
+        }
+
+        public static void VFilter8i(byte[] u, byte[] v, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            FilterLoop24(u, offset + (4 * stride), stride, 1, 8, thresh, ithresh, hevThresh);
+            FilterLoop24(v, offset + (4 * stride), stride, 1, 8, thresh, ithresh, hevThresh);
+        }
+
+        public static void HFilter8i(byte[] u, byte[] v, int offset, int stride, int thresh, int ithresh, int hevThresh)
+        {
+            FilterLoop24(u, offset + 4, 1, stride, 8, thresh, ithresh, hevThresh);
+            FilterLoop24(v, offset + 4, 1, stride, 8, thresh, ithresh, hevThresh);
+        }
+
        public static uint LoadUv(byte u, byte v)
        {
+            // We process u and v together stashed into 32bit(16bit each).
            return (uint)(u | (v << 16));
        }

@ -571,7 +665,6 @@ namespace SixLabors.ImageSharp.Formats.WebP
            bgr[0] = (byte)YuvToB(y, u);
            bgr[1] = (byte)YuvToG(y, u, v);
            bgr[2] = (byte)YuvToR(y, v);
-            int tmp = 0;
        }

        public static int YuvToR(int y, int v)
@ -589,6 +682,157 @@ namespace SixLabors.ImageSharp.Formats.WebP
            return Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685);
        }

+        // Complex In-loop filtering (Paragraph 15.3)
+        private static void FilterLoop24(
+            byte[] p,
+            int offset,
+            int hStride,
+            int vStride,
+            int size,
+            int thresh,
+            int ithresh,
+            int hevThresh)
+        {
+            int thresh2 = (2 * thresh) + 1;
+            while (size-- > 0)
+            {
+                if (NeedsFilter2(p, offset,  hStride, thresh2, ithresh))
+                {
+                    if (Hev(p, offset, hStride, hevThresh))
+                    {
+                        DoFilter2(p, offset, hStride);
+                    }
+                    else
+                    {
+                        DoFilter4(p, offset, hStride);
+                    }
+                }
+
+                offset += vStride;
+            }
+        }
+
+        private static void FilterLoop26(
+            byte[] p,
+            int offset,
+            int hStride,
+            int vStride,
+            int size,
+            int thresh,
+            int ithresh,
+            int hevThresh)
+        {
+            int thresh2 = (2 * thresh) + 1;
+            while (size-- > 0)
+            {
+                if (NeedsFilter2(p, offset, hStride, thresh2, ithresh))
+                {
+                    if (Hev(p, offset, hStride, hevThresh))
+                    {
+                        DoFilter2(p, offset, hStride);
+                    }
+                    else
+                    {
+                        DoFilter6(p, offset, hStride);
+                    }
+                }
+
+                offset += vStride;
+            }
+        }
+
+        private static void DoFilter2(byte[] p, int offset, int step)
+        {
+            // 4 pixels in, 2 pixels out
+            int p1 = p[offset - (2 * step)];
+            int p0 = p[offset - step];
+            int q0 = p[offset];
+            int q1 = p[offset + step];
+            int a = (3 * (q0 - p0)) + Vp8LookupTables.Sclip1(p1 - q1);
+            int a1 = Vp8LookupTables.Sclip2((a + 4) >> 3);
+            int a2 = Vp8LookupTables.Sclip2((a + 3) >> 3);
+            p[offset - step] = Vp8LookupTables.Clip1(p0 + a2);
+            p[offset] = Vp8LookupTables.Clip1(q0 - a1);
+        }
+
+        private static void DoFilter4(byte[] p, int offset, int step)
+        {
+            // 4 pixels in, 4 pixels out
+            int p1 = p[offset - (2 * step)];
+            int p0 = p[offset - step];
+            int q0 = p[offset];
+            int q1 = p[offset + step];
+            int a = 3 * (q0 - p0);
+            int a1 = Vp8LookupTables.Sclip2((a + 4) >> 3);
+            int a2 = Vp8LookupTables.Sclip2((a + 3) >> 3);
+            int a3 = (a1 + 1) >> 1;
+            p[offset - (2 * step)] = Vp8LookupTables.Clip1(p1 + a3);
+            p[offset - step] = Vp8LookupTables.Clip1(p0 + a2);
+            p[offset] = Vp8LookupTables.Clip1(q0 - a1);
+            p[offset + step] = Vp8LookupTables.Clip1(q1 - a3);
+        }
+
+        private static void DoFilter6(byte[] p, int offset, int step)
+        {
+            // 6 pixels in, 6 pixels out
+            int p2 = p[offset - (3 * step)];
+            int p1 = p[offset - (2 * step)];
+            int p0 = p[offset - step];
+            int q0 = p[offset];
+            int q1 = p[offset + step];
+            int q2 = p[offset + (2 * step)];
+            int a = Vp8LookupTables.Clip1((3 * (q0 - p0)) + Vp8LookupTables.Clip1(p1 - q1));
+
+            // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+            int a1 = ((27 * a) + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
+            int a2 = ((18 * a) + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
+            int a3 = ((9 * a) + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+            p[offset - (3 * step)] = Vp8LookupTables.Clip1(p2 + a3);
+            p[offset - (2 * step)] = Vp8LookupTables.Clip1(p1 + a2);
+            p[offset - step] = Vp8LookupTables.Clip1(p0 + a1);
+            p[offset] = Vp8LookupTables.Clip1(q0 - a1);
+            p[offset + step] = Vp8LookupTables.Clip1(q1 - a2);
+            p[offset + (2 * step)] = Vp8LookupTables.Clip1(q2 - a3);
+        }
+
+        private static bool NeedsFilter(byte[] p, int offset, int step, int thresh)
+        {
+            int p1 = p[offset + (-2 * step)];
+            int p0 = p[offset - step];
+            int q0 = p[offset];
+            int q1 = p[offset + step];
+            return (Vp8LookupTables.Abs0(p1 - p0) > thresh) || (Vp8LookupTables.Abs0(q1 - q0) > thresh);
+        }
+
+        private static bool NeedsFilter2(byte[] p, int offset, int step, int t, int it)
+        {
+            int p3 = p[offset - (4 * step)];
+            int p2 = p[offset - (3 * step)];
+            int p1 = p[offset - (2 * step)];
+            int p0 = p[offset - step];
+            int q0 = p[offset];
+            int q1 = p[offset + step];
+            int q2 = p[offset + (2 * step)];
+            int q3 = p[offset + (3 * step)];
+            if (((4 * Vp8LookupTables.Abs0(p0 - q0)) + Vp8LookupTables.Abs0(p1 - q1)) > t)
+            {
+                return false;
+            }
+
+            return Vp8LookupTables.Abs0(p3 - p2) <= it && Vp8LookupTables.Abs0(p2 - p1) <= it &&
+                   Vp8LookupTables.Abs0(p1 - p0) <= it && Vp8LookupTables.Abs0(q3 - q2) <= it &&
+                   Vp8LookupTables.Abs0(q2 - q1) <= it && Vp8LookupTables.Abs0(q1 - q0) <= it;
+        }
+
+        private static bool Hev(byte[] p, int offset, int step, int thresh)
+        {
+            int p1 = p[offset -(2 * step)];
+            int p0 = p[offset - step];
+            int q0 = p[offset];
+            int q1 = p[offset + step];
+            return (Vp8LookupTables.Abs0(p1 - p0) > thresh) || (Vp8LookupTables.Abs0(q1 - q0) > thresh);
+        }
+
        private static int MultHi(int v, int coeff)
        {
            return (v * coeff) >> 8;
--- a/src/ImageSharp/Formats/WebP/Vp8Decoder.cs
+++ b/src/ImageSharp/Formats/WebP/Vp8Decoder.cs
@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
    /// </summary>
    internal class Vp8Decoder
    {
-        public Vp8Decoder(Vp8FrameHeader frameHeader, Vp8PictureHeader pictureHeader, Vp8SegmentHeader segmentHeader, Vp8Proba probabilities, Vp8Io io)
+        public Vp8Decoder(Vp8FrameHeader frameHeader, Vp8PictureHeader pictureHeader, Vp8SegmentHeader segmentHeader, Vp8Proba probabilities)
        {
            this.FilterHeader = new Vp8FilterHeader();
            this.FrameHeader = frameHeader;
@ -71,7 +71,6 @@ namespace SixLabors.ImageSharp.Formats.WebP
            }

            this.Vp8BitReaders = new Vp8BitReader[WebPConstants.MaxNumPartitions];
-            this.Init(io);
        }

        public Vp8FrameHeader FrameHeader { get; }
@ -219,53 +218,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
            }
        }

-        public void Init(Vp8Io io)
-        {
-            int intraPredModeSize = 4 * this.MbWidth;
-            this.IntraT = new byte[intraPredModeSize];
-
-            int extraPixels = WebPConstants.FilterExtraRows[(int)this.Filter];
-            if (this.Filter is LoopFilter.Complex)
-            {
-                // For complex filter, we need to preserve the dependency chain.
-                this.TopLeftMbX = 0;
-                this.TopLeftMbY = 0;
-            }
-            else
-            {
-                // For simple filter, we can filter only the cropped region. We include 'extraPixels' on
-                // the other side of the boundary, since vertical or horizontal filtering of the previous
-                // macroblock can modify some abutting pixels.
-                this.TopLeftMbX = (io.CropLeft - extraPixels) >> 4;
-                this.TopLeftMbY = (io.CropTop - extraPixels) >> 4;
-                if (this.TopLeftMbX < 0)
-                {
-                    this.TopLeftMbX = 0;
-                }
-
-                if (this.TopLeftMbY < 0)
-                {
-                    this.TopLeftMbY = 0;
-                }
-            }
-
-            // We need some 'extra' pixels on the right/bottom.
-            this.BottomRightMbY = (io.CropBottom + 15 + extraPixels) >> 4;
-            this.BottomRightMbX = (io.CropRight + 15 + extraPixels) >> 4;
-            if (this.BottomRightMbX > this.MbWidth)
-            {
-                this.BottomRightMbX = this.MbWidth;
-            }
-
-            if (this.BottomRightMbY > this.MbHeight)
-            {
-                this.BottomRightMbY = this.MbHeight;
-            }
-
-            this.PrecomputeFilterStrengths();
-        }
-
-        private void PrecomputeFilterStrengths()
+        public void PrecomputeFilterStrengths()
        {
            if (this.Filter is LoopFilter.None)
            {
--- a/src/ImageSharp/Formats/WebP/Vp8FilterInfo.cs
+++ b/src/ImageSharp/Formats/WebP/Vp8FilterInfo.cs
@ -20,8 +20,9 @@ namespace SixLabors.ImageSharp.Formats.WebP

        /// <summary>
        /// Gets or sets a value indicating whether to do inner filtering.
+        /// TODO: can this be a bool?
        /// </summary>
-        public byte InnerFiltering { get; set; }
+        public byte UseInnerFiltering { get; set; }

        /// <summary>
        /// Gets or sets the high edge variance threshold in [0..2].
--- a/src/ImageSharp/Formats/WebP/Vp8LookupTables.cs
+++ b/src/ImageSharp/Formats/WebP/Vp8LookupTables.cs
@ -0,0 +1,64 @@
+// Copyright (c) Six Labors and contributors.
+// Licensed under the Apache License, Version 2.0.
+
+namespace SixLabors.ImageSharp.Formats.WebP
+{
+    internal static class Vp8LookupTables
+    {
+        private static readonly byte[] abs0;
+
+        private static readonly byte[] clip1;
+
+        private static readonly sbyte[] sclip1;
+
+        private static readonly sbyte[] sclip2;
+
+        static Vp8LookupTables()
+        {
+            // TODO: maybe use hashset here
+            abs0 = new byte[511];
+            for (int i = -255; i <= 255; ++i)
+            {
+                abs0[255 + i] = (byte)((i < 0) ? -i : i);
+            }
+
+            clip1 = new byte[766];
+            for (int i = -255; i <= 255 + 255; ++i)
+            {
+                clip1[255 + i] = (byte)((i < 0) ? 0 : (i > 255) ? 255 : i);
+            }
+
+            sclip1 = new sbyte[2041];
+            for (int i = -1020; i <= 1020; ++i)
+            {
+                sclip1[1020 + i] = (sbyte)((i < -128) ? -128 : (i > 127) ? 127 : i);
+            }
+
+            sclip2 = new sbyte[225];
+            for (int i = -112; i <= 112; ++i)
+            {
+                sclip2[112 + i] = (sbyte)((i < -16) ? -16 : (i > 15) ? 15 : i);
+            }
+        }
+
+        public static byte Abs0(int v)
+        {
+            return abs0[v + 255];
+        }
+
+        public static byte Clip1(int v)
+        {
+            return clip1[v + 255];
+        }
+
+        public static sbyte Sclip1(int v)
+        {
+            return sclip1[v + 1020];
+        }
+
+        public static sbyte Sclip2(int v)
+        {
+            return sclip2[v + 112];
+        }
+    }
+}
--- a/src/ImageSharp/Formats/WebP/WebPConstants.cs
+++ b/src/ImageSharp/Formats/WebP/WebPConstants.cs
@ -132,8 +132,8 @@ namespace SixLabors.ImageSharp.Formats.WebP

        /// <summary>
        /// How many extra lines are needed on the MB boundary for caching, given a filtering level.
-        /// Simple filter:  up to 2 luma samples are read and 1 is written.
-        /// Complex filter: up to 4 luma samples are read and 3 are written. Same for U/V, so it's 8 samples total (because of the 2x upsampling).
+        /// Simple filter(1):  up to 2 luma samples are read and 1 is written.
+        /// Complex filter(2): up to 4 luma samples are read and 3 are written. Same for U/V, so it's 8 samples total (because of the 2x upsampling).
        /// </summary>
        public static readonly byte[] FilterExtraRows = { 0, 2, 8 };

--- a/src/ImageSharp/Formats/WebP/WebPLossyDecoder.cs
+++ b/src/ImageSharp/Formats/WebP/WebPLossyDecoder.cs
@ -57,11 +57,12 @@ namespace SixLabors.ImageSharp.Formats.WebP
            var proba = new Vp8Proba();
            Vp8SegmentHeader vp8SegmentHeader = this.ParseSegmentHeader(proba);

-            Vp8Io io = InitializeVp8Io(pictureHeader);
-            var decoder = new Vp8Decoder(info.Vp8FrameHeader, pictureHeader, vp8SegmentHeader, proba, io);
+            var decoder = new Vp8Decoder(info.Vp8FrameHeader, pictureHeader, vp8SegmentHeader, proba);
+            Vp8Io io = InitializeVp8Io(decoder, pictureHeader);

            // Paragraph 9.4: Parse the filter specs.
            this.ParseFilterHeader(decoder);
+            decoder.PrecomputeFilterStrengths();

            // Paragraph 9.5: Parse partitions.
            this.ParsePartitions(decoder);
@ -94,7 +95,9 @@ namespace SixLabors.ImageSharp.Formats.WebP
                    byte b = pixelData[idx];
                    byte g = pixelData[idx + 1];
                    byte r = pixelData[idx + 2];
-                    color.FromRgba32(new Rgba32(r, g, b, 255));
+
+                    // TODO: use bulk conversion here.
+                    color.FromBgr24(new Bgr24(r, g, b));
                    pixelRow[x] = color;
                }
            }
@ -214,11 +217,16 @@ namespace SixLabors.ImageSharp.Formats.WebP
            bool filterRow = (dec.Filter != LoopFilter.None) &&
                             (dec.MbY >= dec.TopLeftMbY) && (dec.MbY <= dec.BottomRightMbY);

-            this.ReconstructRow(dec, filterRow);
+            this.ReconstructRow(dec);
+            if (filterRow)
+            {
+                this.FilterRow(dec);
+            }
+
            this.FinishRow(dec, io);
        }

-        private void ReconstructRow(Vp8Decoder dec, bool filterRow)
+        private void ReconstructRow(Vp8Decoder dec)
        {
            int mby = dec.MbY;

@ -313,7 +321,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
                if (block.IsI4x4)
                {
                    // uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
-                    Span<uint> topRight = MemoryMarshal.Cast<byte, uint>(yuv.AsSpan(yOff - WebPConstants.Bps + 16));
+                    //Span<uint> topRight = MemoryMarshal.Cast<byte, uint>(yuv.AsSpan(yOff - WebPConstants.Bps + 16));
                    if (mby > 0)
                    {
                        if (mbx >= dec.MbWidth - 1)
@ -457,7 +465,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
                    vDst.Slice(7 * WebPConstants.Bps, 8).CopyTo(topYuv.V);
                }

-                // Transfer reconstructed samples from yuv_b_ cache to final destination.
+                // Transfer reconstructed samples from yuv_buffer cache to final destination.
                int cacheId = 0; // TODO: what should be cacheId, always 0?
                int yOffset = cacheId * 16 * dec.CacheYStride;
                int uvOffset = cacheId * 8 * dec.CacheUvStride;
@ -477,6 +485,82 @@ namespace SixLabors.ImageSharp.Formats.WebP
            }
        }

+        private void FilterRow(Vp8Decoder dec)
+        {
+            int mby = dec.MbY;
+            for (int mbx = dec.TopLeftMbX; mbx < dec.BottomRightMbX; ++mbx)
+            {
+                //this.DoFilter(dec, mbx, mby);
+            }
+        }
+
+        private void DoFilter(Vp8Decoder dec, int mbx, int mby)
+        {
+            int yBps = dec.CacheYStride;
+            Vp8FilterInfo filterInfo = dec.FilterInfo[dec.MbX];
+            int iLevel = filterInfo.InnerLevel;
+            int limit = filterInfo.Limit;
+
+            if (limit is 0)
+            {
+                return;
+            }
+
+            if (dec.Filter is LoopFilter.Simple)
+            {
+                int offset = mbx * 16;
+                if (mbx > 0)
+                {
+                    LossyUtils.SimpleHFilter16(dec.CacheY, offset, yBps, limit + 4);
+                }
+
+                if (filterInfo.UseInnerFiltering > 0)
+                {
+                    LossyUtils.SimpleHFilter16i(dec.CacheY, offset, yBps, limit);
+                }
+
+                if (mby > 0)
+                {
+                    LossyUtils.SimpleVFilter16(dec.CacheY, offset, yBps, limit + 4);
+                }
+
+                if (filterInfo.UseInnerFiltering > 0)
+                {
+                    LossyUtils.SimpleVFilter16i(dec.CacheY, offset, yBps, limit);
+                }
+            }
+            else if (dec.Filter is LoopFilter.Complex)
+            {
+                int uvBps = dec.CacheUvStride;
+                int yOffset = mbx * 16;
+                int uvOffset = mbx * 8;
+                int hevThresh = filterInfo.HighEdgeVarianceThreshold;
+                if (mbx > 0)
+                {
+                    LossyUtils.HFilter16(dec.CacheY, yOffset, yBps, limit + 4, iLevel, hevThresh);
+                    LossyUtils.HFilter8(dec.CacheU, dec.CacheV, uvOffset, uvBps, limit + 4, iLevel, hevThresh);
+                }
+
+                if (filterInfo.UseInnerFiltering > 0)
+                {
+                    LossyUtils.HFilter16i(dec.CacheY, yOffset, yBps, limit, iLevel, hevThresh);
+                    LossyUtils.HFilter8i(dec.CacheU, dec.CacheV, uvOffset, uvBps, limit, iLevel, hevThresh);
+                }
+
+                if (mby > 0)
+                {
+                    LossyUtils.VFilter16(dec.CacheY, yOffset, yBps, limit + 4, iLevel, hevThresh);
+                    LossyUtils.VFilter8(dec.CacheU, dec.CacheV, uvOffset, uvBps, limit + 4, iLevel, hevThresh);
+                }
+
+                if (filterInfo.UseInnerFiltering > 0)
+                {
+                    LossyUtils.VFilter16i(dec.CacheY, yOffset, yBps, limit, iLevel, hevThresh);
+                    LossyUtils.VFilter8i(dec.CacheU, dec.CacheV, uvOffset, uvBps, limit, iLevel, hevThresh);
+                }
+            }
+        }
+
        private void FinishRow(Vp8Decoder dec, Vp8Io io)
        {
            int cacheId = 0;
@ -532,10 +616,10 @@ namespace SixLabors.ImageSharp.Formats.WebP
            // Rotate top samples if needed.
            if (!isLastRow)
            {
-                // TODO: double check this.
-                yDst.Slice(16 * dec.CacheYStride, ySize).CopyTo(dec.CacheY);
-                uDst.Slice(8 * dec.CacheUvStride, uvSize).CopyTo(dec.CacheU);
-                vDst.Slice(8 * dec.CacheUvStride, uvSize).CopyTo(dec.CacheV);
+                // TODO: double check this. Cache needs extra rows for filtering!
+                //yDst.Slice(16 * dec.CacheYStride, ySize).CopyTo(dec.CacheY);
+                //uDst.Slice(8 * dec.CacheUvStride, uvSize).CopyTo(dec.CacheU);
+                //vDst.Slice(8 * dec.CacheUvStride, uvSize).CopyTo(dec.CacheV);
            }
        }

@ -744,7 +828,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
            if (dec.Filter != LoopFilter.None)
            {
                dec.FilterInfo[dec.MbX] = dec.FilterStrength[blockData.Segment, blockData.IsI4x4 ? 1 : 0];
-                dec.FilterInfo[dec.MbX].InnerFiltering |= (byte)(skip is 0 ? 1 : 0);
+                dec.FilterInfo[dec.MbX].UseInnerFiltering |= (byte)(skip is 0 ? 1 : 0);
            }
        }

@ -760,6 +844,10 @@ namespace SixLabors.ImageSharp.Formats.WebP
            Vp8BandProbas[] acProba;
            Vp8MacroBlock leftMb = dec.LeftMacroBlock;
            short[] dst = block.Coeffs;
+            for (int i = 0; i < dst.Length; i++)
+            {
+                dst[i] = 0;
+            }

            if (!block.IsI4x4)
            {
@ -1208,7 +1296,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
            }
        }

-        private static Vp8Io InitializeVp8Io(Vp8PictureHeader pictureHeader)
+        private static Vp8Io InitializeVp8Io(Vp8Decoder dec, Vp8PictureHeader pictureHeader)
        {
            var io = default(Vp8Io);
            io.Width = (int)pictureHeader.Width;
@ -1225,6 +1313,48 @@ namespace SixLabors.ImageSharp.Formats.WebP
            io.MbH = io.Height;
            io.YStride = (int)(16 * ((pictureHeader.Width + 15) >> 4));
            io.UvStride = (int)(8 * ((pictureHeader.Width + 15) >> 4));
+
+            int intraPredModeSize = 4 * dec.MbWidth;
+            dec.IntraT = new byte[intraPredModeSize];
+
+            int extraPixels = WebPConstants.FilterExtraRows[(int)dec.Filter];
+            if (dec.Filter is LoopFilter.Complex)
+            {
+                // For complex filter, we need to preserve the dependency chain.
+                dec.TopLeftMbX = 0;
+                dec.TopLeftMbY = 0;
+            }
+            else
+            {
+                // For simple filter, we can filter only the cropped region. We include 'extraPixels' on
+                // the other side of the boundary, since vertical or horizontal filtering of the previous
+                // macroblock can modify some abutting pixels.
+                dec.TopLeftMbX = (io.CropLeft - extraPixels) >> 4;
+                dec.TopLeftMbY = (io.CropTop - extraPixels) >> 4;
+                if (dec.TopLeftMbX < 0)
+                {
+                    dec.TopLeftMbX = 0;
+                }
+
+                if (dec.TopLeftMbY < 0)
+                {
+                    dec.TopLeftMbY = 0;
+                }
+            }
+
+            // We need some 'extra' pixels on the right/bottom.
+            dec.BottomRightMbY = (io.CropBottom + 15 + extraPixels) >> 4;
+            dec.BottomRightMbX = (io.CropRight + 15 + extraPixels) >> 4;
+            if (dec.BottomRightMbX > dec.MbWidth)
+            {
+                dec.BottomRightMbX = dec.MbWidth;
+            }
+
+            if (dec.BottomRightMbY > dec.MbHeight)
+            {
+                dec.BottomRightMbY = dec.MbHeight;
+            }
+
            return io;
        }

@ -1298,6 +1428,7 @@ namespace SixLabors.ImageSharp.Formats.WebP
            return value < 0 ? 0 : value > max ? max : value;
        }

+        // TODO: move to LookupTables
        private void InitializeModesProbabilities()
        {
            // Paragraph 11.5