diff --git a/src/ImageSharp/Formats/WebP/LossyUtils.cs b/src/ImageSharp/Formats/WebP/LossyUtils.cs new file mode 100644 index 0000000000..a4c488b61b --- /dev/null +++ b/src/ImageSharp/Formats/WebP/LossyUtils.cs @@ -0,0 +1,336 @@ +// Copyright (c) Six Labors and contributors. +// Licensed under the Apache License, Version 2.0. + +using System; + +namespace SixLabors.ImageSharp.Formats.WebP +{ + internal static class LossyUtils + { + private static void Put16(int v, Span dst) + { + for (int j = 0; j < 16; ++j) + { + Span tmp = dst.Slice(j * WebPConstants.Bps); + for (int i = 0; i < 16; i++) + { + tmp[i] = (byte)v; + } + } + } + + public static void DC16_C(Span dst, byte[] yuv, int offset) + { + int dc = 16; + int j; + for (j = 0; j < 16; ++j) + { + // DC += dst[-1 + j * BPS] + dst[j - BPS]; + dc += yuv[-1 + (j * WebPConstants.Bps) + offset] + yuv[j - WebPConstants.Bps + offset]; + } + + Put16(dc >> 5, dst); + } + + public static void TM16_C(Span dst) + { + + } + + public static void VE16_C(Span dst, byte[] yuv, int offset) + { + // vertical + Span src = yuv.AsSpan(offset - WebPConstants.Bps, 16); + for (int j = 0; j < 16; ++j) + { + // memcpy(dst + j * BPS, dst - BPS, 16); + src.CopyTo(dst.Slice(j * WebPConstants.Bps)); + } + } + + public static void HE16_C(Span dst, byte[] yuv, int offset) + { + // horizontal + for (int j = 16; j > 0; --j) + { + // memset(dst, dst[-1], 16); + dst = dst.Slice(WebPConstants.Bps); + byte v = yuv[offset - 1]; + for (int i = 0; i < 16; i++) + { + dst[i] = v; + } + + offset += WebPConstants.Bps; + } + } + + public static void DC16NoTop_C(Span dst, byte[] yuv, int offset) + { + // DC with top samples not available. + int dc = 8; + for (int j = 0; j < 16; ++j) + { + // DC += dst[-1 + j * BPS]; + dc += yuv[-1 + (j * WebPConstants.Bps) + offset]; + } + + Put16(dc >> 4, dst); + } + + public static void DC16NoLeft_C(Span dst, byte[] yuv, int offset) + { + // DC with left samples not available. + int dc = 8; + for (int i = 0; i < 16; ++i) + { + // DC += dst[i - BPS]; + dc += yuv[i - WebPConstants.Bps + offset]; + } + + Put16(dc >> 4, dst); + } + + public static void DC16NoTopLeft_C(Span dst) + { + // DC with no top and left samples. + Put16(0x80, dst); + } + + public static void DC8uv_C(Span dst, byte[] yuv, int offset) + { + int dc0 = 8; + for (int i = 0; i < 8; ++i) + { + // dc0 += dst[i - BPS] + dst[-1 + i * BPS]; + dc0 += yuv[offset + i - WebPConstants.Bps] + yuv[offset - 1 + (i * WebPConstants.Bps)]; + } + + Put8x8uv((byte)(dc0 >> 4), dst); + } + + public static void TM8uv_C(Span dst) + { + // TrueMotion + } + + public static void VE8uv_C(Span dst, Span src) + { + // vertical + for (int j = 0; j < 8; ++j) + { + // memcpy(dst + j * BPS, dst - BPS, 8); + src.CopyTo(dst.Slice(j * WebPConstants.Bps)); + } + } + + public static void HE8uv_C(Span dst, byte[] yuv, int offset) + { + // horizontal + for (int j = 0; j < 8; ++j) + { + // memset(dst, dst[-1], 8); + byte v = yuv[offset - 1]; + for (int i = 0; i < 8; i++) + { + yuv[offset + i] = v; + } + } + } + + public static void DC8uvNoTop_C(Span dst, byte[] yuv, int offset) + { + // DC with no top samples. + int dc0 = 4; + for (int i = 0; i < 8; ++i) + { + // dc0 += dst[-1 + i * BPS]; + dc0 += yuv[offset - 1 + (i * WebPConstants.Bps)]; + } + + Put8x8uv((byte)(dc0 >> 3), dst); + } + + public static void DC8uvNoLeft_C(Span dst, byte[] yuv, int offset) + { + // DC with no left samples. + int dc0 = 4; + for (int i = 0; i < 8; ++i) + { + // dc0 += dst[i - BPS]; + dc0 += yuv[offset + i - WebPConstants.Bps]; + } + + Put8x8uv((byte)(dc0 >> 3), dst); + } + + public static void DC8uvNoTopLeft_C(Span dst, byte[] yuv, int offset) + { + // DC with no top samples. + int dc0 = 4; + for (int i = 0; i < 8; ++i) + { + // dc0 += dst[-1 + i * BPS]; + dc0 += yuv[offset - 1 + (i * WebPConstants.Bps)]; + } + + Put8x8uv((byte)(dc0 >> 3), dst); + } + + public static void Transform(Span src, Span dst, bool doTwo) + { + TransformOne(src, dst); + if (doTwo) + { + TransformOne(src, dst); + } + } + + public static void TransformOne(Span src, Span dst) + { + var tmp = new int[4 * 4]; + int tmpOffset = 0; + int srcOffset = 0; + for (int i = 0; i < 4; ++i) + { + // vertical pass + int a = src[srcOffset] + src[srcOffset + 8]; // [-4096, 4094] + int b = src[srcOffset] - src[srcOffset + 8]; // [-4095, 4095] + int c = Mul2(src[4]) - Mul1(src[12]); // [-3783, 3783] + int d = Mul1(src[4]) + Mul2(src[12]); // [-3785, 3781] + tmp[tmpOffset] = a + d; // [-7881, 7875] + tmp[tmpOffset + 1] = b + c; // [-7878, 7878] + tmp[tmpOffset + 2] = b - c; // [-7878, 7878] + tmp[tmpOffset + 3] = a - d; // [-7877, 7879] + tmpOffset += 4; + srcOffset++; + } + + // Each pass is expanding the dynamic range by ~3.85 (upper bound). + // The exact value is (2. + (20091 + 35468) / 65536). + // After the second pass, maximum interval is [-3794, 3794], assuming + // an input in [-2048, 2047] interval. We then need to add a dst value + // in the [0, 255] range. + // In the worst case scenario, the input to clip_8b() can be as large as + // [-60713, 60968]. + tmpOffset = 0; + for (int i = 0; i < 4; ++i) + { + // horizontal pass + int dc = tmp[tmpOffset] + 4; + int a = dc + tmp[tmpOffset + 8]; + int b = dc - tmp[tmpOffset + 8]; + int c = Mul2(tmp[tmpOffset + 4]) - Mul1(tmp[tmpOffset + 12]); + int d = Mul1(tmp[tmpOffset + 4]) + Mul2(tmp[tmpOffset + 12]); + Store(dst, 0, 0, a + d); + Store(dst, 1, 0, b + c); + Store(dst, 2, 0, b - c); + Store(dst, 3, 0, a - d); + tmpOffset++; + dst = dst.Slice(WebPConstants.Bps); + } + } + + public static void TransformDc(Span src, Span dst) + { + int dc = src[0] + 4; + for (int j = 0; j < 4; ++j) + { + for (int i = 0; i < 4; ++i) + { + Store(dst, i, j, dc); + } + } + } + + // Simplified transform when only in[0], in[1] and in[4] are non-zero + public static void TransformAc3(Span src, Span dst) + { + int a = src[0] + 4; + int c4 = Mul2(src[4]); + int d4 = Mul1(src[4]); + int c1 = Mul2(src[1]); + int d1 = Mul1(src[1]); + Store2(dst, 0, a + d4, d1, c1); + Store2(dst, 1, a + c4, d1, c1); + Store2(dst, 2, a - c4, d1, c1); + Store2(dst, 3, a - d4, d1, c1); + } + + public static void TransformUv(Span src, Span dst) + { + Transform(src.Slice(0 * 16), dst, true); + Transform(src.Slice(2 * 16), dst.Slice(4 * WebPConstants.Bps), true); + } + + public static void TransformDcuv(Span src, Span dst) + { + if (src[0 * 16] > 0) + { + TransformDc(src.Slice(0 * 16), dst); + } + + if (src[1 * 16] > 0) + { + TransformDc(src.Slice(1 * 16), dst.Slice(4)); + } + + if (src[2 * 16] > 0) + { + TransformDc(src.Slice(2 * 16), dst.Slice(4 * WebPConstants.Bps)); + } + + if (src[3 * 16] > 0) + { + TransformDc(src.Slice(3 * 16), dst.Slice((4 * WebPConstants.Bps) + 4)); + } + } + + private static void Store(Span dst, int x, int y, int v) + { + dst[x + (y * WebPConstants.Bps)] = Clip8B(dst[x + (y * WebPConstants.Bps)] + (v >> 3)); + } + + private static void Store2(Span dst, int y, int dc, int d, int c) + { + Store(dst, 0, y, dc + d); + Store(dst, 1, y, dc + c); + Store(dst, 2, y, dc - c); + Store(dst, 3, y, dc - d); + } + + private static int Mul1(int a) + { + return ((a * 20091) >> 16) + a; + } + + private static int Mul2(int a) + { + return (a * 35468) >> 16; + } + + private static byte Clip8B(int v) + { + return (byte)((v & ~0xff) > 0 ? v : (v < 0) ? 0 : 255); + } + + private static void Put8x8uv(byte value, Span dst) + { + // memset(dst + j * BPS, value, 8); + for (int j = 0; j < 8; ++j) + { + dst[j * WebPConstants.Bps] = value; + } + } + + private static byte Avg2(byte a, byte b) + { + return (byte)((a + b + 1) >> 1); + } + + private static byte Avg3(byte a, byte b, byte c) + { + return (byte)((a + (2 * b) + c + 2) >> 2); + } + } +} diff --git a/src/ImageSharp/Formats/WebP/WebPConstants.cs b/src/ImageSharp/Formats/WebP/WebPConstants.cs index f3a135ba64..e22ae8d348 100644 --- a/src/ImageSharp/Formats/WebP/WebPConstants.cs +++ b/src/ImageSharp/Formats/WebP/WebPConstants.cs @@ -143,6 +143,14 @@ namespace SixLabors.ImageSharp.Formats.WebP 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0 }; + public static readonly short[] KScan = + { + 0 + 0 * Bps, 4 + 0 * Bps, 8 + 0 * Bps, 12 + 0 * Bps, + 0 + 4 * Bps, 4 + 4 * Bps, 8 + 4 * Bps, 12 + 4 * Bps, + 0 + 8 * Bps, 4 + 8 * Bps, 8 + 8 * Bps, 12 + 8 * Bps, + 0 + 12 * Bps, 4 + 12 * Bps, 8 + 12 * Bps, 12 + 12 * Bps + }; + // 31 ^ clz(i) public static readonly byte[] LogTable8bit = { diff --git a/src/ImageSharp/Formats/WebP/WebPLossyDecoder.cs b/src/ImageSharp/Formats/WebP/WebPLossyDecoder.cs index 8c8915e002..725b5857a1 100644 --- a/src/ImageSharp/Formats/WebP/WebPLossyDecoder.cs +++ b/src/ImageSharp/Formats/WebP/WebPLossyDecoder.cs @@ -199,6 +199,7 @@ namespace SixLabors.ImageSharp.Formats.WebP int uOff = yOff + (WebPConstants.Bps * 16) + WebPConstants.Bps; int vOff = uOff + 16; + byte[] yuv = dec.YuvBuffer; Span yDst = dec.YuvBuffer.AsSpan(yOff); Span uDst = dec.YuvBuffer.AsSpan(uOff); Span vDst = dec.YuvBuffer.AsSpan(vOff); @@ -206,19 +207,20 @@ namespace SixLabors.ImageSharp.Formats.WebP // Initialize left-most block. for (int i = 0; i < 16; ++i) { - yDst[(i * WebPConstants.Bps) - 1] = 129; + yuv[(i * WebPConstants.Bps) - 1 + yOff] = 129; } for (int i = 0; i < 8; ++i) { - uDst[(i * WebPConstants.Bps) - 1] = 129; - vDst[(i * WebPConstants.Bps) - 1] = 129; + yuv[(i * WebPConstants.Bps) - 1 + uOff] = 129; + yuv[(i * WebPConstants.Bps) - 1 + vOff] = 129; } // Init top-left sample on left column too. if (mby > 0) { - yDst[-1 - WebPConstants.Bps] = uDst[-1 - WebPConstants.Bps] = vDst[-1 - WebPConstants.Bps] = 129; + // TODO: + // yDst[-1 - WebPConstants.Bps] = uDst[-1 - WebPConstants.Bps] = vDst[-1 - WebPConstants.Bps] = 129; } else { @@ -254,56 +256,193 @@ namespace SixLabors.ImageSharp.Formats.WebP { for (int i = -1; i < 16; ++i) { - // Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); + int srcIdx = (i * WebPConstants.Bps) + 12 + yOff; + int dstIdx = (i * WebPConstants.Bps) - 4 + yOff; + yuv.AsSpan(srcIdx, 4).CopyTo(yuv.AsSpan(dstIdx)); } for (int i = -1; i < 8; ++i) { - // Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); - // Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); + int srcIdx = (i * WebPConstants.Bps) + 4 + uOff; + int dstIdx = (i * WebPConstants.Bps) - 4 + uOff; + yuv.AsSpan(srcIdx, 4).CopyTo(yuv.AsSpan(dstIdx)); + srcIdx = (i * WebPConstants.Bps) + 4 + vOff; + dstIdx = (i * WebPConstants.Bps) - 4 + vOff; + yuv.AsSpan(srcIdx, 4).CopyTo(yuv.AsSpan(dstIdx)); } + } - // Bring top samples into the cache. - Vp8TopSamples topSamples = dec.YuvTopSamples[mbx]; - short[] coeffs = block.Coeffs; - uint bits = block.NonZeroY; + // Bring top samples into the cache. + Vp8TopSamples topYuv = dec.YuvTopSamples[mbx]; + short[] coeffs = block.Coeffs; + uint bits = block.NonZeroY; + if (mby > 0) + { + topYuv.Y.CopyTo(yuv.AsSpan(yOff - WebPConstants.Bps)); + topYuv.U.CopyTo(yuv.AsSpan(uOff - WebPConstants.Bps)); + topYuv.V.CopyTo(yuv.AsSpan(vOff - WebPConstants.Bps)); + } + + // Predict and add residuals. + if (block.IsI4x4) + { if (mby > 0) { - //memcpy(y_dst - BPS, top_yuv[0].y, 16); - //memcpy(u_dst - BPS, top_yuv[0].u, 8); - //memcpy(v_dst - BPS, top_yuv[0].v, 8); + if (mbx >= dec.MbWidth - 1) + { + // On rightmost border. + //memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); + } + else + { + // memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); + } } - // Predict and add residuals. - if (block.IsI4x4) + // Replicate the top-right pixels below. + + + // Predict and add residuals for all 4x4 blocks in turn. + for (int n = 0; n < 16; ++n, bits <<= 2) { - if (mby > 0) + // uint8_t * const dst = y_dst + kScan[n]; + byte lumaMode = block.Modes[n]; + switch (lumaMode) { - if (mbx >= dec.MbWidth - 1) - { - // On rightmost border. - //memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); - } - else - { - // memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); - } + case 0: + break; + case 1: + break; + case 2: + break; + case 3: + break; + case 4: + break; + case 5: + break; + case 6: + break; + case 7: + break; + case 8: + break; + case 9: + break; } - // Replicate the top-right pixels below. - + //DoTransform(bits, coeffs + n * 16, dst); + } + } + else + { + // 16x16 + int mode = CheckMode(mbx, mby, block.Modes[0]); + switch (mode) + { + case 0: + LossyUtils.DC16_C(yDst, yuv, yOff); + break; + case 1: + LossyUtils.TM16_C(yDst); + break; + case 2: + LossyUtils.VE16_C(yDst, yuv, yOff); + break; + case 3: + LossyUtils.HE16_C(yDst, yuv, yOff); + break; + case 4: + LossyUtils.DC16NoTop_C(yDst, yuv, yOff); + break; + case 5: + LossyUtils.DC16NoLeft_C(yDst, yuv, yOff); + break; + case 6: + LossyUtils.DC16NoTopLeft_C(yDst); + break; + } - // Predict and add residuals for all 4x4 blocks in turn. + if (bits != 0) + { for (int n = 0; n < 16; ++n, bits <<= 2) { - + this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebPConstants.KScan[n])); } } - else - { - // 16x16 + } - } + // Chroma + uint bitsUv = block.NonZeroUv; + int chromaMode = CheckMode(mbx, mby, block.UvMode); + switch (chromaMode) + { + case 0: + LossyUtils.DC8uv_C(uDst, yuv, uOff); + LossyUtils.DC8uv_C(vDst, yuv, vOff); + break; + case 1: + LossyUtils.TM8uv_C(uDst); + LossyUtils.TM8uv_C(vDst); + break; + case 2: + LossyUtils.VE8uv_C(uDst, yuv.AsSpan(uOff - WebPConstants.Bps, 8)); + LossyUtils.VE8uv_C(vDst, yuv.AsSpan(vOff - WebPConstants.Bps, 8)); + break; + case 3: + LossyUtils.HE8uv_C(uDst, yuv, uOff); + LossyUtils.HE8uv_C(vDst, yuv, vOff); + break; + case 4: + LossyUtils.DC8uvNoTop_C(uDst, yuv, uOff); + LossyUtils.DC8uvNoTop_C(vDst, yuv, vOff); + break; + case 5: + LossyUtils.DC8uvNoLeft_C(uDst, yuv, uOff); + LossyUtils.DC8uvNoLeft_C(vDst, yuv, vOff); + break; + case 6: + LossyUtils.DC8uvNoTopLeft_C(uDst, yuv, uOff); + LossyUtils.DC8uvNoTopLeft_C(vDst, yuv, vOff); + break; + } + + this.DoUVTransform(bitsUv >> 0, coeffs.AsSpan(16 * 16), uDst); + this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst); + } + } + + private void DoTransform(uint bits, Span src, Span dst) + { + switch (bits >> 30) + { + case 3: + LossyUtils.Transform(src, dst, false); + break; + case 2: + LossyUtils.TransformAc3(src, dst); + break; + case 1: + LossyUtils.TransformDc(src, dst); + break; + default: + break; + } + } + + private void DoUVTransform(uint bits, Span src, Span dst) + { + // any non-zero coeff at all? + if ((bits & 0xff) > 0) + { + // any non-zero AC coefficient? + if ((bits & 0xaa) > 0) + { + LossyUtils.TransformUv(src, dst); // note we don't use the AC3 variant for U/V. + } + else + { + LossyUtils.TransformDcuv(src, dst); } } } @@ -862,6 +1001,26 @@ namespace SixLabors.ImageSharp.Formats.WebP return bandsRow; } + private static int CheckMode(int mbx, int mby, int mode) + { + // B_DC_PRED + if (mode is 0) + { + if (mbx is 0) + { + return (mby is 0) + ? 6 // B_DC_PRED_NOTOPLEFT + : 5; // B_DC_PRED_NOLEFT + } + + return (mby is 0) + ? 4 // B_DC_PRED_NOTOP + : 0; // B_DC_PRED + } + + return mode; + } + private static int Clip(int value, int max) { return value < 0 ? 0 : value > max ? max : value;