Fix missing braces

4 years ago · d2b205260e
2 changed files with 177 additions and 173 deletions
--- a/src/ImageSharp/Compression/Zlib/Adler32.cs
+++ b/src/ImageSharp/Compression/Zlib/Adler32.cs
@ -91,115 +91,117 @@ namespace SixLabors.ImageSharp.Compression.Zlib

            int index = 0;
            fixed (byte* bufferPtr = buffer)
-            fixed (byte* tapPtr = Tap1Tap2)
            {
-                index += (int)blocks * BLOCK_SIZE;
-                var localBufferPtr = bufferPtr;
-
-                // _mm_setr_epi8 on x86
-                Vector128<sbyte> tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
-                Vector128<sbyte> tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
-                Vector128<byte> zero = Vector128<byte>.Zero;
-                var ones = Vector128.Create((short)1);
-
-                while (blocks > 0)
+                fixed (byte* tapPtr = Tap1Tap2)
                {
-                    uint n = NMAX / BLOCK_SIZE;  /* The NMAX constraint. */
-                    if (n > blocks)
-                    {
-                        n = blocks;
-                    }
+                    index += (int)blocks * BLOCK_SIZE;
+                    var localBufferPtr = bufferPtr;

-                    blocks -= n;
+                    // _mm_setr_epi8 on x86
+                    Vector128<sbyte> tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
+                    Vector128<sbyte> tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
+                    Vector128<byte> zero = Vector128<byte>.Zero;
+                    var ones = Vector128.Create((short)1);

-                    // Process n blocks of data. At most NMAX data bytes can be
-                    // processed before s2 must be reduced modulo BASE.
-                    Vector128<uint> v_ps = Vector128.CreateScalar(s1 * n);
-                    Vector128<uint> v_s2 = Vector128.CreateScalar(s2);
-                    Vector128<uint> v_s1 = Vector128<uint>.Zero;
-
-                    do
+                    while (blocks > 0)
                    {
-                        // Load 32 input bytes.
-                        Vector128<byte> bytes1 = Sse3.LoadDquVector128(localBufferPtr);
-                        Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
+                        uint n = NMAX / BLOCK_SIZE;  /* The NMAX constraint. */
+                        if (n > blocks)
+                        {
+                            n = blocks;
+                        }

-                        // Add previous block byte sum to v_ps.
-                        v_ps = Sse2.Add(v_ps, v_s1);
+                        blocks -= n;

-                        // Horizontally add the bytes for s1, multiply-adds the
-                        // bytes by [ 32, 31, 30, ... ] for s2.
-                        v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
-                        Vector128<short> mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
-                        v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
+                        // Process n blocks of data. At most NMAX data bytes can be
+                        // processed before s2 must be reduced modulo BASE.
+                        Vector128<uint> v_ps = Vector128.CreateScalar(s1 * n);
+                        Vector128<uint> v_s2 = Vector128.CreateScalar(s2);
+                        Vector128<uint> v_s1 = Vector128<uint>.Zero;

-                        v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
-                        Vector128<short> mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
-                        v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
+                        do
+                        {
+                            // Load 32 input bytes.
+                            Vector128<byte> bytes1 = Sse3.LoadDquVector128(localBufferPtr);
+                            Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);

-                        localBufferPtr += BLOCK_SIZE;
-                    }
-                    while (--n > 0);
+                            // Add previous block byte sum to v_ps.
+                            v_ps = Sse2.Add(v_ps, v_s1);

-                    v_s2 = Sse2.Add(v_s2, Sse2.ShiftLeftLogical(v_ps, 5));
+                            // Horizontally add the bytes for s1, multiply-adds the
+                            // bytes by [ 32, 31, 30, ... ] for s2.
+                            v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
+                            Vector128<short> mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
+                            v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());

-                    // Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
-                    const byte S2301 = 0b1011_0001;  // A B C D -> B A D C
-                    const byte S1032 = 0b0100_1110;  // A B C D -> C D A B
+                            v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
+                            Vector128<short> mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
+                            v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());

-                    v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
+                            localBufferPtr += BLOCK_SIZE;
+                        }
+                        while (--n > 0);

-                    s1 += v_s1.ToScalar();
+                        v_s2 = Sse2.Add(v_s2, Sse2.ShiftLeftLogical(v_ps, 5));

-                    v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
-                    v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
+                        // Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
+                        const byte S2301 = 0b1011_0001;  // A B C D -> B A D C
+                        const byte S1032 = 0b0100_1110;  // A B C D -> C D A B

-                    s2 = v_s2.ToScalar();
+                        v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));

-                    // Reduce.
-                    s1 %= BASE;
-                    s2 %= BASE;
-                }
+                        s1 += v_s1.ToScalar();

-                if (length > 0)
-                {
-                    if (length >= 16)
-                    {
-                        s2 += s1 += localBufferPtr[0];
-                        s2 += s1 += localBufferPtr[1];
-                        s2 += s1 += localBufferPtr[2];
-                        s2 += s1 += localBufferPtr[3];
-                        s2 += s1 += localBufferPtr[4];
-                        s2 += s1 += localBufferPtr[5];
-                        s2 += s1 += localBufferPtr[6];
-                        s2 += s1 += localBufferPtr[7];
-                        s2 += s1 += localBufferPtr[8];
-                        s2 += s1 += localBufferPtr[9];
-                        s2 += s1 += localBufferPtr[10];
-                        s2 += s1 += localBufferPtr[11];
-                        s2 += s1 += localBufferPtr[12];
-                        s2 += s1 += localBufferPtr[13];
-                        s2 += s1 += localBufferPtr[14];
-                        s2 += s1 += localBufferPtr[15];
+                        v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
+                        v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));

-                        localBufferPtr += 16;
-                        length -= 16;
-                    }
+                        s2 = v_s2.ToScalar();

-                    while (length-- > 0)
-                    {
-                        s2 += s1 += *localBufferPtr++;
+                        // Reduce.
+                        s1 %= BASE;
+                        s2 %= BASE;
                    }

-                    if (s1 >= BASE)
+                    if (length > 0)
                    {
-                        s1 -= BASE;
+                        if (length >= 16)
+                        {
+                            s2 += s1 += localBufferPtr[0];
+                            s2 += s1 += localBufferPtr[1];
+                            s2 += s1 += localBufferPtr[2];
+                            s2 += s1 += localBufferPtr[3];
+                            s2 += s1 += localBufferPtr[4];
+                            s2 += s1 += localBufferPtr[5];
+                            s2 += s1 += localBufferPtr[6];
+                            s2 += s1 += localBufferPtr[7];
+                            s2 += s1 += localBufferPtr[8];
+                            s2 += s1 += localBufferPtr[9];
+                            s2 += s1 += localBufferPtr[10];
+                            s2 += s1 += localBufferPtr[11];
+                            s2 += s1 += localBufferPtr[12];
+                            s2 += s1 += localBufferPtr[13];
+                            s2 += s1 += localBufferPtr[14];
+                            s2 += s1 += localBufferPtr[15];
+
+                            localBufferPtr += 16;
+                            length -= 16;
+                        }
+
+                        while (length-- > 0)
+                        {
+                            s2 += s1 += *localBufferPtr++;
+                        }
+
+                        if (s1 >= BASE)
+                        {
+                            s1 -= BASE;
+                        }
+
+                        s2 %= BASE;
                    }

-                    s2 %= BASE;
+                    return s1 | (s2 << 16);
                }
-
-                return s1 | (s2 << 16);
            }
        }
 #endif
--- a/src/ImageSharp/Compression/Zlib/Crc32.cs
+++ b/src/ImageSharp/Compression/Zlib/Crc32.cs
@ -83,117 +83,119 @@ namespace SixLabors.ImageSharp.Compression.Zlib
            int length = chunksize;

            fixed (byte* bufferPtr = buffer)
-            fixed (ulong* k05PolyPtr = K05Poly)
            {
-                byte* localBufferPtr = bufferPtr;
-                ulong* localK05PolyPtr = k05PolyPtr;
-
-                // There's at least one block of 64.
-                Vector128<ulong> x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
-                Vector128<ulong> x2 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
-                Vector128<ulong> x3 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
-                Vector128<ulong> x4 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
-                Vector128<ulong> x5;
-
-                x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64());
-
-                // k1, k2
-                Vector128<ulong> x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0);
-
-                localBufferPtr += 64;
-                length -= 64;
-
-                // Parallel fold blocks of 64, if any.
-                while (length >= 64)
+                fixed (ulong* k05PolyPtr = K05Poly)
                {
-                    x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
-                    Vector128<ulong> x6 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
-                    Vector128<ulong> x7 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x00);
-                    Vector128<ulong> x8 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x00);
-
-                    x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
-                    x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x11);
-                    x3 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x11);
-                    x4 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x11);
+                    byte* localBufferPtr = bufferPtr;
+                    ulong* localK05PolyPtr = k05PolyPtr;

-                    Vector128<ulong> y5 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
-                    Vector128<ulong> y6 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
-                    Vector128<ulong> y7 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
-                    Vector128<ulong> y8 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
+                    // There's at least one block of 64.
+                    Vector128<ulong> x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
+                    Vector128<ulong> x2 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
+                    Vector128<ulong> x3 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
+                    Vector128<ulong> x4 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
+                    Vector128<ulong> x5;

-                    x1 = Sse2.Xor(x1, x5);
-                    x2 = Sse2.Xor(x2, x6);
-                    x3 = Sse2.Xor(x3, x7);
-                    x4 = Sse2.Xor(x4, x8);
+                    x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64());

-                    x1 = Sse2.Xor(x1, y5);
-                    x2 = Sse2.Xor(x2, y6);
-                    x3 = Sse2.Xor(x3, y7);
-                    x4 = Sse2.Xor(x4, y8);
+                    // k1, k2
+                    Vector128<ulong> x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0);

                    localBufferPtr += 64;
                    length -= 64;
-                }
-
-                // Fold into 128-bits.
-                // k3, k4
-                x0 = Sse2.LoadVector128(k05PolyPtr + 0x2);

-                x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
-                x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
-                x1 = Sse2.Xor(x1, x2);
-                x1 = Sse2.Xor(x1, x5);
+                    // Parallel fold blocks of 64, if any.
+                    while (length >= 64)
+                    {
+                        x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+                        Vector128<ulong> x6 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
+                        Vector128<ulong> x7 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x00);
+                        Vector128<ulong> x8 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x00);
+
+                        x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+                        x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x11);
+                        x3 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x11);
+                        x4 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x11);
+
+                        Vector128<ulong> y5 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
+                        Vector128<ulong> y6 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
+                        Vector128<ulong> y7 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
+                        Vector128<ulong> y8 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
+
+                        x1 = Sse2.Xor(x1, x5);
+                        x2 = Sse2.Xor(x2, x6);
+                        x3 = Sse2.Xor(x3, x7);
+                        x4 = Sse2.Xor(x4, x8);
+
+                        x1 = Sse2.Xor(x1, y5);
+                        x2 = Sse2.Xor(x2, y6);
+                        x3 = Sse2.Xor(x3, y7);
+                        x4 = Sse2.Xor(x4, y8);
+
+                        localBufferPtr += 64;
+                        length -= 64;
+                    }
+
+                    // Fold into 128-bits.
+                    // k3, k4
+                    x0 = Sse2.LoadVector128(k05PolyPtr + 0x2);

-                x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
-                x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
-                x1 = Sse2.Xor(x1, x3);
-                x1 = Sse2.Xor(x1, x5);
-
-                x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
-                x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
-                x1 = Sse2.Xor(x1, x4);
-                x1 = Sse2.Xor(x1, x5);
+                    x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+                    x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+                    x1 = Sse2.Xor(x1, x2);
+                    x1 = Sse2.Xor(x1, x5);

-                // Single fold blocks of 16, if any.
-                while (length >= 16)
-                {
-                    x2 = Sse2.LoadVector128((ulong*)localBufferPtr);
+                    x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+                    x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+                    x1 = Sse2.Xor(x1, x3);
+                    x1 = Sse2.Xor(x1, x5);

                    x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
                    x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
-                    x1 = Sse2.Xor(x1, x2);
+                    x1 = Sse2.Xor(x1, x4);
                    x1 = Sse2.Xor(x1, x5);

-                    localBufferPtr += 16;
-                    length -= 16;
-                }
+                    // Single fold blocks of 16, if any.
+                    while (length >= 16)
+                    {
+                        x2 = Sse2.LoadVector128((ulong*)localBufferPtr);
+
+                        x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+                        x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
+                        x1 = Sse2.Xor(x1, x2);
+                        x1 = Sse2.Xor(x1, x5);

-                // Fold 128 - bits to 64 - bits.
-                x2 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x10);
-                x3 = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); // _mm_setr_epi32 on x86
-                x1 = Sse2.ShiftRightLogical128BitLane(x1, 8);
-                x1 = Sse2.Xor(x1, x2);
+                        localBufferPtr += 16;
+                        length -= 16;
+                    }

-                // k5, k0
-                x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4);
+                    // Fold 128 - bits to 64 - bits.
+                    x2 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x10);
+                    x3 = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); // _mm_setr_epi32 on x86
+                    x1 = Sse2.ShiftRightLogical128BitLane(x1, 8);
+                    x1 = Sse2.Xor(x1, x2);

-                x2 = Sse2.ShiftRightLogical128BitLane(x1, 4);
-                x1 = Sse2.And(x1, x3);
-                x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
-                x1 = Sse2.Xor(x1, x2);
+                    // k5, k0
+                    x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4);

-                // Barret reduce to 32-bits.
-                // polynomial
-                x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6);
+                    x2 = Sse2.ShiftRightLogical128BitLane(x1, 4);
+                    x1 = Sse2.And(x1, x3);
+                    x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
+                    x1 = Sse2.Xor(x1, x2);

-                x2 = Sse2.And(x1, x3);
-                x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10);
-                x2 = Sse2.And(x2, x3);
-                x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
-                x1 = Sse2.Xor(x1, x2);
+                    // Barret reduce to 32-bits.
+                    // polynomial
+                    x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6);

-                crc = (uint)Sse41.Extract(x1.AsInt32(), 1);
-                return buffer.Length - chunksize == 0 ? crc : CalculateScalar(crc, buffer.Slice(chunksize));
+                    x2 = Sse2.And(x1, x3);
+                    x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10);
+                    x2 = Sse2.And(x2, x3);
+                    x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
+                    x1 = Sse2.Xor(x1, x2);
+
+                    crc = (uint)Sse41.Extract(x1.AsInt32(), 1);
+                    return buffer.Length - chunksize == 0 ? crc : CalculateScalar(crc, buffer.Slice(chunksize));
+                }
            }
        }
 #endif