diff --git a/src/ImageSharp/Common/Tuples/Tuple8.cs b/src/ImageSharp/Common/Tuples/Tuple8.cs new file mode 100644 index 000000000..c0b9a9002 --- /dev/null +++ b/src/ImageSharp/Common/Tuples/Tuple8.cs @@ -0,0 +1,251 @@ +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Common.Tuples +{ + /// + /// Contains value type tuples of 8 elements. + /// TODO: Should T4 this stuff to be DRY + /// + internal static class Tuple8 + { + /// + /// Value type tuple of 8 -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(float))] + public struct OfSingle + { + [FieldOffset(0 * sizeof(float))] + public float V0; + + [FieldOffset(1 * sizeof(float))] + public float V1; + + [FieldOffset(2 * sizeof(float))] + public float V2; + + [FieldOffset(3 * sizeof(float))] + public float V3; + + [FieldOffset(4 * sizeof(float))] + public float V4; + + [FieldOffset(5 * sizeof(float))] + public float V5; + + [FieldOffset(6 * sizeof(float))] + public float V6; + + [FieldOffset(7 * sizeof(float))] + public float V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } + } + + /// + /// Value type tuple of 8 -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(int))] + public struct OfInt32 + { + [FieldOffset(0 * sizeof(int))] + public int V0; + + [FieldOffset(1 * sizeof(int))] + public int V1; + + [FieldOffset(2 * sizeof(int))] + public int V2; + + [FieldOffset(3 * sizeof(int))] + public int V3; + + [FieldOffset(4 * sizeof(int))] + public int V4; + + [FieldOffset(5 * sizeof(int))] + public int V5; + + [FieldOffset(6 * sizeof(int))] + public int V6; + + [FieldOffset(7 * sizeof(int))] + public int V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } + } + + /// + /// Value type tuple of 8 -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))] + public struct OfUInt32 + { + [FieldOffset(0 * sizeof(uint))] + public uint V0; + + [FieldOffset(1 * sizeof(uint))] + public uint V1; + + [FieldOffset(2 * sizeof(uint))] + public uint V2; + + [FieldOffset(3 * sizeof(uint))] + public uint V3; + + [FieldOffset(4 * sizeof(uint))] + public uint V4; + + [FieldOffset(5 * sizeof(uint))] + public uint V5; + + [FieldOffset(6 * sizeof(uint))] + public uint V6; + + [FieldOffset(7 * sizeof(uint))] + public uint V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } + + public void LoadFrom(ref OfUInt16 i) + { + this.V0 = i.V0; + this.V1 = i.V1; + this.V2 = i.V2; + this.V3 = i.V3; + this.V4 = i.V4; + this.V5 = i.V5; + this.V6 = i.V6; + this.V7 = i.V7; + } + } + + /// + /// Value type tuple of 8 -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(ushort))] + public struct OfUInt16 + { + [FieldOffset(0 * sizeof(ushort))] + public ushort V0; + + [FieldOffset(1 * sizeof(ushort))] + public ushort V1; + + [FieldOffset(2 * sizeof(ushort))] + public ushort V2; + + [FieldOffset(3 * sizeof(ushort))] + public ushort V3; + + [FieldOffset(4 * sizeof(ushort))] + public ushort V4; + + [FieldOffset(5 * sizeof(ushort))] + public ushort V5; + + [FieldOffset(6 * sizeof(ushort))] + public ushort V6; + + [FieldOffset(7 * sizeof(ushort))] + public ushort V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } + } + + /// + /// Value type tuple of 8 -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(short))] + public struct OfInt16 + { + [FieldOffset(0 * sizeof(short))] + public short V0; + + [FieldOffset(1 * sizeof(short))] + public short V1; + + [FieldOffset(2 * sizeof(short))] + public short V2; + + [FieldOffset(3 * sizeof(short))] + public short V3; + + [FieldOffset(4 * sizeof(short))] + public short V4; + + [FieldOffset(5 * sizeof(short))] + public short V5; + + [FieldOffset(6 * sizeof(short))] + public short V6; + + [FieldOffset(7 * sizeof(short))] + public short V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } + } + + /// + /// Value type tuple of 8 -s + /// + [StructLayout(LayoutKind.Explicit, Size = 8)] + public struct OfByte + { + [FieldOffset(0)] + public byte V0; + + [FieldOffset(1)] + public byte V1; + + [FieldOffset(2)] + public byte V2; + + [FieldOffset(3)] + public byte V3; + + [FieldOffset(4)] + public byte V4; + + [FieldOffset(5)] + public byte V5; + + [FieldOffset(6)] + public byte V6; + + [FieldOffset(7)] + public byte V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } + + public void LoadFrom(ref OfUInt32 i) + { + this.V0 = (byte)i.V0; + this.V1 = (byte)i.V1; + this.V2 = (byte)i.V2; + this.V3 = (byte)i.V3; + this.V4 = (byte)i.V4; + this.V5 = (byte)i.V5; + this.V6 = (byte)i.V6; + this.V7 = (byte)i.V7; + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Tuples/Vector4Pair.cs b/src/ImageSharp/Common/Tuples/Vector4Pair.cs new file mode 100644 index 000000000..1be936b30 --- /dev/null +++ b/src/ImageSharp/Common/Tuples/Vector4Pair.cs @@ -0,0 +1,73 @@ +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp.Common.Tuples +{ + /// + /// Its faster to process multiple Vector4-s together, so let's pair them! + /// On AVX2 this pair should be convertible to of ! + /// + [StructLayout(LayoutKind.Sequential)] + internal struct Vector4Pair + { + public Vector4 A; + + public Vector4 B; + + private static readonly Vector4 Scale = new Vector4(1 / 255f); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void MultiplyInplace(float value) + { + this.A *= value; + this.B *= value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AddInplace(Vector4 value) + { + this.A += value; + this.B += value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AddInplace(ref Vector4Pair other) + { + this.A += other.A; + this.B += other.B; + } + + /// + /// Color-conversion specific downscale method. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void RoundAndDownscaleBasic() + { + ref Vector a = ref Unsafe.As>(ref this.A); + a = a.FastRound(); + + ref Vector b = ref Unsafe.As>(ref this.B); + b = b.FastRound(); + + // Downscale by 1/255 + this.A *= Scale; + this.B *= Scale; + } + + /// + /// AVX2-only color-conversion specific downscale method. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void RoundAndDownscaleAvx2() + { + ref Vector self = ref Unsafe.As>(ref this); + Vector v = self; + v = v.FastRound(); + + // Downscale by 1/255 + v *= new Vector(1 / 255f); + self = v; + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs b/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs index 3f4c69c3e..1291f160a 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Block8x8.cs @@ -176,17 +176,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common } /// - /// Convert into + /// Convert to /// public Block8x8F AsFloatBlock() { - // TODO: Optimize this var result = default(Block8x8F); - for (int i = 0; i < Size; i++) - { - result[i] = this[i]; - } - + this.CopyToFloatBlock(ref result); return result; } @@ -302,5 +297,85 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common return result; } + + /// + /// Convert values into as -s + /// + public void CopyToFloatBlock(ref Block8x8F dest) + { + ref short selfRef = ref Unsafe.As(ref this); + + dest.V0L.X = Unsafe.Add(ref selfRef, 0); + dest.V0L.Y = Unsafe.Add(ref selfRef, 1); + dest.V0L.Z = Unsafe.Add(ref selfRef, 2); + dest.V0L.W = Unsafe.Add(ref selfRef, 3); + dest.V0R.X = Unsafe.Add(ref selfRef, 4); + dest.V0R.Y = Unsafe.Add(ref selfRef, 5); + dest.V0R.Z = Unsafe.Add(ref selfRef, 6); + dest.V0R.W = Unsafe.Add(ref selfRef, 7); + + dest.V1L.X = Unsafe.Add(ref selfRef, 8); + dest.V1L.Y = Unsafe.Add(ref selfRef, 9); + dest.V1L.Z = Unsafe.Add(ref selfRef, 10); + dest.V1L.W = Unsafe.Add(ref selfRef, 11); + dest.V1R.X = Unsafe.Add(ref selfRef, 12); + dest.V1R.Y = Unsafe.Add(ref selfRef, 13); + dest.V1R.Z = Unsafe.Add(ref selfRef, 14); + dest.V1R.W = Unsafe.Add(ref selfRef, 15); + + dest.V2L.X = Unsafe.Add(ref selfRef, 16); + dest.V2L.Y = Unsafe.Add(ref selfRef, 17); + dest.V2L.Z = Unsafe.Add(ref selfRef, 18); + dest.V2L.W = Unsafe.Add(ref selfRef, 19); + dest.V2R.X = Unsafe.Add(ref selfRef, 20); + dest.V2R.Y = Unsafe.Add(ref selfRef, 21); + dest.V2R.Z = Unsafe.Add(ref selfRef, 22); + dest.V2R.W = Unsafe.Add(ref selfRef, 23); + + dest.V3L.X = Unsafe.Add(ref selfRef, 24); + dest.V3L.Y = Unsafe.Add(ref selfRef, 25); + dest.V3L.Z = Unsafe.Add(ref selfRef, 26); + dest.V3L.W = Unsafe.Add(ref selfRef, 27); + dest.V3R.X = Unsafe.Add(ref selfRef, 28); + dest.V3R.Y = Unsafe.Add(ref selfRef, 29); + dest.V3R.Z = Unsafe.Add(ref selfRef, 30); + dest.V3R.W = Unsafe.Add(ref selfRef, 31); + + dest.V4L.X = Unsafe.Add(ref selfRef, 32); + dest.V4L.Y = Unsafe.Add(ref selfRef, 33); + dest.V4L.Z = Unsafe.Add(ref selfRef, 34); + dest.V4L.W = Unsafe.Add(ref selfRef, 35); + dest.V4R.X = Unsafe.Add(ref selfRef, 36); + dest.V4R.Y = Unsafe.Add(ref selfRef, 37); + dest.V4R.Z = Unsafe.Add(ref selfRef, 38); + dest.V4R.W = Unsafe.Add(ref selfRef, 39); + + dest.V5L.X = Unsafe.Add(ref selfRef, 40); + dest.V5L.Y = Unsafe.Add(ref selfRef, 41); + dest.V5L.Z = Unsafe.Add(ref selfRef, 42); + dest.V5L.W = Unsafe.Add(ref selfRef, 43); + dest.V5R.X = Unsafe.Add(ref selfRef, 44); + dest.V5R.Y = Unsafe.Add(ref selfRef, 45); + dest.V5R.Z = Unsafe.Add(ref selfRef, 46); + dest.V5R.W = Unsafe.Add(ref selfRef, 47); + + dest.V6L.X = Unsafe.Add(ref selfRef, 48); + dest.V6L.Y = Unsafe.Add(ref selfRef, 49); + dest.V6L.Z = Unsafe.Add(ref selfRef, 50); + dest.V6L.W = Unsafe.Add(ref selfRef, 51); + dest.V6R.X = Unsafe.Add(ref selfRef, 52); + dest.V6R.Y = Unsafe.Add(ref selfRef, 53); + dest.V6R.Z = Unsafe.Add(ref selfRef, 54); + dest.V6R.W = Unsafe.Add(ref selfRef, 55); + + dest.V7L.X = Unsafe.Add(ref selfRef, 56); + dest.V7L.Y = Unsafe.Add(ref selfRef, 57); + dest.V7L.Z = Unsafe.Add(ref selfRef, 58); + dest.V7L.W = Unsafe.Add(ref selfRef, 59); + dest.V7R.X = Unsafe.Add(ref selfRef, 60); + dest.V7R.Y = Unsafe.Add(ref selfRef, 61); + dest.V7R.Z = Unsafe.Add(ref selfRef, 62); + dest.V7R.W = Unsafe.Add(ref selfRef, 63); + } } } \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs index aecd5c59e..4ffb63480 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs @@ -556,7 +556,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common } } - private void RoundInplaceSlow() + internal void RoundInplaceSlow() { for (int i = 0; i < Size; i++) { diff --git a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs index 7139260ec..2db869de7 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs @@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder ref Block8x8 sourceBlock, BufferArea destArea) { - this.data.SourceBlock = sourceBlock.AsFloatBlock(); + sourceBlock.CopyToFloatBlock(ref this.data.SourceBlock); Block8x8F* b = this.pointers.SourceBlock; diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index e5f2fd5e7..15a794144 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -8,6 +8,8 @@ namespace SixLabors.ImageSharp.Tests.Common using System.Linq; using System.Runtime.CompilerServices; + using SixLabors.ImageSharp.Common.Tuples; + using Xunit.Abstractions; using Xunit.Sdk; @@ -243,15 +245,15 @@ namespace SixLabors.ImageSharp.Tests.Common x = (x * scale) + magick; - SimdUtils.Octet.OfUInt32 ii = default(SimdUtils.Octet.OfUInt32); + Tuple8.OfUInt32 ii = default(Tuple8.OfUInt32); - ref Vector iiRef = ref Unsafe.As>(ref ii); + ref Vector iiRef = ref Unsafe.As>(ref ii); iiRef = x; - //SimdUtils.Octet.OfUInt32 ii = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x); + //Tuple8.OfUInt32 ii = Unsafe.As, Tuple8.OfUInt32>(ref x); - ref SimdUtils.Octet.OfByte d = ref dest.NonPortableCast()[0]; + ref Tuple8.OfByte d = ref dest.NonPortableCast()[0]; d.LoadFrom(ref ii); this.Output.WriteLine(ii.ToString());