diff --git a/.gitattributes b/.gitattributes index c0bff6e18..7c648c077 100644 --- a/.gitattributes +++ b/.gitattributes @@ -80,8 +80,11 @@ *.pvr binary *.snk binary *.tga binary +*.tif binary +*.tiff binary *.ttc binary *.ttf binary +*.wbmp binary *.webp binary *.woff binary *.woff2 binary diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs new file mode 100644 index 000000000..4f3d732b4 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -0,0 +1,99 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); + } + + [MethodImpl(InliningOptions.ShortMethod)] + internal static void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + } + + private static void PackFromRgbPlanesScalarBatchedReduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); + ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); + ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + + int count = destination.Length / 4; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + + ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); + ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); + ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + } + + int finished = count * 4; + redChannel = redChannel.Slice(finished); + greenChannel = greenChannel.Slice(finished); + blueChannel = blueChannel.Slice(finished); + destination = destination.Slice(finished); + } + + private static void PackFromRgbPlanesRemainder( + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + + for (int i = 0; i < destination.Length; i++) + { + ref Rgb24 d = ref Unsafe.Add(ref rgb, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + } + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index f37226a1a..6d82cfad0 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -148,26 +148,6 @@ namespace SixLabors.ImageSharp } } - [MethodImpl(InliningOptions.ShortMethod)] - internal static void PackFromRgbPlanes( - Configuration configuration, - ReadOnlySpan redChannel, - ReadOnlySpan greenChannel, - ReadOnlySpan blueChannel, - Span destination) - { - } - - [MethodImpl(InliningOptions.ShortMethod)] - internal static void PackFromRgbPlanes( - Configuration configuration, - ReadOnlySpan redChannel, - ReadOnlySpan greenChannel, - ReadOnlySpan blueChannel, - Span destination) - { - } - [MethodImpl(InliningOptions.ColdPath)] private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan source, Span dest) { @@ -241,5 +221,13 @@ namespace SixLabors.ImageSharp nameof(source), $"length should be divisible by {shouldBeDivisibleBy}!"); } + + private struct ByteTuple4 + { + public byte V0; + public byte V1; + public byte V2; + public byte V3; + } } } diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index d2d07af54..66b88489f 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -25,16 +25,16 @@ - + - + - + - - + + diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs new file mode 100644 index 000000000..db66ae941 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -0,0 +1,252 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion +{ + public unsafe class PixelConversion_PackFromRgbPlanes + { + private byte[] rBuf; + private byte[] gBuf; + private byte[] bBuf; + private Rgb24[] rgbBuf; + private Rgba32[] rgbaBuf; + + private float[] rFloat; + private float[] gFloat; + private float[] bFloat; + + private float[] rgbaFloat; + + [Params(512)] + public int Count { get; set; } + + [GlobalSetup] + public void Setup() + { + this.rBuf = new byte[this.Count]; + this.gBuf = new byte[this.Count]; + this.bBuf = new byte[this.Count]; + this.rgbBuf = new Rgb24[this.Count]; + this.rgbaBuf = new Rgba32[this.Count]; + + this.rFloat = new float[this.Count]; + this.gFloat = new float[this.Count]; + this.bFloat = new float[this.Count]; + + this.rgbaFloat = new float[this.Count * 4]; + } + + // [Benchmark(Baseline = true)] + public void Rgb24_Scalar_PerElement_Pinned() + { + fixed (byte* r = &this.rBuf[0]) + fixed (byte* g = &this.gBuf[0]) + fixed (byte* b = &this.bBuf[0]) + fixed (Rgb24* rgb = &this.rgbBuf[0]) + { + for (int i = 0; i < this.Count; i++) + { + Rgb24* d = rgb + i; + d->R = r[i]; + d->G = g[i]; + d->B = b[i]; + } + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Span() + { + Span r = this.rBuf; + Span g = this.rBuf; + Span b = this.rBuf; + Span rgb = this.rgbBuf; + + for (int i = 0; i < rgb.Length; i++) + { + ref Rgb24 d = ref rgb[i]; + d.R = r[i]; + d.G = g[i]; + d.B = b[i]; + } + } + + [Benchmark(Baseline = true)] + public void Rgb24_Scalar_PerElement_Unsafe() + { + ref byte r = ref this.rBuf[0]; + ref byte g = ref this.rBuf[0]; + ref byte b = ref this.rBuf[0]; + ref Rgb24 rgb = ref this.rgbBuf[0]; + + for (int i = 0; i < this.Count; i++) + { + ref Rgb24 d = ref Unsafe.Add(ref rgb, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Batched8() + { + ref Byte8 r = ref Unsafe.As(ref this.rBuf[0]); + ref Byte8 g = ref Unsafe.As(ref this.rBuf[0]); + ref Byte8 b = ref Unsafe.As(ref this.rBuf[0]); + ref Rgb24 rgb = ref this.rgbBuf[0]; + + int count = this.Count / 8; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 8); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + ref Rgb24 d4 = ref Unsafe.Add(ref d0, 4); + ref Rgb24 d5 = ref Unsafe.Add(ref d0, 5); + ref Rgb24 d6 = ref Unsafe.Add(ref d0, 6); + ref Rgb24 d7 = ref Unsafe.Add(ref d0, 7); + + ref Byte8 rr = ref Unsafe.Add(ref r, i); + ref Byte8 gg = ref Unsafe.Add(ref g, i); + ref Byte8 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + + d4.R = rr.V4; + d4.G = gg.V4; + d4.B = bb.V4; + + d5.R = rr.V5; + d5.G = gg.V5; + d5.B = bb.V5; + + d6.R = rr.V6; + d6.G = gg.V6; + d6.B = bb.V6; + + d7.R = rr.V7; + d7.G = gg.V7; + d7.B = bb.V7; + } + } + + [Benchmark] + public void Rgb24_Scalar_PerElement_Batched4() + { + ref Byte4 r = ref Unsafe.As(ref this.rBuf[0]); + ref Byte4 g = ref Unsafe.As(ref this.rBuf[0]); + ref Byte4 b = ref Unsafe.As(ref this.rBuf[0]); + ref Rgb24 rgb = ref this.rgbBuf[0]; + + int count = this.Count / 4; + for (int i = 0; i < count; i++) + { + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + + ref Byte4 rr = ref Unsafe.Add(ref r, i); + ref Byte4 gg = ref Unsafe.Add(ref g, i); + ref Byte4 bb = ref Unsafe.Add(ref b, i); + + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; + + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; + + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; + + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void Rgba32_Vector_Float() + { + ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]); + ref Vector256 gBase = ref Unsafe.As>(ref this.gFloat[0]); + ref Vector256 bBase = ref Unsafe.As>(ref this.bFloat[0]); + ref Vector256 resultBase = ref Unsafe.As>(ref this.rgbaFloat[0]); + + int count = this.Count / Vector256.Count; + + ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + var va = Vector256.Create(1F); + + for (int i = 0; i < count; i++) + { + Vector256 r = Unsafe.Add(ref rBase, i); + Vector256 g = Unsafe.Add(ref gBase, i); + Vector256 b = Unsafe.Add(ref bBase, i); + + r = Avx2.PermuteVar8x32(r, vcontrol); + g = Avx2.PermuteVar8x32(g, vcontrol); + b = Avx2.PermuteVar8x32(b, vcontrol); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } + } +#endif + +#pragma warning disable SA1132 + private struct Byte8 + { + public byte V0, V1, V2, V3, V4, V5, V6, V7; + } + + private struct Byte4 + { + public byte V0, V1, V2, V3; + } +#pragma warning restore + } +} \ No newline at end of file