mirror of https://github.com/SixLabors/ImageSharp
committed by
GitHub
7 changed files with 396 additions and 34 deletions
@ -0,0 +1,120 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using System.Diagnostics; |
||||
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
||||
|
using System.Runtime.CompilerServices; |
||||
|
using System.Runtime.InteropServices; |
||||
|
using System.Runtime.Intrinsics; |
||||
|
using System.Runtime.Intrinsics.X86; |
||||
|
#endif
|
||||
|
using SixLabors.ImageSharp.PixelFormats; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder |
||||
|
{ |
||||
|
internal static class RgbToYCbCrConverterVectorized |
||||
|
{ |
||||
|
public static bool IsSupported |
||||
|
{ |
||||
|
get |
||||
|
{ |
||||
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
||||
|
return Avx2.IsSupported; |
||||
|
#else
|
||||
|
return false; |
||||
|
#endif
|
||||
|
} |
||||
|
} |
||||
|
|
||||
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
||||
|
private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[] |
||||
|
{ |
||||
|
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, |
||||
|
3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 |
||||
|
}; |
||||
|
|
||||
|
private static ReadOnlySpan<byte> MoveLast24BytesToSeparateLanes => new byte[] |
||||
|
{ |
||||
|
2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, |
||||
|
5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0 |
||||
|
}; |
||||
|
|
||||
|
private static ReadOnlySpan<byte> ExtractRgb => new byte[] |
||||
|
{ |
||||
|
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
||||
|
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF |
||||
|
}; |
||||
|
#endif
|
||||
|
|
||||
|
public static void Convert(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) |
||||
|
{ |
||||
|
Debug.Assert(IsSupported, "AVX2 is required to run this converter"); |
||||
|
|
||||
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
||||
|
var f0299 = Vector256.Create(0.299f); |
||||
|
var f0587 = Vector256.Create(0.587f); |
||||
|
var f0114 = Vector256.Create(0.114f); |
||||
|
var fn0168736 = Vector256.Create(-0.168736f); |
||||
|
var fn0331264 = Vector256.Create(-0.331264f); |
||||
|
var f128 = Vector256.Create(128f); |
||||
|
var fn0418688 = Vector256.Create(-0.418688f); |
||||
|
var fn0081312F = Vector256.Create(-0.081312F); |
||||
|
var f05 = Vector256.Create(0.5f); |
||||
|
var zero = Vector256.Create(0).AsByte(); |
||||
|
|
||||
|
ref Vector256<byte> inRef = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan)); |
||||
|
ref Vector256<float> destYRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref yBlock); |
||||
|
ref Vector256<float> destCbRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref cbBlock); |
||||
|
ref Vector256<float> destCrRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref crBlock); |
||||
|
|
||||
|
var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); |
||||
|
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb)); |
||||
|
Vector256<byte> rgb, rg, bx; |
||||
|
Vector256<float> r, g, b; |
||||
|
for (int i = 0; i < 7; i++) |
||||
|
{ |
||||
|
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte(); |
||||
|
|
||||
|
rgb = Avx2.Shuffle(rgb, extractRgbMask); |
||||
|
|
||||
|
rg = Avx2.UnpackLow(rgb, zero); |
||||
|
bx = Avx2.UnpackHigh(rgb, zero); |
||||
|
|
||||
|
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); |
||||
|
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); |
||||
|
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); |
||||
|
|
||||
|
// (0.299F * r) + (0.587F * g) + (0.114F * b);
|
||||
|
Unsafe.Add(ref destYRef, i) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); |
||||
|
|
||||
|
// 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
|
||||
|
Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); |
||||
|
|
||||
|
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
|
||||
|
Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); |
||||
|
} |
||||
|
|
||||
|
extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes)); |
||||
|
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); |
||||
|
rgb = Avx2.Shuffle(rgb, extractRgbMask); |
||||
|
|
||||
|
rg = Avx2.UnpackLow(rgb, zero); |
||||
|
bx = Avx2.UnpackHigh(rgb, zero); |
||||
|
|
||||
|
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); |
||||
|
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); |
||||
|
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); |
||||
|
|
||||
|
// (0.299F * r) + (0.587F * g) + (0.114F * b);
|
||||
|
Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); |
||||
|
|
||||
|
// 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
|
||||
|
Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); |
||||
|
|
||||
|
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
|
||||
|
Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); |
||||
|
#endif
|
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,56 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using BenchmarkDotNet.Attributes; |
||||
|
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
||||
|
using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder; |
||||
|
using SixLabors.ImageSharp.PixelFormats; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components.Encoder |
||||
|
{ |
||||
|
public class YCbCrForwardConverterBenchmark |
||||
|
{ |
||||
|
private RgbToYCbCrConverterLut converter; |
||||
|
private Rgb24[] data; |
||||
|
|
||||
|
[GlobalSetup] |
||||
|
public void Setup() |
||||
|
{ |
||||
|
this.converter = RgbToYCbCrConverterLut.Create(); |
||||
|
|
||||
|
var r = new Random(42); |
||||
|
this.data = new Rgb24[64]; |
||||
|
|
||||
|
var d = new byte[3]; |
||||
|
for (int i = 0; i < this.data.Length; i++) |
||||
|
{ |
||||
|
r.NextBytes(d); |
||||
|
this.data[i] = new Rgb24(d[0], d[1], d[2]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
[Benchmark(Baseline = true)] |
||||
|
public void ConvertLut() |
||||
|
{ |
||||
|
Block8x8F y = default; |
||||
|
Block8x8F cb = default; |
||||
|
Block8x8F cr = default; |
||||
|
|
||||
|
this.converter.Convert(this.data.AsSpan(), ref y, ref cb, ref cr); |
||||
|
} |
||||
|
|
||||
|
[Benchmark] |
||||
|
public void ConvertVectorized() |
||||
|
{ |
||||
|
Block8x8F y = default; |
||||
|
Block8x8F cb = default; |
||||
|
Block8x8F cr = default; |
||||
|
|
||||
|
if (RgbToYCbCrConverterVectorized.IsSupported) |
||||
|
{ |
||||
|
RgbToYCbCrConverterVectorized.Convert(this.data.AsSpan(), ref y, ref cb, ref cr); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,91 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using SixLabors.ImageSharp.ColorSpaces; |
||||
|
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
||||
|
using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder; |
||||
|
using SixLabors.ImageSharp.PixelFormats; |
||||
|
using SixLabors.ImageSharp.Tests.Colorspaces.Conversion; |
||||
|
using Xunit; |
||||
|
using Xunit.Abstractions; |
||||
|
|
||||
|
// ReSharper disable InconsistentNaming
|
||||
|
namespace SixLabors.ImageSharp.Tests.Formats.Jpg |
||||
|
{ |
||||
|
public class RgbToYCbCrConverterTests |
||||
|
{ |
||||
|
public RgbToYCbCrConverterTests(ITestOutputHelper output) |
||||
|
{ |
||||
|
this.Output = output; |
||||
|
} |
||||
|
|
||||
|
private ITestOutputHelper Output { get; } |
||||
|
|
||||
|
[Fact] |
||||
|
public void TestLutConverter() |
||||
|
{ |
||||
|
Rgb24[] data = CreateTestData(); |
||||
|
var target = RgbToYCbCrConverterLut.Create(); |
||||
|
|
||||
|
Block8x8F y = default; |
||||
|
Block8x8F cb = default; |
||||
|
Block8x8F cr = default; |
||||
|
|
||||
|
target.Convert(data.AsSpan(), ref y, ref cb, ref cr); |
||||
|
|
||||
|
Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F)); |
||||
|
} |
||||
|
|
||||
|
[Fact] |
||||
|
public void TestVectorizedConverter() |
||||
|
{ |
||||
|
if (!RgbToYCbCrConverterVectorized.IsSupported) |
||||
|
{ |
||||
|
this.Output.WriteLine("No AVX and/or FMA present, skipping test!"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
Rgb24[] data = CreateTestData(); |
||||
|
|
||||
|
Block8x8F y = default; |
||||
|
Block8x8F cb = default; |
||||
|
Block8x8F cr = default; |
||||
|
|
||||
|
RgbToYCbCrConverterVectorized.Convert(data.AsSpan(), ref y, ref cb, ref cr); |
||||
|
|
||||
|
Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F)); |
||||
|
} |
||||
|
|
||||
|
private static void Verify(ReadOnlySpan<Rgb24> data, ref Block8x8F yResult, ref Block8x8F cbResult, ref Block8x8F crResult, ApproximateColorSpaceComparer comparer) |
||||
|
{ |
||||
|
for (int i = 0; i < data.Length; i++) |
||||
|
{ |
||||
|
int r = data[i].R; |
||||
|
int g = data[i].G; |
||||
|
int b = data[i].B; |
||||
|
|
||||
|
float y = (0.299F * r) + (0.587F * g) + (0.114F * b); |
||||
|
float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); |
||||
|
float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); |
||||
|
|
||||
|
Assert.True(comparer.Equals(new YCbCr(y, cb, cr), new YCbCr(yResult[i], cbResult[i], crResult[i])), $"Pos {i}, Expected {y} == {yResult[i]}, {cb} == {cbResult[i]}, {cr} == {crResult[i]}"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static Rgb24[] CreateTestData() |
||||
|
{ |
||||
|
var data = new Rgb24[64]; |
||||
|
var r = new Random(); |
||||
|
|
||||
|
var random = new byte[3]; |
||||
|
for (int i = 0; i < data.Length; i++) |
||||
|
{ |
||||
|
r.NextBytes(random); |
||||
|
data[i] = new Rgb24(random[0], random[1], random[2]); |
||||
|
} |
||||
|
|
||||
|
return data; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue