diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index 93bb7be36..5954ad325 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -3,13 +3,14 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; // namespace SixLabors.ImageSharp.Formats.Jpeg.Components; internal partial struct Block8x8F { - /// + /// /// Level shift by +maximum/2, clip to [0, maximum] /// public void NormalizeColorsInPlace(float maximum) @@ -37,38 +38,66 @@ internal partial struct Block8x8F } /// - /// AVX2-only variant for executing and in one step. + /// version of and . /// + /// The maximum value to normalize to. [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInPlaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector256(float maximum) { - var off = new Vector(MathF.Ceiling(maximum * 0.5F)); - var max = new Vector(maximum); - - ref Vector row0 = ref Unsafe.As>(ref this.V0L); - row0 = NormalizeAndRound(row0, off, max); - - ref Vector row1 = ref Unsafe.As>(ref this.V1L); - row1 = NormalizeAndRound(row1, off, max); - - ref Vector row2 = ref Unsafe.As>(ref this.V2L); - row2 = NormalizeAndRound(row2, off, max); - - ref Vector row3 = ref Unsafe.As>(ref this.V3L); - row3 = NormalizeAndRound(row3, off, max); - - ref Vector row4 = ref Unsafe.As>(ref this.V4L); - row4 = NormalizeAndRound(row4, off, max); - - ref Vector row5 = ref Unsafe.As>(ref this.V5L); - row5 = NormalizeAndRound(row5, off, max); - - ref Vector row6 = ref Unsafe.As>(ref this.V6L); - row6 = NormalizeAndRound(row6, off, max); - - ref Vector row7 = ref Unsafe.As>(ref this.V7L); - row7 = NormalizeAndRound(row7, off, max); - + Vector256 off = Vector256.Create(MathF.Ceiling(maximum * 0.5F)); + Vector256 max = Vector256.Create(maximum); + + ref Vector256 row0 = ref Unsafe.As>(ref this.V0L); + row0 = NormalizeAndRoundVector256(row0, off, max); + + ref Vector256 row1 = ref Unsafe.As>(ref this.V1L); + row1 = NormalizeAndRoundVector256(row1, off, max); + + ref Vector256 row2 = ref Unsafe.As>(ref this.V2L); + row2 = NormalizeAndRoundVector256(row2, off, max); + + ref Vector256 row3 = ref Unsafe.As>(ref this.V3L); + row3 = NormalizeAndRoundVector256(row3, off, max); + + ref Vector256 row4 = ref Unsafe.As>(ref this.V4L); + row4 = NormalizeAndRoundVector256(row4, off, max); + + ref Vector256 row5 = ref Unsafe.As>(ref this.V5L); + row5 = NormalizeAndRoundVector256(row5, off, max); + + ref Vector256 row6 = ref Unsafe.As>(ref this.V6L); + row6 = NormalizeAndRoundVector256(row6, off, max); + + ref Vector256 row7 = ref Unsafe.As>(ref this.V7L); + row7 = NormalizeAndRoundVector256(row7, off, max); + } + + /// + /// version of and . + /// + /// The maximum value to normalize to. + [MethodImpl(InliningOptions.ShortMethod)] + public void NormalizeColorsAndRoundInPlaceVector128(float maximum) + { + Vector128 off = Vector128.Create(MathF.Ceiling(maximum * 0.5F)); + Vector128 max = Vector128.Create(maximum); + + this.V0L = NormalizeAndRoundVector128(this.V0L.AsVector128(), off, max).AsVector4(); + this.V0R = NormalizeAndRoundVector128(this.V0R.AsVector128(), off, max).AsVector4(); + this.V1L = NormalizeAndRoundVector128(this.V1L.AsVector128(), off, max).AsVector4(); + this.V1R = NormalizeAndRoundVector128(this.V1R.AsVector128(), off, max).AsVector4(); + this.V2L = NormalizeAndRoundVector128(this.V2L.AsVector128(), off, max).AsVector4(); + this.V2R = NormalizeAndRoundVector128(this.V2R.AsVector128(), off, max).AsVector4(); + this.V3L = NormalizeAndRoundVector128(this.V3L.AsVector128(), off, max).AsVector4(); + this.V3R = NormalizeAndRoundVector128(this.V3R.AsVector128(), off, max).AsVector4(); + this.V4L = NormalizeAndRoundVector128(this.V4L.AsVector128(), off, max).AsVector4(); + this.V4R = NormalizeAndRoundVector128(this.V4R.AsVector128(), off, max).AsVector4(); + this.V5L = NormalizeAndRoundVector128(this.V5L.AsVector128(), off, max).AsVector4(); + this.V5R = NormalizeAndRoundVector128(this.V5R.AsVector128(), off, max).AsVector4(); + this.V6L = NormalizeAndRoundVector128(this.V6L.AsVector128(), off, max).AsVector4(); + this.V6R = NormalizeAndRoundVector128(this.V6R.AsVector128(), off, max).AsVector4(); + this.V7L = NormalizeAndRoundVector128(this.V7L.AsVector128(), off, max).AsVector4(); + this.V7R = NormalizeAndRoundVector128(this.V7R.AsVector128(), off, max).AsVector4(); } /// @@ -78,76 +107,76 @@ internal partial struct Block8x8F { ref short selfRef = ref Unsafe.As(ref source); - this.V0L.X = Unsafe.Add(ref selfRef, 0); - this.V0L.Y = Unsafe.Add(ref selfRef, 1); - this.V0L.Z = Unsafe.Add(ref selfRef, 2); - this.V0L.W = Unsafe.Add(ref selfRef, 3); - this.V0R.X = Unsafe.Add(ref selfRef, 4); - this.V0R.Y = Unsafe.Add(ref selfRef, 5); - this.V0R.Z = Unsafe.Add(ref selfRef, 6); - this.V0R.W = Unsafe.Add(ref selfRef, 7); - - this.V1L.X = Unsafe.Add(ref selfRef, 8); - this.V1L.Y = Unsafe.Add(ref selfRef, 9); - this.V1L.Z = Unsafe.Add(ref selfRef, 10); - this.V1L.W = Unsafe.Add(ref selfRef, 11); - this.V1R.X = Unsafe.Add(ref selfRef, 12); - this.V1R.Y = Unsafe.Add(ref selfRef, 13); - this.V1R.Z = Unsafe.Add(ref selfRef, 14); - this.V1R.W = Unsafe.Add(ref selfRef, 15); - - this.V2L.X = Unsafe.Add(ref selfRef, 16); - this.V2L.Y = Unsafe.Add(ref selfRef, 17); - this.V2L.Z = Unsafe.Add(ref selfRef, 18); - this.V2L.W = Unsafe.Add(ref selfRef, 19); - this.V2R.X = Unsafe.Add(ref selfRef, 20); - this.V2R.Y = Unsafe.Add(ref selfRef, 21); - this.V2R.Z = Unsafe.Add(ref selfRef, 22); - this.V2R.W = Unsafe.Add(ref selfRef, 23); - - this.V3L.X = Unsafe.Add(ref selfRef, 24); - this.V3L.Y = Unsafe.Add(ref selfRef, 25); - this.V3L.Z = Unsafe.Add(ref selfRef, 26); - this.V3L.W = Unsafe.Add(ref selfRef, 27); - this.V3R.X = Unsafe.Add(ref selfRef, 28); - this.V3R.Y = Unsafe.Add(ref selfRef, 29); - this.V3R.Z = Unsafe.Add(ref selfRef, 30); - this.V3R.W = Unsafe.Add(ref selfRef, 31); - - this.V4L.X = Unsafe.Add(ref selfRef, 32); - this.V4L.Y = Unsafe.Add(ref selfRef, 33); - this.V4L.Z = Unsafe.Add(ref selfRef, 34); - this.V4L.W = Unsafe.Add(ref selfRef, 35); - this.V4R.X = Unsafe.Add(ref selfRef, 36); - this.V4R.Y = Unsafe.Add(ref selfRef, 37); - this.V4R.Z = Unsafe.Add(ref selfRef, 38); - this.V4R.W = Unsafe.Add(ref selfRef, 39); - - this.V5L.X = Unsafe.Add(ref selfRef, 40); - this.V5L.Y = Unsafe.Add(ref selfRef, 41); - this.V5L.Z = Unsafe.Add(ref selfRef, 42); - this.V5L.W = Unsafe.Add(ref selfRef, 43); - this.V5R.X = Unsafe.Add(ref selfRef, 44); - this.V5R.Y = Unsafe.Add(ref selfRef, 45); - this.V5R.Z = Unsafe.Add(ref selfRef, 46); - this.V5R.W = Unsafe.Add(ref selfRef, 47); - - this.V6L.X = Unsafe.Add(ref selfRef, 48); - this.V6L.Y = Unsafe.Add(ref selfRef, 49); - this.V6L.Z = Unsafe.Add(ref selfRef, 50); - this.V6L.W = Unsafe.Add(ref selfRef, 51); - this.V6R.X = Unsafe.Add(ref selfRef, 52); - this.V6R.Y = Unsafe.Add(ref selfRef, 53); - this.V6R.Z = Unsafe.Add(ref selfRef, 54); - this.V6R.W = Unsafe.Add(ref selfRef, 55); - - this.V7L.X = Unsafe.Add(ref selfRef, 56); - this.V7L.Y = Unsafe.Add(ref selfRef, 57); - this.V7L.Z = Unsafe.Add(ref selfRef, 58); - this.V7L.W = Unsafe.Add(ref selfRef, 59); - this.V7R.X = Unsafe.Add(ref selfRef, 60); - this.V7R.Y = Unsafe.Add(ref selfRef, 61); - this.V7R.Z = Unsafe.Add(ref selfRef, 62); - this.V7R.W = Unsafe.Add(ref selfRef, 63); + this.V0L.X = Unsafe.Add(ref selfRef, 0); + this.V0L.Y = Unsafe.Add(ref selfRef, 1); + this.V0L.Z = Unsafe.Add(ref selfRef, 2); + this.V0L.W = Unsafe.Add(ref selfRef, 3); + this.V0R.X = Unsafe.Add(ref selfRef, 4); + this.V0R.Y = Unsafe.Add(ref selfRef, 5); + this.V0R.Z = Unsafe.Add(ref selfRef, 6); + this.V0R.W = Unsafe.Add(ref selfRef, 7); + + this.V1L.X = Unsafe.Add(ref selfRef, 8); + this.V1L.Y = Unsafe.Add(ref selfRef, 9); + this.V1L.Z = Unsafe.Add(ref selfRef, 10); + this.V1L.W = Unsafe.Add(ref selfRef, 11); + this.V1R.X = Unsafe.Add(ref selfRef, 12); + this.V1R.Y = Unsafe.Add(ref selfRef, 13); + this.V1R.Z = Unsafe.Add(ref selfRef, 14); + this.V1R.W = Unsafe.Add(ref selfRef, 15); + + this.V2L.X = Unsafe.Add(ref selfRef, 16); + this.V2L.Y = Unsafe.Add(ref selfRef, 17); + this.V2L.Z = Unsafe.Add(ref selfRef, 18); + this.V2L.W = Unsafe.Add(ref selfRef, 19); + this.V2R.X = Unsafe.Add(ref selfRef, 20); + this.V2R.Y = Unsafe.Add(ref selfRef, 21); + this.V2R.Z = Unsafe.Add(ref selfRef, 22); + this.V2R.W = Unsafe.Add(ref selfRef, 23); + + this.V3L.X = Unsafe.Add(ref selfRef, 24); + this.V3L.Y = Unsafe.Add(ref selfRef, 25); + this.V3L.Z = Unsafe.Add(ref selfRef, 26); + this.V3L.W = Unsafe.Add(ref selfRef, 27); + this.V3R.X = Unsafe.Add(ref selfRef, 28); + this.V3R.Y = Unsafe.Add(ref selfRef, 29); + this.V3R.Z = Unsafe.Add(ref selfRef, 30); + this.V3R.W = Unsafe.Add(ref selfRef, 31); + + this.V4L.X = Unsafe.Add(ref selfRef, 32); + this.V4L.Y = Unsafe.Add(ref selfRef, 33); + this.V4L.Z = Unsafe.Add(ref selfRef, 34); + this.V4L.W = Unsafe.Add(ref selfRef, 35); + this.V4R.X = Unsafe.Add(ref selfRef, 36); + this.V4R.Y = Unsafe.Add(ref selfRef, 37); + this.V4R.Z = Unsafe.Add(ref selfRef, 38); + this.V4R.W = Unsafe.Add(ref selfRef, 39); + + this.V5L.X = Unsafe.Add(ref selfRef, 40); + this.V5L.Y = Unsafe.Add(ref selfRef, 41); + this.V5L.Z = Unsafe.Add(ref selfRef, 42); + this.V5L.W = Unsafe.Add(ref selfRef, 43); + this.V5R.X = Unsafe.Add(ref selfRef, 44); + this.V5R.Y = Unsafe.Add(ref selfRef, 45); + this.V5R.Z = Unsafe.Add(ref selfRef, 46); + this.V5R.W = Unsafe.Add(ref selfRef, 47); + + this.V6L.X = Unsafe.Add(ref selfRef, 48); + this.V6L.Y = Unsafe.Add(ref selfRef, 49); + this.V6L.Z = Unsafe.Add(ref selfRef, 50); + this.V6L.W = Unsafe.Add(ref selfRef, 51); + this.V6R.X = Unsafe.Add(ref selfRef, 52); + this.V6R.Y = Unsafe.Add(ref selfRef, 53); + this.V6R.Z = Unsafe.Add(ref selfRef, 54); + this.V6R.W = Unsafe.Add(ref selfRef, 55); + + this.V7L.X = Unsafe.Add(ref selfRef, 56); + this.V7L.Y = Unsafe.Add(ref selfRef, 57); + this.V7L.Z = Unsafe.Add(ref selfRef, 58); + this.V7L.W = Unsafe.Add(ref selfRef, 59); + this.V7R.X = Unsafe.Add(ref selfRef, 60); + this.V7R.Y = Unsafe.Add(ref selfRef, 61); + this.V7R.Z = Unsafe.Add(ref selfRef, 62); + this.V7R.W = Unsafe.Add(ref selfRef, 63); } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt deleted file mode 100644 index 19b795c23..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ /dev/null @@ -1,103 +0,0 @@ -<# -// Copyright (c) Six Labors. -// Licensed under the Six Labors Split License. -#> -<#@ template debug="false" hostspecific="false" language="C#" #> -<#@ assembly name="System.Core" #> -<#@ import namespace="System.Linq" #> -<#@ import namespace="System.Text" #> -<#@ import namespace="System.Collections.Generic" #> -<#@ output extension=".cs" #> -// Copyright (c) Six Labors. -// Licensed under the Six Labors Split License. - -using System.Numerics; -using System.Runtime.CompilerServices; - -// -<# -char[] coordz = {'X', 'Y', 'Z', 'W'}; -#> -namespace SixLabors.ImageSharp.Formats.Jpeg.Components; - -internal partial struct Block8x8F -{ - /// - /// Level shift by +maximum/2, clip to [0, maximum] - /// - public void NormalizeColorsInPlace(float maximum) - { - var CMin4 = new Vector4(0F); - var CMax4 = new Vector4(maximum); - var COff4 = new Vector4(MathF.Ceiling(maximum * 0.5F)); - - <# - - PushIndent(" "); - - for (int i = 0; i < 8; i++) - { - for (int j = 0; j < 2; j++) - { - char side = j == 0 ? 'L' : 'R'; - Write($"this.V{i}{side} = Numerics.Clamp(this.V{i}{side} + COff4, CMin4, CMax4);\r\n"); - } - } - PopIndent(); - #> - } - - /// - /// AVX2-only variant for executing and in one step. - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInPlaceVector8(float maximum) - { - var off = new Vector(MathF.Ceiling(maximum * 0.5F)); - var max = new Vector(maximum); - <# - - for (int i = 0; i < 8; i++) - { - #> - - ref Vector row<#=i#> = ref Unsafe.As>(ref this.V<#=i#>L); - row<#=i#> = NormalizeAndRound(row<#=i#>, off, max); - <# - } - #> - - } - - /// - /// Fill the block from 'source' doing short -> float conversion. - /// - public void LoadFromInt16Scalar(ref Block8x8 source) - { - ref short selfRef = ref Unsafe.As(ref source); - - <# - PushIndent(" "); - for (int j = 0; j < 8; j++) - { - for (int i = 0; i < 8; i++) - { - char destCoord = coordz[i % 4]; - char destSide = (i / 4) % 2 == 0 ? 'L' : 'R'; - - if(j > 0 && i == 0){ - WriteLine(""); - } - - char srcCoord = coordz[j % 4]; - char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R'; - - var expression = $"this.V{j}{destSide}.{destCoord} = Unsafe.Add(ref selfRef, {j*8+i});\r\n"; - Write(expression); - - } - } - PopIndent(); - #> - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 018df5f9f..7aa1fb296 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -8,6 +8,8 @@ using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using System.Text; using SixLabors.ImageSharp.Common.Helpers; +using Vector128_ = SixLabors.ImageSharp.Common.Helpers.Vector128Utilities; +using Vector256_ = SixLabors.ImageSharp.Common.Helpers.Vector256Utilities; // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Jpeg.Components; @@ -332,22 +334,13 @@ internal partial struct Block8x8F : IEquatable /// The maximum value. public void NormalizeColorsAndRoundInPlace(float maximum) { - if (SimdUtils.HasVector8) + if (Vector256.IsHardwareAccelerated) { - this.NormalizeColorsAndRoundInPlaceVector8(maximum); - } - else - { - this.NormalizeColorsInPlace(maximum); - this.RoundInPlace(); + this.NormalizeColorsAndRoundInPlaceVector256(maximum); } - } - - public void DE_NormalizeColors(float maximum) - { - if (SimdUtils.HasVector8) + else if (Vector128.IsHardwareAccelerated) { - this.NormalizeColorsAndRoundInPlaceVector8(maximum); + this.NormalizeColorsAndRoundInPlaceVector128(maximum); } else { @@ -590,4 +583,22 @@ internal partial struct Block8x8F : IEquatable row = Vector.Min(row, max); return row.FastRound(); } + + [MethodImpl(InliningOptions.ShortMethod)] + private static Vector256 NormalizeAndRoundVector256(Vector256 row, Vector256 off, Vector256 max) + { + row += off; + row = Vector256.Max(row, Vector256.Zero); + row = Vector256.Min(row, max); + return Vector256_.RoundToNearestInteger(row); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static Vector128 NormalizeAndRoundVector128(Vector128 row, Vector128 off, Vector128 max) + { + row += off; + row = Vector128.Max(row, Vector128.Zero); + row = Vector128.Min(row, max); + return Vector128_.RoundToNearestInteger(row); + } } diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index 0d36340bf..fde3e94e9 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -56,16 +56,6 @@ True ImageMetadataExtensions.tt - - True - True - Block8x8F.Generated.tt - - - True - True - Block8x8F.Generated.tt - True True @@ -158,14 +148,6 @@ ImageMetadataExtensions.cs TextTemplatingFileGenerator - - TextTemplatingFileGenerator - Block8x8F.Generated.cs - - - TextTemplatingFileGenerator - Block8x8F.Generated.cs - TextTemplatingFileGenerator Abgr32.PixelOperations.Generated.cs diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index cde9e776b..4d804f646 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -3,6 +3,7 @@ // Uncomment this to turn unit tests into benchmarks: // #define BENCHMARKING +using System.Runtime.Intrinsics; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -24,11 +25,22 @@ public partial class Block8x8FTests : JpegFixture { } - private bool SkipOnNonAvx2Runner() + private bool SkipOnNonVector256Runner() { - if (!SimdUtils.HasVector8) + if (!Vector256.IsHardwareAccelerated) { - this.Output.WriteLine("AVX2 not supported, skipping!"); + this.Output.WriteLine("Vector256 not supported, skipping!"); + return true; + } + + return false; + } + + private bool SkipOnNonVector128Runner() + { + if (!Vector128.IsHardwareAccelerated) + { + this.Output.WriteLine("Vector128 not supported, skipping!"); return true; } @@ -172,9 +184,33 @@ public partial class Block8x8FTests : JpegFixture [Theory] [InlineData(1)] [InlineData(2)] - public void NormalizeColorsAndRoundAvx2(int seed) + public void NormalizeColorsAndRoundVector256(int seed) + { + if (this.SkipOnNonVector256Runner()) + { + return; + } + + Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); + + Block8x8F expected = source; + expected.NormalizeColorsInPlace(255); + expected.RoundInPlace(); + + Block8x8F actual = source; + actual.NormalizeColorsAndRoundInPlaceVector256(255); + + this.Output.WriteLine(expected.ToString()); + this.Output.WriteLine(actual.ToString()); + this.CompareBlocks(expected, actual, 0); + } + + [Theory] + [InlineData(1)] + [InlineData(2)] + public void NormalizeColorsAndRoundVector128(int seed) { - if (this.SkipOnNonAvx2Runner()) + if (this.SkipOnNonVector128Runner()) { return; } @@ -186,7 +222,7 @@ public partial class Block8x8FTests : JpegFixture expected.RoundInPlace(); Block8x8F actual = source; - actual.NormalizeColorsAndRoundInPlaceVector8(255); + actual.NormalizeColorsAndRoundInPlaceVector128(255); this.Output.WriteLine(expected.ToString()); this.Output.WriteLine(actual.ToString()); @@ -206,7 +242,7 @@ public partial class Block8x8FTests : JpegFixture Block8x8F source = CreateRandomFloatBlock(-2000, 2000, srcSeed); // Quantization code is used only in jpeg where it's guaranteed that - // qunatization valus are greater than 1 + // quantization values are greater than 1 // Quantize method supports negative numbers by very small numbers can cause troubles Block8x8F quant = CreateRandomFloatBlock(1, 2000, qtSeed); @@ -345,7 +381,7 @@ public partial class Block8x8FTests : JpegFixture [Fact] public void LoadFromUInt16Scalar() { - if (this.SkipOnNonAvx2Runner()) + if (this.SkipOnNonVector256Runner()) { return; } @@ -366,7 +402,7 @@ public partial class Block8x8FTests : JpegFixture [Fact] public void LoadFromUInt16ExtendedAvx2() { - if (this.SkipOnNonAvx2Runner()) + if (this.SkipOnNonVector256Runner()) { return; }