// Copyright (c) Six Labors and contributors. // Licensed under the Apache License, Version 2.0. using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.Memory; namespace SixLabors.ImageSharp.PixelFormats { /// /// Provides optimized overrides for bulk operations. /// public partial struct Rgba32 { /// /// implementation optimized for . /// internal partial class PixelOperations : PixelOperations { /// /// SIMD optimized bulk implementation of /// that works only with `count` divisible by . /// /// The to the source colors. /// The to the dstination vectors. /// The number of pixels to convert. /// /// Implementation adapted from: /// /// http://stackoverflow.com/a/5362789 /// /// TODO: We can replace this implementation in the future using new Vector API-s: /// /// https://github.com/dotnet/corefx/issues/15957 /// /// internal static void ToVector4SimdAligned(ReadOnlySpan sourceColors, Span destVectors, int count) { if (!Vector.IsHardwareAccelerated) { throw new InvalidOperationException( "Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!"); } DebugGuard.IsTrue( count % Vector.Count == 0, nameof(count), "Argument 'count' should divisible by Vector.Count!"); var bVec = new Vector(256.0f / 255.0f); var magicFloat = new Vector(32768.0f); var magicInt = new Vector(1191182336); // reinterpreded value of 32768.0f var mask = new Vector(255); int unpackedRawCount = count * 4; ref uint sourceBase = ref Unsafe.As(ref MemoryMarshal.GetReference(sourceColors)); ref WideRgba destBaseAsWide = ref Unsafe.As(ref MemoryMarshal.GetReference(destVectors)); ref Vector destBaseAsUInt = ref Unsafe.As>(ref destBaseAsWide); ref Vector destBaseAsFloat = ref Unsafe.As>(ref destBaseAsWide); for (int i = 0; i < count; i++) { uint sVal = Unsafe.Add(ref sourceBase, i); ref WideRgba dst = ref Unsafe.Add(ref destBaseAsWide, i); // This call is the bottleneck now: dst.Load(sVal); } int numOfVectors = unpackedRawCount / Vector.Count; for (int i = 0; i < numOfVectors; i++) { Vector vi = Unsafe.Add(ref destBaseAsUInt, i); vi &= mask; vi |= magicInt; var vf = Vector.AsVectorSingle(vi); vf = (vf - magicFloat) * bVec; Unsafe.Add(ref destBaseAsFloat, i) = vf; } } /// internal override void ToVector4(ReadOnlySpan sourceColors, Span destinationVectors, int count) { Guard.MustBeSizedAtLeast(sourceColors, count, nameof(sourceColors)); Guard.MustBeSizedAtLeast(destinationVectors, count, nameof(destinationVectors)); if (count < 256 || !Vector.IsHardwareAccelerated) { // Doesn't worth to bother with SIMD: base.ToVector4(sourceColors, destinationVectors, count); return; } int remainder = count % Vector.Count; int alignedCount = count - remainder; if (alignedCount > 0) { ToVector4SimdAligned(sourceColors, destinationVectors, alignedCount); } if (remainder > 0) { sourceColors = sourceColors.Slice(alignedCount); destinationVectors = destinationVectors.Slice(alignedCount); base.ToVector4(sourceColors, destinationVectors, remainder); } } /// internal override void PackFromVector4(ReadOnlySpan sourceVectors, Span destinationColors, int count) { GuardSpans(sourceVectors, nameof(sourceVectors), destinationColors, nameof(destinationColors), count); if (!SimdUtils.IsAvx2CompatibleArchitecture) { base.PackFromVector4(sourceVectors, destinationColors, count); return; } int remainder = count % 2; int alignedCount = count - remainder; if (alignedCount > 0) { ReadOnlySpan flatSrc = MemoryMarshal.Cast(sourceVectors.Slice(0, alignedCount)); Span flatDest = MemoryMarshal.Cast(destinationColors); SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest); } if (remainder > 0) { // actually: remainder == 1 int lastIdx = count - 1; destinationColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]); } } /// internal override void ToScaledVector4(ReadOnlySpan sourceColors, Span destinationVectors, int count) { this.ToVector4(sourceColors, destinationVectors, count); } /// internal override void PackFromScaledVector4(ReadOnlySpan sourceVectors, Span destinationColors, int count) { this.PackFromVector4(sourceVectors, destinationColors, count); } /// internal override void PackFromRgba32(ReadOnlySpan source, Span destPixels, int count) { GuardSpans(source, nameof(source), destPixels, nameof(destPixels), count); source.Slice(0, count).CopyTo(destPixels); } /// internal override void ToRgba32(ReadOnlySpan sourcePixels, Span dest, int count) { GuardSpans(sourcePixels, nameof(sourcePixels), dest, nameof(dest), count); sourcePixels.Slice(0, count).CopyTo(dest); } /// /// Value type to store -s widened into multiple -s. /// [StructLayout(LayoutKind.Sequential)] private struct WideRgba { private uint r; private uint g; private uint b; private uint a; [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Load(uint p) { this.r = p; this.g = p >> GreenShift; this.b = p >> BlueShift; this.a = p >> AlphaShift; } } } } }