// Copyright (c) Six Labors and contributors. // Licensed under the Apache License, Version 2.0. using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp { /// /// Provides optimized overrides for bulk operations. /// public partial struct Rgba32 { /// /// implementation optimized for . /// internal partial class PixelOperations : PixelOperations { /// /// SIMD optimized bulk implementation of /// that works only with `count` divisible by . /// /// The to the source colors. /// The to the dstination vectors. /// The number of pixels to convert. /// /// Implementation adapted from: /// /// http://stackoverflow.com/a/5362789 /// /// TODO: We can replace this implementation in the future using new Vector API-s: /// /// https://github.com/dotnet/corefx/issues/15957 /// /// internal static void ToVector4SimdAligned(Span sourceColors, Span destVectors, int count) { if (!Vector.IsHardwareAccelerated) { throw new InvalidOperationException( "Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!"); } DebugGuard.IsTrue( count % Vector.Count == 0, nameof(count), "Argument 'count' should divisible by Vector.Count!"); var bVec = new Vector(256.0f / 255.0f); var magicFloat = new Vector(32768.0f); var magicInt = new Vector(1191182336); // reinterpreded value of 32768.0f var mask = new Vector(255); int unpackedRawCount = count * 4; ref uint sourceBase = ref Unsafe.As(ref sourceColors.DangerousGetPinnableReference()); ref UnpackedRGBA destBaseAsUnpacked = ref Unsafe.As(ref destVectors.DangerousGetPinnableReference()); ref Vector destBaseAsUInt = ref Unsafe.As>(ref destBaseAsUnpacked); ref Vector destBaseAsFloat = ref Unsafe.As>(ref destBaseAsUnpacked); for (int i = 0; i < count; i++) { uint sVal = Unsafe.Add(ref sourceBase, i); ref UnpackedRGBA dst = ref Unsafe.Add(ref destBaseAsUnpacked, i); // This call is the bottleneck now: dst.Load(sVal); } int numOfVectors = unpackedRawCount / Vector.Count; for (int i = 0; i < numOfVectors; i++) { Vector vi = Unsafe.Add(ref destBaseAsUInt, i); vi &= mask; vi |= magicInt; var vf = Vector.AsVectorSingle(vi); vf = (vf - magicFloat) * bVec; Unsafe.Add(ref destBaseAsFloat, i) = vf; } } /// internal override void ToVector4(Span sourceColors, Span destVectors, int count) { Guard.MustBeSizedAtLeast(sourceColors, count, nameof(sourceColors)); Guard.MustBeSizedAtLeast(destVectors, count, nameof(destVectors)); if (count < 256 || !Vector.IsHardwareAccelerated) { // Doesn't worth to bother with SIMD: base.ToVector4(sourceColors, destVectors, count); return; } int remainder = count % Vector.Count; int alignedCount = count - remainder; if (alignedCount > 0) { ToVector4SimdAligned(sourceColors, destVectors, alignedCount); } if (remainder > 0) { sourceColors = sourceColors.Slice(alignedCount); destVectors = destVectors.Slice(alignedCount); base.ToVector4(sourceColors, destVectors, remainder); } } internal override void PackFromVector4(Span sourceVectors, Span destColors, int count) { GuardSpans(sourceVectors, nameof(sourceVectors), destColors, nameof(destColors), count); if (!SimdUtils.IsAvx2CompatibleArchitecture) { base.PackFromVector4(sourceVectors, destColors, count); return; } int remainder = count % 2; int alignedCount = count - remainder; if (alignedCount > 0) { Span flatSrc = sourceVectors.Slice(0, alignedCount).NonPortableCast(); Span flatDest = destColors.NonPortableCast(); SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest); } if (remainder > 0) { // actually: remainder == 1 int lastIdx = count - 1; destColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]); } } /// internal override void PackFromRgba32(Span source, Span destPixels, int count) { GuardSpans(source, nameof(source), destPixels, nameof(destPixels), count); SpanHelper.Copy(source, destPixels, count); } /// internal override void ToRgba32(Span sourcePixels, Span dest, int count) { GuardSpans(sourcePixels, nameof(sourcePixels), dest, nameof(dest), count); SpanHelper.Copy(sourcePixels, dest, count); } /// /// Value type to store -s unpacked into multiple -s. /// [StructLayout(LayoutKind.Sequential)] private struct UnpackedRGBA { private uint r; private uint g; private uint b; private uint a; [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Load(uint p) { this.r = p; this.g = p >> GreenShift; this.b = p >> BlueShift; this.a = p >> AlphaShift; } } } } }