// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
{
///
/// Provides optimized overrides for bulk operations.
///
public partial struct Rgba32
{
///
/// implementation optimized for .
///
internal partial class PixelOperations : PixelOperations
{
///
/// SIMD optimized bulk implementation of
/// that works only with `count` divisible by .
///
/// The to the source colors.
/// The to the dstination vectors.
/// The number of pixels to convert.
///
/// Implementation adapted from:
///
/// http://stackoverflow.com/a/5362789
///
/// TODO: We can replace this implementation in the future using new Vector API-s:
///
/// https://github.com/dotnet/corefx/issues/15957
///
///
internal static void ToVector4SimdAligned(Span sourceColors, Span destVectors, int count)
{
if (!Vector.IsHardwareAccelerated)
{
throw new InvalidOperationException(
"Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!");
}
DebugGuard.IsTrue(
count % Vector.Count == 0,
nameof(count),
"Argument 'count' should divisible by Vector.Count!");
var bVec = new Vector(256.0f / 255.0f);
var magicFloat = new Vector(32768.0f);
var magicInt = new Vector(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector(255);
int unpackedRawCount = count * 4;
ref uint sourceBase = ref Unsafe.As(ref sourceColors.DangerousGetPinnableReference());
ref UnpackedRGBA destBaseAsUnpacked = ref Unsafe.As(ref destVectors.DangerousGetPinnableReference());
ref Vector destBaseAsUInt = ref Unsafe.As>(ref destBaseAsUnpacked);
ref Vector destBaseAsFloat = ref Unsafe.As>(ref destBaseAsUnpacked);
for (int i = 0; i < count; i++)
{
uint sVal = Unsafe.Add(ref sourceBase, i);
ref UnpackedRGBA dst = ref Unsafe.Add(ref destBaseAsUnpacked, i);
// This call is the bottleneck now:
dst.Load(sVal);
}
int numOfVectors = unpackedRawCount / Vector.Count;
for (int i = 0; i < numOfVectors; i++)
{
Vector vi = Unsafe.Add(ref destBaseAsUInt, i);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
Unsafe.Add(ref destBaseAsFloat, i) = vf;
}
}
///
internal override void ToVector4(Span sourceColors, Span destVectors, int count)
{
Guard.MustBeSizedAtLeast(sourceColors, count, nameof(sourceColors));
Guard.MustBeSizedAtLeast(destVectors, count, nameof(destVectors));
if (count < 256 || !Vector.IsHardwareAccelerated)
{
// Doesn't worth to bother with SIMD:
base.ToVector4(sourceColors, destVectors, count);
return;
}
int remainder = count % Vector.Count;
int alignedCount = count - remainder;
if (alignedCount > 0)
{
ToVector4SimdAligned(sourceColors, destVectors, alignedCount);
}
if (remainder > 0)
{
sourceColors = sourceColors.Slice(alignedCount);
destVectors = destVectors.Slice(alignedCount);
base.ToVector4(sourceColors, destVectors, remainder);
}
}
internal override void PackFromVector4(Span sourceVectors, Span destColors, int count)
{
GuardSpans(sourceVectors, nameof(sourceVectors), destColors, nameof(destColors), count);
if (!SimdUtils.IsAvx2CompatibleArchitecture)
{
base.PackFromVector4(sourceVectors, destColors, count);
return;
}
int remainder = count % 2;
int alignedCount = count - remainder;
if (alignedCount > 0)
{
Span flatSrc = sourceVectors.Slice(0, alignedCount).NonPortableCast();
Span flatDest = destColors.NonPortableCast();
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest);
}
if (remainder > 0)
{
// actually: remainder == 1
int lastIdx = count - 1;
destColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]);
}
}
///
internal override void PackFromRgba32(Span source, Span destPixels, int count)
{
GuardSpans(source, nameof(source), destPixels, nameof(destPixels), count);
SpanHelper.Copy(source, destPixels, count);
}
///
internal override void ToRgba32(Span sourcePixels, Span dest, int count)
{
GuardSpans(sourcePixels, nameof(sourcePixels), dest, nameof(dest), count);
SpanHelper.Copy(sourcePixels, dest, count);
}
///
/// Value type to store -s unpacked into multiple -s.
///
[StructLayout(LayoutKind.Sequential)]
private struct UnpackedRGBA
{
private uint r;
private uint g;
private uint b;
private uint a;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Load(uint p)
{
this.r = p;
this.g = p >> GreenShift;
this.b = p >> BlueShift;
this.a = p >> AlphaShift;
}
}
}
}
}