diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
index 3ee1ca989..a6ff21bdc 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
@@ -47,6 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
};
#endif
+ ///
+ /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices
+ ///
+ /// Total size of rgb span must be 200 bytes
+ /// Span of rgb pixels with size of 64
+ /// 8x8 destination matrix of Luminance(Y) converted data
+ /// 8x8 destination matrix of Chrominance(Cb) converted data
+ /// 8x8 destination matrix of Chrominance(Cr) converted data
public static void Convert(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
{
Debug.Assert(IsSupported, "AVX2 is required to run this converter");
@@ -63,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
var f05 = Vector256.Create(0.5f);
var zero = Vector256.Create(0).AsByte();
- ref Vector256 inRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan));
+ ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan));
ref Vector256 destYRef = ref yBlock.V0;
ref Vector256 destCbRef = ref cbBlock.V0;
ref Vector256 destCrRef = ref crBlock.V0;
@@ -72,9 +80,31 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb));
Vector256 rgb, rg, bx;
Vector256 r, g, b;
+
+ // TODO: probably remove this after the draft
+ // rgbByteSpan contains 8 strides by 8 pixels each, thus 64 pixels total
+ // Strides are stored sequentially - one big span of 64 * 3 = 192 bytes
+ // Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits
+ // Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride:
+ // stride 0 0 - 192 -(+64bits)-> 256
+ // stride 1 192 - 384 -(+64bits)-> 448
+ // stride 2 384 - 576 -(+64bits)-> 640
+ // stride 3 576 - 768 -(+64bits)-> 832
+ // stride 4 768 - 960 -(+64bits)-> 1024
+ // stride 5 960 - 1152 -(+64bits)-> 1216
+ // stride 6 1152 - 1344 -(+64bits)-> 1408
+ // stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION
+ //
+ // Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits
+ // This is not permitted - we are reading foreign memory
+ // That's why last stride is calculated outside of the for-loop loop with special extract shuffle mask involved
+ //
+ // Extra mask & separate stride:7 calculations can be eliminated by simply providing rgb pixel span of slightly bigger size than pixels data need:
+ // Total pixel data size is 192 bytes, avx registers need it to be 200 bytes
+ const int bytesPerRgbStride = 24;
for (int i = 0; i < 7; i++)
{
- rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte();
+ rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
@@ -96,7 +126,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes));
- rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte();
+ rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
rg = Avx2.UnpackLow(rgb, zero);