Browse Source

Merge branch 'master' into af/UniformUnmanagedMemoryPoolMemoryAllocator-02

# Conflicts:
#	src/ImageSharp/Memory/Allocators/IManagedByteBuffer.cs
#	src/ImageSharp/Memory/Allocators/Internals/BasicByteBuffer.cs
#	src/ImageSharp/Memory/Allocators/Internals/ManagedBufferBase.cs
#	tests/ImageSharp.Tests.ProfilingSandbox/LoadResizeSaveParallelMemoryStress.cs
#	tests/ImageSharp.Tests/Formats/Tiff/Compression/PackBitsTiffCompressionTests.cs
#	tests/ImageSharp.Tests/Image/ImageFrameTests.cs
#	tests/ImageSharp.Tests/Image/ImageTests.cs
#	tests/ImageSharp.Tests/Memory/Allocators/ArrayPoolMemoryAllocatorTests.cs
#	tests/ImageSharp.Tests/Memory/Allocators/SimpleGcMemoryAllocatorTests.cs
#	tests/ImageSharp.Tests/TestUtilities/ImageProviders/TestImageProvider.cs
#	tests/ImageSharp.Tests/TestUtilities/TestImageExtensions.cs
af/UniformUnmanagedMemoryPoolMemoryAllocator-02-MemoryGuards
Anton Firszov 4 years ago
parent
commit
017ee4049c
  1. 141
      .github/workflows/build-and-test.yml
  2. 4
      .gitignore
  3. 5
      Directory.Build.props
  4. 1
      ImageSharp.sln
  5. 2
      shared-infrastructure
  6. 1
      src/ImageSharp/Advanced/ParallelExecutionSettings.cs
  7. 3
      src/ImageSharp/Color/Color.Conversions.cs
  8. 2
      src/ImageSharp/Color/Color.WebSafePalette.cs
  9. 1
      src/ImageSharp/Color/Color.cs
  10. 2
      src/ImageSharp/ColorSpaces/CieLab.cs
  11. 2
      src/ImageSharp/ColorSpaces/CieLch.cs
  12. 2
      src/ImageSharp/ColorSpaces/CieLchuv.cs
  13. 2
      src/ImageSharp/ColorSpaces/CieLuv.cs
  14. 2
      src/ImageSharp/ColorSpaces/CieXyy.cs
  15. 2
      src/ImageSharp/ColorSpaces/CieXyz.cs
  16. 2
      src/ImageSharp/ColorSpaces/Cmyk.cs
  17. 2
      src/ImageSharp/ColorSpaces/Companding/GammaCompanding.cs
  18. 2
      src/ImageSharp/ColorSpaces/Companding/Rec2020Companding.cs
  19. 2
      src/ImageSharp/ColorSpaces/Companding/Rec709Companding.cs
  20. 2
      src/ImageSharp/ColorSpaces/Conversion/CieConstants.cs
  21. 4
      src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.HunterLab.cs
  22. 2
      src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.Lms.cs
  23. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CIeLchToCieLabConverter.cs
  24. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndCieXyyConverter.cs
  25. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndHunterLabConverterBase.cs
  26. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndLmsConverter.cs
  27. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLabConverter.cs
  28. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLuvConverter.cs
  29. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToHunterLabConverter.cs
  30. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToLinearRgbConverter.cs
  31. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CmykAndRgbConverter.cs
  32. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/YCbCrAndRgbConverter.cs
  33. 2
      src/ImageSharp/ColorSpaces/Conversion/Implementation/IChromaticAdaptation.cs
  34. 2
      src/ImageSharp/ColorSpaces/Hsl.cs
  35. 2
      src/ImageSharp/ColorSpaces/Hsv.cs
  36. 2
      src/ImageSharp/ColorSpaces/HunterLab.cs
  37. 2
      src/ImageSharp/ColorSpaces/Illuminants.cs
  38. 2
      src/ImageSharp/ColorSpaces/Lms.cs
  39. 2
      src/ImageSharp/Common/Constants.cs
  40. 4
      src/ImageSharp/Common/Helpers/InliningOptions.cs
  41. 145
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  42. 4
      src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
  43. 170
      src/ImageSharp/Compression/Zlib/Adler32.cs
  44. 180
      src/ImageSharp/Compression/Zlib/Crc32.cs
  45. 2
      src/ImageSharp/Formats/Bmp/BmpConfigurationModule.cs
  46. 2
      src/ImageSharp/Formats/Bmp/BmpConstants.cs
  47. 2
      src/ImageSharp/Formats/Bmp/BmpFormat.cs
  48. 2
      src/ImageSharp/Formats/Bmp/BmpMetadata.cs
  49. 2
      src/ImageSharp/Formats/Bmp/IBmpDecoderOptions.cs
  50. 2
      src/ImageSharp/Formats/Gif/GifConfigurationModule.cs
  51. 1
      src/ImageSharp/Formats/Gif/GifDecoderCore.cs
  52. 2
      src/ImageSharp/Formats/Gif/GifDisposalMethod.cs
  53. 2
      src/ImageSharp/Formats/Gif/GifFormat.cs
  54. 2
      src/ImageSharp/Formats/Gif/GifImageFormatDetector.cs
  55. 1
      src/ImageSharp/Formats/Gif/LzwEncoder.cs
  56. 2
      src/ImageSharp/Formats/Gif/Sections/GifGraphicControlExtension.cs
  57. 2
      src/ImageSharp/Formats/Gif/Sections/GifImageDescriptor.cs
  58. 2
      src/ImageSharp/Formats/Gif/Sections/GifLogicalScreenDescriptor.cs
  59. 2
      src/ImageSharp/Formats/Gif/Sections/IGifExtension.cs
  60. 2
      src/ImageSharp/Formats/IImageFormat.cs
  61. 193
      src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
  62. 149
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
  63. 2
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs
  64. 439
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  65. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/AdobeMarker.cs
  66. 47
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
  67. 45
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
  68. 34
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
  69. 32
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
  70. 38
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
  71. 40
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
  72. 33
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
  73. 40
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
  74. 50
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs
  75. 37
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs
  76. 52
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs
  77. 47
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs
  78. 64
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs
  79. 43
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs
  80. 60
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs
  81. 15
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs
  82. 156
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  83. 6
      src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs
  84. 27
      src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
  85. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/IJpegComponent.cs
  86. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs
  87. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JFifMarker.cs
  88. 26
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
  89. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegColorSpace.cs
  90. 1
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponent.cs
  91. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegFileMarker.cs
  92. 10
      src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs
  93. 72
      src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs
  94. 2
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffIndex.cs
  95. 21
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs
  96. 561
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
  97. 10
      src/ImageSharp/Formats/Jpeg/Components/Encoder/QuantIndex.cs
  98. 114
      src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs
  99. 8
      src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs
  100. 8
      src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs

141
.github/workflows/build-and-test.yml

@ -1,19 +1,37 @@
name: Build
on:
push:
branches:
- master
tags:
- "v*"
pull_request:
branches:
- master
push:
branches:
- master
tags:
- "v*"
pull_request:
branches:
- master
jobs:
Build:
strategy:
matrix:
options:
- os: ubuntu-latest
framework: net6.0
sdk: 6.0.x
sdk-preview: true
runtime: -x64
codecov: false
- os: macos-latest
framework: net6.0
sdk: 6.0.x
sdk-preview: true
runtime: -x64
codecov: false
- os: windows-latest
framework: net6.0
sdk: 6.0.x
sdk-preview: true
runtime: -x64
codecov: false
- os: ubuntu-latest
framework: net5.0
runtime: -x64
@ -52,37 +70,38 @@ jobs:
codecov: false
runs-on: ${{matrix.options.os}}
if: "!contains(github.event.head_commit.message, '[skip ci]')"
steps:
- uses: actions/checkout@v2
- name: Git Config
shell: bash
run: |
git config --global core.autocrlf false
git config --global core.longpaths true
- name: Git Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
submodules: recursive
# See https://github.com/actions/checkout/issues/165#issuecomment-657673315
- name: Create LFS file list
- name: Git Create LFS FileList
run: git lfs ls-files -l | cut -d' ' -f1 | sort > .lfs-assets-id
- name: Restore LFS cache
- name: Git Setup LFS Cache
uses: actions/cache@v2
id: lfs-cache
with:
path: .git/lfs
key: ${{ runner.os }}-lfs-${{ hashFiles('.lfs-assets-id') }}-v1
- name: Git LFS Pull
- name: Git Pull LFS
run: git lfs pull
- name: Install NuGet
- name: NuGet Install
uses: NuGet/setup-nuget@v1
- name: Setup Git
shell: bash
run: |
git config --global core.autocrlf false
git config --global core.longpaths true
git fetch --prune --unshallow
git submodule -q update --init --recursive
- name: Setup NuGet Cache
- name: NuGet Setup Cache
uses: actions/cache@v2
id: nuget-cache
with:
@ -90,60 +109,94 @@ jobs:
key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.csproj', '**/*.props', '**/*.targets') }}
restore-keys: ${{ runner.os }}-nuget-
- name: Build
- name: DotNet Setup Preview
if: ${{ matrix.options.sdk-preview == true }}
uses: actions/setup-dotnet@v1
with:
dotnet-version: ${{ matrix.options.sdk }}
include-prerelease: true
- name: DotNet Build
if: ${{ matrix.options.sdk-preview != true }}
shell: pwsh
run: ./ci-build.ps1 "${{matrix.options.framework}}"
env:
SIXLABORS_TESTING: True
- name: Test
- name: DotNet Build Preview
if: ${{ matrix.options.sdk-preview == true }}
shell: pwsh
run: ./ci-build.ps1 "${{matrix.options.framework}}"
env:
SIXLABORS_TESTING_PREVIEW: True
- name: DotNet Test
if: ${{ matrix.options.sdk-preview != true }}
shell: pwsh
run: ./ci-test.ps1 "${{matrix.options.os}}" "${{matrix.options.framework}}" "${{matrix.options.runtime}}" "${{matrix.options.codecov}}"
env:
SIXLABORS_TESTING: True
XUNIT_PATH: .\tests\ImageSharp.Tests # Required for xunit
SIXLABORS_TESTING: True
XUNIT_PATH: .\tests\ImageSharp.Tests # Required for xunit
- name: DotNet Test Preview
if: ${{ matrix.options.sdk-preview == true }}
shell: pwsh
run: ./ci-test.ps1 "${{matrix.options.os}}" "${{matrix.options.framework}}" "${{matrix.options.runtime}}" "${{matrix.options.codecov}}"
env:
SIXLABORS_TESTING_PREVIEW: True
XUNIT_PATH: .\tests\ImageSharp.Tests # Required for xunit
- name: Export Failed Output
uses: actions/upload-artifact@v2
if: failure()
with:
name: actual_output_${{ runner.os }}_${{ matrix.options.framework }}${{ matrix.options.runtime }}.zip
path: tests/Images/ActualOutput/
name: actual_output_${{ runner.os }}_${{ matrix.options.framework }}${{ matrix.options.runtime }}.zip
path: tests/Images/ActualOutput/
- name: Update Codecov
- name: Codecov Update
uses: codecov/codecov-action@v1
if: matrix.options.codecov == true && startsWith(github.repository, 'SixLabors')
with:
flags: unittests
flags: unittests
Publish:
needs: [Build]
runs-on: windows-latest
runs-on: ubuntu-latest
if: (github.event_name == 'push')
steps:
- uses: actions/checkout@v2
- name: Install NuGet
uses: NuGet/setup-nuget@v1
- name: Setup Git
- name: Git Config
shell: bash
run: |
git config --global core.autocrlf false
git config --global core.longpaths true
git fetch --prune --unshallow
git submodule -q update --init --recursive
- name: Pack
- name: Git Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
submodules: recursive
- name: NuGet Install
uses: NuGet/setup-nuget@v1
- name: NuGet Setup Cache
uses: actions/cache@v2
id: nuget-cache
with:
path: ~/.nuget
key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.csproj', '**/*.props', '**/*.targets') }}
restore-keys: ${{ runner.os }}-nuget-
- name: DotNet Pack
shell: pwsh
run: ./ci-pack.ps1
- name: Publish to MyGet
- name: MyGet Publish
shell: pwsh
run: |
nuget.exe push .\artifacts\*.nupkg ${{secrets.MYGET_TOKEN}} -Source https://www.myget.org/F/sixlabors/api/v2/package
nuget.exe push .\artifacts\*.snupkg ${{secrets.MYGET_TOKEN}} -Source https://www.myget.org/F/sixlabors/api/v3/index.json
dotnet nuget push .\artifacts\*.nupkg -k ${{secrets.MYGET_TOKEN}} -s https://www.myget.org/F/sixlabors/api/v2/package
dotnet nuget push .\artifacts\*.snupkg -k ${{secrets.MYGET_TOKEN}} -s https://www.myget.org/F/sixlabors/api/v3/index.json
# TODO: If github.ref starts with 'refs/tags' then it was tag push and we can optionally push out package to nuget.org

4
.gitignore

@ -223,3 +223,7 @@ artifacts/
**/Images/ReferenceOutput
**/Images/Input/MemoryStress
.DS_Store
#lfs
hooks/**
lfs/**

5
Directory.Build.props

@ -18,6 +18,11 @@
<!-- Import the shared global .props file -->
<Import Project="$(MSBuildThisFileDirectory)shared-infrastructure\msbuild\props\SixLabors.Global.props" />
<PropertyGroup Condition="$(SIXLABORS_TESTING_PREVIEW) == true">
<!-- Workaround various issues bound to implicit language features. -->
<LangVersion>preview</LangVersion>
</PropertyGroup>
<!--
Ensure all custom build configurations based upon "Release" are optimized.
This is easier than setting each project individually.

1
ImageSharp.sln

@ -403,6 +403,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "issues", "issues", "{670DD4
tests\Images\Input\Png\issues\Issue_1127.png = tests\Images\Input\Png\issues\Issue_1127.png
tests\Images\Input\Png\issues\Issue_1177_1.png = tests\Images\Input\Png\issues\Issue_1177_1.png
tests\Images\Input\Png\issues\Issue_1177_2.png = tests\Images\Input\Png\issues\Issue_1177_2.png
tests\Images\Input\Png\issues\Issue_1765_Net6DeflateStreamRead.png = tests\Images\Input\Png\issues\Issue_1765_Net6DeflateStreamRead.png
tests\Images\Input\Png\issues\Issue_410.png = tests\Images\Input\Png\issues\Issue_410.png
tests\Images\Input\Png\issues\Issue_935.png = tests\Images\Input\Png\issues\Issue_935.png
EndProjectSection

2
shared-infrastructure

@ -1 +1 @@
Subproject commit 9b94ebc4be9b7a8d7620c257e6ee485455973332
Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3

1
src/ImageSharp/Advanced/ParallelExecutionSettings.cs

@ -3,7 +3,6 @@
using System;
using System.Threading.Tasks;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Advanced

3
src/ImageSharp/Color/Color.Conversions.cs

@ -3,7 +3,6 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
@ -95,4 +94,4 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
internal Vector4 ToVector4() => this.data.ToVector4();
}
}
}

2
src/ImageSharp/Color/Color.WebSafePalette.cs

@ -163,4 +163,4 @@ namespace SixLabors.ImageSharp
YellowGreen
};
}
}
}

1
src/ImageSharp/Color/Color.cs

@ -5,7 +5,6 @@ using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp

2
src/ImageSharp/ColorSpaces/CieLab.cs

@ -136,4 +136,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.WhitePoint.Equals(other.WhitePoint);
}
}
}
}

2
src/ImageSharp/ColorSpaces/CieLch.cs

@ -162,4 +162,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
return result;
}
}
}
}

2
src/ImageSharp/ColorSpaces/CieLchuv.cs

@ -157,4 +157,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
return result;
}
}
}
}

2
src/ImageSharp/ColorSpaces/CieLuv.cs

@ -137,4 +137,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.WhitePoint.Equals(other.WhitePoint);
}
}
}
}

2
src/ImageSharp/ColorSpaces/CieXyy.cs

@ -100,4 +100,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.Yl.Equals(other.Yl);
}
}
}
}

2
src/ImageSharp/ColorSpaces/CieXyz.cs

@ -103,4 +103,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.Z.Equals(other.Z);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Cmyk.cs

@ -108,4 +108,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.K.Equals(other.K);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Companding/GammaCompanding.cs

@ -33,4 +33,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
[MethodImpl(InliningOptions.ShortMethod)]
public static float Compress(float channel, float gamma) => MathF.Pow(channel, 1 / gamma);
}
}
}

2
src/ImageSharp/ColorSpaces/Companding/Rec2020Companding.cs

@ -38,4 +38,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
public static float Compress(float channel)
=> channel < Beta ? 4.5F * channel : (Alpha * MathF.Pow(channel, 0.45F)) - AlphaMinusOne;
}
}
}

2
src/ImageSharp/ColorSpaces/Companding/Rec709Companding.cs

@ -34,4 +34,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
public static float Compress(float channel)
=> channel < 0.018F ? 4.5F * channel : (1.099F * MathF.Pow(channel, 0.45F)) - 0.099F;
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/CieConstants.cs

@ -19,4 +19,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
/// </summary>
public const float Kappa = 903.2963F;
}
}
}

4
src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.HunterLab.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
@ -429,4 +429,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return this.ToHunterLab(xyzColor);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/ColorSpaceConverter.Lms.cs

@ -424,4 +424,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return this.ToLms(xyzColor);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CIeLchToCieLabConverter.cs

@ -30,4 +30,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieLab(l, a, b, input.WhitePoint);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndCieXyyConverter.cs

@ -51,4 +51,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieXyz(x, y, z);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndHunterLabConverterBase.cs

@ -42,4 +42,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return 100F * (70F / 218.11F) * (whitePoint.Y + whitePoint.Z);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzAndLmsConverter.cs

@ -67,4 +67,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieXyz(vector);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLabConverter.cs

@ -54,4 +54,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new CieLab(l, a, b, this.LabWhitePoint);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToCieLuvConverter.cs

@ -85,4 +85,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
private static float ComputeVp(in CieXyz input)
=> (9 * input.Y) / (input.X + (15 * input.Y) + (3 * input.Z));
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToHunterLabConverter.cs

@ -64,4 +64,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new HunterLab(l, a, b, this.HunterLabWhitePoint);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieXyzToLinearRgbConverter.cs

@ -53,4 +53,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new LinearRgb(vector, this.TargetWorkingSpace);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CmykAndRgbConverter.cs

@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new Cmyk(cmy.X, cmy.Y, cmy.Z, k.X);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/YCbCrAndRgbConverter.cs

@ -54,4 +54,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
return new YCbCr(y, cb, cr);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Conversion/Implementation/IChromaticAdaptation.cs

@ -36,4 +36,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
CieXyz sourceWhitePoint,
in CieXyz destinationWhitePoint);
}
}
}

2
src/ImageSharp/ColorSpaces/Hsl.cs

@ -101,4 +101,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.L.Equals(other.L);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Hsv.cs

@ -99,4 +99,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.V.Equals(other.V);
}
}
}
}

2
src/ImageSharp/ColorSpaces/HunterLab.cs

@ -135,4 +135,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.WhitePoint.Equals(other.WhitePoint);
}
}
}
}

2
src/ImageSharp/ColorSpaces/Illuminants.cs

@ -69,4 +69,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
/// </summary>
public static readonly CieXyz F11 = new CieXyz(1.00962F, 1F, 0.64350F);
}
}
}

2
src/ImageSharp/ColorSpaces/Lms.cs

@ -104,4 +104,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
&& this.S.Equals(other.S);
}
}
}
}

2
src/ImageSharp/Common/Constants.cs

@ -18,4 +18,4 @@ namespace SixLabors.ImageSharp
/// </summary>
public static readonly float EpsilonSquared = Epsilon * Epsilon;
}
}
}

4
src/ImageSharp/Common/Helpers/InliningOptions.cs

@ -12,6 +12,10 @@ namespace SixLabors.ImageSharp
/// </summary>
internal static class InliningOptions
{
/// <summary>
/// <see cref="MethodImplOptions.AggressiveInlining"/> regardless of the build conditions.
/// </summary>
public const MethodImplOptions AlwaysInline = MethodImplOptions.AggressiveInlining;
#if PROFILING
public const MethodImplOptions HotPath = MethodImplOptions.NoInlining;
public const MethodImplOptions ShortMethod = MethodImplOptions.NoInlining;

145
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -537,7 +537,7 @@ namespace SixLabors.ImageSharp
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(InliningOptions.AlwaysInline)]
public static Vector256<float> MultiplyAdd(
in Vector256<float> va,
in Vector256<float> vm0,
@ -622,90 +622,89 @@ namespace SixLabors.ImageSharp
ReadOnlySpan<byte> source,
Span<float> dest)
{
if (Avx2.IsSupported)
fixed (byte* sourceBase = source)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
int n = dest.Length / Vector256<byte>.Count;
if (Avx2.IsSupported)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
int n = dest.Length / Vector256<byte>.Count;
ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
var scale = Vector256.Create(1 / (float)byte.MaxValue);
var scale = Vector256.Create(1 / (float)byte.MaxValue);
for (int i = 0; i < n; i++)
{
int si = Vector256<byte>.Count * i;
Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count);
Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2));
Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3));
Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
for (int i = 0; i < n; i++)
{
int si = Vector256<byte>.Count * i;
Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count);
Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2));
Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3));
Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
}
else
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
int n = dest.Length / Vector128<byte>.Count;
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
else
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
int n = dest.Length / Vector128<byte>.Count;
var scale = Vector128.Create(1 / (float)byte.MaxValue);
Vector128<byte> zero = Vector128<byte>.Zero;
ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
{
int si = Vector128<byte>.Count * i;
var scale = Vector128.Create(1 / (float)byte.MaxValue);
Vector128<byte> zero = Vector128<byte>.Zero;
Vector128<int> i0, i1, i2, i3;
if (Sse41.IsSupported)
{
i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count);
i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2));
i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3));
}
else
for (int i = 0; i < n; i++)
{
Vector128<byte> b = Sse2.LoadVector128(sourceBase + si);
Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16();
Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16();
i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
int si = Vector128<byte>.Count * i;
Vector128<int> i0, i1, i2, i3;
if (Sse41.IsSupported)
{
i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count);
i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2));
i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3));
}
else
{
Vector128<byte> b = Sse2.LoadVector128(sourceBase + si);
Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16();
Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16();
i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
}
Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
}

4
src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs

@ -5,9 +5,7 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
@ -203,4 +201,4 @@ namespace SixLabors.ImageSharp
}
}
}
}
}

170
src/ImageSharp/Compression/Zlib/Adler32.cs

@ -91,115 +91,117 @@ namespace SixLabors.ImageSharp.Compression.Zlib
int index = 0;
fixed (byte* bufferPtr = buffer)
fixed (byte* tapPtr = Tap1Tap2)
{
index += (int)blocks * BLOCK_SIZE;
var localBufferPtr = bufferPtr;
// _mm_setr_epi8 on x86
Vector128<sbyte> tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
Vector128<sbyte> tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
Vector128<byte> zero = Vector128<byte>.Zero;
var ones = Vector128.Create((short)1);
while (blocks > 0)
fixed (byte* tapPtr = Tap1Tap2)
{
uint n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */
if (n > blocks)
{
n = blocks;
}
index += (int)blocks * BLOCK_SIZE;
var localBufferPtr = bufferPtr;
blocks -= n;
// _mm_setr_epi8 on x86
Vector128<sbyte> tap1 = Sse2.LoadVector128((sbyte*)tapPtr);
Vector128<sbyte> tap2 = Sse2.LoadVector128((sbyte*)(tapPtr + 0x10));
Vector128<byte> zero = Vector128<byte>.Zero;
var ones = Vector128.Create((short)1);
// Process n blocks of data. At most NMAX data bytes can be
// processed before s2 must be reduced modulo BASE.
Vector128<uint> v_ps = Vector128.CreateScalar(s1 * n);
Vector128<uint> v_s2 = Vector128.CreateScalar(s2);
Vector128<uint> v_s1 = Vector128<uint>.Zero;
do
while (blocks > 0)
{
// Load 32 input bytes.
Vector128<byte> bytes1 = Sse3.LoadDquVector128(localBufferPtr);
Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
uint n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */
if (n > blocks)
{
n = blocks;
}
// Add previous block byte sum to v_ps.
v_ps = Sse2.Add(v_ps, v_s1);
blocks -= n;
// Horizontally add the bytes for s1, multiply-adds the
// bytes by [ 32, 31, 30, ... ] for s2.
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
Vector128<short> mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
// Process n blocks of data. At most NMAX data bytes can be
// processed before s2 must be reduced modulo BASE.
Vector128<uint> v_ps = Vector128.CreateScalar(s1 * n);
Vector128<uint> v_s2 = Vector128.CreateScalar(s2);
Vector128<uint> v_s1 = Vector128<uint>.Zero;
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
Vector128<short> mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
do
{
// Load 32 input bytes.
Vector128<byte> bytes1 = Sse3.LoadDquVector128(localBufferPtr);
Vector128<byte> bytes2 = Sse3.LoadDquVector128(localBufferPtr + 0x10);
localBufferPtr += BLOCK_SIZE;
}
while (--n > 0);
// Add previous block byte sum to v_ps.
v_ps = Sse2.Add(v_ps, v_s1);
v_s2 = Sse2.Add(v_s2, Sse2.ShiftLeftLogical(v_ps, 5));
// Horizontally add the bytes for s1, multiply-adds the
// bytes by [ 32, 31, 30, ... ] for s2.
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes1, zero).AsUInt32());
Vector128<short> mad1 = Ssse3.MultiplyAddAdjacent(bytes1, tap1);
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad1, ones).AsUInt32());
// Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
const byte S2301 = 0b1011_0001; // A B C D -> B A D C
const byte S1032 = 0b0100_1110; // A B C D -> C D A B
v_s1 = Sse2.Add(v_s1, Sse2.SumAbsoluteDifferences(bytes2, zero).AsUInt32());
Vector128<short> mad2 = Ssse3.MultiplyAddAdjacent(bytes2, tap2);
v_s2 = Sse2.Add(v_s2, Sse2.MultiplyAddAdjacent(mad2, ones).AsUInt32());
v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
localBufferPtr += BLOCK_SIZE;
}
while (--n > 0);
s1 += v_s1.ToScalar();
v_s2 = Sse2.Add(v_s2, Sse2.ShiftLeftLogical(v_ps, 5));
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
// Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
const byte S2301 = 0b1011_0001; // A B C D -> B A D C
const byte S1032 = 0b0100_1110; // A B C D -> C D A B
s2 = v_s2.ToScalar();
v_s1 = Sse2.Add(v_s1, Sse2.Shuffle(v_s1, S1032));
// Reduce.
s1 %= BASE;
s2 %= BASE;
}
s1 += v_s1.ToScalar();
if (length > 0)
{
if (length >= 16)
{
s2 += s1 += localBufferPtr[0];
s2 += s1 += localBufferPtr[1];
s2 += s1 += localBufferPtr[2];
s2 += s1 += localBufferPtr[3];
s2 += s1 += localBufferPtr[4];
s2 += s1 += localBufferPtr[5];
s2 += s1 += localBufferPtr[6];
s2 += s1 += localBufferPtr[7];
s2 += s1 += localBufferPtr[8];
s2 += s1 += localBufferPtr[9];
s2 += s1 += localBufferPtr[10];
s2 += s1 += localBufferPtr[11];
s2 += s1 += localBufferPtr[12];
s2 += s1 += localBufferPtr[13];
s2 += s1 += localBufferPtr[14];
s2 += s1 += localBufferPtr[15];
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S2301));
v_s2 = Sse2.Add(v_s2, Sse2.Shuffle(v_s2, S1032));
localBufferPtr += 16;
length -= 16;
}
s2 = v_s2.ToScalar();
while (length-- > 0)
{
s2 += s1 += *localBufferPtr++;
// Reduce.
s1 %= BASE;
s2 %= BASE;
}
if (s1 >= BASE)
if (length > 0)
{
s1 -= BASE;
if (length >= 16)
{
s2 += s1 += localBufferPtr[0];
s2 += s1 += localBufferPtr[1];
s2 += s1 += localBufferPtr[2];
s2 += s1 += localBufferPtr[3];
s2 += s1 += localBufferPtr[4];
s2 += s1 += localBufferPtr[5];
s2 += s1 += localBufferPtr[6];
s2 += s1 += localBufferPtr[7];
s2 += s1 += localBufferPtr[8];
s2 += s1 += localBufferPtr[9];
s2 += s1 += localBufferPtr[10];
s2 += s1 += localBufferPtr[11];
s2 += s1 += localBufferPtr[12];
s2 += s1 += localBufferPtr[13];
s2 += s1 += localBufferPtr[14];
s2 += s1 += localBufferPtr[15];
localBufferPtr += 16;
length -= 16;
}
while (length-- > 0)
{
s2 += s1 += *localBufferPtr++;
}
if (s1 >= BASE)
{
s1 -= BASE;
}
s2 %= BASE;
}
s2 %= BASE;
return s1 | (s2 << 16);
}
return s1 | (s2 << 16);
}
}
#endif

180
src/ImageSharp/Compression/Zlib/Crc32.cs

@ -83,117 +83,119 @@ namespace SixLabors.ImageSharp.Compression.Zlib
int length = chunksize;
fixed (byte* bufferPtr = buffer)
fixed (ulong* k05PolyPtr = K05Poly)
{
byte* localBufferPtr = bufferPtr;
ulong* localK05PolyPtr = k05PolyPtr;
// There's at least one block of 64.
Vector128<ulong> x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
Vector128<ulong> x2 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
Vector128<ulong> x3 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
Vector128<ulong> x4 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
Vector128<ulong> x5;
x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64());
// k1, k2
Vector128<ulong> x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0);
localBufferPtr += 64;
length -= 64;
// Parallel fold blocks of 64, if any.
while (length >= 64)
fixed (ulong* k05PolyPtr = K05Poly)
{
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
Vector128<ulong> x6 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
Vector128<ulong> x7 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x00);
Vector128<ulong> x8 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x11);
x3 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x11);
x4 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x11);
byte* localBufferPtr = bufferPtr;
ulong* localK05PolyPtr = k05PolyPtr;
Vector128<ulong> y5 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
Vector128<ulong> y6 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
Vector128<ulong> y7 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
Vector128<ulong> y8 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
// There's at least one block of 64.
Vector128<ulong> x1 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
Vector128<ulong> x2 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
Vector128<ulong> x3 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
Vector128<ulong> x4 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
Vector128<ulong> x5;
x1 = Sse2.Xor(x1, x5);
x2 = Sse2.Xor(x2, x6);
x3 = Sse2.Xor(x3, x7);
x4 = Sse2.Xor(x4, x8);
x1 = Sse2.Xor(x1, Sse2.ConvertScalarToVector128UInt32(crc).AsUInt64());
x1 = Sse2.Xor(x1, y5);
x2 = Sse2.Xor(x2, y6);
x3 = Sse2.Xor(x3, y7);
x4 = Sse2.Xor(x4, y8);
// k1, k2
Vector128<ulong> x0 = Sse2.LoadVector128(localK05PolyPtr + 0x0);
localBufferPtr += 64;
length -= 64;
}
// Fold into 128-bits.
// k3, k4
x0 = Sse2.LoadVector128(k05PolyPtr + 0x2);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x1 = Sse2.Xor(x1, x2);
x1 = Sse2.Xor(x1, x5);
// Parallel fold blocks of 64, if any.
while (length >= 64)
{
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
Vector128<ulong> x6 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
Vector128<ulong> x7 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x00);
Vector128<ulong> x8 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x11);
x3 = Pclmulqdq.CarrylessMultiply(x3, x0, 0x11);
x4 = Pclmulqdq.CarrylessMultiply(x4, x0, 0x11);
Vector128<ulong> y5 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x00));
Vector128<ulong> y6 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x10));
Vector128<ulong> y7 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x20));
Vector128<ulong> y8 = Sse2.LoadVector128((ulong*)(localBufferPtr + 0x30));
x1 = Sse2.Xor(x1, x5);
x2 = Sse2.Xor(x2, x6);
x3 = Sse2.Xor(x3, x7);
x4 = Sse2.Xor(x4, x8);
x1 = Sse2.Xor(x1, y5);
x2 = Sse2.Xor(x2, y6);
x3 = Sse2.Xor(x3, y7);
x4 = Sse2.Xor(x4, y8);
localBufferPtr += 64;
length -= 64;
}
// Fold into 128-bits.
// k3, k4
x0 = Sse2.LoadVector128(k05PolyPtr + 0x2);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x1 = Sse2.Xor(x1, x3);
x1 = Sse2.Xor(x1, x5);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x1 = Sse2.Xor(x1, x4);
x1 = Sse2.Xor(x1, x5);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x1 = Sse2.Xor(x1, x2);
x1 = Sse2.Xor(x1, x5);
// Single fold blocks of 16, if any.
while (length >= 16)
{
x2 = Sse2.LoadVector128((ulong*)localBufferPtr);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x1 = Sse2.Xor(x1, x3);
x1 = Sse2.Xor(x1, x5);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x1 = Sse2.Xor(x1, x2);
x1 = Sse2.Xor(x1, x4);
x1 = Sse2.Xor(x1, x5);
localBufferPtr += 16;
length -= 16;
}
// Single fold blocks of 16, if any.
while (length >= 16)
{
x2 = Sse2.LoadVector128((ulong*)localBufferPtr);
x5 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x11);
x1 = Sse2.Xor(x1, x2);
x1 = Sse2.Xor(x1, x5);
// Fold 128 - bits to 64 - bits.
x2 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x10);
x3 = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); // _mm_setr_epi32 on x86
x1 = Sse2.ShiftRightLogical128BitLane(x1, 8);
x1 = Sse2.Xor(x1, x2);
localBufferPtr += 16;
length -= 16;
}
// k5, k0
x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4);
// Fold 128 - bits to 64 - bits.
x2 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x10);
x3 = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); // _mm_setr_epi32 on x86
x1 = Sse2.ShiftRightLogical128BitLane(x1, 8);
x1 = Sse2.Xor(x1, x2);
x2 = Sse2.ShiftRightLogical128BitLane(x1, 4);
x1 = Sse2.And(x1, x3);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Sse2.Xor(x1, x2);
// k5, k0
x0 = Sse2.LoadScalarVector128(localK05PolyPtr + 0x4);
// Barret reduce to 32-bits.
// polynomial
x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6);
x2 = Sse2.ShiftRightLogical128BitLane(x1, 4);
x1 = Sse2.And(x1, x3);
x1 = Pclmulqdq.CarrylessMultiply(x1, x0, 0x00);
x1 = Sse2.Xor(x1, x2);
x2 = Sse2.And(x1, x3);
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10);
x2 = Sse2.And(x2, x3);
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
x1 = Sse2.Xor(x1, x2);
// Barret reduce to 32-bits.
// polynomial
x0 = Sse2.LoadVector128(localK05PolyPtr + 0x6);
crc = (uint)Sse41.Extract(x1.AsInt32(), 1);
return buffer.Length - chunksize == 0 ? crc : CalculateScalar(crc, buffer.Slice(chunksize));
x2 = Sse2.And(x1, x3);
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x10);
x2 = Sse2.And(x2, x3);
x2 = Pclmulqdq.CarrylessMultiply(x2, x0, 0x00);
x1 = Sse2.Xor(x1, x2);
crc = (uint)Sse41.Extract(x1.AsInt32(), 1);
return buffer.Length - chunksize == 0 ? crc : CalculateScalar(crc, buffer.Slice(chunksize));
}
}
}
#endif

2
src/ImageSharp/Formats/Bmp/BmpConfigurationModule.cs

@ -16,4 +16,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
configuration.ImageFormatsManager.AddImageFormatDetector(new BmpImageFormatDetector());
}
}
}
}

2
src/ImageSharp/Formats/Bmp/BmpConstants.cs

@ -56,4 +56,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
public const int Pointer = 0x5450;
}
}
}
}

2
src/ImageSharp/Formats/Bmp/BmpFormat.cs

@ -34,4 +34,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
/// <inheritdoc/>
public BmpMetadata CreateDefaultFormatMetadata() => new BmpMetadata();
}
}
}

2
src/ImageSharp/Formats/Bmp/BmpMetadata.cs

@ -40,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
// TODO: Colors used once we support encoding palette bmps.
}
}
}

2
src/ImageSharp/Formats/Bmp/IBmpDecoderOptions.cs

@ -13,4 +13,4 @@ namespace SixLabors.ImageSharp.Formats.Bmp
/// </summary>
RleSkippedPixelHandling RleSkippedPixelHandling { get; }
}
}
}

2
src/ImageSharp/Formats/Gif/GifConfigurationModule.cs

@ -16,4 +16,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
configuration.ImageFormatsManager.AddImageFormatDetector(new GifImageFormatDetector());
}
}
}
}

1
src/ImageSharp/Formats/Gif/GifDecoderCore.cs

@ -8,7 +8,6 @@ using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using SixLabors.ImageSharp.IO;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Metadata;

2
src/ImageSharp/Formats/Gif/GifDisposalMethod.cs

@ -35,4 +35,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
/// </summary>
RestoreToPrevious = 3
}
}
}

2
src/ImageSharp/Formats/Gif/GifFormat.cs

@ -37,4 +37,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
/// <inheritdoc/>
public GifFrameMetadata CreateDefaultFormatFrameMetadata() => new GifFrameMetadata();
}
}
}

2
src/ImageSharp/Formats/Gif/GifImageFormatDetector.cs

@ -30,4 +30,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
header[5] == 0x61; // a
}
}
}
}

1
src/ImageSharp/Formats/Gif/LzwEncoder.cs

@ -6,7 +6,6 @@ using System.Buffers;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Gif

2
src/ImageSharp/Formats/Gif/Sections/GifGraphicControlExtension.cs

@ -103,4 +103,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
return value;
}
}
}
}

2
src/ImageSharp/Formats/Gif/Sections/GifImageDescriptor.cs

@ -113,4 +113,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
return value;
}
}
}
}

2
src/ImageSharp/Formats/Gif/Sections/GifLogicalScreenDescriptor.cs

@ -130,4 +130,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
return value;
}
}
}
}

2
src/ImageSharp/Formats/Gif/Sections/IGifExtension.cs

@ -22,4 +22,4 @@ namespace SixLabors.ImageSharp.Formats.Gif
/// <returns>The number of bytes written to the buffer.</returns>
int WriteTo(Span<byte> buffer);
}
}
}

2
src/ImageSharp/Formats/IImageFormat.cs

@ -60,4 +60,4 @@ namespace SixLabors.ImageSharp.Formats
/// <returns>The <typeparamref name="TFormatFrameMetadata"/>.</returns>
TFormatFrameMetadata CreateDefaultFormatFrameMetadata();
}
}
}

193
src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs

@ -2,17 +2,22 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// Represents a Jpeg block with <see cref="short"/> coefficients.
/// 8x8 matrix of <see cref="short"/> coefficients.
/// </summary>
// ReSharper disable once InconsistentNaming
[StructLayout(LayoutKind.Explicit)]
internal unsafe struct Block8x8 : IEquatable<Block8x8>
{
/// <summary>
@ -20,24 +25,44 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary>
public const int Size = 64;
#pragma warning disable IDE0051 // Remove unused private member
/// <summary>
/// A fixed size buffer holding the values.
/// See: <see>
/// <cref>https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/unsafe-code-pointers/fixed-size-buffers</cref>
/// </see>
/// A placeholder buffer so the actual struct occupies exactly 64 * 2 bytes.
/// </summary>
/// <remarks>
/// This is not used directly in the code.
/// </remarks>
[FieldOffset(0)]
private fixed short data[Size];
/// <summary>
/// Initializes a new instance of the <see cref="Block8x8"/> struct.
/// </summary>
/// <param name="coefficients">A <see cref="Span{T}"/> of coefficients</param>
public Block8x8(Span<short> coefficients)
{
ref byte selfRef = ref Unsafe.As<Block8x8, byte>(ref this);
ref byte sourceRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(coefficients));
Unsafe.CopyBlock(ref selfRef, ref sourceRef, Size * sizeof(short));
}
#pragma warning restore IDE0051
#if SUPPORTS_RUNTIME_INTRINSICS
[FieldOffset(0)]
public Vector128<short> V0;
[FieldOffset(16)]
public Vector128<short> V1;
[FieldOffset(32)]
public Vector128<short> V2;
[FieldOffset(48)]
public Vector128<short> V3;
[FieldOffset(64)]
public Vector128<short> V4;
[FieldOffset(80)]
public Vector128<short> V5;
[FieldOffset(96)]
public Vector128<short> V6;
[FieldOffset(112)]
public Vector128<short> V7;
[FieldOffset(0)]
public Vector256<short> V01;
[FieldOffset(32)]
public Vector256<short> V23;
[FieldOffset(64)]
public Vector256<short> V45;
[FieldOffset(96)]
public Vector256<short> V67;
#endif
/// <summary>
/// Gets or sets a <see cref="short"/> value at the given index
@ -49,7 +74,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref short selfRef = ref Unsafe.As<Block8x8, short>(ref this);
return Unsafe.Add(ref selfRef, idx);
}
@ -57,7 +83,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref short selfRef = ref Unsafe.As<Block8x8, short>(ref this);
Unsafe.Add(ref selfRef, idx) = value;
}
@ -75,15 +102,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
set => this[(y * 8) + x] = value;
}
public static bool operator ==(Block8x8 left, Block8x8 right)
{
return left.Equals(right);
}
public static bool operator ==(Block8x8 left, Block8x8 right) => left.Equals(right);
public static bool operator !=(Block8x8 left, Block8x8 right)
{
return !left.Equals(right);
}
public static bool operator !=(Block8x8 left, Block8x8 right) => !left.Equals(right);
/// <summary>
/// Multiply all elements by a given <see cref="int"/>
@ -149,34 +170,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return result;
}
/// <summary>
/// Pointer-based "Indexer" (getter part)
/// </summary>
/// <param name="blockPtr">Block pointer</param>
/// <param name="idx">Index</param>
/// <returns>The scaleVec value at the specified index</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short GetScalarAt(Block8x8* blockPtr, int idx)
{
GuardBlockIndex(idx);
short* fp = blockPtr->data;
return fp[idx];
}
/// <summary>
/// Pointer-based "Indexer" (setter part)
/// </summary>
/// <param name="blockPtr">Block pointer</param>
/// <param name="idx">Index</param>
/// <param name="value">Value</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void SetScalarAt(Block8x8* blockPtr, int idx, short value)
public static Block8x8 Load(Span<short> data)
{
GuardBlockIndex(idx);
short* fp = blockPtr->data;
fp[idx] = value;
Unsafe.SkipInit(out Block8x8 result);
result.LoadFrom(data);
return result;
}
/// <summary>
@ -194,7 +192,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary>
public short[] ToArray()
{
var result = new short[Size];
short[] result = new short[Size];
this.CopyTo(result);
return result;
}
@ -206,7 +204,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
ref byte selfRef = ref Unsafe.As<Block8x8, byte>(ref this);
ref byte destRef = ref MemoryMarshal.GetReference(MemoryMarshal.Cast<short, byte>(destination));
Unsafe.CopyBlock(ref destRef, ref selfRef, Size * sizeof(short));
Unsafe.CopyBlockUnaligned(ref destRef, ref selfRef, Size * sizeof(short));
}
/// <summary>
@ -220,6 +218,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
}
/// <summary>
/// Load raw 16bit integers from source.
/// </summary>
/// <param name="source">Source</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(Span<short> source)
{
ref byte sourceRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(source));
ref byte destRef = ref Unsafe.As<Block8x8, byte>(ref this);
Unsafe.CopyBlockUnaligned(ref destRef, ref sourceRef, Size * sizeof(short));
}
/// <summary>
/// Cast and copy <see cref="Size"/> <see cref="int"/>-s from the beginning of 'source' span.
/// </summary>
@ -231,13 +242,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
}
[Conditional("DEBUG")]
private static void GuardBlockIndex(int idx)
{
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
/// <inheritdoc />
public override string ToString()
{
@ -271,15 +275,66 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <inheritdoc />
public override bool Equals(object obj)
{
return obj is Block8x8 other && this.Equals(other);
}
public override bool Equals(object obj) => obj is Block8x8 other && this.Equals(other);
/// <inheritdoc />
public override int GetHashCode()
public override int GetHashCode() => (this[0] * 31) + this[1];
/// <summary>
/// Returns index of the last non-zero element in given matrix.
/// </summary>
/// <returns>
/// Index of the last non-zero element. Returns -1 if all elements are equal to zero.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
public nint GetLastNonZeroIndex()
{
return (this[0] * 31) + this[1];
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
Vector256<short> zero16 = Vector256<short>.Zero;
ref Vector256<short> mcuStride = ref Unsafe.As<Block8x8, Vector256<short>>(ref this);
for (nint i = 3; i >= 0; i--)
{
int areEqual = Avx2.MoveMask(Avx2.CompareEqual(Unsafe.Add(ref mcuStride, i), zero16).AsByte());
if (areEqual != equalityMask)
{
// Each 2 bits represents comparison operation for each 2-byte element in input vectors
// LSB represents first element in the stride
// MSB represents last element in the stride
// lzcnt operation would calculate number of zero numbers at the end
// Given mask is not actually suitable for lzcnt as 1's represent zero elements and 0's represent non-zero elements
// So we need to invert it
int lzcnt = BitOperations.LeadingZeroCount(~(uint)areEqual);
// As input number is represented by 2 bits in the mask, we need to divide lzcnt result by 2
// to get the exact number of zero elements in the stride
int strideRelativeIndex = 15 - (lzcnt / 2);
return (i * 16) + strideRelativeIndex;
}
}
return -1;
}
else
#endif
{
nint index = Size - 1;
ref short elemRef = ref Unsafe.As<Block8x8, short>(ref this);
while (index >= 0 && Unsafe.Add(ref elemRef, index) == 0)
{
index--;
}
return index;
}
}
/// <summary>

149
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs

@ -0,0 +1,149 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
/// </summary>
public const int RowCount = 8;
[FieldOffset(0)]
public Vector256<float> V0;
[FieldOffset(32)]
public Vector256<float> V1;
[FieldOffset(64)]
public Vector256<float> V2;
[FieldOffset(96)]
public Vector256<float> V3;
[FieldOffset(128)]
public Vector256<float> V4;
[FieldOffset(160)]
public Vector256<float> V5;
[FieldOffset(192)]
public Vector256<float> V6;
[FieldOffset(224)]
public Vector256<float> V7;
private static readonly Vector256<int> MultiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);
private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
ref Vector256<float> aBase = ref a.V0;
ref Vector256<float> bBase = ref b.V0;
ref Vector256<short> destRef = ref dest.V01;
for (nint i = 0; i < 8; i += 2)
{
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
row = Avx2.PermuteVar8x32(row.AsInt32(), MultiplyIntoInt16ShuffleMask).AsInt16();
Unsafe.Add(ref destRef, (IntPtr)((uint)i / 2)) = row;
}
}
private static void MultiplyIntoInt16_Sse2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Sse2.IsSupported, "Sse2 support is required to run this operation!");
ref Vector128<float> aBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref a);
ref Vector128<float> bBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref b);
ref Vector128<short> destBase = ref Unsafe.As<Block8x8, Vector128<short>>(ref dest);
for (int i = 0; i < 16; i += 2)
{
Vector128<int> left = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector128<int> right = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
Vector128<short> row = Sse2.PackSignedSaturate(left, right);
Unsafe.Add(ref destBase, (IntPtr)((uint)i / 2)) = row;
}
}
private void TransposeInplace_Avx()
{
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
Vector256<float> r0 = Avx.InsertVector128(
this.V0,
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
1);
Vector256<float> r1 = Avx.InsertVector128(
this.V1,
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
1);
Vector256<float> r2 = Avx.InsertVector128(
this.V2,
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
1);
Vector256<float> r3 = Avx.InsertVector128(
this.V3,
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
1);
Vector256<float> r4 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
1);
Vector256<float> r5 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
1);
Vector256<float> r6 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
1);
Vector256<float> r7 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
1);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
this.V0 = Avx.Blend(t0, v, 0xCC);
this.V1 = Avx.Blend(t2, v, 0x33);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
this.V4 = Avx.Blend(t4, v, 0xCC);
this.V5 = Avx.Blend(t6, v, 0x33);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
this.V2 = Avx.Blend(t1, v, 0xCC);
this.V3 = Avx.Blend(t3, v, 0x33);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
this.V6 = Avx.Blend(t5, v, 0xCC);
this.V7 = Avx.Blend(t7, v, 0x33);
}
}
}
#endif

2
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;

439
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -16,7 +16,7 @@ using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// Represents a Jpeg block with <see cref="float"/> coefficients.
/// 8x8 matrix of <see cref="float"/> coefficients.
/// </summary>
[StructLayout(LayoutKind.Explicit)]
internal partial struct Block8x8F : IEquatable<Block8x8F>
@ -66,30 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public Vector4 V7L;
[FieldOffset(240)]
public Vector4 V7R;
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
/// </summary>
public const int RowCount = 8;
[FieldOffset(0)]
public Vector256<float> V0;
[FieldOffset(32)]
public Vector256<float> V1;
[FieldOffset(64)]
public Vector256<float> V2;
[FieldOffset(96)]
public Vector256<float> V3;
[FieldOffset(128)]
public Vector256<float> V4;
[FieldOffset(160)]
public Vector256<float> V5;
[FieldOffset(192)]
public Vector256<float> V6;
[FieldOffset(224)]
public Vector256<float> V7;
#endif
#pragma warning restore SA1600 // ElementsMustBeDocumented
/// <summary>
@ -102,17 +78,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref float selfRef = ref Unsafe.As<Block8x8F, float>(ref this);
return Unsafe.Add(ref selfRef, idx);
return Unsafe.Add(ref selfRef, (nint)(uint)idx);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref float selfRef = ref Unsafe.As<Block8x8F, float>(ref this);
Unsafe.Add(ref selfRef, idx) = value;
Unsafe.Add(ref selfRef, (nint)(uint)idx) = value;
}
}
@ -188,13 +164,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return result;
}
/// <summary>
/// Fill the block with defaults (zeroes).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void Clear()
=> this = default; // The cheapest way to do this in C#:
/// <summary>
/// Load raw 32bit floating point data from source.
/// </summary>
@ -302,7 +271,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public float[] ToArray()
{
var result = new float[Size];
float[] result = new float[Size];
this.ScaledCopyTo(result);
return result;
}
@ -434,102 +403,37 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <summary>
/// Quantize the block.
/// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
/// </summary>
/// <param name="blockPtr">The block pointer.</param>
/// <param name="qtPtr">The qt pointer.</param>
/// <param name="unzigPtr">Unzig pointer</param>
public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
{
float* b = (float*)blockPtr;
float* qtp = (float*)qtPtr;
for (int qtIndex = 0; qtIndex < Size; qtIndex++)
{
byte blockIndex = unzigPtr[qtIndex];
float* unzigPos = b + blockIndex;
float val = *unzigPos;
val *= qtp[qtIndex];
*unzigPos = val;
}
}
/// <summary>
/// Quantize 'block' into 'dest' using the 'qt' quantization table:
/// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values.
/// To finish the rounding it's enough to (int)-cast these values.
/// </summary>
/// <param name="block">Source block</param>
/// <param name="dest">Destination block</param>
/// <param name="qt">The quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
public static unsafe void Quantize(
ref Block8x8F block,
ref Block8x8F dest,
ref Block8x8F qt,
ref ZigZag unZig)
/// <param name="block">Source block.</param>
/// <param name="dest">Destination block.</param>
/// <param name="qt">The quantization table.</param>
public static void Quantize(ref Block8x8F block, ref Block8x8 dest, ref Block8x8F qt)
{
for (int zig = 0; zig < Size; zig++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
dest[zig] = block[unZig[zig]];
MultiplyIntoInt16_Avx2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingAvx2(ref dest);
}
DivideRoundAll(ref dest, ref qt);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
else if (Ssse3.IsSupported)
{
var vnegOne = Vector256.Create(-1f);
var vadd = Vector256.Create(.5F);
var vone = Vector256.Create(1f);
for (int i = 0; i < RowCount; i++)
{
ref Vector256<float> aRow = ref Unsafe.Add(ref a.V0, i);
ref Vector256<float> bRow = ref Unsafe.Add(ref b.V0, i);
Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aRow), vone), vadd);
aRow = Avx.Add(Avx.Divide(aRow, bRow), voff);
}
MultiplyIntoInt16_Sse2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingSsse3(ref dest);
}
else
#endif
{
a.V0L = DivideRound(a.V0L, b.V0L);
a.V0R = DivideRound(a.V0R, b.V0R);
a.V1L = DivideRound(a.V1L, b.V1L);
a.V1R = DivideRound(a.V1R, b.V1R);
a.V2L = DivideRound(a.V2L, b.V2L);
a.V2R = DivideRound(a.V2R, b.V2R);
a.V3L = DivideRound(a.V3L, b.V3L);
a.V3R = DivideRound(a.V3R, b.V3R);
a.V4L = DivideRound(a.V4L, b.V4L);
a.V4R = DivideRound(a.V4R, b.V4R);
a.V5L = DivideRound(a.V5L, b.V5L);
a.V5R = DivideRound(a.V5R, b.V5R);
a.V6L = DivideRound(a.V6L, b.V6L);
a.V6R = DivideRound(a.V6R, b.V6R);
a.V7L = DivideRound(a.V7L, b.V7L);
a.V7R = DivideRound(a.V7R, b.V7R);
for (int i = 0; i < Size; i++)
{
int idx = ZigZag.ZigZagOrder[i];
float quantizedVal = block[idx] * qt[idx];
quantizedVal += quantizedVal < 0 ? -0.5f : 0.5f;
dest[i] = (short)quantizedVal;
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
{
var neg = new Vector4(-1);
var add = new Vector4(.5F);
// sign(dividend) = max(min(dividend, 1), -1)
Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One);
// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
return (dividend / divisor) + (sign * add);
}
public void RoundInto(ref Block8x8 dest)
{
for (int i = 0; i < Size; i++)
@ -627,6 +531,47 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Unsafe.Add(ref dRef, 7) = bottom;
}
/// <summary>
/// Compares entire 8x8 block to a single scalar value.
/// </summary>
/// <param name="value">Value to compare to.</param>
public bool EqualsToScalar(int value)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
var targetVector = Vector256.Create(value);
ref Vector256<float> blockStride = ref this.V0;
for (int i = 0; i < RowCount; i++)
{
Vector256<int> areEqual = Avx2.CompareEqual(Avx.ConvertToVector256Int32WithTruncation(Unsafe.Add(ref this.V0, i)), targetVector);
if (Avx2.MoveMask(areEqual.AsByte()) != equalityMask)
{
return false;
}
}
return true;
}
#endif
{
ref float scalars = ref Unsafe.As<Block8x8F, float>(ref this);
for (int i = 0; i < Size; i++)
{
if ((int)Unsafe.Add(ref scalars, i) != value)
{
return false;
}
}
return true;
}
}
/// <inheritdoc />
public bool Equals(Block8x8F other)
=> this.V0L == other.V0L
@ -663,213 +608,89 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return sb.ToString();
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float> off, Vector<float> max)
{
row += off;
row = Vector.Max(row, Vector<float>.Zero);
row = Vector.Min(row, max);
return row.FastRound();
}
[Conditional("DEBUG")]
private static void GuardBlockIndex(int idx)
{
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
/// <summary>
/// Transpose the block into the destination block.
/// Transpose the block inplace.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeInplace()
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
Vector256<float> r0 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
1);
Vector256<float> r1 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
1);
Vector256<float> r2 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
1);
Vector256<float> r3 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
1);
Vector256<float> r4 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
1);
Vector256<float> r5 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
1);
Vector256<float> r6 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
1);
Vector256<float> r7 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
1);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
d.V0 = Avx.Blend(t0, v, 0xCC);
d.V1 = Avx.Blend(t2, v, 0x33);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
d.V4 = Avx.Blend(t4, v, 0xCC);
d.V5 = Avx.Blend(t6, v, 0x33);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
d.V2 = Avx.Blend(t1, v, 0xCC);
d.V3 = Avx.Blend(t3, v, 0x33);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
d.V6 = Avx.Blend(t5, v, 0xCC);
d.V7 = Avx.Blend(t7, v, 0x33);
this.TransposeInplace_Avx();
}
else
#endif
{
d.V0L.X = this.V0L.X;
d.V1L.X = this.V0L.Y;
d.V2L.X = this.V0L.Z;
d.V3L.X = this.V0L.W;
d.V4L.X = this.V0R.X;
d.V5L.X = this.V0R.Y;
d.V6L.X = this.V0R.Z;
d.V7L.X = this.V0R.W;
d.V0L.Y = this.V1L.X;
d.V1L.Y = this.V1L.Y;
d.V2L.Y = this.V1L.Z;
d.V3L.Y = this.V1L.W;
d.V4L.Y = this.V1R.X;
d.V5L.Y = this.V1R.Y;
d.V6L.Y = this.V1R.Z;
d.V7L.Y = this.V1R.W;
d.V0L.Z = this.V2L.X;
d.V1L.Z = this.V2L.Y;
d.V2L.Z = this.V2L.Z;
d.V3L.Z = this.V2L.W;
d.V4L.Z = this.V2R.X;
d.V5L.Z = this.V2R.Y;
d.V6L.Z = this.V2R.Z;
d.V7L.Z = this.V2R.W;
d.V0L.W = this.V3L.X;
d.V1L.W = this.V3L.Y;
d.V2L.W = this.V3L.Z;
d.V3L.W = this.V3L.W;
d.V4L.W = this.V3R.X;
d.V5L.W = this.V3R.Y;
d.V6L.W = this.V3R.Z;
d.V7L.W = this.V3R.W;
d.V0R.X = this.V4L.X;
d.V1R.X = this.V4L.Y;
d.V2R.X = this.V4L.Z;
d.V3R.X = this.V4L.W;
d.V4R.X = this.V4R.X;
d.V5R.X = this.V4R.Y;
d.V6R.X = this.V4R.Z;
d.V7R.X = this.V4R.W;
d.V0R.Y = this.V5L.X;
d.V1R.Y = this.V5L.Y;
d.V2R.Y = this.V5L.Z;
d.V3R.Y = this.V5L.W;
d.V4R.Y = this.V5R.X;
d.V5R.Y = this.V5R.Y;
d.V6R.Y = this.V5R.Z;
d.V7R.Y = this.V5R.W;
d.V0R.Z = this.V6L.X;
d.V1R.Z = this.V6L.Y;
d.V2R.Z = this.V6L.Z;
d.V3R.Z = this.V6L.W;
d.V4R.Z = this.V6R.X;
d.V5R.Z = this.V6R.Y;
d.V6R.Z = this.V6R.Z;
d.V7R.Z = this.V6R.W;
d.V0R.W = this.V7L.X;
d.V1R.W = this.V7L.Y;
d.V2R.W = this.V7L.Z;
d.V3R.W = this.V7L.W;
d.V4R.W = this.V7R.X;
d.V5R.W = this.V7R.Y;
d.V6R.W = this.V7R.Z;
d.V7R.W = this.V7R.W;
this.TransposeInplace_Scalar();
}
}
/// <summary>
/// Compares entire 8x8 block to a single scalar value.
/// Scalar inplace transpose implementation for <see cref="TransposeInplace"/>
/// </summary>
/// <param name="value">Value to compare to.</param>
public bool EqualsToScalar(int value)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
[MethodImpl(InliningOptions.ShortMethod)]
private void TransposeInplace_Scalar()
{
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref this);
// row #0
Swap(ref Unsafe.Add(ref elemRef, 1), ref Unsafe.Add(ref elemRef, 8));
Swap(ref Unsafe.Add(ref elemRef, 2), ref Unsafe.Add(ref elemRef, 16));
Swap(ref Unsafe.Add(ref elemRef, 3), ref Unsafe.Add(ref elemRef, 24));
Swap(ref Unsafe.Add(ref elemRef, 4), ref Unsafe.Add(ref elemRef, 32));
Swap(ref Unsafe.Add(ref elemRef, 5), ref Unsafe.Add(ref elemRef, 40));
Swap(ref Unsafe.Add(ref elemRef, 6), ref Unsafe.Add(ref elemRef, 48));
Swap(ref Unsafe.Add(ref elemRef, 7), ref Unsafe.Add(ref elemRef, 56));
// row #1
Swap(ref Unsafe.Add(ref elemRef, 10), ref Unsafe.Add(ref elemRef, 17));
Swap(ref Unsafe.Add(ref elemRef, 11), ref Unsafe.Add(ref elemRef, 25));
Swap(ref Unsafe.Add(ref elemRef, 12), ref Unsafe.Add(ref elemRef, 33));
Swap(ref Unsafe.Add(ref elemRef, 13), ref Unsafe.Add(ref elemRef, 41));
Swap(ref Unsafe.Add(ref elemRef, 14), ref Unsafe.Add(ref elemRef, 49));
Swap(ref Unsafe.Add(ref elemRef, 15), ref Unsafe.Add(ref elemRef, 57));
// row #2
Swap(ref Unsafe.Add(ref elemRef, 19), ref Unsafe.Add(ref elemRef, 26));
Swap(ref Unsafe.Add(ref elemRef, 20), ref Unsafe.Add(ref elemRef, 34));
Swap(ref Unsafe.Add(ref elemRef, 21), ref Unsafe.Add(ref elemRef, 42));
Swap(ref Unsafe.Add(ref elemRef, 22), ref Unsafe.Add(ref elemRef, 50));
Swap(ref Unsafe.Add(ref elemRef, 23), ref Unsafe.Add(ref elemRef, 58));
// row #3
Swap(ref Unsafe.Add(ref elemRef, 28), ref Unsafe.Add(ref elemRef, 35));
Swap(ref Unsafe.Add(ref elemRef, 29), ref Unsafe.Add(ref elemRef, 43));
Swap(ref Unsafe.Add(ref elemRef, 30), ref Unsafe.Add(ref elemRef, 51));
Swap(ref Unsafe.Add(ref elemRef, 31), ref Unsafe.Add(ref elemRef, 59));
// row #4
Swap(ref Unsafe.Add(ref elemRef, 37), ref Unsafe.Add(ref elemRef, 44));
Swap(ref Unsafe.Add(ref elemRef, 38), ref Unsafe.Add(ref elemRef, 52));
Swap(ref Unsafe.Add(ref elemRef, 39), ref Unsafe.Add(ref elemRef, 60));
// row #5
Swap(ref Unsafe.Add(ref elemRef, 46), ref Unsafe.Add(ref elemRef, 53));
Swap(ref Unsafe.Add(ref elemRef, 47), ref Unsafe.Add(ref elemRef, 61));
// row #6
Swap(ref Unsafe.Add(ref elemRef, 55), ref Unsafe.Add(ref elemRef, 62));
static void Swap(ref float a, ref float b)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
var targetVector = Vector256.Create(value);
ref Vector256<float> blockStride = ref this.V0;
for (int i = 0; i < RowCount; i++)
{
Vector256<int> areEqual = Avx2.CompareEqual(Avx.ConvertToVector256Int32WithTruncation(Unsafe.Add(ref this.V0, i)), targetVector);
if (Avx2.MoveMask(areEqual.AsByte()) != equalityMask)
{
return false;
}
}
return true;
float tmp = a;
a = b;
b = tmp;
}
#endif
{
ref float scalars = ref Unsafe.As<Block8x8F, float>(ref this);
for (int i = 0; i < Size; i++)
{
if ((int)Unsafe.Add(ref scalars, i) != value)
{
return false;
}
}
}
return true;
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float> off, Vector<float> max)
{
row += off;
row = Vector.Max(row, Vector<float>.Zero);
row = Vector.Min(row, max);
return row.FastRound();
}
}
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/AdobeMarker.cs

@ -107,4 +107,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.ColorTransform);
}
}
}
}

47
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs

@ -22,60 +22,39 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> cBase =
ref Vector256<float> c0Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> mBase =
ref Vector256<float> c1Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> yBase =
ref Vector256<float> c2Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Vector256<float> c3Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
nint n = values.Component0.Length / 8;
for (nint i = 0; i < n; i++)
{
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
ref Vector256<float> c = ref Unsafe.Add(ref c0Base, i);
ref Vector256<float> m = ref Unsafe.Add(ref c1Base, i);
ref Vector256<float> y = ref Unsafe.Add(ref c2Base, i);
Vector256<float> k = Unsafe.Add(ref c3Base, i);
k = Avx.Multiply(k, scale);
c = Avx.Multiply(Avx.Multiply(c, k), scale);
m = Avx.Multiply(Avx.Multiply(m, k), scale);
y = Avx.Multiply(Avx.Multiply(y, k), scale);
Vector256<float> cmLo = Avx.UnpackLow(c, m);
Vector256<float> yoLo = Avx.UnpackLow(y, one);
Vector256<float> cmHi = Avx.UnpackHigh(c, m);
Vector256<float> yoHi = Avx.UnpackHigh(y, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromCmykBasic.ConvertCoreInplace(values, this.MaximumValue);
}
}
}

45
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs

@ -15,38 +15,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
ConvertCore(values, result, this.MaximumValue);
}
public override void ConvertToRgbInplace(in ComponentValues values) =>
ConvertCoreInplace(values, this.MaximumValue);
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
internal static void ConvertCoreInplace(in ComponentValues values, float maxValue)
{
ReadOnlySpan<float> cVals = values.Component0;
ReadOnlySpan<float> mVals = values.Component1;
ReadOnlySpan<float> yVals = values.Component2;
ReadOnlySpan<float> kVals = values.Component3;
var v = new Vector4(0, 0, 0, 1F);
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
Span<float> c0 = values.Component0;
Span<float> c1 = values.Component1;
Span<float> c2 = values.Component2;
Span<float> c3 = values.Component3;
for (int i = 0; i < result.Length; i++)
float scale = 1 / maxValue;
for (int i = 0; i < c0.Length; i++)
{
float c = cVals[i];
float m = mVals[i];
float y = yVals[i];
float k = kVals[i] / maxValue;
v.X = c * k;
v.Y = m * k;
v.Z = y * k;
v.W = 1F;
v *= scale;
result[i] = v;
float c = c0[i];
float m = c1[i];
float y = c2[i];
float k = c3[i] / maxValue;
c0[i] = c * k * scale;
c1[i] = m * k * scale;
c2[i] = y * k * scale;
}
}
}

34
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs

@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
ref Vector<float> cBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
@ -29,43 +29,25 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair cc = default;
Vector4Pair mm = default;
Vector4Pair yy = default;
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);
var scale = new Vector<float>(1 / this.MaximumValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
nint n = values.Component0.Length / 8;
for (nint i = 0; i < n; i++)
{
Vector<float> c = Unsafe.Add(ref cBase, i);
Vector<float> m = Unsafe.Add(ref mBase, i);
Vector<float> y = Unsafe.Add(ref yBase, i);
ref Vector<float> c = ref Unsafe.Add(ref cBase, i);
ref Vector<float> m = ref Unsafe.Add(ref mBase, i);
ref Vector<float> y = ref Unsafe.Add(ref yBase, i);
Vector<float> k = Unsafe.Add(ref kBase, i) * scale;
c = (c * k) * scale;
m = (m * k) * scale;
y = (y * k) * scale;
ccRefAsVector = c;
mmRefAsVector = m;
yyRefAsVector = y;
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref cc, ref mm, ref yy);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromCmykBasic.ConvertCoreInplace(values, this.MaximumValue);
}
}
}

32
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs

@ -22,42 +22,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> gBase =
ref Vector256<float> c0Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
nint n = values.Component0.Length / 8;
for (nint i = 0; i < n; i++)
{
Vector256<float> g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale);
g = Avx2.PermuteVar8x32(g, vcontrol);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000);
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000);
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000);
ref Vector256<float> c0 = ref Unsafe.Add(ref c0Base, i);
c0 = Avx.Multiply(c0, scale);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromGrayscaleBasic.ScaleValues(values.Component0, this.MaximumValue);
}
}
}

38
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs

@ -17,25 +17,35 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
ConvertCore(values, result, this.MaximumValue);
}
public override void ConvertToRgbInplace(in ComponentValues values) =>
ScaleValues(values.Component0, this.MaximumValue);
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
internal static void ScaleValues(Span<float> values, float maxValue)
{
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
Span<Vector4> vecValues = MemoryMarshal.Cast<float, Vector4>(values);
ref float sBase = ref MemoryMarshal.GetReference(values.Component0);
ref Vector4 dBase = ref MemoryMarshal.GetReference(result);
var scaleVector = new Vector4(1 / maxValue);
for (int i = 0; i < result.Length; i++)
for (int i = 0; i < vecValues.Length; i++)
{
var v = new Vector4(Unsafe.Add(ref sBase, i));
v.W = 1f;
v *= scale;
Unsafe.Add(ref dBase, i) = v;
vecValues[i] *= scaleVector;
}
values = values.Slice(vecValues.Length * 4);
if (!values.IsEmpty)
{
float scaleValue = 1f / maxValue;
values[0] *= scaleValue;
if ((uint)values.Length > 1)
{
values[1] *= scaleValue;
if ((uint)values.Length > 2)
{
values[2] *= scaleValue;
}
}
}
}
}

40
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs

@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> rBase =
@ -32,41 +32,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector256<float> bBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
nint n = values.Component0.Length / 8;
for (nint i = 0; i < n; i++)
{
Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale);
Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale);
Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale);
Vector256<float> rgLo = Avx.UnpackLow(r, g);
Vector256<float> boLo = Avx.UnpackLow(b, one);
Vector256<float> rgHi = Avx.UnpackHigh(r, g);
Vector256<float> boHi = Avx.UnpackHigh(b, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10);
ref Vector256<float> r = ref Unsafe.Add(ref rBase, i);
ref Vector256<float> g = ref Unsafe.Add(ref gBase, i);
ref Vector256<float> b = ref Unsafe.Add(ref bBase, i);
r = Avx.Multiply(r, scale);
g = Avx.Multiply(g, scale);
b = Avx.Multiply(b, scale);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromRgbBasic.ConvertCoreInplace(values, this.MaximumValue);
}
}
}

33
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs

@ -3,6 +3,7 @@
using System;
using System.Numerics;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
@ -15,36 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
public override void ConvertToRgbInplace(in ComponentValues values)
{
ConvertCore(values, result, this.MaximumValue);
ConvertCoreInplace(values, this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
internal static void ConvertCoreInplace(ComponentValues values, float maxValue)
{
ReadOnlySpan<float> rVals = values.Component0;
ReadOnlySpan<float> gVals = values.Component1;
ReadOnlySpan<float> bVals = values.Component2;
var v = new Vector4(0, 0, 0, 1);
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
for (int i = 0; i < result.Length; i++)
{
float r = rVals[i];
float g = gVals[i];
float b = bVals[i];
v.X = r;
v.Y = g;
v.Z = b;
v *= scale;
result[i] = v;
}
FromGrayscaleBasic.ScaleValues(values.Component0, maxValue);
FromGrayscaleBasic.ScaleValues(values.Component1, maxValue);
FromGrayscaleBasic.ScaleValues(values.Component2, maxValue);
}
}
}

40
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs

@ -18,50 +18,32 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
ref Vector<float> rBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> bBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
var scale = new Vector<float>(1 / this.MaximumValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
nint n = values.Component0.Length / 8;
for (nint i = 0; i < n; i++)
{
Vector<float> r = Unsafe.Add(ref rBase, i);
Vector<float> g = Unsafe.Add(ref gBase, i);
Vector<float> b = Unsafe.Add(ref bBase, i);
ref Vector<float> r = ref Unsafe.Add(ref rBase, i);
ref Vector<float> g = ref Unsafe.Add(ref gBase, i);
ref Vector<float> b = ref Unsafe.Add(ref bBase, i);
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromRgbBasic.ConvertCoreInplace(values, this.MaximumValue);
}
}
}

50
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs

@ -23,19 +23,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> c0Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Vector256<float> c1Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Vector256<float> c2Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-this.HalfValue);
var scale = Vector256.Create(1 / this.MaximumValue);
@ -50,19 +47,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
nint n = values.Component0.Length / 8;
for (nint i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
ref Vector256<float> c0 = ref Unsafe.Add(ref c0Base, i);
ref Vector256<float> c1 = ref Unsafe.Add(ref c1Base, i);
ref Vector256<float> c2 = ref Unsafe.Add(ref c2Base, i);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
Vector256<float> y = c0;
Vector256<float> cb = Avx.Add(c1, chromaOffset);
Vector256<float> cr = Avx.Add(c2, chromaOffset);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
@ -72,30 +69,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
// TODO: We should be saving to RGBA not Vector4
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
c0 = r;
c1 = g;
c2 = b;
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromYCbCrBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue);
}
}
}

37
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs

@ -15,35 +15,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
public override void ConvertToRgbInplace(in ComponentValues values)
=> ConvertCoreInplace(values, this.MaximumValue, this.HalfValue);
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
internal static void ConvertCoreInplace(in ComponentValues values, float maxValue, float halfValue)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ReadOnlySpan<float> yVals = values.Component0;
ReadOnlySpan<float> cbVals = values.Component1;
ReadOnlySpan<float> crVals = values.Component2;
var v = new Vector4(0, 0, 0, 1);
Span<float> c0 = values.Component0;
Span<float> c1 = values.Component1;
Span<float> c2 = values.Component2;
var scale = new Vector4(1 / maxValue, 1 / maxValue, 1 / maxValue, 1F);
var scale = 1 / maxValue;
for (int i = 0; i < result.Length; i++)
for (int i = 0; i < c0.Length; i++)
{
float y = yVals[i];
float cb = cbVals[i] - halfValue;
float cr = crVals[i] - halfValue;
v.X = MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero);
v.Y = MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero);
v.Z = MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero);
v *= scale;
float y = c0[i];
float cb = c1[i] - halfValue;
float cr = c2[i] - halfValue;
result[i] = v;
c0[i] = MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero) * scale;
c1[i] = MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero) * scale;
c2[i] = MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero) * scale;
}
}
}

52
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs

@ -20,58 +20,54 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
protected override bool IsAvailable => SimdUtils.HasVector4;
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
// TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!");
DebugGuard.IsTrue(values.Component0.Length % 8 == 0, nameof(values), "Length should be divisible by 8!");
ref Vector4Pair yBase =
ref Vector4Pair c0Base =
ref Unsafe.As<float, Vector4Pair>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector4Pair cbBase =
ref Vector4Pair c1Base =
ref Unsafe.As<float, Vector4Pair>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector4Pair crBase =
ref Vector4Pair c2Base =
ref Unsafe.As<float, Vector4Pair>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector4(-this.HalfValue);
var maxValue = this.MaximumValue;
// Walking 8 elements at one step:
int n = result.Length / 8;
nint n = values.Component0.Length / 8;
for (int i = 0; i < n; i++)
for (nint i = 0; i < n; i++)
{
// y = yVals[i];
Vector4Pair y = Unsafe.Add(ref yBase, i);
ref Vector4Pair c0 = ref Unsafe.Add(ref c0Base, i);
// cb = cbVals[i] - halfValue);
Vector4Pair cb = Unsafe.Add(ref cbBase, i);
cb.AddInplace(chromaOffset);
ref Vector4Pair c1 = ref Unsafe.Add(ref c1Base, i);
c1.AddInplace(chromaOffset);
// cr = crVals[i] - halfValue;
Vector4Pair cr = Unsafe.Add(ref crBase, i);
cr.AddInplace(chromaOffset);
ref Vector4Pair c2 = ref Unsafe.Add(ref c2Base, i);
c2.AddInplace(chromaOffset);
// r = y + (1.402F * cr);
Vector4Pair r = y;
Vector4Pair tmp = cr;
Vector4Pair r = c0;
Vector4Pair tmp = c2;
tmp.MultiplyInplace(1.402F);
r.AddInplace(ref tmp);
// g = y - (0.344136F * cb) - (0.714136F * cr);
Vector4Pair g = y;
tmp = cb;
Vector4Pair g = c0;
tmp = c1;
tmp.MultiplyInplace(-0.344136F);
g.AddInplace(ref tmp);
tmp = cr;
tmp = c2;
tmp.MultiplyInplace(-0.714136F);
g.AddInplace(ref tmp);
// b = y + (1.772F * cb);
Vector4Pair b = y;
tmp = cb;
Vector4Pair b = c0;
tmp = c1;
tmp.MultiplyInplace(1.772F);
b.AddInplace(ref tmp);
@ -79,14 +75,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
g.RoundAndDownscalePreVector8(maxValue);
b.RoundAndDownscalePreVector8(maxValue);
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref r, ref g, ref b);
c0 = r;
c1 = g;
c2 = b;
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
protected override void ConvertCoreInplace(in ComponentValues values)
=> FromYCbCrBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue);
}
}
}

47
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs

@ -19,41 +19,32 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
ref Vector<float> yBase =
ref Vector<float> c0Base =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Vector<float> c1Base =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Vector<float> c2Base =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-this.HalfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
nint n = values.Component0.Length / 8;
var scale = new Vector<float>(1 / this.MaximumValue);
for (int i = 0; i < n; i++)
for (nint i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
ref Vector<float> c0 = ref Unsafe.Add(ref c0Base, i);
ref Vector<float> c1 = ref Unsafe.Add(ref c1Base, i);
ref Vector<float> c2 = ref Unsafe.Add(ref c2Base, i);
Vector<float> y = Unsafe.Add(ref c0Base, i);
Vector<float> cb = Unsafe.Add(ref c1Base, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref c2Base, i) + chromaOffset;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
@ -70,18 +61,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
c0 = r;
c1 = g;
c2 = b;
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromYCbCrBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue);
}
}
}

64
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs

@ -22,52 +22,42 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Vector256<float> c0Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Vector256<float> c1Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Vector256<float> c2Base =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-this.HalfValue);
var scale = Vector256.Create(1 / this.MaximumValue);
var scale = Vector256.Create(1 / (this.MaximumValue * this.MaximumValue));
var max = Vector256.Create(this.MaximumValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
nint n = values.Component0.Length / 8;
for (nint i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
k = Avx2.PermuteVar8x32(k, vcontrol);
ref Vector256<float> c0 = ref Unsafe.Add(ref c0Base, i);
ref Vector256<float> c1 = ref Unsafe.Add(ref c1Base, i);
ref Vector256<float> c2 = ref Unsafe.Add(ref c2Base, i);
Vector256<float> y = c0;
Vector256<float> cb = Avx.Add(c1, chromaOffset);
Vector256<float> cr = Avx.Add(c2, chromaOffset);
Vector256<float> scaledK = Avx.Multiply(Unsafe.Add(ref kBase, i), scale);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
@ -82,29 +72,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
g = Avx.Subtract(max, Avx.RoundToNearestInteger(g));
b = Avx.Subtract(max, Avx.RoundToNearestInteger(b));
r = Avx.Multiply(Avx.Multiply(r, k), scale);
g = Avx.Multiply(Avx.Multiply(g, k), scale);
b = Avx.Multiply(Avx.Multiply(b, k), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
r = Avx.Multiply(r, scaledK);
g = Avx.Multiply(g, scaledK);
b = Avx.Multiply(b, scaledK);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
c0 = r;
c1 = g;
c2 = b;
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromYccKBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue);
}
}
}

43
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs

@ -15,39 +15,30 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
public override void ConvertToRgbInplace(in ComponentValues values) =>
ConvertCoreInplace(values, this.MaximumValue, this.HalfValue);
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
internal static void ConvertCoreInplace(in ComponentValues values, float maxValue, float halfValue)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ReadOnlySpan<float> yVals = values.Component0;
ReadOnlySpan<float> cbVals = values.Component1;
ReadOnlySpan<float> crVals = values.Component2;
ReadOnlySpan<float> kVals = values.Component3;
Span<float> c0 = values.Component0;
Span<float> c1 = values.Component1;
Span<float> c2 = values.Component2;
Span<float> c3 = values.Component3;
var v = new Vector4(0, 0, 0, 1F);
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
var scale = 1 / (maxValue * maxValue);
for (int i = 0; i < result.Length; i++)
for (int i = 0; i < values.Component0.Length; i++)
{
float y = yVals[i];
float cb = cbVals[i] - halfValue;
float cr = crVals[i] - halfValue;
float k = kVals[i] / maxValue;
v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k;
v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
v.W = 1F;
v *= scale;
result[i] = v;
float y = c0[i];
float cb = c1[i] - halfValue;
float cr = c2[i] - halfValue;
float scaledK = c3[i] * scale;
c0[i] = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * scaledK;
c1[i] = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * scaledK;
c2[i] = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * scaledK;
}
}
}

60
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs

@ -18,46 +18,39 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
protected override void ConvertCoreVectorizedInplace(in ComponentValues values)
{
ref Vector<float> yBase =
ref Vector<float> c0Base =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Vector<float> c1Base =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Vector<float> c2Base =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-this.HalfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
nint n = values.Component0.Length / 8;
var scale = new Vector<float>(1 / this.MaximumValue);
var max = new Vector<float>(this.MaximumValue);
var scale = new Vector<float>(1f) / (max * max);
for (int i = 0; i < n; i++)
for (nint i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
Vector<float> k = Unsafe.Add(ref kBase, i) / max;
ref Vector<float> c0 = ref Unsafe.Add(ref c0Base, i);
ref Vector<float> c1 = ref Unsafe.Add(ref c1Base, i);
ref Vector<float> c2 = ref Unsafe.Add(ref c2Base, i);
Vector<float> y = c0;
Vector<float> cb = c1 + chromaOffset;
Vector<float> cr = c2 + chromaOffset;
Vector<float> scaledK = Unsafe.Add(ref kBase, i) * scale;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
@ -67,25 +60,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
Vector<float> b = y + (cb * new Vector<float>(1.772F));
r = (max - r.FastRound()) * k;
g = (max - g.FastRound()) * k;
b = (max - b.FastRound()) * k;
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
r = (max - r.FastRound()) * scaledK;
g = (max - g.FastRound()) * scaledK;
b = (max - b.FastRound()) * scaledK;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
c0 = r;
c1 = g;
c2 = b;
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
protected override void ConvertCoreInplace(in ComponentValues values) =>
FromYccKBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue);
}
}
}

15
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs

@ -18,10 +18,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
this.vectorSize = vectorSize;
}
public sealed override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
public override void ConvertToRgbInplace(in ComponentValues values)
{
int remainder = result.Length % this.vectorSize;
int simdCount = result.Length - remainder;
int length = values.Component0.Length;
int remainder = values.Component0.Length % this.vectorSize;
int simdCount = length - remainder;
if (simdCount > 0)
{
// This implementation is actually AVX specific.
@ -32,15 +33,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
"This converter can be used only on architecture having 256 byte floating point SIMD registers!");
}
this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount));
this.ConvertCoreVectorizedInplace(values.Slice(0, simdCount));
}
this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder));
this.ConvertCoreInplace(values.Slice(simdCount, remainder));
}
protected abstract void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result);
protected virtual void ConvertCoreVectorizedInplace(in ComponentValues values) => throw new NotImplementedException();
protected abstract void ConvertCore(in ComponentValues values, Span<Vector4> result);
protected virtual void ConvertCoreInplace(in ComponentValues values) => throw new NotImplementedException();
}
}
}

156
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -76,11 +76,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
/// <summary>
/// He implementation of the conversion.
/// Converts planar jpeg component values in <paramref name="values"/> to RGB color space inplace.
/// </summary>
/// <param name="values">The input as a stack-only <see cref="ComponentValues"/> struct</param>
/// <param name="result">The destination buffer of <see cref="Vector4"/> values</param>
public abstract void ConvertToRgba(in ComponentValues values, Span<Vector4> result);
/// <param name="values">The input/ouptut as a stack-only <see cref="ComponentValues"/> struct</param>
public abstract void ConvertToRgbInplace(in ComponentValues values);
/// <summary>
/// Returns the <see cref="JpegColorConverter"/>s for all supported colorspaces and precisions.
@ -181,22 +180,22 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
/// <summary>
/// The component 0 (eg. Y)
/// </summary>
public readonly ReadOnlySpan<float> Component0;
public readonly Span<float> Component0;
/// <summary>
/// The component 1 (eg. Cb)
/// The component 1 (eg. Cb). In case of grayscale, it points to <see cref="Component0"/>.
/// </summary>
public readonly ReadOnlySpan<float> Component1;
public readonly Span<float> Component1;
/// <summary>
/// The component 2 (eg. Cr)
/// The component 2 (eg. Cr). In case of grayscale, it points to <see cref="Component0"/>.
/// </summary>
public readonly ReadOnlySpan<float> Component2;
public readonly Span<float> Component2;
/// <summary>
/// The component 4
/// </summary>
public readonly ReadOnlySpan<float> Component3;
public readonly Span<float> Component3;
/// <summary>
/// Initializes a new instance of the <see cref="ComponentValues"/> struct.
@ -208,30 +207,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
this.ComponentCount = componentBuffers.Count;
this.Component0 = componentBuffers[0].GetRowSpan(row);
this.Component1 = Span<float>.Empty;
this.Component2 = Span<float>.Empty;
this.Component3 = Span<float>.Empty;
if (this.ComponentCount > 1)
{
this.Component1 = componentBuffers[1].GetRowSpan(row);
if (this.ComponentCount > 2)
{
this.Component2 = componentBuffers[2].GetRowSpan(row);
if (this.ComponentCount > 3)
{
this.Component3 = componentBuffers[3].GetRowSpan(row);
}
}
}
// In case of grayscale, Component1 and Component2 point to Component0 memory area
this.Component1 = this.ComponentCount > 1 ? componentBuffers[1].GetRowSpan(row) : this.Component0;
this.Component2 = this.ComponentCount > 2 ? componentBuffers[2].GetRowSpan(row) : this.Component0;
this.Component3 = this.ComponentCount > 3 ? componentBuffers[3].GetRowSpan(row) : Span<float>.Empty;
}
private ComponentValues(
internal ComponentValues(
int componentCount,
ReadOnlySpan<float> c0,
ReadOnlySpan<float> c1,
ReadOnlySpan<float> c2,
ReadOnlySpan<float> c3)
Span<float> c0,
Span<float> c1,
Span<float> c2,
Span<float> c3)
{
this.ComponentCount = componentCount;
this.Component0 = c0;
@ -242,111 +230,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
public ComponentValues Slice(int start, int length)
{
ReadOnlySpan<float> c0 = this.Component0.Slice(start, length);
ReadOnlySpan<float> c1 = this.ComponentCount > 1 ? this.Component1.Slice(start, length) : ReadOnlySpan<float>.Empty;
ReadOnlySpan<float> c2 = this.ComponentCount > 2 ? this.Component2.Slice(start, length) : ReadOnlySpan<float>.Empty;
ReadOnlySpan<float> c3 = this.ComponentCount > 3 ? this.Component3.Slice(start, length) : ReadOnlySpan<float>.Empty;
Span<float> c0 = this.Component0.Slice(start, length);
Span<float> c1 = this.Component1.Length > 0 ? this.Component1.Slice(start, length) : Span<float>.Empty;
Span<float> c2 = this.Component2.Length > 0 ? this.Component2.Slice(start, length) : Span<float>.Empty;
Span<float> c3 = this.Component3.Length > 0 ? this.Component3.Slice(start, length) : Span<float>.Empty;
return new ComponentValues(this.ComponentCount, c0, c1, c2, c3);
}
}
internal struct Vector4Octet
{
#pragma warning disable SA1132 // Do not combine fields
public Vector4 V0, V1, V2, V3, V4, V5, V6, V7;
/// <summary>
/// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ...
/// </summary>
public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b)
{
this.V0.X = r.A.X;
this.V0.Y = g.A.X;
this.V0.Z = b.A.X;
this.V0.W = 1f;
this.V1.X = r.A.Y;
this.V1.Y = g.A.Y;
this.V1.Z = b.A.Y;
this.V1.W = 1f;
this.V2.X = r.A.Z;
this.V2.Y = g.A.Z;
this.V2.Z = b.A.Z;
this.V2.W = 1f;
this.V3.X = r.A.W;
this.V3.Y = g.A.W;
this.V3.Z = b.A.W;
this.V3.W = 1f;
this.V4.X = r.B.X;
this.V4.Y = g.B.X;
this.V4.Z = b.B.X;
this.V4.W = 1f;
this.V5.X = r.B.Y;
this.V5.Y = g.B.Y;
this.V5.Z = b.B.Y;
this.V5.W = 1f;
this.V6.X = r.B.Z;
this.V6.Y = g.B.Z;
this.V6.Z = b.B.Z;
this.V6.W = 1f;
this.V7.X = r.B.W;
this.V7.Y = g.B.W;
this.V7.Z = b.B.W;
this.V7.W = 1f;
}
/// <summary>
/// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ...
/// </summary>
public void Pack(ref Vector4Pair g)
{
this.V0.X = g.A.X;
this.V0.Y = g.A.X;
this.V0.Z = g.A.X;
this.V0.W = 1f;
this.V1.X = g.A.Y;
this.V1.Y = g.A.Y;
this.V1.Z = g.A.Y;
this.V1.W = 1f;
this.V2.X = g.A.Z;
this.V2.Y = g.A.Z;
this.V2.Z = g.A.Z;
this.V2.W = 1f;
this.V3.X = g.A.W;
this.V3.Y = g.A.W;
this.V3.Z = g.A.W;
this.V3.W = 1f;
this.V4.X = g.B.X;
this.V4.Y = g.B.X;
this.V4.Z = g.B.X;
this.V4.W = 1f;
this.V5.X = g.B.Y;
this.V5.Y = g.B.Y;
this.V5.Z = g.B.Y;
this.V5.W = 1f;
this.V6.X = g.B.Z;
this.V6.Y = g.B.Z;
this.V6.Z = g.B.Z;
this.V6.W = 1f;
this.V7.X = g.B.W;
this.V7.Y = g.B.W;
this.V7.Z = g.B.W;
this.V7.W = 1f;
}
}
}
}

6
src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs

@ -80,7 +80,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
[MethodImpl(InliningOptions.ShortMethod)]
public bool HasBadMarker() => this.Marker != JpegConstants.Markers.XFF && !this.HasRestartMarker();
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(InliningOptions.AlwaysInline)]
public void FillBuffer()
{
// Attempt to load at least the minimum number of required bits into the buffer.
@ -130,7 +130,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
[MethodImpl(InliningOptions.ShortMethod)]
public int PeekBits(int nbits) => (int)ExtractBits(this.data, this.remainingBits - nbits, nbits);
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(InliningOptions.AlwaysInline)]
private static ulong ExtractBits(ulong value, int offset, int size) => (value >> offset) & (ulong)((1 << size) - 1);
[MethodImpl(InliningOptions.ShortMethod)]
@ -207,7 +207,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
}
[MethodImpl(InliningOptions.ShortMethod)]
[MethodImpl(InliningOptions.AlwaysInline)]
private int ReadStream()
{
int value = this.badData ? 0 : this.stream.ReadByte();

27
src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs

@ -38,10 +38,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </summary>
private int restartInterval;
// How many mcu's are left to do.
/// <summary>
/// How many mcu's are left to do.
/// </summary>
private int todo;
// The End-Of-Block countdown for ending the sequence prematurely when the remaining coefficients are zero.
/// <summary>
/// The End-Of-Block countdown for ending the sequence prematurely when the remaining coefficients are zero.
/// </summary>
private int eobrun;
/// <summary>
@ -54,14 +58,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </summary>
private readonly HuffmanTable[] acHuffmanTables;
// The unzig data.
private ZigZag dctZigZag;
private HuffmanScanBuffer scanBuffer;
private readonly SpectralConverter spectralConverter;
private CancellationToken cancellationToken;
private readonly CancellationToken cancellationToken;
/// <summary>
/// Initializes a new instance of the <see cref="HuffmanScanDecoder"/> class.
@ -74,7 +75,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
SpectralConverter converter,
CancellationToken cancellationToken)
{
this.dctZigZag = ZigZag.CreateUnzigTable();
this.stream = stream;
this.spectralConverter = converter;
this.cancellationToken = cancellationToken;
@ -477,7 +477,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
ref short blockDataRef = ref Unsafe.As<Block8x8, short>(ref block);
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
// DC
int t = buffer.DecodeHuffman(ref dcTable);
@ -502,7 +501,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
i += r;
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, zigzag[i++]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i++]) = (short)s;
}
else
{
@ -556,7 +555,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
int low = this.SuccessiveLow;
@ -572,7 +570,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (s != 0)
{
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, zigzag[i]) = (short)(s << low);
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i]) = (short)(s << low);
}
else
{
@ -602,7 +600,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
// Refinement scan for these AC coefficients
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
@ -649,7 +646,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
do
{
ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{
buffer.CheckBits();
@ -675,7 +672,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if ((s != 0) && (k < 64))
{
Unsafe.Add(ref blockDataRef, zigzag[k]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]) = (short)s;
}
}
}
@ -684,7 +681,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
for (; k <= end; k++)
{
ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/IJpegComponent.cs

@ -45,4 +45,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </summary>
Buffer2D<Block8x8> SpectralBlocks { get; }
}
}
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs

@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
IJpegComponent[] Components { get; }
/// <summary>
/// Gets the quantization tables, in zigzag order.
/// Gets the quantization tables, in natural order.
/// </summary>
Block8x8F[] QuantizationTables { get; }
}

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/JFifMarker.cs

@ -125,4 +125,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.YDensity);
}
}
}
}

26
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

@ -19,14 +19,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public Block8x8F SourceBlock;
/// <summary>
/// Temporal block 1 to store intermediate and/or final computation results.
/// Temporal block to store intermediate computation results.
/// </summary>
public Block8x8F WorkspaceBlock1;
/// <summary>
/// Temporal block 2 to store intermediate and/or final computation results.
/// </summary>
public Block8x8F WorkspaceBlock2;
public Block8x8F WorkspaceBlock;
/// <summary>
/// The quantization table as <see cref="Block8x8F"/>.
@ -46,12 +41,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public JpegBlockPostProcessor(IRawJpegData decoder, IJpegComponent component)
{
int qtIndex = component.QuantizationTableIndex;
this.DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]);
this.DequantiazationTable = decoder.QuantizationTables[qtIndex];
this.subSamplingDivisors = component.SubSamplingDivisors;
this.SourceBlock = default;
this.WorkspaceBlock1 = default;
this.WorkspaceBlock2 = default;
this.WorkspaceBlock = default;
}
/// <summary>
@ -71,20 +65,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
int destAreaStride,
float maximumValue)
{
ref Block8x8F b = ref this.SourceBlock;
b.LoadFrom(ref sourceBlock);
ref Block8x8F block = ref this.SourceBlock;
block.LoadFrom(ref sourceBlock);
// Dequantize:
b.MultiplyInPlace(ref this.DequantiazationTable);
block.MultiplyInPlace(ref this.DequantiazationTable);
FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2);
FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock);
// To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations.
// To be "more accurate", we need to emulate this by rounding!
this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue);
block.NormalizeColorsAndRoundInPlace(maximumValue);
this.WorkspaceBlock1.ScaledCopyTo(
block.ScaledCopyTo(
ref destAreaOrigin,
destAreaStride,
this.subSamplingDivisors.Width,

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegColorSpace.cs

@ -20,4 +20,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
YCbCr
}
}
}

1
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponent.cs

@ -2,7 +2,6 @@
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegFileMarker.cs

@ -66,4 +66,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
return this.Marker.ToString("X");
}
}
}
}

10
src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs

@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
/// <summary>
@ -30,5 +32,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// Actual stride height depends on the subsampling factor of the given component.
/// </remarks>
public abstract void ConvertStrideBaseline();
/// <summary>
/// Gets the color converter.
/// </summary>
/// <param name="frame">The jpeg frame with the color space to convert to.</param>
/// <param name="jpegData">The raw JPEG data.</param>
/// <returns>The color converter.</returns>
protected virtual JpegColorConverter GetColorConverter(JpegFrame frame, IRawJpegData jpegData) => JpegColorConverter.GetConverter(jpegData.ColorSpace, frame.Precision);
}
}

72
src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs

@ -11,18 +11,21 @@ using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
internal sealed class SpectralConverter<TPixel> : SpectralConverter, IDisposable
internal class SpectralConverter<TPixel> : SpectralConverter, IDisposable
where TPixel : unmanaged, IPixel<TPixel>
{
private readonly Configuration configuration;
private CancellationToken cancellationToken;
private readonly CancellationToken cancellationToken;
private JpegComponentPostProcessor[] componentProcessors;
private JpegColorConverter colorConverter;
private IMemoryOwner<Vector4> rgbaBuffer;
// private IMemoryOwner<Vector4> rgbaBuffer;
private IMemoryOwner<byte> rgbBuffer;
private IMemoryOwner<TPixel> paddedProxyPixelRow;
private Buffer2D<TPixel> pixelBuffer;
@ -40,25 +43,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
private bool Converted => this.pixelRowCounter >= this.pixelBuffer.Height;
public Buffer2D<TPixel> PixelBuffer
public Buffer2D<TPixel> GetPixelBuffer()
{
get
if (!this.Converted)
{
if (!this.Converted)
{
int steps = (int)Math.Ceiling(this.pixelBuffer.Height / (float)this.pixelRowsPerStep);
int steps = (int)Math.Ceiling(this.pixelBuffer.Height / (float)this.pixelRowsPerStep);
for (int step = 0; step < steps; step++)
{
this.cancellationToken.ThrowIfCancellationRequested();
this.ConvertNextStride(step);
}
for (int step = 0; step < steps; step++)
{
this.cancellationToken.ThrowIfCancellationRequested();
this.ConvertNextStride(step);
}
return this.pixelBuffer;
}
return this.pixelBuffer;
}
/// <inheritdoc/>
public override void InjectFrameData(JpegFrame frame, IRawJpegData jpegData)
{
MemoryAllocator allocator = this.configuration.MemoryAllocator;
@ -71,7 +72,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.pixelRowsPerStep = this.blockRowsPerStep * blockPixelHeight;
// pixel buffer for resulting image
this.pixelBuffer = allocator.Allocate2D<TPixel>(frame.PixelWidth, frame.PixelHeight, AllocationOptions.Clean);
this.pixelBuffer = allocator.Allocate2D<TPixel>(frame.PixelWidth, frame.PixelHeight);
this.paddedProxyPixelRow = allocator.Allocate<TPixel>(frame.PixelWidth + 3);
// component processors from spectral to Rgba32
var postProcessorBufferSize = new Size(c0.SizeInBlocks.Width * 8, this.pixelRowsPerStep);
@ -82,12 +84,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
// single 'stride' rgba32 buffer for conversion between spectral and TPixel
this.rgbaBuffer = allocator.Allocate<Vector4>(frame.PixelWidth);
// this.rgbaBuffer = allocator.Allocate<Vector4>(frame.PixelWidth);
this.rgbBuffer = allocator.Allocate<byte>(frame.PixelWidth * 3);
// color converter from Rgba32 to TPixel
this.colorConverter = JpegColorConverter.GetConverter(jpegData.ColorSpace, frame.Precision);
this.colorConverter = this.GetColorConverter(frame, jpegData);
}
/// <inheritdoc/>
public override void ConvertStrideBaseline()
{
// Convert next pixel stride using single spectral `stride'
@ -113,7 +117,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
}
this.rgbaBuffer?.Dispose();
this.rgbBuffer?.Dispose();
this.paddedProxyPixelRow?.Dispose();
}
private void ConvertNextStride(int spectralStep)
@ -127,17 +132,38 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
buffers[i] = this.componentProcessors[i].ColorBuffer;
}
int width = this.pixelBuffer.Width;
for (int yy = this.pixelRowCounter; yy < maxY; yy++)
{
int y = yy - this.pixelRowCounter;
var values = new JpegColorConverter.ComponentValues(buffers, y);
this.colorConverter.ConvertToRgba(values, this.rgbaBuffer.GetSpan());
Span<TPixel> destRow = this.pixelBuffer.GetRowSpan(yy);
this.colorConverter.ConvertToRgbInplace(values);
values = values.Slice(0, width); // slice away Jpeg padding
// TODO: Investigate if slicing is actually necessary
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, this.rgbaBuffer.GetSpan().Slice(0, destRow.Length), destRow);
Span<byte> r = this.rgbBuffer.Slice(0, width);
Span<byte> g = this.rgbBuffer.Slice(width, width);
Span<byte> b = this.rgbBuffer.Slice(width * 2, width);
SimdUtils.NormalizedFloatToByteSaturate(values.Component0, r);
SimdUtils.NormalizedFloatToByteSaturate(values.Component1, g);
SimdUtils.NormalizedFloatToByteSaturate(values.Component2, b);
// PackFromRgbPlanes expects the destination to be padded, so try to get padded span containing extra elements from the next row.
// If we can't get such a padded row because we are on a MemoryGroup boundary or at the last row,
// pack pixels to a temporary, padded proxy buffer, then copy the relevant values to the destination row.
if (this.pixelBuffer.TryGetPaddedRowSpan(yy, 3, out Span<TPixel> destRow))
{
PixelOperations<TPixel>.Instance.PackFromRgbPlanes(this.configuration, r, g, b, destRow);
}
else
{
Span<TPixel> proxyRow = this.paddedProxyPixelRow.GetSpan();
PixelOperations<TPixel>.Instance.PackFromRgbPlanes(this.configuration, r, g, b, proxyRow);
proxyRow.Slice(0, width).CopyTo(this.pixelBuffer.GetRowSpan(yy));
}
}
this.pixelRowCounter += this.pixelRowsPerStep;

2
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffIndex.cs

@ -32,4 +32,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
// ReSharper restore UnusedMember.Local
}
}
}

21
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs

@ -5,10 +5,25 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// A compiled look-up table representation of a huffmanSpec.
/// Each value maps to a int32 of which the 24 most significant bits hold the
/// codeword in bits and the 8 least significant bits hold the codeword size.
/// The maximum codeword size is 16 bits.
/// </summary>
/// <remarks>
/// <para>
/// Each value maps to a int32 of which the 24 most significant bits hold the
/// codeword in bits and the 8 least significant bits hold the codeword size.
/// </para>
/// <para>
/// Code value occupies 24 most significant bits as integer value.
/// This value is shifted to the MSB position for performance reasons.
/// For example, decimal value 10 is stored like this:
/// <code>
/// MSB LSB
/// 1010 0000 00000000 00000000 | 00000100
/// </code>
/// This was done to eliminate extra binary shifts in the encoder.
/// While code length is represented as 8 bit integer value
/// </para>
/// </remarks>
internal readonly struct HuffmanLut
{
/// <summary>
@ -54,7 +69,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
int len = i + 1;
for (int j = 0; j < spec.Count[i]; j++)
{
this.Values[spec.Values[k]] = len | (code << 8);
this.Values[spec.Values[k]] = len | (code << (32 - len));
code++;
k++;
}

561
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs

@ -1,12 +1,11 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.IO;
using System.Numerics;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Runtime.InteropServices;
using System.Threading;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@ -16,67 +15,134 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
internal class HuffmanScanEncoder
{
/// <summary>
/// Compiled huffman tree to encode given values.
/// Maximum number of bytes encoded jpeg 8x8 block can occupy.
/// It's highly unlikely for block to occupy this much space - it's a theoretical limit.
/// </summary>
/// <remarks>Yields codewords by index consisting of [run length | bitsize].</remarks>
private HuffmanLut[] huffmanTables;
/// <remarks>
/// Where 16 is maximum huffman code binary length according to itu
/// specs. 10 is maximum value binary length, value comes from discrete
/// cosine tranform with value range: [-1024..1023]. Block stores
/// 8x8 = 64 values thus multiplication by 64. Then divided by 8 to get
/// the number of bytes. This value is then multiplied by
/// <see cref="MaxBytesPerBlockMultiplier"/> for performance reasons.
/// </remarks>
private const int MaxBytesPerBlock = (16 + 10) * 64 / 8 * MaxBytesPerBlockMultiplier;
/// <summary>
/// Multiplier used within cache buffers size calculation.
/// </summary>
/// <remarks>
/// <para>
/// Theoretically, <see cref="MaxBytesPerBlock"/> bytes buffer can fit
/// exactly one minimal coding unit. In reality, coding blocks occupy much
/// less space than the theoretical maximum - this can be exploited.
/// If temporal buffer size is multiplied by at least 2, second half of
/// the resulting buffer will be used as an overflow 'guard' if next
/// block would occupy maximum number of bytes. While first half may fit
/// many blocks before needing to flush.
/// </para>
/// <para>
/// This is subject to change. This can be equal to 1 but recomended
/// value is 2 or even greater - futher benchmarking needed.
/// </para>
/// </remarks>
private const int MaxBytesPerBlockMultiplier = 2;
/// <summary>
/// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count).
/// <see cref="streamWriteBuffer"/> size multiplier.
/// </summary>
/// <remarks>
/// This is subject to change, 1024 seems to be the best value in terms of performance.
/// <see cref="Emit(int, int)"/> expects it to be at least 8 (see comments in method body).
/// Jpeg specification requiers to insert 'stuff' bytes after each
/// 0xff byte value. Worst case scenarion is when all bytes are 0xff.
/// While it's highly unlikely (if not impossible) to get such
/// combination, it's theoretically possible so buffer size must be guarded.
/// </remarks>
private const int EmitBufferSizeInBytes = 1024;
private const int OutputBufferLengthMultiplier = 2;
/// <summary>
/// A buffer for reducing the number of stream writes when emitting Huffman tables.
/// Compiled huffman tree to encode given values.
/// </summary>
private readonly byte[] emitBuffer = new byte[EmitBufferSizeInBytes];
/// <remarks>Yields codewords by index consisting of [run length | bitsize].</remarks>
private HuffmanLut[] huffmanTables;
/// <summary>
/// Number of filled bytes in <see cref="emitBuffer"/> buffer
/// Emitted bits 'micro buffer' before being transferred to the <see cref="emitBuffer"/>.
/// </summary>
private int emitLen = 0;
private uint accumulatedBits;
/// <summary>
/// Emmited bits 'micro buffer' before being transfered to the <see cref="emitBuffer"/>.
/// Buffer for temporal storage of huffman rle encoding bit data.
/// </summary>
private int accumulatedBits;
/// <remarks>
/// Encoding bits are assembled to 4 byte unsigned integers and then copied to this buffer.
/// This process does NOT include inserting stuff bytes.
/// </remarks>
private readonly uint[] emitBuffer;
/// <summary>
/// Buffer for temporal storage which is then written to the output stream.
/// </summary>
/// <remarks>
/// Encoding bits from <see cref="emitBuffer"/> are copied to this byte buffer including stuff bytes.
/// </remarks>
private readonly byte[] streamWriteBuffer;
/// <summary>
/// Number of jagged bits stored in <see cref="accumulatedBits"/>
/// </summary>
private int bitCount;
private Block8x8F temporalBlock1;
private Block8x8F temporalBlock2;
private int emitWriteIndex;
private Block8x8 tempBlock;
/// <summary>
/// The output stream. All attempted writes after the first error become no-ops.
/// </summary>
private readonly Stream target;
public HuffmanScanEncoder(Stream outputStream)
/// <summary>
/// Initializes a new instance of the <see cref="HuffmanScanEncoder"/> class.
/// </summary>
/// <param name="blocksPerCodingUnit">Amount of encoded 8x8 blocks per single jpeg macroblock.</param>
/// <param name="outputStream">Output stream for saving encoded data.</param>
public HuffmanScanEncoder(int blocksPerCodingUnit, Stream outputStream)
{
int emitBufferByteLength = MaxBytesPerBlock * blocksPerCodingUnit;
this.emitBuffer = new uint[emitBufferByteLength / sizeof(uint)];
this.emitWriteIndex = this.emitBuffer.Length;
this.streamWriteBuffer = new byte[emitBufferByteLength * OutputBufferLengthMultiplier];
this.target = outputStream;
}
/// <summary>
/// Gets a value indicating whether <see cref="emitBuffer"/> is full
/// and must be flushed using <see cref="FlushToStream()"/>
/// before encoding next 8x8 coding block.
/// </summary>
private bool IsStreamFlushNeeded
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => this.emitWriteIndex < (uint)this.emitBuffer.Length / 2;
}
/// <summary>
/// Encodes the image with no subsampling.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee.</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void Encode444<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
var unzig = ZigZag.CreateUnzigTable();
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@ -100,26 +166,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushInternalBuffer();
this.FlushRemainingBytes();
}
/// <summary>
@ -128,15 +196,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee.</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void Encode420<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
var unzig = ZigZag.CreateUnzigTable();
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@ -161,34 +230,35 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YLeft,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YRight,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
}
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushInternalBuffer();
this.FlushRemainingBytes();
}
/// <summary>
@ -196,14 +266,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void EncodeGrayscale<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
var unzig = ZigZag.CreateUnzigTable();
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0;
@ -226,12 +296,76 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushInternalBuffer();
this.FlushRemainingBytes();
}
/// <summary>
/// Encodes the image with no subsampling and keeps the pixel data as Rgb24.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="quantTable">Quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void EncodeRgb<TPixel>(Image<TPixel> pixels, ref Block8x8F quantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
FastFloatingPointDCT.AdjustToFDCT(ref quantTable);
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCR = 0, prevDCG = 0, prevDCB = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
var pixelConverter = new RgbForwardConverter<TPixel>(frame);
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(x, y, ref currentRows);
prevDCR = this.WriteBlock(
QuantIndex.Luminance,
prevDCR,
ref pixelConverter.R,
ref quantTable);
prevDCG = this.WriteBlock(
QuantIndex.Luminance,
prevDCG,
ref pixelConverter.G,
ref quantTable);
prevDCB = this.WriteBlock(
QuantIndex.Luminance,
prevDCB,
ref pixelConverter.B,
ref quantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushRemainingBytes();
}
/// <summary>
@ -241,47 +375,53 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// </summary>
/// <param name="index">The quantization table index.</param>
/// <param name="prevDC">The previous DC value.</param>
/// <param name="src">Source block</param>
/// <param name="quant">Quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
/// <param name="block">Source block.</param>
/// <param name="quant">Quantization table.</param>
/// <returns>The <see cref="int"/>.</returns>
private int WriteBlock(
QuantIndex index,
int prevDC,
ref Block8x8F src,
ref Block8x8F quant,
ref ZigZag unZig)
ref Block8x8F block,
ref Block8x8F quant)
{
ref Block8x8F refTemp1 = ref this.temporalBlock1;
ref Block8x8F refTemp2 = ref this.temporalBlock2;
ref Block8x8 spectralBlock = ref this.tempBlock;
FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2);
// Shifting level from 0..255 to -128..127
block.AddInPlace(-128f);
Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig);
// Discrete cosine transform
FastFloatingPointDCT.TransformFDCT(ref block);
// Quantization
Block8x8F.Quantize(ref block, ref spectralBlock, ref quant);
// Emit the DC delta.
int dc = (int)refTemp2[0];
this.EmitDirectCurrentTerm(this.huffmanTables[2 * (int)index].Values, dc - prevDC);
int dc = spectralBlock[0];
this.EmitHuffRLE(this.huffmanTables[2 * (int)index].Values, 0, dc - prevDC);
// Emit the AC components.
int[] acHuffTable = this.huffmanTables[(2 * (int)index) + 1].Values;
nint lastValuableIndex = spectralBlock.GetLastNonZeroIndex();
int runLength = 0;
int lastValuableIndex = GetLastValuableElementIndex(ref refTemp2);
for (int zig = 1; zig <= lastValuableIndex; zig++)
ref short blockRef = ref Unsafe.As<Block8x8, short>(ref spectralBlock);
for (nint zig = 1; zig <= lastValuableIndex; zig++)
{
int ac = (int)refTemp2[zig];
const int zeroRun1 = 1 << 4;
const int zeroRun16 = 16 << 4;
int ac = Unsafe.Add(ref blockRef, zig);
if (ac == 0)
{
runLength++;
runLength += zeroRun1;
}
else
{
while (runLength > 15)
while (runLength >= zeroRun16)
{
this.EmitHuff(acHuffTable, 0xf0);
runLength -= 16;
runLength -= zeroRun16;
}
this.EmitHuffRLE(acHuffTable, runLength, ac);
@ -301,100 +441,89 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
/// <summary>
/// Emits the least significant count of bits to the stream write buffer.
/// The precondition is bits
/// <example>
/// &lt; 1&lt;&lt;nBits &amp;&amp; nBits &lt;= 16
/// </example>
/// .
/// Emits the most significant count of bits to the buffer.
/// </summary>
/// <param name="bits">The packed bits.</param>
/// <param name="count">The number of bits</param>
/// <remarks>
/// <para>
/// Supports up to 32 count of bits but, generally speaking, jpeg
/// standard assures that there won't be more than 16 bits per single
/// value.
/// </para>
/// <para>
/// Emitting algorithm uses 3 intermediate buffers for caching before
/// writing to the stream:
/// <list type="number">
/// <item>
/// <term>uint32</term>
/// <description>
/// Bit buffer. Encoded spectral values can occupy up to 16 bits, bits
/// are assembled to whole bytes via this intermediate buffer.
/// </description>
/// </item>
/// <item>
/// <term>uint32[]</term>
/// <description>
/// Assembled bytes from uint32 buffer are saved into this buffer.
/// uint32 buffer values are saved using indices from the last to the first.
/// As bytes are saved to the memory as 4-byte packages endianness matters:
/// Jpeg stream is big-endian, indexing buffer bytes from the last index to the
/// first eliminates all operations to extract separate bytes. This only works for
/// little-endian machines (there are no known examples of big-endian users atm).
/// For big-endians this approach is slower due to the separate byte extraction.
/// </description>
/// </item>
/// <item>
/// <term>byte[]</term>
/// <description>
/// Byte buffer used only during <see cref="FlushToStream(int)"/> method.
/// </description>
/// </item>
/// </list>
/// </para>
/// </remarks>
/// <param name="bits">Bits to emit, must be shifted to the left.</param>
/// <param name="count">Bits count stored in the bits parameter.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void Emit(int bits, int count)
private void Emit(uint bits, int count)
{
this.accumulatedBits |= bits >> this.bitCount;
count += this.bitCount;
bits <<= 32 - count;
bits |= this.accumulatedBits;
// Only write if more than 8 bits.
if (count >= 8)
if (count >= 32)
{
// Track length
while (count >= 8)
{
byte b = (byte)(bits >> 24);
this.emitBuffer[this.emitLen++] = b;
// Adding stuff byte
// This is because by JPEG standard scan data can contain JPEG markers (indicated by the 0xFF byte, followed by a non-zero byte)
// Considering this every 0xFF byte must be followed by 0x00 padding byte to signal that this is not a marker
if (b == byte.MaxValue)
{
this.emitBuffer[this.emitLen++] = byte.MinValue;
}
bits <<= 8;
count -= 8;
}
this.emitBuffer[--this.emitWriteIndex] = this.accumulatedBits;
this.accumulatedBits = bits << (32 - this.bitCount);
// This can emit 4 times of:
// 1 byte guaranteed
// 1 extra byte.MinValue byte if previous one was byte.MaxValue
// Thus writing (1 + 1) * 4 = 8 bytes max
// So we must check if emit buffer has extra 8 bytes, if not - call stream.Write
if (this.emitLen > EmitBufferSizeInBytes - 8)
{
this.target.Write(this.emitBuffer, 0, this.emitLen);
this.emitLen = 0;
}
count -= 32;
}
this.accumulatedBits = bits;
this.bitCount = count;
}
/// <summary>
/// Emits the given value with the given Huffman encoder.
/// Emits the given value with the given Huffman table.
/// </summary>
/// <param name="table">Compiled Huffman spec values.</param>
/// <param name="value">The value to encode.</param>
/// <param name="table">Huffman table.</param>
/// <param name="value">Value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuff(int[] table, int value)
{
int x = table[value];
this.Emit(x >> 8, x & 0xff);
}
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitDirectCurrentTerm(int[] table, int value)
{
int a = value;
int b = value;
if (a < 0)
{
a = -value;
b = value - 1;
}
int bt = GetHuffmanEncodingLength((uint)a);
this.EmitHuff(table, bt);
if (bt > 0)
{
this.Emit(b & ((1 << bt) - 1), bt);
}
this.Emit((uint)x & 0xffff_ff00u, x & 0xff);
}
/// <summary>
/// Emits a run of runLength copies of value encoded with the given Huffman encoder.
/// Emits given value via huffman rle encoding.
/// </summary>
/// <param name="table">Compiled Huffman spec values.</param>
/// <param name="runLength">The number of copies to encode.</param>
/// <param name="value">The value to encode.</param>
/// <param name="table">Huffman table.</param>
/// <param name="runLength">The number of preceding zeroes, preshifted by 4 to the left.</param>
/// <param name="value">Value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuffRLE(int[] table, int runLength, int value)
{
DebugGuard.IsTrue((runLength & 0xf) == 0, $"{nameof(runLength)} parameter must be shifted to the left by 4 bits");
int a = value;
int b = value;
if (a < 0)
@ -403,25 +532,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
b = value - 1;
}
int bt = GetHuffmanEncodingLength((uint)a);
int valueLen = GetHuffmanEncodingLength((uint)a);
this.EmitHuff(table, (runLength << 4) | bt);
this.Emit(b & ((1 << bt) - 1), bt);
}
// Huffman prefix code
int huffPackage = table[runLength | valueLen];
int prefixLen = huffPackage & 0xff;
uint prefix = (uint)huffPackage & 0xffff_0000u;
/// <summary>
/// Writes remaining bytes from internal buffer to the target stream.
/// </summary>
/// <remarks>Pads last byte with 1's if necessary</remarks>
private void FlushInternalBuffer()
{
// pad last byte with 1's
int padBitsCount = 8 - (this.bitCount % 8);
if (padBitsCount != 0)
{
this.Emit((1 << padBitsCount) - 1, padBitsCount);
this.target.Write(this.emitBuffer, 0, this.emitLen);
}
// Actual encoded value
uint encodedValue = (uint)b << (32 - valueLen);
// Doing two binary shifts to get rid of leading 1's in negative value case
this.Emit(prefix | (encodedValue >> prefixLen), prefixLen + valueLen);
}
/// <summary>
@ -437,19 +559,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
DebugGuard.IsTrue(value <= (1 << 16), "Huffman encoder is supposed to encode a value of 16bit size max");
#if SUPPORTS_BITOPERATIONS
// This should have been implemented as (BitOperations.Log2(value) + 1) as in non-intrinsic implementation
// But internal log2 is implementated like this: (31 - (int)Lzcnt.LeadingZeroCount(value))
// But internal log2 is implemented like this: (31 - (int)Lzcnt.LeadingZeroCount(value))
// BitOperations.Log2 implementation also checks if input value is zero for the convention 0->0
// Lzcnt would return 32 for input value of 0 - no need to check that with branching
// Fallback code if Lzcnt is not supported still use if-check
// But most modern CPUs support this instruction so this should not be a problem
return 32 - System.Numerics.BitOperations.LeadingZeroCount(value);
return 32 - BitOperations.LeadingZeroCount(value);
#else
// Ideally:
// if 0 - return 0 in this case
// else - return log2(value) + 1
//
// Hack based on input value constaint:
// Hack based on input value constraint:
// We know that input values are guaranteed to be maximum 16 bit large for huffman encoding
// We can safely shift input value for one bit -> log2(value << 1)
// Because of the 16 bit value constraint it won't overflow
@ -460,65 +582,108 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
/// <summary>
/// Returns index of the last non-zero element in given mcu block.
/// If all values of the mcu block are zero, this method might return different results depending on the runtime and hardware support.
/// This is jpeg mcu specific code, mcu[0] stores a dc value which will be encoded outside of the loop.
/// This method is guaranteed to return either -1 or 0 if all elements are zero.
/// General method for flushing cached spectral data bytes to
/// the ouput stream respecting stuff bytes.
/// </summary>
/// <remarks>
/// This is an internal operation supposed to be used only in <see cref="HuffmanScanEncoder"/> class for jpeg encoding.
/// Bytes cached via <see cref="Emit"/> are stored in 4-bytes blocks
/// which makes this method endianness dependent.
/// </remarks>
/// <param name="mcu">Mcu block.</param>
/// <returns>Index of the last non-zero element.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
internal static int GetLastValuableElementIndex(ref Block8x8F mcu)
private void FlushToStream(int endIndex)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
Span<byte> emitBytes = MemoryMarshal.AsBytes(this.emitBuffer.AsSpan());
Vector256<int> zero8 = Vector256<int>.Zero;
int writeIdx = 0;
int startIndex = emitBytes.Length - 1;
ref Vector256<float> mcuStride = ref mcu.V0;
for (int i = 7; i >= 0; i--)
// Some platforms may fail to eliminate this if-else branching
// Even if it happens - buffer is flushed in big packs,
// branching overhead shouldn't be noticeable
if (BitConverter.IsLittleEndian)
{
// For little endian case bytes are ordered and can be
// safely written to the stream with stuff bytes
// First byte is cached on the most significant index
// so we are going from the end of the array to its beginning:
// ... [ double word #1 ] [ double word #0 ]
// ... [idx3|idx2|idx1|idx0] [idx3|idx2|idx1|idx0]
for (int i = startIndex; i >= endIndex; i--)
{
int areEqual = Avx2.MoveMask(Avx2.CompareEqual(Avx.ConvertToVector256Int32(Unsafe.Add(ref mcuStride, i)), zero8).AsByte());
byte value = emitBytes[i];
this.streamWriteBuffer[writeIdx++] = value;
// we do not know for sure if this stride contain all non-zero elements or if it has some trailing zeros
if (areEqual != equalityMask)
// Inserting stuff byte
if (value == 0xff)
{
// last index in the stride, we go from the end to the start of the stride
int startIndex = i * 8;
int index = startIndex + 7;
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref mcu);
while (index >= startIndex && (int)Unsafe.Add(ref elemRef, index) == 0)
{
index--;
}
// this implementation will return -1 if all ac components are zero and dc are zero
return index;
this.streamWriteBuffer[writeIdx++] = 0x00;
}
}
return -1;
}
else
#endif
{
int index = Block8x8F.Size - 1;
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref mcu);
while (index > 0 && (int)Unsafe.Add(ref elemRef, index) == 0)
// For big endian case bytes are ordered in 4-byte packs
// which are ordered like bytes in the little endian case by in 4-byte packs:
// ... [ double word #1 ] [ double word #0 ]
// ... [idx0|idx1|idx2|idx3] [idx0|idx1|idx2|idx3]
// So we must write each 4-bytes in 'natural order'
for (int i = startIndex; i >= endIndex; i -= 4)
{
index--;
}
// This loop is caused by the nature of underlying byte buffer
// implementation and indeed causes performace by somewhat 5%
// compared to little endian scenario
// Even with this performance drop this cached buffer implementation
// is faster than individually writing bytes using binary shifts and binary and(s)
for (int j = i - 3; j <= i; j++)
{
byte value = emitBytes[j];
this.streamWriteBuffer[writeIdx++] = value;
// this implementation will return 0 if all ac components and dc are zero
return index;
// Inserting stuff byte
if (value == 0xff)
{
this.streamWriteBuffer[writeIdx++] = 0x00;
}
}
}
}
this.target.Write(this.streamWriteBuffer, 0, writeIdx);
}
/// <summary>
/// Flushes spectral data bytes after encoding all channel blocks
/// in a single jpeg macroblock using <see cref="WriteBlock"/>.
/// </summary>
/// <remarks>
/// This must be called only if <see cref="IsStreamFlushNeeded"/> is true
/// only during the macroblocks encoding routine.
/// </remarks>
private void FlushToStream()
{
this.FlushToStream(this.emitWriteIndex * 4);
this.emitWriteIndex = this.emitBuffer.Length;
}
/// <summary>
/// Flushes final cached bits to the stream padding 1's to
/// complement full bytes.
/// </summary>
/// <remarks>
/// This must be called only once at the end of the encoding routine.
/// <see cref="IsStreamFlushNeeded"/> check is not needed.
/// </remarks>
[MethodImpl(InliningOptions.ShortMethod)]
private void FlushRemainingBytes()
{
// Padding all 4 bytes with 1's while not corrupting initial bits stored in accumulatedBits
// And writing only valuable count of bytes count we want to write to the output stream
int valuableBytesCount = (int)Numerics.DivideCeil((uint)this.bitCount, 8);
uint packedBytes = this.accumulatedBits | (uint.MaxValue >> this.bitCount);
this.emitBuffer[--this.emitWriteIndex] = packedBytes;
// Flush cached bytes to the output stream with padding bits
this.FlushToStream((this.emitWriteIndex * 4) - 4 + valuableBytesCount);
}
}
}

10
src/ImageSharp/Formats/Jpeg/Components/Encoder/QuantIndex.cs

@ -1,21 +1,21 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// Enumerates the quantization tables
/// Enumerates the quantization tables.
/// </summary>
internal enum QuantIndex
{
/// <summary>
/// The luminance quantization table index
/// The luminance quantization table index.
/// </summary>
Luminance = 0,
/// <summary>
/// The chrominance quantization table index
/// The chrominance quantization table index.
/// </summary>
Chrominance = 1,
}
}
}

114
src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs

@ -0,0 +1,114 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// On-stack worker struct to convert TPixel -> Rgb24 of 8x8 pixel blocks.
/// </summary>
/// <typeparam name="TPixel">The pixel type to work on.</typeparam>
internal ref struct RgbForwardConverter<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
/// <summary>
/// Number of pixels processed per single <see cref="Convert(int, int, ref RowOctet{TPixel})"/> call
/// </summary>
private const int PixelsPerSample = 8 * 8;
/// <summary>
/// Total byte size of processed pixels converted from TPixel to <see cref="Rgb24"/>
/// </summary>
private const int RgbSpanByteSize = PixelsPerSample * 3;
/// <summary>
/// <see cref="Size"/> of sampling area from given frame pixel buffer.
/// </summary>
private static readonly Size SampleSize = new Size(8, 8);
/// <summary>
/// The Red component.
/// </summary>
public Block8x8F R;
/// <summary>
/// The Green component.
/// </summary>
public Block8x8F G;
/// <summary>
/// The Blue component.
/// </summary>
public Block8x8F B;
/// <summary>
/// Temporal 64-byte span to hold unconverted TPixel data.
/// </summary>
private readonly Span<TPixel> pixelSpan;
/// <summary>
/// Temporal 64-byte span to hold converted Rgb24 data.
/// </summary>
private readonly Span<Rgb24> rgbSpan;
/// <summary>
/// Sampled pixel buffer size.
/// </summary>
private readonly Size samplingAreaSize;
/// <summary>
/// <see cref="Configuration"/> for internal operations.
/// </summary>
private readonly Configuration config;
public RgbForwardConverter(ImageFrame<TPixel> frame)
{
this.R = default;
this.G = default;
this.B = default;
// temporal pixel buffers
this.pixelSpan = new TPixel[PixelsPerSample].AsSpan();
this.rgbSpan = MemoryMarshal.Cast<byte, Rgb24>(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan());
// frame data
this.samplingAreaSize = new Size(frame.Width, frame.Height);
this.config = frame.GetConfiguration();
}
/// <summary>
/// Converts a 8x8 image area inside 'pixels' at position (x, y) to Rgb24.
/// </summary>
public void Convert(int x, int y, ref RowOctet<TPixel> currentRows)
{
YCbCrForwardConverter<TPixel>.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize);
PixelOperations<TPixel>.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan);
ref Block8x8F redBlock = ref this.R;
ref Block8x8F greenBlock = ref this.G;
ref Block8x8F blueBlock = ref this.B;
CopyToBlock(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock);
}
private static void CopyToBlock(Span<Rgb24> rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock)
{
ref Rgb24 rgbStart = ref MemoryMarshal.GetReference(rgbSpan);
for (int i = 0; i < Block8x8F.Size; i++)
{
Rgb24 c = Unsafe.Add(ref rgbStart, (nint)(uint)i);
redBlock[i] = c.R;
greenBlock[i] = c.G;
blueBlock[i] = c.B;
}
}
}
}

8
src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs

@ -58,22 +58,22 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// <summary>
/// Temporal 16x8 block to hold TPixel data
/// </summary>
private Span<TPixel> pixelSpan;
private readonly Span<TPixel> pixelSpan;
/// <summary>
/// Temporal RGB block
/// </summary>
private Span<Rgb24> rgbSpan;
private readonly Span<Rgb24> rgbSpan;
/// <summary>
/// Sampled pixel buffer size
/// </summary>
private Size samplingAreaSize;
private readonly Size samplingAreaSize;
/// <summary>
/// <see cref="Configuration"/> for internal operations
/// </summary>
private Configuration config;
private readonly Configuration config;
public YCbCrForwardConverter420(ImageFrame<TPixel> frame)
{

8
src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs

@ -53,22 +53,22 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// <summary>
/// Temporal 64-byte span to hold unconverted TPixel data
/// </summary>
private Span<TPixel> pixelSpan;
private readonly Span<TPixel> pixelSpan;
/// <summary>
/// Temporal 64-byte span to hold converted Rgb24 data
/// </summary>
private Span<Rgb24> rgbSpan;
private readonly Span<Rgb24> rgbSpan;
/// <summary>
/// Sampled pixel buffer size
/// </summary>
private Size samplingAreaSize;
private readonly Size samplingAreaSize;
/// <summary>
/// <see cref="Configuration"/> for internal operations
/// </summary>
private Configuration config;
private readonly Configuration config;
public YCbCrForwardConverter444(ImageFrame<TPixel> frame)
{

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save