Merge branch 'master' into js/aot-experiments

5 years ago · 0152586509
1259 changed files with 19371 additions and 6750 deletions
--- a/.editorconfig
+++ b/.editorconfig
@ -1,373 +1,436 @@
-###############################################################################
-# EditorConfig is awesome: http://EditorConfig.org
-###############################################################################
+# Version: 1.6.2 (Using https://semver.org/)
+# Updated: 2020-11-02
+# See https://github.com/RehanSaeed/EditorConfig/releases for release notes.
+# See https://github.com/RehanSaeed/EditorConfig for updates to this file.
+# See http://EditorConfig.org for more information about .editorconfig files.

-###############################################################################
-# Top-most EditorConfig file
-###############################################################################
+##########################################
+# Common Settings
+##########################################
+
+# This file is the top-most EditorConfig file
 root = true

-###############################################################################
-# Set default behavior to:
-#   a UTF-8 encoding,
-#   Unix-style line endings,
-#   a newline ending the file,
-#   4 space indentation, and
-#   trimming of trailing whitespace
-###############################################################################
+# All Files
 [*]
 charset = utf-8
-end_of_line = lf
-insert_final_newline = true
 indent_style = space
 indent_size = 4
+end_of_line = lf
+insert_final_newline = true
 trim_trailing_whitespace = true

-###############################################################################
-# Set file behavior to:
-#   2 space indentation
-###############################################################################
-[*.{cmd,config,csproj,json,props,ps1,resx,sh,targets}]
-indent_size = 2
+##########################################
+# File Extension Settings
+##########################################

-###############################################################################
-# Set file behavior to:
-#   Windows-style line endings, and
-#   tabular indentation
-###############################################################################
+# Visual Studio Solution Files
 [*.sln]
+indent_style = tab
+
+# Visual Studio XML Project Files
+[*.{csproj,vbproj,vcxproj.filters,proj,projitems,shproj}]
+indent_size = 2
+
+# T4 Templates Files
+[*.{tt,ttinclude}]
 end_of_line = crlf
+
+# XML Configuration Files
+[*.{xml,config,props,targets,nuspec,resx,ruleset,vsixmanifest,vsct}]
+indent_size = 2
+
+# JSON Files
+[*.{json,json5,webmanifest}]
+indent_size = 2
+
+# YAML Files
+[*.{yml,yaml}]
+indent_size = 2
+
+# Markdown Files
+[*.md]
+trim_trailing_whitespace = false
+
+# Web Files
+[*.{htm,html,js,jsm,ts,tsx,css,sass,scss,less,svg,vue}]
+indent_size = 2
+
+# Batch Files
+[*.{cmd,bat}]
+end_of_line = crlf
+
+# Makefiles
+[Makefile]
 indent_style = tab

-###############################################################################
-# Set dotnet naming rules to:
-#    suggest async members be pascal case suffixed with Async
-#    suggest const declarations be pascal case
-#    suggest interfaces be pascal case prefixed with I
-#    suggest parameters be camel case
-#    suggest private and internal static fields be camel case
-#    suggest private and internal fields be camel case
-#    suggest public and protected declarations be pascal case
-#    suggest static readonly declarations be pascal case
-#    suggest type parameters be prefixed with T
-###############################################################################
-[*.cs]
-dotnet_naming_rule.async_members_should_be_pascal_case_suffixed_with_async.severity = suggestion
-dotnet_naming_rule.async_members_should_be_pascal_case_suffixed_with_async.style = pascal_case_suffixed_with_async
-dotnet_naming_rule.async_members_should_be_pascal_case_suffixed_with_async.symbols = async_members
-
-dotnet_naming_rule.const_declarations_should_be_pascal_case.severity = suggestion
-dotnet_naming_rule.const_declarations_should_be_pascal_case.style = pascal_case
-dotnet_naming_rule.const_declarations_should_be_pascal_case.symbols = const_declarations
-
-dotnet_naming_rule.interfaces_should_be_pascal_case_prefixed_with_i.severity = suggestion
-dotnet_naming_rule.interfaces_should_be_pascal_case_prefixed_with_i.style = pascal_case_prefixed_with_i
-dotnet_naming_rule.interfaces_should_be_pascal_case_prefixed_with_i.symbols = interfaces
-
-dotnet_naming_rule.parameters_should_be_camel_case.severity = suggestion
-dotnet_naming_rule.parameters_should_be_camel_case.style = camel_case
-dotnet_naming_rule.parameters_should_be_camel_case.symbols = parameters
-
-dotnet_naming_rule.private_and_internal_static_fields_should_be_camel_case.severity = suggestion
-dotnet_naming_rule.private_and_internal_static_fields_should_be_camel_case.style = camel_case
-dotnet_naming_rule.private_and_internal_static_fields_should_be_camel_case.symbols = private_and_internal_static_fields
-
-dotnet_naming_rule.private_and_internal_fields_should_be_camel_case.severity = suggestion
-dotnet_naming_rule.private_and_internal_fields_should_be_camel_case.style = camel_case
-dotnet_naming_rule.private_and_internal_fields_should_be_camel_case.symbols = private_and_internal_fields
-
-dotnet_naming_rule.public_and_protected_declarations_should_be_pascal_case.severity = suggestion
-dotnet_naming_rule.public_and_protected_declarations_should_be_pascal_case.style = pascal_case
-dotnet_naming_rule.public_and_protected_declarations_should_be_pascal_case.symbols = public_and_protected_declarations
-dotnet_naming_symbols.public_and_protected_declarations.applicable_kinds = method, field, event, property
-
-dotnet_naming_rule.static_readonly_declarations_should_be_pascal_case.severity = suggestion
-dotnet_naming_rule.static_readonly_declarations_should_be_pascal_case.style = pascal_case
-dotnet_naming_rule.static_readonly_declarations_should_be_pascal_case.symbols = static_readonly_declarations
-
-dotnet_naming_rule.type_parameters_should_be_pascal_case_prefixed_with_t.severity = suggestion
-dotnet_naming_rule.type_parameters_should_be_pascal_case_prefixed_with_t.style = pascal_case_prefixed_with_t
-dotnet_naming_rule.type_parameters_should_be_pascal_case_prefixed_with_t.symbols = type_parameters
-
-###############################################################################
-# Set dotnet naming styles to define:
-#   camel case
-#   pascal case
-#   pascal case suffixed with Async
-#   pascal case prefixed with I
-#   pascal case prefixed with T
-###############################################################################
-[*.cs]
-dotnet_naming_style.camel_case.capitalization = camel_case
-
-dotnet_naming_style.pascal_case.capitalization = pascal_case
-
-dotnet_naming_style.pascal_case_suffixed_with_async.capitalization = pascal_case
-dotnet_naming_style.pascal_case_suffixed_with_async.required_suffix = Async
-
-dotnet_naming_style.pascal_case_prefixed_with_i.capitalization = pascal_case
-dotnet_naming_style.pascal_case_prefixed_with_i.required_prefix = I
-
-dotnet_naming_style.pascal_case_prefixed_with_t.capitalization = pascal_case
-dotnet_naming_style.pascal_case_prefixed_with_t.required_prefix = T
-
-###############################################################################
-# Set dotnet naming symbols to:
-#   async members
-#   const declarations
-#   interfaces
-#   private and internal fields
-#   private and internal static fields
-#   public and protected declarations
-#   static readonly declarations
-#   type parameters
-###############################################################################
-[*.cs]
-dotnet_naming_symbols.async_members.required_modifiers = async
-
-dotnet_naming_symbols.const_declarations.required_modifiers = const
-
-dotnet_naming_symbols.interfaces.applicable_kinds = interface
-
-dotnet_naming_symbols.parameters.applicable_kinds = parameter
-
-dotnet_naming_symbols.private_and_internal_fields.applicable_accessibilities = private, internal
-dotnet_naming_symbols.private_and_internal_fields.applicable_kinds = field
-
-dotnet_naming_symbols.private_and_internal_static_fields.applicable_accessibilities = private, internal
-dotnet_naming_symbols.private_and_internal_static_fields.applicable_kinds = field
-dotnet_naming_symbols.private_and_internal_static_fields.required_modifiers = static
-
-dotnet_naming_symbols.public_and_protected_declarations.applicable_accessibilities = public, protected
-
-dotnet_naming_symbols.static_readonly_declarations.required_modifiers = static, readonly
-
-dotnet_naming_symbols.type_parameters.applicable_kinds = type_parameter
-
-###############################################################################
-# Set dotnet sort options to:
-#   do not separate import directives into groups, and
-#   sort system directives first
-###############################################################################
-[*.cs]
-dotnet_separate_import_directive_groups = false
-dotnet_sort_system_directives_first = true
+##########################################
+# File Header (Uncomment to support file headers)
+# https://docs.microsoft.com/visualstudio/ide/reference/add-file-header
+##########################################

-###############################################################################
-# Set dotnet style options to:
-#   suggest null-coalescing expressions,
-#   suggest collection-initializers,
-#   suggest explicit tuple names,
-#   suggest null-propogation
-#   suggest object-initializers,
-#   generate parentheses in arithmetic binary operators for clarity,
-#   generate parentheses in other binary operators for clarity,
-#   don't generate parentheses in other operators if unnecessary,
-#   generate parentheses in relational binary operators for clarity,
-#   warn when not using predefined-types for locals, parameters, and members,
-#   generate predefined-types of type names for member access,
-#   generate auto properties,
-#   suggest compound assignment,
-#   generate conditional expression over assignment,
-#   generate conditional expression over return,
-#   suggest inferred anonymous types,
-#   suggest inferred tuple names,
-#   suggest 'is null' checks over '== null',
-#   don't generate 'this.' and 'Me.' for events,
-#   warn when not using 'this.' and 'Me.' for fields,
-#   warn when not using 'this.' and 'Me.' for methods,
-#   warn when not using 'this.' and 'Me.' for properties,
-#   suggest readonly fields, and
-#   generate accessibility modifiers for non interface members
-###############################################################################
-[*.cs]
-dotnet_style_coalesce_expression = true:suggestion
-dotnet_style_collection_initializer = true:suggestion
-dotnet_style_explicit_tuple_names = true:suggestion
-dotnet_style_null_propagation = true:suggestion
-dotnet_style_object_initializer = true:suggestion
-
-dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent
-dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent
-dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent
-dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent
+# [*.{cs,csx,cake,vb,vbx,tt,ttinclude}]
+file_header_template = Copyright (c) Six Labors.\nLicensed under the Apache License, Version 2.0.

-dotnet_style_predefined_type_for_locals_parameters_members = true:warning
-dotnet_style_predefined_type_for_member_access = true:silent
+# SA1636: File header copyright text should match
+# Justification: .editorconfig supports file headers. If this is changed to a value other than "none", a stylecop.json file will need to added to the project.
+# dotnet_diagnostic.SA1636.severity = none

-dotnet_style_prefer_auto_properties = true:silent
-dotnet_style_prefer_compound_assignment = true:suggestion
-dotnet_style_prefer_conditional_expression_over_assignment = true:silent
-dotnet_style_prefer_conditional_expression_over_return = true:silent
-dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
-dotnet_style_prefer_inferred_tuple_names = true:suggestion
-dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
+##########################################
+# .NET Language Conventions
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions
+##########################################

-dotnet_style_qualification_for_event = false:silent
+# .NET Code Style Settings
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#net-code-style-settings
+[*.{cs,csx,cake,vb,vbx}]
+# "this." and "Me." qualifiers
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#this-and-me
 dotnet_style_qualification_for_field = true:warning
-dotnet_style_qualification_for_method = true:warning
 dotnet_style_qualification_for_property = true:warning
-
-dotnet_style_readonly_field = true:suggestion
-dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent
-
-###############################################################################
-# Set dotnet style options to:
-#   suggest removing all unused parameters
-###############################################################################
-[*.cs]
-dotnet_code_quality_unused_parameters = all:suggestion
-
-###############################################################################
-# Set csharp indent options to:
-#   indent block contents,
-#   not indent braces,
-#   indent case contents,
-#   not indent case contents when block,
-#   indent labels one less than the current, and
-#   indent switch labels
-###############################################################################
-[*.cs]
-csharp_indent_block_contents = true
-csharp_indent_braces = false
-csharp_indent_case_contents = true
-csharp_indent_case_contents_when_block = false
-csharp_indent_labels = one_less_than_current
-csharp_indent_switch_labels = true
-
-###############################################################################
-# Set csharp new-line options to:
-#   insert a new-line before "catch",
-#   insert a new-line before "else",
-#   insert a new-line before "finally",
-#   insert a new-line before members in anonymous-types,
-#   insert a new-line before members in object-initializers, and
-#   insert a new-line before all open braces
-###############################################################################
-[*.cs]
-csharp_new_line_before_catch = true
+dotnet_style_qualification_for_method = true:warning
+dotnet_style_qualification_for_event = true:warning
+# Language keywords instead of framework type names for type references
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#language-keywords
+dotnet_style_predefined_type_for_locals_parameters_members = true:warning
+dotnet_style_predefined_type_for_member_access = true:warning
+# Modifier preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#normalize-modifiers
+dotnet_style_require_accessibility_modifiers = always:warning
+csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:warning
+visual_basic_preferred_modifier_order = Partial,Default,Private,Protected,Public,Friend,NotOverridable,Overridable,MustOverride,Overloads,Overrides,MustInherit,NotInheritable,Static,Shared,Shadows,ReadOnly,WriteOnly,Dim,Const,WithEvents,Widening,Narrowing,Custom,Async:warning
+dotnet_style_readonly_field = true:warning
+# Parentheses preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#parentheses-preferences
+dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:warning
+dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:warning
+dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:warning
+dotnet_style_parentheses_in_other_operators = never_if_unnecessary:suggestion
+# Expression-level preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#expression-level-preferences
+dotnet_style_object_initializer = true:warning
+dotnet_style_collection_initializer = true:warning
+dotnet_style_explicit_tuple_names = true:warning
+dotnet_style_prefer_inferred_tuple_names = true:warning
+dotnet_style_prefer_inferred_anonymous_type_member_names = true:warning
+dotnet_style_prefer_auto_properties = true:warning
+dotnet_style_prefer_is_null_check_over_reference_equality_method = true:warning
+dotnet_style_prefer_conditional_expression_over_assignment = false:suggestion
+dotnet_style_prefer_conditional_expression_over_return = false:suggestion
+dotnet_style_prefer_compound_assignment = true:warning
+# Null-checking preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#null-checking-preferences
+dotnet_style_coalesce_expression = true:warning
+dotnet_style_null_propagation = true:warning
+# Parameter preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#parameter-preferences
+dotnet_code_quality_unused_parameters = all:warning
+# More style options (Undocumented)
+# https://github.com/MicrosoftDocs/visualstudio-docs/issues/3641
+dotnet_style_operator_placement_when_wrapping = end_of_line
+# https://github.com/dotnet/roslyn/pull/40070
+dotnet_style_prefer_simplified_interpolation = true:warning
+
+# C# Code Style Settings
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#c-code-style-settings
+[*.{cs,csx,cake}]
+# Implicit and explicit types
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#implicit-and-explicit-types
+csharp_style_var_for_built_in_types = never
+csharp_style_var_when_type_is_apparent = true:warning
+csharp_style_var_elsewhere = false:warning
+# Expression-bodied members
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#expression-bodied-members
+csharp_style_expression_bodied_methods = true:warning
+csharp_style_expression_bodied_constructors = true:warning
+csharp_style_expression_bodied_operators = true:warning
+csharp_style_expression_bodied_properties = true:warning
+csharp_style_expression_bodied_indexers = true:warning
+csharp_style_expression_bodied_accessors = true:warning
+csharp_style_expression_bodied_lambdas = true:warning
+csharp_style_expression_bodied_local_functions = true:warning
+# Pattern matching
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#pattern-matching
+csharp_style_pattern_matching_over_is_with_cast_check = true:warning
+csharp_style_pattern_matching_over_as_with_null_check = true:warning
+# Inlined variable declarations
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#inlined-variable-declarations
+csharp_style_inlined_variable_declaration = true:warning
+# Expression-level preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#expression-level-preferences
+csharp_prefer_simple_default_expression = true:warning
+# "Null" checking preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#c-null-checking-preferences
+csharp_style_throw_expression = true:warning
+csharp_style_conditional_delegate_call = true:warning
+# Code block preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#code-block-preferences
+csharp_prefer_braces = true:warning
+# Unused value preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#unused-value-preferences
+csharp_style_unused_value_expression_statement_preference = discard_variable:suggestion
+csharp_style_unused_value_assignment_preference = discard_variable:suggestion
+# Index and range preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#index-and-range-preferences
+csharp_style_prefer_index_operator = true:warning
+csharp_style_prefer_range_operator = true:warning
+# Miscellaneous preferences
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-language-conventions#miscellaneous-preferences
+csharp_style_deconstructed_variable_declaration = true:warning
+csharp_style_pattern_local_over_anonymous_function = true:warning
+csharp_using_directive_placement = outside_namespace:warning
+csharp_prefer_static_local_function = true:warning
+csharp_prefer_simple_using_statement = true:suggestion
+
+##########################################
+# .NET Formatting Conventions
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-code-style-settings-reference#formatting-conventions
+##########################################
+
+# Organize usings
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-formatting-conventions#organize-using-directives
+dotnet_sort_system_directives_first = true
+# Newline options
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-formatting-conventions#new-line-options
+csharp_new_line_before_open_brace = all
 csharp_new_line_before_else = true
+csharp_new_line_before_catch = true
 csharp_new_line_before_finally = true
-
-csharp_new_line_before_members_in_anonymous_types = true
 csharp_new_line_before_members_in_object_initializers = true
-
-csharp_new_line_before_open_brace = all
-
-###############################################################################
-# Set csharp preserve options to:
-#   preserve single-line blocks, and
-#   preserve single-line statements
-###############################################################################
-[*.cs]
-csharp_preserve_single_line_blocks = true
-csharp_preserve_single_line_statements = true
-
-###############################################################################
-# Set csharp space options to:
-#   remove any space after a cast,
-#   add a space after the colon in an inheritance clause,
-#   add a space after a comma,
-#   remove any space after a dot,
-#   add a space after keywords in control flow statements,
-#   add a space after a semicolon in a "for" statement,
-#   add a space before and after binary operators,
-#   remove space around declaration statements,
-#   add a space before the colon in an inheritance clause,
-#   remove any space before a comma,
-#   remove any space before a dot,
-#   remove any space before an open square-bracket,
-#   remove any space before a semicolon in a "for" statement,
-#   remove any space between empty square-brackets,
-#   remove any space between a method call's empty parameter list parenthesis,
-#   remove any space between a method call's name and its opening parenthesis,
-#   remove any space between a method call's parameter list parenthesis,
-#   remove any space between a method declaration's empty parameter list parenthesis,
-#   remove any space between a method declaration's name and its openening parenthesis,
-#   remove any space between a method declaration's parameter list parenthesis,
-#   remove any space between parentheses, and
-#   remove any space between square brackets
-###############################################################################
-[*.cs]
+csharp_new_line_before_members_in_anonymous_types = true
+csharp_new_line_between_query_expression_clauses = true
+# Indentation options
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-formatting-conventions#indentation-options
+csharp_indent_case_contents = true
+csharp_indent_switch_labels = true
+csharp_indent_labels = no_change
+csharp_indent_block_contents = true
+csharp_indent_braces = false
+csharp_indent_case_contents_when_block = false
+# Spacing options
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-formatting-conventions#spacing-options
 csharp_space_after_cast = false
-csharp_space_after_colon_in_inheritance_clause = true
-csharp_space_after_comma = true
-csharp_space_after_dot = false
 csharp_space_after_keywords_in_control_flow_statements = true
-csharp_space_after_semicolon_in_for_statement = true
-
-csharp_space_around_binary_operators = before_and_after
-csharp_space_around_declaration_statements = do_not_ignore
-
+csharp_space_between_parentheses = false
 csharp_space_before_colon_in_inheritance_clause = true
+csharp_space_after_colon_in_inheritance_clause = true
+csharp_space_around_binary_operators = before_and_after
+csharp_space_between_method_declaration_parameter_list_parentheses = false
+csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
+csharp_space_between_method_declaration_name_and_open_parenthesis = false
+csharp_space_between_method_call_parameter_list_parentheses = false
+csharp_space_between_method_call_empty_parameter_list_parentheses = false
+csharp_space_between_method_call_name_and_opening_parenthesis = false
+csharp_space_after_comma = true
 csharp_space_before_comma = false
+csharp_space_after_dot = false
 csharp_space_before_dot = false
-csharp_space_before_open_square_brackets = false
+csharp_space_after_semicolon_in_for_statement = true
 csharp_space_before_semicolon_in_for_statement = false
-
+csharp_space_around_declaration_statements = false
+csharp_space_before_open_square_brackets = false
 csharp_space_between_empty_square_brackets = false
-csharp_space_between_method_call_empty_parameter_list_parentheses = false
-csharp_space_between_method_call_name_and_opening_parenthesis = false
-csharp_space_between_method_call_parameter_list_parentheses = false
-csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
-csharp_space_between_method_declaration_name_and_open_parenthesis = false
-csharp_space_between_method_declaration_parameter_list_parentheses = false
-csharp_space_between_parentheses = false
 csharp_space_between_square_brackets = false
+# Wrapping options
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-formatting-conventions#wrap-options
+csharp_preserve_single_line_statements = false
+csharp_preserve_single_line_blocks = true

-###############################################################################
-# Set csharp style options to:
-#   generate braces,
-#   suggest simple default expressions,
-#   generate a preferred modifier order,
-#   suggest conditional delegate calls,
-#   suggest deconstructed variable declarations,
-#   generate expression-bodied accessors,
-#   generate expression-bodied constructors,
-#   generate expression-bodied indexers,
-#   generate expression-bodied lambdas,
-#   generate expression-bodied methods,
-#   generate expression-bodied operators,
-#   generate expression-bodied properties,
-#   suggest inlined variable declarations,
-#   suggest local over anonymous functions,
-#   suggest pattern-matching over "as" with "null" check,
-#   suggest pattern-matching over "is" with "cast" check,
-#   suggest throw expressions,
-#   generate a discard variable for unused value expression statements,
-#   suggest a discard variable for unused assignments,
-#   warn when using var for built-in types,
-#   warn when using var when the type is not apparent, and
-#   warn when not using var when the type is apparent
-#   warn when using simplified "using" declaration
-###############################################################################
-[*.cs]
-csharp_prefer_braces = true:silent
-csharp_prefer_simple_default_expression = true:suggestion
-csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:silent
-
-csharp_style_conditional_delegate_call = true:suggestion
-csharp_style_deconstructed_variable_declaration = true:suggestion
-
-csharp_style_expression_bodied_accessors = true:silent
-csharp_style_expression_bodied_constructors = true:silent
-csharp_style_expression_bodied_indexers = true:silent
-csharp_style_expression_bodied_lambdas = true:silent
-csharp_style_expression_bodied_methods = true:silent
-csharp_style_expression_bodied_operators = true:silent
-csharp_style_expression_bodied_properties = true:silent
-
-csharp_style_inlined_variable_declaration = true:suggestion
-
-csharp_style_pattern_local_over_anonymous_function = true:suggestion
-csharp_style_pattern_matching_over_as_with_null_check = true:suggestion
-csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion
-
-csharp_style_throw_expression = true:suggestion
-
-csharp_style_unused_value_expression_statement_preference = discard_variable:silent
-csharp_style_unused_value_assignment_preference = discard_variable:suggestion
-
-csharp_style_var_for_built_in_types = never
-csharp_style_var_when_type_is_apparent = true:warning
-csharp_style_var_elsewhere = false:warning
+##########################################
+# .NET Naming Conventions
+# https://docs.microsoft.com/visualstudio/ide/editorconfig-naming-conventions
+##########################################
+
+[*.{cs,csx,cake,vb,vbx}]
+
+##########################################
+# Styles
+##########################################
+
+# camel_case_style - Define the camelCase style
+dotnet_naming_style.camel_case_style.capitalization = camel_case
+# pascal_case_style - Define the PascalCase style
+dotnet_naming_style.pascal_case_style.capitalization = pascal_case
+# first_upper_style - The first character must start with an upper-case character
+dotnet_naming_style.first_upper_style.capitalization = first_word_upper
+# prefix_interface_with_i_style - Interfaces must be PascalCase and the first character of an interface must be an 'I'
+dotnet_naming_style.prefix_interface_with_i_style.capitalization = pascal_case
+dotnet_naming_style.prefix_interface_with_i_style.required_prefix = I
+# prefix_type_parameters_with_t_style - Generic Type Parameters must be PascalCase and the first character must be a 'T'
+dotnet_naming_style.prefix_type_parameters_with_t_style.capitalization = pascal_case
+dotnet_naming_style.prefix_type_parameters_with_t_style.required_prefix = T
+# disallowed_style - Anything that has this style applied is marked as disallowed
+dotnet_naming_style.disallowed_style.capitalization  = pascal_case
+dotnet_naming_style.disallowed_style.required_prefix = ____RULE_VIOLATION____
+dotnet_naming_style.disallowed_style.required_suffix = ____RULE_VIOLATION____
+# internal_error_style - This style should never occur... if it does, it indicates a bug in file or in the parser using the file
+dotnet_naming_style.internal_error_style.capitalization  = pascal_case
+dotnet_naming_style.internal_error_style.required_prefix = ____INTERNAL_ERROR____
+dotnet_naming_style.internal_error_style.required_suffix = ____INTERNAL_ERROR____
+
+##########################################
+# .NET Design Guideline Field Naming Rules
+# Naming rules for fields follow the .NET Framework design guidelines
+# https://docs.microsoft.com/dotnet/standard/design-guidelines/index
+##########################################
+
+# All public/protected/protected_internal constant fields must be PascalCase
+# https://docs.microsoft.com/dotnet/standard/design-guidelines/field
+dotnet_naming_symbols.public_protected_constant_fields_group.applicable_accessibilities = public, protected, protected_internal
+dotnet_naming_symbols.public_protected_constant_fields_group.required_modifiers         = const
+dotnet_naming_symbols.public_protected_constant_fields_group.applicable_kinds           = field
+dotnet_naming_rule.public_protected_constant_fields_must_be_pascal_case_rule.symbols    = public_protected_constant_fields_group
+dotnet_naming_rule.public_protected_constant_fields_must_be_pascal_case_rule.style      = pascal_case_style
+dotnet_naming_rule.public_protected_constant_fields_must_be_pascal_case_rule.severity   = warning
+
+# All public/protected/protected_internal static readonly fields must be PascalCase
+# https://docs.microsoft.com/dotnet/standard/design-guidelines/field
+dotnet_naming_symbols.public_protected_static_readonly_fields_group.applicable_accessibilities = public, protected, protected_internal
+dotnet_naming_symbols.public_protected_static_readonly_fields_group.required_modifiers         = static, readonly
+dotnet_naming_symbols.public_protected_static_readonly_fields_group.applicable_kinds           = field
+dotnet_naming_rule.public_protected_static_readonly_fields_must_be_pascal_case_rule.symbols    = public_protected_static_readonly_fields_group
+dotnet_naming_rule.public_protected_static_readonly_fields_must_be_pascal_case_rule.style      = pascal_case_style
+dotnet_naming_rule.public_protected_static_readonly_fields_must_be_pascal_case_rule.severity   = warning
+
+# No other public/protected/protected_internal fields are allowed
+# https://docs.microsoft.com/dotnet/standard/design-guidelines/field
+dotnet_naming_symbols.other_public_protected_fields_group.applicable_accessibilities = public, protected, protected_internal
+dotnet_naming_symbols.other_public_protected_fields_group.applicable_kinds           = field
+dotnet_naming_rule.other_public_protected_fields_disallowed_rule.symbols             = other_public_protected_fields_group
+dotnet_naming_rule.other_public_protected_fields_disallowed_rule.style               = disallowed_style
+dotnet_naming_rule.other_public_protected_fields_disallowed_rule.severity            = error
+
+##########################################
+# StyleCop Field Naming Rules
+# Naming rules for fields follow the StyleCop analyzers
+# This does not override any rules using disallowed_style above
+# https://github.com/DotNetAnalyzers/StyleCopAnalyzers
+##########################################
+
+# All constant fields must be PascalCase
+# https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1303.md
+dotnet_naming_symbols.stylecop_constant_fields_group.applicable_accessibilities = public, internal, protected_internal, protected, private_protected, private
+dotnet_naming_symbols.stylecop_constant_fields_group.required_modifiers         = const
+dotnet_naming_symbols.stylecop_constant_fields_group.applicable_kinds           = field
+dotnet_naming_rule.stylecop_constant_fields_must_be_pascal_case_rule.symbols    = stylecop_constant_fields_group
+dotnet_naming_rule.stylecop_constant_fields_must_be_pascal_case_rule.style      = pascal_case_style
+dotnet_naming_rule.stylecop_constant_fields_must_be_pascal_case_rule.severity   = warning
+
+# All static readonly fields must be PascalCase
+# https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1311.md
+dotnet_naming_symbols.stylecop_static_readonly_fields_group.applicable_accessibilities = public, internal, protected_internal, protected, private_protected, private
+dotnet_naming_symbols.stylecop_static_readonly_fields_group.required_modifiers         = static, readonly
+dotnet_naming_symbols.stylecop_static_readonly_fields_group.applicable_kinds           = field
+dotnet_naming_rule.stylecop_static_readonly_fields_must_be_pascal_case_rule.symbols    = stylecop_static_readonly_fields_group
+dotnet_naming_rule.stylecop_static_readonly_fields_must_be_pascal_case_rule.style      = pascal_case_style
+dotnet_naming_rule.stylecop_static_readonly_fields_must_be_pascal_case_rule.severity   = warning
+
+# No non-private instance fields are allowed
+# https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1401.md
+dotnet_naming_symbols.stylecop_fields_must_be_private_group.applicable_accessibilities = public, internal, protected_internal, protected, private_protected
+dotnet_naming_symbols.stylecop_fields_must_be_private_group.applicable_kinds           = field
+dotnet_naming_rule.stylecop_instance_fields_must_be_private_rule.symbols               = stylecop_fields_must_be_private_group
+dotnet_naming_rule.stylecop_instance_fields_must_be_private_rule.style                 = disallowed_style
+dotnet_naming_rule.stylecop_instance_fields_must_be_private_rule.severity              = error
+
+# Private fields must be camelCase
+# https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1306.md
+dotnet_naming_symbols.stylecop_private_fields_group.applicable_accessibilities = private
+dotnet_naming_symbols.stylecop_private_fields_group.applicable_kinds           = field
+dotnet_naming_rule.stylecop_private_fields_must_be_camel_case_rule.symbols     = stylecop_private_fields_group
+dotnet_naming_rule.stylecop_private_fields_must_be_camel_case_rule.style       = camel_case_style
+dotnet_naming_rule.stylecop_private_fields_must_be_camel_case_rule.severity    = warning
+
+# Local variables must be camelCase
+# https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1312.md
+dotnet_naming_symbols.stylecop_local_fields_group.applicable_accessibilities = local
+dotnet_naming_symbols.stylecop_local_fields_group.applicable_kinds           = local
+dotnet_naming_rule.stylecop_local_fields_must_be_camel_case_rule.symbols     = stylecop_local_fields_group
+dotnet_naming_rule.stylecop_local_fields_must_be_camel_case_rule.style       = camel_case_style
+dotnet_naming_rule.stylecop_local_fields_must_be_camel_case_rule.severity    = silent
+
+# This rule should never fire.  However, it's included for at least two purposes:
+# First, it helps to understand, reason about, and root-case certain types of issues, such as bugs in .editorconfig parsers.
+# Second, it helps to raise immediate awareness if a new field type is added (as occurred recently in C#).
+dotnet_naming_symbols.sanity_check_uncovered_field_case_group.applicable_accessibilities = *
+dotnet_naming_symbols.sanity_check_uncovered_field_case_group.applicable_kinds           = field
+dotnet_naming_rule.sanity_check_uncovered_field_case_rule.symbols  = sanity_check_uncovered_field_case_group
+dotnet_naming_rule.sanity_check_uncovered_field_case_rule.style    = internal_error_style
+dotnet_naming_rule.sanity_check_uncovered_field_case_rule.severity = error
+
+
+##########################################
+# Other Naming Rules
+##########################################
+
+# All of the following must be PascalCase:
+# - Namespaces
+#   https://docs.microsoft.com/dotnet/standard/design-guidelines/names-of-namespaces
+#   https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1300.md
+# - Classes and Enumerations
+#   https://docs.microsoft.com/dotnet/standard/design-guidelines/names-of-classes-structs-and-interfaces
+#   https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1300.md
+# - Delegates
+#   https://docs.microsoft.com/dotnet/standard/design-guidelines/names-of-classes-structs-and-interfaces#names-of-common-types
+# - Constructors, Properties, Events, Methods
+#   https://docs.microsoft.com/dotnet/standard/design-guidelines/names-of-type-members
+dotnet_naming_symbols.element_group.applicable_kinds = namespace, class, enum, struct, delegate, event, method, property
+dotnet_naming_rule.element_rule.symbols              = element_group
+dotnet_naming_rule.element_rule.style                = pascal_case_style
+dotnet_naming_rule.element_rule.severity             = warning
+
+# Interfaces use PascalCase and are prefixed with uppercase 'I'
+# https://docs.microsoft.com/dotnet/standard/design-guidelines/names-of-classes-structs-and-interfaces
+dotnet_naming_symbols.interface_group.applicable_kinds = interface
+dotnet_naming_rule.interface_rule.symbols              = interface_group
+dotnet_naming_rule.interface_rule.style                = prefix_interface_with_i_style
+dotnet_naming_rule.interface_rule.severity             = warning
+
+# Generics Type Parameters use PascalCase and are prefixed with uppercase 'T'
+# https://docs.microsoft.com/dotnet/standard/design-guidelines/names-of-classes-structs-and-interfaces
+dotnet_naming_symbols.type_parameter_group.applicable_kinds = type_parameter
+dotnet_naming_rule.type_parameter_rule.symbols              = type_parameter_group
+dotnet_naming_rule.type_parameter_rule.style                = prefix_type_parameters_with_t_style
+dotnet_naming_rule.type_parameter_rule.severity             = warning
+
+# Function parameters use camelCase
+# https://docs.microsoft.com/dotnet/standard/design-guidelines/naming-parameters
+dotnet_naming_symbols.parameters_group.applicable_kinds = parameter
+dotnet_naming_rule.parameters_rule.symbols              = parameters_group
+dotnet_naming_rule.parameters_rule.style                = camel_case_style
+dotnet_naming_rule.parameters_rule.severity             = warning
+
+##########################################
+# License
+##########################################
+# The following applies as to the .editorconfig file ONLY, and is
+# included below for reference, per the requirements of the license
+# corresponding to this .editorconfig file.
+# See: https://github.com/RehanSaeed/EditorConfig
+#
+# MIT License
+#
+# Copyright (c) 2017-2019 Muhammad Rehan Saeed
+# Copyright (c) 2019 Henry Gabryjelski
+#
+# Permission is hereby granted, free of charge, to any
+# person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the
+# Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute,
+# sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject
+# to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+##########################################
--- a/.gitattributes
+++ b/.gitattributes
@ -2,9 +2,13 @@
 # Set default behavior to:
 #   treat as text and
 #   normalize to Unix-style line endings
+###############################################################################
 *           text        eol=lf
-
+###############################################################################
 # Set explicit file behavior to:
+#   treat as text and
+#   normalize to Unix-style line endings
+###############################################################################
 *.asm               text        eol=lf
 *.c                 text        eol=lf
 *.clj               text        eol=lf
@ -49,45 +53,58 @@
 *.txt               text        eol=lf
 *.vb                text        eol=lf
 *.yml               text        eol=lf
+###############################################################################
+# Set explicit file behavior to:
 #   treat as text
 #   normalize to Unix-style line endings and
 #   diff as csharp
+###############################################################################
 *.cs                text        eol=lf          diff=csharp
+###############################################################################
+# Set explicit file behavior to:
+#   treat as text
+#   normalize to Unix-style line endings and
 #   use a union merge when resoling conflicts
+###############################################################################
 *.csproj            text        eol=lf          merge=union
 *.dbproj            text        eol=lf          merge=union
 *.fsproj            text        eol=lf          merge=union
 *.ncrunchproject    text        eol=lf          merge=union
 *.vbproj            text        eol=lf          merge=union
+###############################################################################
+# Set explicit file behavior to:
+#   treat as text
 #   normalize to Windows-style line endings and
+#   use a union merge when resoling conflicts
+###############################################################################
 *.sln               text        eol=crlf        merge=union
+###############################################################################
+# Set explicit file behavior to:
 #   treat as binary
+###############################################################################
 *.basis             binary
-*.bmp               binary
-*.dds               binary
 *.dll               binary
 *.eot               binary
 *.exe               binary
-*.gif               binary
-*.jpg               binary
 *.ktx               binary
 *.otf               binary
 *.pbm               binary
 *.pdf               binary
-*.png               binary
 *.ppt               binary
 *.pptx              binary
 *.pvr               binary
 *.snk               binary
-*.tga               binary
 *.ttc               binary
 *.ttf               binary
-*.webp              binary
+*.wbmp              binary
 *.woff              binary
 *.woff2             binary
 *.xls               binary
 *.xlsx              binary
+###############################################################################
+# Set explicit file behavior to:
 #   diff as plain text
+###############################################################################
 *.doc               diff=astextplain
 *.docx              diff=astextplain
 *.dot               diff=astextplain
@ -95,12 +112,16 @@
 *.pptx              diff=astextplain
 *.rtf               diff=astextplain
 *.svg               diff=astextplain
-*.jpg filter=lfs diff=lfs merge=lfs -text
-*.jpeg filter=lfs diff=lfs merge=lfs -text
-*.bmp filter=lfs diff=lfs merge=lfs -text
-*.gif filter=lfs diff=lfs merge=lfs -text
-*.png filter=lfs diff=lfs merge=lfs -text
-*.tif filter=lfs diff=lfs merge=lfs -text
-*.tiff filter=lfs diff=lfs merge=lfs -text
-*.tga filter=lfs diff=lfs merge=lfs -text
-*.webp filter=lfs diff=lfs merge=lfs -text
+###############################################################################
+# Handle image files by git lfs
+###############################################################################
+*.jpg               filter=lfs diff=lfs merge=lfs -text
+*.jpeg              filter=lfs diff=lfs merge=lfs -text
+*.bmp               filter=lfs diff=lfs merge=lfs -text
+*.gif               filter=lfs diff=lfs merge=lfs -text
+*.png               filter=lfs diff=lfs merge=lfs -text
+*.tif               filter=lfs diff=lfs merge=lfs -text
+*.tiff              filter=lfs diff=lfs merge=lfs -text
+*.tga               filter=lfs diff=lfs merge=lfs -text
+*.webp              filter=lfs diff=lfs merge=lfs -text
+*.dds               filter=lfs diff=lfs merge=lfs -text
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@ -29,7 +29,7 @@
 #### **Running tests and Debugging**

 * Expected test output is pulled in as a submodule from the [ImageSharp.Tests.Images repository](https://github.com/SixLabors/Imagesharp.Tests.Images/tree/master/ReferenceOutput). To succesfully run tests, make sure that you have updated the submodules!
-* Debugging (running tests in Debug mode) is only supported on .NET Core 2.1, because of JIT Code Generation bugs like [dotnet/coreclr#16443](https://github.com/dotnet/coreclr/issues/16443) or [dotnet/coreclr#20657](https://github.com/dotnet/coreclr/issues/20657)
+* Debugging (running tests in Debug mode) is only supported on .NET Core 2.1+, because of JIT Code Generation bugs like [dotnet/coreclr#16443](https://github.com/dotnet/coreclr/issues/16443) or [dotnet/coreclr#20657](https://github.com/dotnet/coreclr/issues/20657)

 #### **Do you have questions about consuming the library or the source code?**

--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,7 +1,7 @@
 blank_issues_enabled: false
 contact_links:
    - name: Ask a Question
-      url: https://github.com/SixLabors/ImageSharp/discussions?discussions_q=category%3AHelp
+      url: https://github.com/SixLabors/ImageSharp/discussions?discussions_q=category%3AQ%26A
      about: Ask a question about this project.
    - name: Feature Request
      url: https://github.com/SixLabors/ImageSharp/discussions?discussions_q=category%3AIdeas
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@ -15,13 +15,29 @@ jobs:
            matrix:
                options:
                    - os: ubuntu-latest
-                      framework: netcoreapp3.1
+                      framework: net5.0
+                      runtime: -x64
+                      codecov: false
+                    - os: macos-latest
+                      framework: net5.0
                      runtime: -x64
                      codecov: false
                    - os: windows-latest
+                      framework: net5.0
+                      runtime: -x64
+                      codecov: false
+                    - os: ubuntu-latest
                      framework: netcoreapp3.1
                      runtime: -x64
                      codecov: true
+                    - os: macos-latest
+                      framework: netcoreapp3.1
+                      runtime: -x64
+                      codecov: false
+                    - os: windows-latest
+                      framework: netcoreapp3.1
+                      runtime: -x64
+                      codecov: false
                    - os: windows-latest
                      framework: netcoreapp2.1
                      runtime: -x64
@ -40,6 +56,20 @@ jobs:

        steps:
            - uses: actions/checkout@v2
+            
+            # See https://github.com/actions/checkout/issues/165#issuecomment-657673315
+            - name: Create LFS file list
+              run: git lfs ls-files -l | cut -d' ' -f1 | sort > .lfs-assets-id
+
+            - name: Restore LFS cache
+              uses: actions/cache@v2
+              id: lfs-cache
+              with:
+                path: .git/lfs
+                key: ${{ runner.os }}-lfs-${{ hashFiles('.lfs-assets-id') }}-v1
+
+            - name: Git LFS Pull
+              run: git lfs pull

            - name: Install NuGet
              uses: NuGet/setup-nuget@v1
@ -52,17 +82,34 @@ jobs:
                  git fetch --prune --unshallow
                  git submodule -q update --init --recursive

+            - name: Setup NuGet Cache
+              uses: actions/cache@v2
+              id: nuget-cache
+              with:
+                  path: ~/.nuget
+                  key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.csproj', '**/*.props', '**/*.targets') }}
+                  restore-keys: ${{ runner.os }}-nuget-
+
            - name: Build
              shell: pwsh
              run: ./ci-build.ps1 "${{matrix.options.framework}}"
+              env:
+                SIXLABORS_TESTING: True

            - name: Test
              shell: pwsh
              run: ./ci-test.ps1 "${{matrix.options.os}}" "${{matrix.options.framework}}" "${{matrix.options.runtime}}" "${{matrix.options.codecov}}"
              env:
-                  CI: True
+                  SIXLABORS_TESTING: True
                  XUNIT_PATH: .\tests\ImageSharp.Tests # Required for xunit

+            - name: Export Failed Output
+              uses: actions/upload-artifact@v2
+              if: failure()
+              with:
+                  name: actual_output_${{ runner.os }}_${{ matrix.options.framework }}${{ matrix.options.runtime }}.zip
+                  path: tests/Images/ActualOutput/
+
            - name: Update Codecov
              uses: codecov/codecov-action@v1
              if: matrix.options.codecov == true && startsWith(github.repository, 'SixLabors')
--- a/.gitignore
+++ b/.gitignore
@ -221,3 +221,4 @@ artifacts/
 # Tests
 **/Images/ActualOutput
 **/Images/ReferenceOutput
+.DS_Store
--- a/.gitmodules
+++ b/.gitmodules
@ -1,7 +1,3 @@
-[submodule "tests/Images/External"]
-	path = tests/Images/External
-	url = https://github.com/SixLabors/Imagesharp.Tests.Images.git
-	branch = master
 [submodule "shared-infrastructure"]
 	path = shared-infrastructure
 	url = https://github.com/SixLabors/SharedInfrastructure
--- a/.runsettings
+++ b/.runsettings
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<RunSettings>
+    <RunConfiguration>
+        <!--Used in conjunction with ActiveIssueAttribute to skip tests with known issues-->
+        <TestCaseFilter>category!=failing</TestCaseFilter>
+    </RunConfiguration>
+</RunSettings>
--- a/Directory.Build.props
+++ b/Directory.Build.props
@ -10,131 +10,12 @@
    that is done by the file that imports us.
  -->

-  <!-- Default settings that are used by other settings -->
  <PropertyGroup>
-    <BaseArtifactsPath>$(MSBuildThisFileDirectory)artifacts/</BaseArtifactsPath>
-    <BaseArtifactsPathSuffix>$(SixLaborsProjectCategory)/$(MSBuildProjectName)</BaseArtifactsPathSuffix>
-    <RepositoryUrl Condition="'$(RepositoryUrl)' == ''">https://github.com/SixLabors/ImageSharp/</RepositoryUrl>
+    <!-- This MUST be defined before importing props. -->
+    <SixLaborsSolutionDirectory>$(MSBuildThisFileDirectory)</SixLaborsSolutionDirectory>
  </PropertyGroup>

-  <!-- Default settings that explicitly differ from the Sdk.props defaults  -->
-  <PropertyGroup>
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <BaseIntermediateOutputPath>$(BaseArtifactsPath)obj/$(BaseArtifactsPathSuffix)/</BaseIntermediateOutputPath>
-    <DebugType>portable</DebugType>
-    <DebugType Condition="'$(codecov)' != ''">full</DebugType>
-    <NullableContextOptions>disable</NullableContextOptions>
-    <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
-    <SuppressNETCoreSdkPreviewMessage>true</SuppressNETCoreSdkPreviewMessage>
-  </PropertyGroup>
-
-  <!--
-    https://apisof.net/
-    +===================+=======+==========+=====================+=============+=================+====================+==============+=========+============|
-    | SUPPORTS          | MATHF | HASHCODE | EXTENDED_INTRINSICS | SPAN_STREAM | ENCODING_STRING | RUNTIME_INTRINSICS | CODECOVERAGE | HOTPATH | CREATESPAN |
-    +===================+=======+==========+=====================+=============+=================+====================+==============+=========|============|
-    | netcoreapp3.1     |   Y   |    Y     |         Y           |      Y      |        Y        |        Y           |      Y       |    Y    |      Y     |
-    | netcoreapp2.1     |   Y   |    Y     |         Y           |      Y      |        Y        |        N           |      Y       |    N    |      Y     |
-    | netcoreapp2.0     |   Y   |    N     |         N           |      N      |        N        |        N           |      Y       |    N    |      Y     |
-    | netstandard2.1    |   Y   |    Y     |         N           |      Y      |        Y        |        N           |      Y       |    N    |      Y     |
-    | netstandard2.0    |   N   |    N     |         N           |      N      |        N        |        N           |      Y       |    N    |      N     |
-    | netstandard1.3    |   N   |    N     |         N           |      N      |        N        |        N           |      N       |    N    |      N     |
-    | net472            |   N   |    N     |         Y           |      N      |        N        |        N           |      Y       |    N    |      N     |
-    +===================+=======+==========+=====================+=============+=================+====================+==============+=========|============|
-    -->
-
-  <PropertyGroup Condition="'$(TargetFramework)' == 'netcoreapp3.1'">
-    <DefineConstants>$(DefineConstants);SUPPORTS_MATHF</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_HASHCODE</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_EXTENDED_INTRINSICS</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_SPAN_STREAM</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_ENCODING_STRING</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_RUNTIME_INTRINSICS</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CODECOVERAGE</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_HOTPATH</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CREATESPAN</DefineConstants>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(TargetFramework)' == 'netcoreapp2.1'">
-    <DefineConstants>$(DefineConstants);SUPPORTS_MATHF</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_HASHCODE</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_EXTENDED_INTRINSICS</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_SPAN_STREAM</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_ENCODING_STRING</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CODECOVERAGE</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CREATESPAN</DefineConstants>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(TargetFramework)' == 'netcoreapp2.0'">
-    <DefineConstants>$(DefineConstants);SUPPORTS_MATHF</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CODECOVERAGE</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CREATESPAN</DefineConstants>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(TargetFramework)' == 'netstandard2.1'">
-    <DefineConstants>$(DefineConstants);SUPPORTS_MATHF</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_HASHCODE</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_SPAN_STREAM</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_ENCODING_STRING</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CODECOVERAGE</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CREATESPAN</DefineConstants>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
-    <DefineConstants>$(DefineConstants);SUPPORTS_CODECOVERAGE</DefineConstants>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(TargetFramework)' == 'net472'">
-    <DefineConstants>$(DefineConstants);SUPPORTS_EXTENDED_INTRINSICS</DefineConstants>
-    <DefineConstants>$(DefineConstants);SUPPORTS_CODECOVERAGE</DefineConstants>
-  </PropertyGroup>
-
-  <!-- Default settings that explicitly differ from the Sdk.targets defaults-->
-  <PropertyGroup>
-    <Authors>Six Labors and contributors</Authors>
-    <BaseOutputPath>$(BaseArtifactsPath)bin/$(BaseArtifactsPathSuffix)/</BaseOutputPath>
-    <Company>Six Labors</Company>
-    <PackageOutputPath>$(BaseArtifactsPath)pkg/$(BaseArtifactsPathSuffix)/$(Configuration)/</PackageOutputPath>
-    <Product>SixLabors.ImageSharp</Product>
-    <VersionPrefix>0.0.1</VersionPrefix>
-    <VersionPrefix Condition="'$(packageversion)' != ''">$(PackageVersion)</VersionPrefix>
-    <VersionSuffix></VersionSuffix>
-  </PropertyGroup>
-
-  <!--MinVer Properties for versioning-->
-  <PropertyGroup>
-    <MinVerTagPrefix>v</MinVerTagPrefix>
-    <MinVerVerbosity>normal</MinVerVerbosity>
-  </PropertyGroup>
-
-  <!-- Default settings that are otherwise undefined -->
-  <PropertyGroup>
-    <Copyright>Copyright © Six Labors</Copyright>
-    <Features>strict;IOperation</Features>
-    <HighEntropyVA>true</HighEntropyVA>
-    <LangVersion>8.0</LangVersion>
-    <NeutralLanguage>en</NeutralLanguage>
-    <OverwriteReadOnlyFiles>true</OverwriteReadOnlyFiles>
-    <PackageIcon>sixlabors.imagesharp.128.png</PackageIcon>
-    <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
-    <PackageProjectUrl>$(RepositoryUrl)</PackageProjectUrl>
-    <ProduceReferenceAssembly>true</ProduceReferenceAssembly>
-    <RepositoryType>git</RepositoryType>
-    <RestoreSources>
-      https://www.myget.org/F/sixlabors/api/v3/index.json;
-      https://api.nuget.org/v3/index.json;
-      <!-- Contains RemoteExecutor. Taken from: https://github.com/dotnet/runtime/blob/master/NuGet.config -->
-      https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json;
-    </RestoreSources>
-    <SignAssembly>true</SignAssembly>
-    <AssemblyOriginatorKeyFile>$(MSBuildThisFileDirectory)shared-infrastructure/SixLabors.snk</AssemblyOriginatorKeyFile>
-    <SixLaborsPublicKey>00240000048000009400000006020000002400005253413100040000010001000147e6fe6766715eec6cfed61f1e7dcdbf69748a3e355c67e9d8dfd953acab1d5e012ba34b23308166fdc61ee1d0390d5f36d814a6091dd4b5ed9eda5a26afced924c683b4bfb4b3d64b0586a57eff9f02b1f84e3cb0ddd518bd1697f2c84dcbb97eb8bb5c7801be12112ed0ec86db934b0e9a5171e6bb1384b6d2f7d54dfa97</SixLaborsPublicKey>
-    <UseSharedCompilation>true</UseSharedCompilation>
-  </PropertyGroup>
-
-  <!-- Package references and additional files which are consumed by all projects -->
-  <ItemGroup>
-    <PackageReference Include="Microsoft.Net.Compilers.Toolset" IsImplicitlyDefined="true" />
-    <PackageReference Include="Microsoft.NETFramework.ReferenceAssemblies" IsImplicitlyDefined="true"  />
-    <PackageReference Include="StyleCop.Analyzers" IsImplicitlyDefined="true"  />
-    <AdditionalFiles Include="$(MSBuildThisFileDirectory)shared-infrastructure\stylecop.json" />
-    <!--NuGet package icon source-->
-    <None Include="$(MSBuildThisFileDirectory)shared-infrastructure\branding\icons\imagesharp\sixlabors.imagesharp.128.png" Pack="true" PackagePath="" />
-  </ItemGroup>
+  <!-- Import the shared global .props file -->
+  <Import Project="$(MSBuildThisFileDirectory)shared-infrastructure\msbuild\props\SixLabors.Global.props" />

 </Project>
--- a/Directory.Build.targets
+++ b/Directory.Build.targets
@ -5,39 +5,12 @@
    Directory.Build.targets is automatically picked up and imported by
    Microsoft.Common.targets. This file needs to exist, even if empty so that
    files in the parent directory tree, with the same name, are not imported
-    instead. The import fairly late and most other props/targets will have been
+    instead. They import fairly late and most other props/targets will have been
    imported beforehand. We also don't need to add ourselves to
    MSBuildAllProjects, as that is done by the file that imports us.
  -->

-  <!-- Settings that append the existing setting value -->
-  <PropertyGroup>
-    <DefineConstants>$(DefineConstants);$(OS)</DefineConstants>
-  </PropertyGroup>
-
-  <!-- Package versions for package references across all projects -->
-  <ItemGroup>
-    <!--Global Dependencies-->
-    <PackageReference Update="Microsoft.Net.Compilers.Toolset" PrivateAssets="All" Version="3.3.1" />
-    <PackageReference Update="Microsoft.NETFramework.ReferenceAssemblies" PrivateAssets="All" Version="1.0.0" />
-    <PackageReference Update="StyleCop.Analyzers" PrivateAssets="All" Version="1.1.118" />
-    
-    <!--Src Dependencies-->
-    <PackageReference Update="Microsoft.SourceLink.GitHub" Version="1.0.0" PrivateAssets="All"/>
-    <PackageReference Update="MinVer" PrivateAssets="All" Version="2.3.0" />
-    <PackageReference Update="System.Buffers" Version="4.5.1" />
-    <PackageReference Update="System.IO.Compression" Version="4.3.0" />
-    <PackageReference Update="System.IO.UnmanagedMemoryStream" Version="4.3.0" />
-    <PackageReference Update="System.Numerics.Vectors" Version="4.5.0" />
-    <!--
-    Do no update System.Memory as it currently breaks the CI build
-    with FileNotFoundException for SixLabors.ImageSharp.Tests.dll.config
-    -->
-    <PackageReference Update="System.Memory" Version="4.5.3" />
-    <PackageReference Update="System.Runtime.CompilerServices.Unsafe" Version="4.7.1" />
-    <PackageReference Update="System.Threading.Tasks.Parallel" Version="4.3.0" />
-    <PackageReference Update="System.ValueTuple" Version="4.5.0" />
-
-  </ItemGroup>
+  <!-- Import the shared global .props file -->
+  <Import Project="$(MSBuildThisFileDirectory)shared-infrastructure\msbuild\targets\SixLabors.Global.targets"/>

 </Project>
--- a/ImageSharp.sln
+++ b/ImageSharp.sln
@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio Version 16
 VisualStudioVersion = 16.0.28902.138
 MinimumVisualStudioVersion = 10.0.40219.1
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{C317F1B1-D75E-4C6D-83EB-80367343E0D7}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_root", "_root", "{C317F1B1-D75E-4C6D-83EB-80367343E0D7}"
 	ProjectSection(SolutionItems) = preProject
 		.editorconfig = .editorconfig
 		.gitattributes = .gitattributes
 		.gitignore = .gitignore
 		.gitmodules = .gitmodules
+		.runsettings = .runsettings
 		ci-build.ps1 = ci-build.ps1
 		ci-pack.ps1 = ci-pack.ps1
 		ci-test.ps1 = ci-test.ps1
 		Directory.Build.props = Directory.Build.props
 		Directory.Build.targets = Directory.Build.targets
-		GitVersion.yml = GitVersion.yml
 		LICENSE = LICENSE
 		README.md = README.md
 	EndProjectSection
@ -45,6 +45,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{56801022
 	ProjectSection(SolutionItems) = preProject
 		tests\Directory.Build.props = tests\Directory.Build.props
 		tests\Directory.Build.targets = tests\Directory.Build.targets
+		tests\ImageSharp.Tests.ruleset = tests\ImageSharp.Tests.ruleset
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Images", "Images", "{FA55F5DE-11A6-487D-ABA4-BC93A02717DD}"
@ -53,16 +54,23 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Input", "Input", "{9DA226A1
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Bmp", "Bmp", "{1A82C5F6-90E0-4E97-BE16-A825C046B493}"
 	ProjectSection(SolutionItems) = preProject
+		tests\Images\Input\Bmp\9S.BMP = tests\Images\Input\Bmp\9S.BMP
+		tests\Images\Input\Bmp\ba-bm.bmp = tests\Images\Input\Bmp\ba-bm.bmp
 		tests\Images\Input\Bmp\BitmapCoreHeaderQR.bmp = tests\Images\Input\Bmp\BitmapCoreHeaderQR.bmp
 		tests\Images\Input\Bmp\BITMAPV5HEADER.bmp = tests\Images\Input\Bmp\BITMAPV5HEADER.bmp
 		tests\Images\Input\Bmp\Car.bmp = tests\Images\Input\Bmp\Car.bmp
+		tests\Images\Input\Bmp\DIAMOND.BMP = tests\Images\Input\Bmp\DIAMOND.BMP
 		tests\Images\Input\Bmp\F.bmp = tests\Images\Input\Bmp\F.bmp
+		tests\Images\Input\Bmp\GMARBLE.BMP = tests\Images\Input\Bmp\GMARBLE.BMP
+		tests\Images\Input\Bmp\invalidPaletteSize.bmp = tests\Images\Input\Bmp\invalidPaletteSize.bmp
 		tests\Images\Input\Bmp\issue735.bmp = tests\Images\Input\Bmp\issue735.bmp
 		tests\Images\Input\Bmp\neg_height.bmp = tests\Images\Input\Bmp\neg_height.bmp
 		tests\Images\Input\Bmp\pal1.bmp = tests\Images\Input\Bmp\pal1.bmp
 		tests\Images\Input\Bmp\pal1p1.bmp = tests\Images\Input\Bmp\pal1p1.bmp
 		tests\Images\Input\Bmp\pal4.bmp = tests\Images\Input\Bmp\pal4.bmp
 		tests\Images\Input\Bmp\pal4rle.bmp = tests\Images\Input\Bmp\pal4rle.bmp
+		tests\Images\Input\Bmp\pal4rlecut.bmp = tests\Images\Input\Bmp\pal4rlecut.bmp
+		tests\Images\Input\Bmp\pal4rletrns.bmp = tests\Images\Input\Bmp\pal4rletrns.bmp
 		tests\Images\Input\Bmp\pal8-0.bmp = tests\Images\Input\Bmp\pal8-0.bmp
 		tests\Images\Input\Bmp\pal8gs.bmp = tests\Images\Input\Bmp\pal8gs.bmp
 		tests\Images\Input\Bmp\pal8offs.bmp = tests\Images\Input\Bmp\pal8offs.bmp
@ -70,26 +78,45 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Bmp", "Bmp", "{1A82C5F6-90E
 		tests\Images\Input\Bmp\pal8os2v1_winv2.bmp = tests\Images\Input\Bmp\pal8os2v1_winv2.bmp
 		tests\Images\Input\Bmp\pal8os2v2-16.bmp = tests\Images\Input\Bmp\pal8os2v2-16.bmp
 		tests\Images\Input\Bmp\pal8os2v2.bmp = tests\Images\Input\Bmp\pal8os2v2.bmp
+		tests\Images\Input\Bmp\pal8oversizepal.bmp = tests\Images\Input\Bmp\pal8oversizepal.bmp
+		tests\Images\Input\Bmp\pal8rlecut.bmp = tests\Images\Input\Bmp\pal8rlecut.bmp
+		tests\Images\Input\Bmp\pal8rletrns.bmp = tests\Images\Input\Bmp\pal8rletrns.bmp
 		tests\Images\Input\Bmp\pal8v4.bmp = tests\Images\Input\Bmp\pal8v4.bmp
 		tests\Images\Input\Bmp\pal8v5.bmp = tests\Images\Input\Bmp\pal8v5.bmp
+		tests\Images\Input\Bmp\PINES.BMP = tests\Images\Input\Bmp\PINES.BMP
 		tests\Images\Input\Bmp\rgb16-565.bmp = tests\Images\Input\Bmp\rgb16-565.bmp
 		tests\Images\Input\Bmp\rgb16-565pal.bmp = tests\Images\Input\Bmp\rgb16-565pal.bmp
 		tests\Images\Input\Bmp\rgb16.bmp = tests\Images\Input\Bmp\rgb16.bmp
 		tests\Images\Input\Bmp\rgb16bfdef.bmp = tests\Images\Input\Bmp\rgb16bfdef.bmp
 		tests\Images\Input\Bmp\rgb24.bmp = tests\Images\Input\Bmp\rgb24.bmp
+		tests\Images\Input\Bmp\rgb24jpeg.bmp = tests\Images\Input\Bmp\rgb24jpeg.bmp
+		tests\Images\Input\Bmp\rgb24largepal.bmp = tests\Images\Input\Bmp\rgb24largepal.bmp
+		tests\Images\Input\Bmp\rgb24png.bmp = tests\Images\Input\Bmp\rgb24png.bmp
+		tests\Images\Input\Bmp\rgb24rle24.bmp = tests\Images\Input\Bmp\rgb24rle24.bmp
 		tests\Images\Input\Bmp\rgb32.bmp = tests\Images\Input\Bmp\rgb32.bmp
 		tests\Images\Input\Bmp\rgb32bf.bmp = tests\Images\Input\Bmp\rgb32bf.bmp
 		tests\Images\Input\Bmp\rgb32bfdef.bmp = tests\Images\Input\Bmp\rgb32bfdef.bmp
+		tests\Images\Input\Bmp\rgb32h52.bmp = tests\Images\Input\Bmp\rgb32h52.bmp
 		tests\Images\Input\Bmp\rgba32-1010102.bmp = tests\Images\Input\Bmp\rgba32-1010102.bmp
 		tests\Images\Input\Bmp\rgba32.bmp = tests\Images\Input\Bmp\rgba32.bmp
 		tests\Images\Input\Bmp\rgba32abf.bmp = tests\Images\Input\Bmp\rgba32abf.bmp
 		tests\Images\Input\Bmp\rgba32h56.bmp = tests\Images\Input\Bmp\rgba32h56.bmp
+		tests\Images\Input\Bmp\rgba32v4.bmp = tests\Images\Input\Bmp\rgba32v4.bmp
+		tests\Images\Input\Bmp\rle24rlecut.bmp = tests\Images\Input\Bmp\rle24rlecut.bmp
+		tests\Images\Input\Bmp\rle24rletrns.bmp = tests\Images\Input\Bmp\rle24rletrns.bmp
+		tests\Images\Input\Bmp\rle4-delta-320x240.bmp = tests\Images\Input\Bmp\rle4-delta-320x240.bmp
+		tests\Images\Input\Bmp\rle8-blank-160x120.bmp = tests\Images\Input\Bmp\rle8-blank-160x120.bmp
+		tests\Images\Input\Bmp\rle8-delta-320x240.bmp = tests\Images\Input\Bmp\rle8-delta-320x240.bmp
 		tests\Images\Input\Bmp\RunLengthEncoded-inverted.bmp = tests\Images\Input\Bmp\RunLengthEncoded-inverted.bmp
 		tests\Images\Input\Bmp\RunLengthEncoded.bmp = tests\Images\Input\Bmp\RunLengthEncoded.bmp
+		tests\Images\Input\Bmp\SKATER.BMP = tests\Images\Input\Bmp\SKATER.BMP
+		tests\Images\Input\Bmp\SPADE.BMP = tests\Images\Input\Bmp\SPADE.BMP
+		tests\Images\Input\Bmp\SUNFLOW.BMP = tests\Images\Input\Bmp\SUNFLOW.BMP
 		tests\Images\Input\Bmp\test16-inverted.bmp = tests\Images\Input\Bmp\test16-inverted.bmp
 		tests\Images\Input\Bmp\test16.bmp = tests\Images\Input\Bmp\test16.bmp
 		tests\Images\Input\Bmp\test8-inverted.bmp = tests\Images\Input\Bmp\test8-inverted.bmp
 		tests\Images\Input\Bmp\test8.bmp = tests\Images\Input\Bmp\test8.bmp
+		tests\Images\Input\Bmp\WARPD.BMP = tests\Images\Input\Bmp\WARPD.BMP
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Gif", "Gif", "{EE3FB0B3-1C31-41E9-93AB-BA800560A868}"
@ -98,8 +125,16 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Gif", "Gif", "{EE3FB0B3-1C3
 		tests\Images\Input\Gif\base_4x1.gif = tests\Images\Input\Gif\base_4x1.gif
 		tests\Images\Input\Gif\cheers.gif = tests\Images\Input\Gif\cheers.gif
 		tests\Images\Input\Gif\giphy.gif = tests\Images\Input\Gif\giphy.gif
+		tests\Images\Input\Gif\GlobalQuantizationTest.gif = tests\Images\Input\Gif\GlobalQuantizationTest.gif
+		tests\Images\Input\Gif\image-zero-height.gif = tests\Images\Input\Gif\image-zero-height.gif
+		tests\Images\Input\Gif\image-zero-size.gif = tests\Images\Input\Gif\image-zero-size.gif
+		tests\Images\Input\Gif\image-zero-width.gif = tests\Images\Input\Gif\image-zero-width.gif
 		tests\Images\Input\Gif\kumin.gif = tests\Images\Input\Gif\kumin.gif
+		tests\Images\Input\Gif\large_comment.gif = tests\Images\Input\Gif\large_comment.gif
 		tests\Images\Input\Gif\leo.gif = tests\Images\Input\Gif\leo.gif
+		tests\Images\Input\Gif\max-height.gif = tests\Images\Input\Gif\max-height.gif
+		tests\Images\Input\Gif\max-width.gif = tests\Images\Input\Gif\max-width.gif
+		tests\Images\Input\Gif\receipt.gif = tests\Images\Input\Gif\receipt.gif
 		tests\Images\Input\Gif\rings.gif = tests\Images\Input\Gif\rings.gif
 		tests\Images\Input\Gif\trans.gif = tests\Images\Input\Gif\trans.gif
 	EndProjectSection
@ -115,6 +150,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Jpg", "Jpg", "{DB21FED7-E8C
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "baseline", "baseline", "{195BA3D3-3E9F-4BC5-AB40-5F9FEB638146}"
 	ProjectSection(SolutionItems) = preProject
+		tests\Images\Input\Jpg\baseline\640px-Unequalized_Hawkes_Bay_NZ.jpg = tests\Images\Input\Jpg\baseline\640px-Unequalized_Hawkes_Bay_NZ.jpg
 		tests\Images\Input\Jpg\baseline\AsianCarvingLowContrast.jpg = tests\Images\Input\Jpg\baseline\AsianCarvingLowContrast.jpg
 		tests\Images\Input\Jpg\baseline\badeof.jpg = tests\Images\Input\Jpg\baseline\badeof.jpg
 		tests\Images\Input\Jpg\baseline\badrst.jpg = tests\Images\Input\Jpg\baseline\badrst.jpg
@ -124,6 +160,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "baseline", "baseline", "{19
 		tests\Images\Input\Jpg\baseline\Floorplan.jpg = tests\Images\Input\Jpg\baseline\Floorplan.jpg
 		tests\Images\Input\Jpg\baseline\gamma_dalai_lama_gray.jpg = tests\Images\Input\Jpg\baseline\gamma_dalai_lama_gray.jpg
 		tests\Images\Input\Jpg\baseline\Hiyamugi.jpg = tests\Images\Input\Jpg\baseline\Hiyamugi.jpg
+		tests\Images\Input\Jpg\baseline\iptc-psAPP13-wIPTCempty.jpg = tests\Images\Input\Jpg\baseline\iptc-psAPP13-wIPTCempty.jpg
+		tests\Images\Input\Jpg\baseline\iptc.jpg = tests\Images\Input\Jpg\baseline\iptc.jpg
 		tests\Images\Input\Jpg\baseline\jpeg400jfif.jpg = tests\Images\Input\Jpg\baseline\jpeg400jfif.jpg
 		tests\Images\Input\Jpg\baseline\jpeg420exif.jpg = tests\Images\Input\Jpg\baseline\jpeg420exif.jpg
 		tests\Images\Input\Jpg\baseline\jpeg420small.jpg = tests\Images\Input\Jpg\baseline\jpeg420small.jpg
@ -166,6 +204,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "JpegSnoopReports", "JpegSno
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "issues", "issues", "{5C9B689F-B96D-47BE-A208-C23B1B2A8570}"
 	ProjectSection(SolutionItems) = preProject
+		tests\Images\Input\Jpg\issues\issue-1076-invalid-subsampling.jpg = tests\Images\Input\Jpg\issues\issue-1076-invalid-subsampling.jpg
+		tests\Images\Input\Jpg\issues\issue-1221-identify-multi-frame.jpg = tests\Images\Input\Jpg\issues\issue-1221-identify-multi-frame.jpg
+		tests\Images\Input\Jpg\issues\issue1006-incorrect-resize.jpg = tests\Images\Input\Jpg\issues\issue1006-incorrect-resize.jpg
+		tests\Images\Input\Jpg\issues\issue1049-exif-resize.jpg = tests\Images\Input\Jpg\issues\issue1049-exif-resize.jpg
 		tests\Images\Input\Jpg\issues\Issue159-MissingFF00-Progressive-Bedroom.jpg = tests\Images\Input\Jpg\issues\Issue159-MissingFF00-Progressive-Bedroom.jpg
 		tests\Images\Input\Jpg\issues\Issue159-MissingFF00-Progressive-Girl.jpg = tests\Images\Input\Jpg\issues\Issue159-MissingFF00-Progressive-Girl.jpg
 		tests\Images\Input\Jpg\issues\Issue178-BadCoeffsProgressive-Lemon.jpg = tests\Images\Input\Jpg\issues\Issue178-BadCoeffsProgressive-Lemon.jpg
@ -213,6 +255,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "fuzz", "fuzz", "{516A3532-6
 		tests\Images\Input\Jpg\issues\fuzz\Issue826-ArgumentException-C.jpg = tests\Images\Input\Jpg\issues\fuzz\Issue826-ArgumentException-C.jpg
 		tests\Images\Input\Jpg\issues\fuzz\Issue827-AccessViolationException.jpg = tests\Images\Input\Jpg\issues\fuzz\Issue827-AccessViolationException.jpg
 		tests\Images\Input\Jpg\issues\fuzz\Issue839-ExecutionEngineException.jpg = tests\Images\Input\Jpg\issues\fuzz\Issue839-ExecutionEngineException.jpg
+		tests\Images\Input\Jpg\issues\fuzz\Issue922-AccessViolationException.jpg = tests\Images\Input\Jpg\issues\fuzz\Issue922-AccessViolationException.jpg
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "JpegSnoopReports", "JpegSnoopReports", "{714CDEA1-9AE6-4F76-B8B1-A7DB8C1DB82F}"
@ -260,16 +303,24 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Png", "Png", "{E1C42A6F-913
 	ProjectSection(SolutionItems) = preProject
 		tests\Images\Input\Png\banner7-adam.png = tests\Images\Input\Png\banner7-adam.png
 		tests\Images\Input\Png\banner8-index.png = tests\Images\Input\Png\banner8-index.png
+		tests\Images\Input\Png\basn3p01.png = tests\Images\Input\Png\basn3p01.png
+		tests\Images\Input\Png\basn3p02.png = tests\Images\Input\Png\basn3p02.png
+		tests\Images\Input\Png\basn3p04.png = tests\Images\Input\Png\basn3p04.png
+		tests\Images\Input\Png\basn3p08.png = tests\Images\Input\Png\basn3p08.png
 		tests\Images\Input\Png\big-corrupted-chunk.png = tests\Images\Input\Png\big-corrupted-chunk.png
+		tests\Images\Input\Png\bike-small.png = tests\Images\Input\Png\bike-small.png
 		tests\Images\Input\Png\Bike.png = tests\Images\Input\Png\Bike.png
 		tests\Images\Input\Png\BikeGrayscale.png = tests\Images\Input\Png\BikeGrayscale.png
 		tests\Images\Input\Png\blur.png = tests\Images\Input\Png\blur.png
 		tests\Images\Input\Png\bpp1.png = tests\Images\Input\Png\bpp1.png
+		tests\Images\Input\Png\Bradley01.png = tests\Images\Input\Png\Bradley01.png
+		tests\Images\Input\Png\Bradley02.png = tests\Images\Input\Png\Bradley02.png
 		tests\Images\Input\Png\CalliphoraPartial.png = tests\Images\Input\Png\CalliphoraPartial.png
 		tests\Images\Input\Png\CalliphoraPartialGrayscale.png = tests\Images\Input\Png\CalliphoraPartialGrayscale.png
 		tests\Images\Input\Png\chunklength1.png = tests\Images\Input\Png\chunklength1.png
 		tests\Images\Input\Png\chunklength2.png = tests\Images\Input\Png\chunklength2.png
 		tests\Images\Input\Png\cross.png = tests\Images\Input\Png\cross.png
+		tests\Images\Input\Png\david.png = tests\Images\Input\Png\david.png
 		tests\Images\Input\Png\ducky.png = tests\Images\Input\Png\ducky.png
 		tests\Images\Input\Png\filter0.png = tests\Images\Input\Png\filter0.png
 		tests\Images\Input\Png\filter1.png = tests\Images\Input\Png\filter1.png
@ -292,6 +343,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Png", "Png", "{E1C42A6F-913
 		tests\Images\Input\Png\iftbbn0g04.png = tests\Images\Input\Png\iftbbn0g04.png
 		tests\Images\Input\Png\indexed.png = tests\Images\Input\Png\indexed.png
 		tests\Images\Input\Png\interlaced.png = tests\Images\Input\Png\interlaced.png
+		tests\Images\Input\Png\InvalidTextData.png = tests\Images\Input\Png\InvalidTextData.png
 		tests\Images\Input\Png\kaboom.png = tests\Images\Input\Png\kaboom.png
 		tests\Images\Input\Png\low-variance.png = tests\Images\Input\Png\low-variance.png
 		tests\Images\Input\Png\palette-8bpp.png = tests\Images\Input\Png\palette-8bpp.png
@ -299,6 +351,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Png", "Png", "{E1C42A6F-913
 		tests\Images\Input\Png\pd-source.png = tests\Images\Input\Png\pd-source.png
 		tests\Images\Input\Png\pd.png = tests\Images\Input\Png\pd.png
 		tests\Images\Input\Png\pl.png = tests\Images\Input\Png\pl.png
+		tests\Images\Input\Png\PngWithMetaData.png = tests\Images\Input\Png\PngWithMetaData.png
 		tests\Images\Input\Png\pp.png = tests\Images\Input\Png\pp.png
 		tests\Images\Input\Png\rainbow.png = tests\Images\Input\Png\rainbow.png
 		tests\Images\Input\Png\ratio-1x4.png = tests\Images\Input\Png\ratio-1x4.png
@ -316,7 +369,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Png", "Png", "{E1C42A6F-913
 		tests\Images\Input\Png\versioning-1_2.png = tests\Images\Input\Png\versioning-1_2.png
 		tests\Images\Input\Png\vim16x16_1.png = tests\Images\Input\Png\vim16x16_1.png
 		tests\Images\Input\Png\vim16x16_2.png = tests\Images\Input\Png\vim16x16_2.png
+		tests\Images\Input\Png\xc1n0g08.png = tests\Images\Input\Png\xc1n0g08.png
+		tests\Images\Input\Png\xc9n2c08.png = tests\Images\Input\Png\xc9n2c08.png
+		tests\Images\Input\Png\xd0n2c08.png = tests\Images\Input\Png\xd0n2c08.png
+		tests\Images\Input\Png\xd3n2c08.png = tests\Images\Input\Png\xd3n2c08.png
+		tests\Images\Input\Png\xdtn0g01.png = tests\Images\Input\Png\xdtn0g01.png
 		tests\Images\Input\Png\zlib-overflow.png = tests\Images\Input\Png\zlib-overflow.png
+		tests\Images\Input\Png\zlib-overflow2.png = tests\Images\Input\Png\zlib-overflow2.png
+		tests\Images\Input\Png\zlib-ztxt-bad-header.png = tests\Images\Input\Png\zlib-ztxt-bad-header.png
 	EndProjectSection
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ImageSharp.Tests", "tests\ImageSharp.Tests\ImageSharp.Tests.csproj", "{EA3000E9-2A91-4EC4-8A68-E566DEBDC4F6}"
@ -332,6 +392,87 @@ Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "SharedInfrastructure", "sha
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ImageSharp.Tests.ProfilingSandbox", "tests\ImageSharp.Tests.ProfilingSandbox\ImageSharp.Tests.ProfilingSandbox.csproj", "{FC527290-2F22-432C-B77B-6E815726B02C}"
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "issues", "issues", "{670DD46C-82E9-499A-B2D2-00A802ED0141}"
+	ProjectSection(SolutionItems) = preProject
+		tests\Images\Input\Png\issues\Issue_1014_1.png = tests\Images\Input\Png\issues\Issue_1014_1.png
+		tests\Images\Input\Png\issues\Issue_1014_2.png = tests\Images\Input\Png\issues\Issue_1014_2.png
+		tests\Images\Input\Png\issues\Issue_1014_3.png = tests\Images\Input\Png\issues\Issue_1014_3.png
+		tests\Images\Input\Png\issues\Issue_1014_4.png = tests\Images\Input\Png\issues\Issue_1014_4.png
+		tests\Images\Input\Png\issues\Issue_1014_5.png = tests\Images\Input\Png\issues\Issue_1014_5.png
+		tests\Images\Input\Png\issues\Issue_1014_6.png = tests\Images\Input\Png\issues\Issue_1014_6.png
+		tests\Images\Input\Png\issues\Issue_1047.png = tests\Images\Input\Png\issues\Issue_1047.png
+		tests\Images\Input\Png\issues\Issue_1127.png = tests\Images\Input\Png\issues\Issue_1127.png
+		tests\Images\Input\Png\issues\Issue_1177_1.png = tests\Images\Input\Png\issues\Issue_1177_1.png
+		tests\Images\Input\Png\issues\Issue_1177_2.png = tests\Images\Input\Png\issues\Issue_1177_2.png
+		tests\Images\Input\Png\issues\Issue_410.png = tests\Images\Input\Png\issues\Issue_410.png
+		tests\Images\Input\Png\issues\Issue_935.png = tests\Images\Input\Png\issues\Issue_935.png
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tga", "Tga", "{5DFC394F-136F-4B76-9BCA-3BA786515EFC}"
+	ProjectSection(SolutionItems) = preProject
+		tests\Images\Input\Tga\16bit_noalphabits.tga = tests\Images\Input\Tga\16bit_noalphabits.tga
+		tests\Images\Input\Tga\16bit_rle_noalphabits.tga = tests\Images\Input\Tga\16bit_rle_noalphabits.tga
+		tests\Images\Input\Tga\32bit_no_alphabits.tga = tests\Images\Input\Tga\32bit_no_alphabits.tga
+		tests\Images\Input\Tga\32bit_rle_no_alphabits.tga = tests\Images\Input\Tga\32bit_rle_no_alphabits.tga
+		tests\Images\Input\Tga\ccm8.tga = tests\Images\Input\Tga\ccm8.tga
+		tests\Images\Input\Tga\grayscale_a_LL.tga = tests\Images\Input\Tga\grayscale_a_LL.tga
+		tests\Images\Input\Tga\grayscale_a_LR.tga = tests\Images\Input\Tga\grayscale_a_LR.tga
+		tests\Images\Input\Tga\grayscale_a_rle_LL.tga = tests\Images\Input\Tga\grayscale_a_rle_LL.tga
+		tests\Images\Input\Tga\grayscale_a_rle_LR.tga = tests\Images\Input\Tga\grayscale_a_rle_LR.tga
+		tests\Images\Input\Tga\grayscale_a_rle_UL.tga = tests\Images\Input\Tga\grayscale_a_rle_UL.tga
+		tests\Images\Input\Tga\grayscale_a_rle_UR.tga = tests\Images\Input\Tga\grayscale_a_rle_UR.tga
+		tests\Images\Input\Tga\grayscale_a_UL.tga = tests\Images\Input\Tga\grayscale_a_UL.tga
+		tests\Images\Input\Tga\grayscale_a_UR.tga = tests\Images\Input\Tga\grayscale_a_UR.tga
+		tests\Images\Input\Tga\grayscale_LL.tga = tests\Images\Input\Tga\grayscale_LL.tga
+		tests\Images\Input\Tga\grayscale_LR.tga = tests\Images\Input\Tga\grayscale_LR.tga
+		tests\Images\Input\Tga\grayscale_rle_LR.tga = tests\Images\Input\Tga\grayscale_rle_LR.tga
+		tests\Images\Input\Tga\grayscale_rle_UL.tga = tests\Images\Input\Tga\grayscale_rle_UL.tga
+		tests\Images\Input\Tga\grayscale_rle_UR.tga = tests\Images\Input\Tga\grayscale_rle_UR.tga
+		tests\Images\Input\Tga\grayscale_UL.tga = tests\Images\Input\Tga\grayscale_UL.tga
+		tests\Images\Input\Tga\grayscale_UR.tga = tests\Images\Input\Tga\grayscale_UR.tga
+		tests\Images\Input\Tga\indexed_a_LL.tga = tests\Images\Input\Tga\indexed_a_LL.tga
+		tests\Images\Input\Tga\indexed_a_LR.tga = tests\Images\Input\Tga\indexed_a_LR.tga
+		tests\Images\Input\Tga\indexed_a_rle_LL.tga = tests\Images\Input\Tga\indexed_a_rle_LL.tga
+		tests\Images\Input\Tga\indexed_a_rle_LR.tga = tests\Images\Input\Tga\indexed_a_rle_LR.tga
+		tests\Images\Input\Tga\indexed_a_rle_UL.tga = tests\Images\Input\Tga\indexed_a_rle_UL.tga
+		tests\Images\Input\Tga\indexed_a_rle_UR.tga = tests\Images\Input\Tga\indexed_a_rle_UR.tga
+		tests\Images\Input\Tga\indexed_a_UL.tga = tests\Images\Input\Tga\indexed_a_UL.tga
+		tests\Images\Input\Tga\indexed_a_UR.tga = tests\Images\Input\Tga\indexed_a_UR.tga
+		tests\Images\Input\Tga\indexed_LR.tga = tests\Images\Input\Tga\indexed_LR.tga
+		tests\Images\Input\Tga\indexed_rle_LL.tga = tests\Images\Input\Tga\indexed_rle_LL.tga
+		tests\Images\Input\Tga\indexed_rle_LR.tga = tests\Images\Input\Tga\indexed_rle_LR.tga
+		tests\Images\Input\Tga\indexed_rle_UL.tga = tests\Images\Input\Tga\indexed_rle_UL.tga
+		tests\Images\Input\Tga\indexed_rle_UR.tga = tests\Images\Input\Tga\indexed_rle_UR.tga
+		tests\Images\Input\Tga\indexed_UL.tga = tests\Images\Input\Tga\indexed_UL.tga
+		tests\Images\Input\Tga\indexed_UR.tga = tests\Images\Input\Tga\indexed_UR.tga
+		tests\Images\Input\Tga\rgb15.tga = tests\Images\Input\Tga\rgb15.tga
+		tests\Images\Input\Tga\rgb15rle.tga = tests\Images\Input\Tga\rgb15rle.tga
+		tests\Images\Input\Tga\rgb24_top_left.tga = tests\Images\Input\Tga\rgb24_top_left.tga
+		tests\Images\Input\Tga\rgb_a_LL.tga = tests\Images\Input\Tga\rgb_a_LL.tga
+		tests\Images\Input\Tga\rgb_a_LR.tga = tests\Images\Input\Tga\rgb_a_LR.tga
+		tests\Images\Input\Tga\rgb_a_rle_LR.tga = tests\Images\Input\Tga\rgb_a_rle_LR.tga
+		tests\Images\Input\Tga\rgb_a_rle_UL.tga = tests\Images\Input\Tga\rgb_a_rle_UL.tga
+		tests\Images\Input\Tga\rgb_a_rle_UR.tga = tests\Images\Input\Tga\rgb_a_rle_UR.tga
+		tests\Images\Input\Tga\rgb_a_UL.tga = tests\Images\Input\Tga\rgb_a_UL.tga
+		tests\Images\Input\Tga\rgb_a_UR.tga = tests\Images\Input\Tga\rgb_a_UR.tga
+		tests\Images\Input\Tga\rgb_LR.tga = tests\Images\Input\Tga\rgb_LR.tga
+		tests\Images\Input\Tga\rgb_rle_LR.tga = tests\Images\Input\Tga\rgb_rle_LR.tga
+		tests\Images\Input\Tga\rgb_rle_UR.tga = tests\Images\Input\Tga\rgb_rle_UR.tga
+		tests\Images\Input\Tga\rgb_UR.tga = tests\Images\Input\Tga\rgb_UR.tga
+		tests\Images\Input\Tga\targa_16bit.tga = tests\Images\Input\Tga\targa_16bit.tga
+		tests\Images\Input\Tga\targa_16bit_pal.tga = tests\Images\Input\Tga\targa_16bit_pal.tga
+		tests\Images\Input\Tga\targa_16bit_rle.tga = tests\Images\Input\Tga\targa_16bit_rle.tga
+		tests\Images\Input\Tga\targa_24bit.tga = tests\Images\Input\Tga\targa_24bit.tga
+		tests\Images\Input\Tga\targa_24bit_pal.tga = tests\Images\Input\Tga\targa_24bit_pal.tga
+		tests\Images\Input\Tga\targa_24bit_pal_origin_topleft.tga = tests\Images\Input\Tga\targa_24bit_pal_origin_topleft.tga
+		tests\Images\Input\Tga\targa_24bit_rle.tga = tests\Images\Input\Tga\targa_24bit_rle.tga
+		tests\Images\Input\Tga\targa_24bit_rle_origin_topleft.tga = tests\Images\Input\Tga\targa_24bit_rle_origin_topleft.tga
+		tests\Images\Input\Tga\targa_32bit.tga = tests\Images\Input\Tga\targa_32bit.tga
+		tests\Images\Input\Tga\targa_32bit_rle.tga = tests\Images\Input\Tga\targa_32bit_rle.tga
+		tests\Images\Input\Tga\targa_8bit.tga = tests\Images\Input\Tga\targa_8bit.tga
+		tests\Images\Input\Tga\targa_8bit_rle.tga = tests\Images\Input\Tga\targa_8bit_rle.tga
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SharedMSBuildProjectFiles) = preSolution
 		shared-infrastructure\src\SharedInfrastructure\SharedInfrastructure.projitems*{2aa31a1f-142c-43f4-8687-09abca4b3a26}*SharedItemsImports = 5
@ -399,6 +540,7 @@ Global
 		HideSolutionNode = FALSE
 	EndGlobalSection
 	GlobalSection(NestedProjects) = preSolution
+		{1799C43E-5C54-4A8F-8D64-B1475241DB0D} = {C317F1B1-D75E-4C6D-83EB-80367343E0D7}
 		{FBE8C1AD-5AEC-4514-9B64-091D8E145865} = {1799C43E-5C54-4A8F-8D64-B1475241DB0D}
 		{2AA31A1F-142C-43F4-8687-09ABCA4B3A26} = {815C0625-CD3D-440F-9F80-2D83856AB7AE}
 		{FA55F5DE-11A6-487D-ABA4-BC93A02717DD} = {56801022-D71A-4FBE-BC5B-CBA08E2284EC}
@ -420,6 +562,8 @@ Global
 		{C0D7754B-5277-438E-ABEB-2BA34401B5A7} = {1799C43E-5C54-4A8F-8D64-B1475241DB0D}
 		{68A8CC40-6AED-4E96-B524-31B1158FDEEA} = {815C0625-CD3D-440F-9F80-2D83856AB7AE}
 		{FC527290-2F22-432C-B77B-6E815726B02C} = {56801022-D71A-4FBE-BC5B-CBA08E2284EC}
+		{670DD46C-82E9-499A-B2D2-00A802ED0141} = {E1C42A6F-913B-4A7B-B1A8-2BB62843B254}
+		{5DFC394F-136F-4B76-9BCA-3BA786515EFC} = {9DA226A1-8656-49A8-A58A-A8B5C081AD66}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {5F8B9D1F-CD8B-4CC5-8216-D531E25BD795}
--- a/README.md
+++ b/README.md
@ -70,13 +70,15 @@ To clone ImageSharp locally, click the "Clone in [YOUR_OS]" button above or run
 git clone https://github.com/SixLabors/ImageSharp
 ```

-If working with Windows please ensure that you have enabled log file paths in git (run as Administrator).
+If working with Windows please ensure that you have enabled long file paths in git (run as Administrator).

 ```bash
 git config --system core.longpaths true
 ```

-This repository contains [git submodules](https://blog.github.com/2016-02-01-working-with-submodules/). To add the submodules to the project, navigate to the repository root and type:
+This repository uses [Git Large File Storage](https://docs.github.com/en/github/managing-large-files/installing-git-large-file-storage). Please follow the linked instructions to ensure you have it set up in your environment.
+
+This repository contains [Git Submodules](https://blog.github.com/2016-02-01-working-with-submodules/). To add the submodules to the project, navigate to the repository root and type:

 ``` bash
 git submodule update --init --recursive
@ -130,4 +132,4 @@ Become a bronze sponsor with a monthly donation of $100 and get your logo (small
 <a href="https://opencollective.com/sixlabors/tiers/bronze-sponsors/7/website" target="_blank"><img src="https://opencollective.com/sixlabors/tiers/bronze-sponsors/7/avatar.svg?avatarHeight=96"></a>
 <a href="https://opencollective.com/sixlabors/tiers/bronze-sponsors/8/website" target="_blank"><img src="https://opencollective.com/sixlabors/tiers/bronze-sponsors/8/avatar.svg?avatarHeight=96"></a>
 <a href="https://opencollective.com/sixlabors/tiers/bronze-sponsors/9/website" target="_blank"><img src="https://opencollective.com/sixlabors/tiers/bronze-sponsors/9/avatar.svg?avatarHeight=96"></a>
-<a href="https://opencollective.com/sixlabors/tiers/bronze-sponsors/10/website" target="_blank"><img src="https://opencollective.com/sixlabors/tiers/bronze-sponsors/10/avatar.svg?avatarHeight=96"></a>
+<a href="https://opencollective.com/sixlabors/tiers/bronze-sponsors/10/website" target="_blank"><img src="https://opencollective.com/sixlabors/tiers/bronze-sponsors/10/avatar.svg?avatarHeight=96"></a>
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit b0d4cd98647996265a668e852574d901b27f22d6
+Subproject commit 06a733983486638b9e38197c7c6eb197ecac43e6
--- a/src/Directory.Build.props
+++ b/src/Directory.Build.props
@ -5,40 +5,22 @@
    Directory.Build.props is automatically picked up and imported by
    Microsoft.Common.props. This file needs to exist, even if empty so that
    files in the parent directory tree, with the same name, are not imported
-    instead. The import fairly early and only Sdk.props will have been
+    instead. They import fairly early and only Sdk.props will have been
    imported beforehand. We also don't need to add ourselves to
    MSBuildAllProjects, as that is done by the file that imports us.
  -->

-  <PropertyGroup>
-    <MSBuildAllProjects>$(MSBuildAllProjects);$(MSBuildThisFileDirectory)..\Directory.Build.props</MSBuildAllProjects>
-    <SixLaborsProjectCategory>src</SixLaborsProjectCategory>
-  </PropertyGroup>
+  <!-- Import the shared src .props file -->
+  <Import Project="$(MSBuildThisFileDirectory)..\shared-infrastructure\msbuild\props\SixLabors.Src.props" />

+  <!-- Import the solution .props file. -->
  <Import Project="$(MSBuildThisFileDirectory)..\Directory.Build.props" />

-  <PropertyGroup>
-    <CodeAnalysisRuleSet>$(MSBuildThisFileDirectory)..\shared-infrastructure\SixLabors.ruleset</CodeAnalysisRuleSet>
-    <GenerateDocumentationFile>true</GenerateDocumentationFile>
-  </PropertyGroup>
-
+  <!-- Compilation properties. -->
  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
    <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
  </PropertyGroup>

-  <!--Add deterministic builds in CI-->
-  <PropertyGroup Condition="'$(GITHUB_ACTIONS)' == 'true'">
-    <ContinuousIntegrationBuild>true</ContinuousIntegrationBuild>
-    <EmbedUntrackedSources>true</EmbedUntrackedSources>
-  </PropertyGroup>
-
-  <PropertyGroup>
-    <PublishRepositoryUrl>true</PublishRepositoryUrl>
-    <!-- Build symbol package (.snupkg) to distribute the PDB containing Source Link -->
-    <IncludeSymbols>true</IncludeSymbols>
-    <SymbolPackageFormat>snupkg</SymbolPackageFormat>
-  </PropertyGroup>
-  
  <ItemGroup>
    <!-- DynamicProxyGenAssembly2 is needed so Moq can use our internals -->
    <InternalsVisibleTo Include="DynamicProxyGenAssembly2" Key="0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7" />
@ -47,6 +29,4 @@
    <InternalsVisibleTo Include="SixLabors.ImageSharp.Tests" Key="$(SixLaborsPublicKey)" />
  </ItemGroup>

-
-  
 </Project>
--- a/src/Directory.Build.targets
+++ b/src/Directory.Build.targets
@ -5,82 +5,15 @@
    Directory.Build.targets is automatically picked up and imported by
    Microsoft.Common.targets. This file needs to exist, even if empty so that
    files in the parent directory tree, with the same name, are not imported
-    instead. The import fairly late and most other props/targets will have
+    instead. They import fairly late and most other props/targets will have
    been imported beforehand. We also don't need to add ourselves to
    MSBuildAllProjects, as that is done by the file that imports us.
  -->

-  <PropertyGroup>
-    <MSBuildAllProjects>$(MSBuildAllProjects);$(MSBuildThisFileDirectory)..\Directory.Build.targets</MSBuildAllProjects>
-  </PropertyGroup>
+  <!-- Import the shared src .targets file -->
+  <Import Project="$(MSBuildThisFileDirectory)..\shared-infrastructure\msbuild\targets\SixLabors.Src.targets" />

+  <!-- Import the solution .targets file. -->
  <Import Project="$(MSBuildThisFileDirectory)..\Directory.Build.targets" />

-  <PropertyGroup>
-    <GeneratedInternalsVisibleToFile Condition="'$(GeneratedInternalsVisibleToFile)' == ''">$(IntermediateOutputPath)$(MSBuildProjectName).InternalsVisibleTo$(DefaultLanguageSourceExtension)</GeneratedInternalsVisibleToFile>
-  </PropertyGroup>
-
-  <!-- Workaround for running Coverlet with Determenistic builds -->
-  <!-- https://github.com/coverlet-coverage/coverlet/blob/master/Documentation/DeterministicBuild.md -->
-  <Target Name="CoverletGetPathMap"
-            DependsOnTargets="InitializeSourceRootMappedPaths"
-            Returns="@(_LocalTopLevelSourceRoot)"
-            Condition="'$(DeterministicSourcePaths)' == 'true'">
-    <ItemGroup>
-      <_LocalTopLevelSourceRoot Include="@(SourceRoot)" Condition="'%(SourceRoot.NestedRoot)' == ''"/>
-    </ItemGroup>
-  </Target>
-  
-  <ItemDefinitionGroup>
-    <InternalsVisibleTo>
-      <Visible>false</Visible>
-    </InternalsVisibleTo>
-  </ItemDefinitionGroup>
-
-  <Target Name="GenerateInternalsVisibleTo"
-          BeforeTargets="CoreCompile"
-          DependsOnTargets="PrepareForBuild;CoreGenerateInternalsVisibleTo"
-          Condition="'@(InternalsVisibleTo)' != ''" />
-
-  <Target Name="CoreGenerateInternalsVisibleTo"
-          Condition="'$(Language)' == 'VB' or '$(Language)' == 'C#'"
-          Inputs="$(MSBuildAllProjects)"
-          Outputs="$(GeneratedInternalsVisibleToFile)">
-    <CreateItem Include="System.Runtime.CompilerServices.InternalsVisibleToAttribute" AdditionalMetadata="_Parameter1=%(InternalsVisibleTo.Identity)" Condition="'%(InternalsVisibleTo.Key)' == ''">
-      <Output TaskParameter="Include" ItemName="InternalsVisibleToAttribute" />
-    </CreateItem>
-    <CreateItem Include="System.Runtime.CompilerServices.InternalsVisibleToAttribute" AdditionalMetadata="_Parameter1=%(InternalsVisibleTo.Identity), PublicKey=%(InternalsVisibleTo.Key)" Condition="'%(InternalsVisibleTo.Key)' != ''">
-      <Output TaskParameter="Include" ItemName="InternalsVisibleToAttribute" />
-    </CreateItem>
-
-    <WriteCodeFragment AssemblyAttributes="@(InternalsVisibleToAttribute)" Language="$(Language)" OutputFile="$(GeneratedInternalsVisibleToFile)">
-      <Output TaskParameter="OutputFile" ItemName="Compile" />
-      <Output TaskParameter="OutputFile" ItemName="FileWrites" />
-    </WriteCodeFragment>
-  </Target>
-
-  <!-- Empty target so that `dotnet test` will work on the solution -->
-  <!-- https://github.com/Microsoft/vstest/issues/411 -->
-  <Target Name="VSTest" Condition="'$(IsTestProject)' == 'true'"/>
-  
-  <ItemGroup>
-    <!--Shared config files that have to exist at root level.-->
-    <ConfigFilesToCopy Include="..\..\shared-infrastructure\.editorconfig;..\..\shared-infrastructure\.gitattributes" />
-  </ItemGroup>
-
-  <!--Ensures our config files are up to date.-->
-  <Target Name="CopyFiles" BeforeTargets="Build">
-    <Copy SourceFiles="@(ConfigFilesToCopy)"
-          SkipUnchangedFiles = "true"
-          DestinationFolder="..\..\" />
-  </Target>
-  
-  <!-- Allows regenerating T4-generated files at build time using MsBuild -->
-  <!-- Enable on Windows OS to build all T4 templates. TODO: XPlat
-  <Import Project="$(MSBuildExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\TextTemplating\Microsoft.TextTemplating.targets" />
-  <PropertyGroup>
-    <TransformOnBuild>true</TransformOnBuild>
-  </PropertyGroup>
-  -->
-  
 </Project>
--- a/src/ImageSharp/Color/Color.cs
+++ b/src/ImageSharp/Color/Color.cs
@ -27,19 +27,19 @@ namespace SixLabors.ImageSharp
        private Color(byte r, byte g, byte b, byte a)
        {
            this.data = new Rgba64(
-                ImageMaths.UpscaleFrom8BitTo16Bit(r),
-                ImageMaths.UpscaleFrom8BitTo16Bit(g),
-                ImageMaths.UpscaleFrom8BitTo16Bit(b),
-                ImageMaths.UpscaleFrom8BitTo16Bit(a));
+                ColorNumerics.UpscaleFrom8BitTo16Bit(r),
+                ColorNumerics.UpscaleFrom8BitTo16Bit(g),
+                ColorNumerics.UpscaleFrom8BitTo16Bit(b),
+                ColorNumerics.UpscaleFrom8BitTo16Bit(a));
        }

        [MethodImpl(InliningOptions.ShortMethod)]
        private Color(byte r, byte g, byte b)
        {
            this.data = new Rgba64(
-                ImageMaths.UpscaleFrom8BitTo16Bit(r),
-                ImageMaths.UpscaleFrom8BitTo16Bit(g),
-                ImageMaths.UpscaleFrom8BitTo16Bit(b),
+                ColorNumerics.UpscaleFrom8BitTo16Bit(r),
+                ColorNumerics.UpscaleFrom8BitTo16Bit(g),
+                ColorNumerics.UpscaleFrom8BitTo16Bit(b),
                ushort.MaxValue);
        }

--- a/src/ImageSharp/ColorSpaces/Cmyk.cs
+++ b/src/ImageSharp/ColorSpaces/Cmyk.cs
@ -59,7 +59,7 @@ namespace SixLabors.ImageSharp.ColorSpaces
        [MethodImpl(InliningOptions.ShortMethod)]
        public Cmyk(Vector4 vector)
        {
-            vector = Vector4Utilities.FastClamp(vector, Min, Max);
+            vector = Numerics.Clamp(vector, Min, Max);
            this.C = vector.X;
            this.M = vector.Y;
            this.Y = vector.Z;
--- a/src/ImageSharp/ColorSpaces/Companding/LCompanding.cs
+++ b/src/ImageSharp/ColorSpaces/Companding/LCompanding.cs
@ -24,7 +24,7 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
        /// <returns>The <see cref="float"/> representing the linear channel value.</returns>
        [MethodImpl(InliningOptions.ShortMethod)]
        public static float Expand(float channel)
-            => channel <= 0.08F ? (100F * channel) / CieConstants.Kappa : ImageMaths.Pow3((channel + 0.16F) / 1.16F);
+            => channel <= 0.08F ? (100F * channel) / CieConstants.Kappa : Numerics.Pow3((channel + 0.16F) / 1.16F);

        /// <summary>
        /// Compresses an uncompanded channel (linear) to its nonlinear equivalent.
--- a/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs
+++ b/src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs
@ -1,10 +1,14 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif

 namespace SixLabors.ImageSharp.ColorSpaces.Companding
 {
@ -18,19 +22,83 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
    /// </remarks>
    public static class SRgbCompanding
    {
+        private const int Length = Scale + 2; // 256kb @ 16bit precision.
+        private const int Scale = (1 << 16) - 1;
+
+        private static readonly Lazy<float[]> LazyCompressTable = new Lazy<float[]>(
+            () =>
+            {
+                var result = new float[Length];
+
+                for (int i = 0; i < result.Length; i++)
+                {
+                    double d = (double)i / Scale;
+                    if (d <= (0.04045 / 12.92))
+                    {
+                        d *= 12.92;
+                    }
+                    else
+                    {
+                        d = (1.055 * Math.Pow(d, 1.0 / 2.4)) - 0.055;
+                    }
+
+                    result[i] = (float)d;
+                }
+
+                return result;
+            },
+            true);
+
+        private static readonly Lazy<float[]> LazyExpandTable = new Lazy<float[]>(
+            () =>
+            {
+                var result = new float[Length];
+
+                for (int i = 0; i < result.Length; i++)
+                {
+                    double d = (double)i / Scale;
+                    if (d <= 0.04045)
+                    {
+                        d /= 12.92;
+                    }
+                    else
+                    {
+                        d = Math.Pow((d + 0.055) / 1.055, 2.4);
+                    }
+
+                    result[i] = (float)d;
+                }
+
+                return result;
+            },
+            true);
+
+        private static float[] ExpandTable => LazyExpandTable.Value;
+
+        private static float[] CompressTable => LazyCompressTable.Value;
+
        /// <summary>
        /// Expands the companded vectors to their linear equivalents with respect to the energy.
        /// </summary>
        /// <param name="vectors">The span of vectors.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static void Expand(Span<Vector4> vectors)
        {
-            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx2.IsSupported && vectors.Length >= 2)
+            {
+                CompandAvx2(vectors, ExpandTable);

-            for (int i = 0; i < vectors.Length; i++)
+                if (Numerics.Modulo2(vectors.Length) != 0)
+                {
+                    // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+                    Expand(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+                }
+            }
+            else
+#endif
            {
-                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                Expand(ref v);
+                CompandScalar(vectors, ExpandTable);
            }
        }

@ -38,15 +106,24 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
        /// Compresses the uncompanded vectors to their nonlinear equivalents with respect to the energy.
        /// </summary>
        /// <param name="vectors">The span of vectors.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Compress(Span<Vector4> vectors)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void Compress(Span<Vector4> vectors)
        {
-            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx2.IsSupported && vectors.Length >= 2)
+            {
+                CompandAvx2(vectors, CompressTable);

-            for (int i = 0; i < vectors.Length; i++)
+                if (Numerics.Modulo2(vectors.Length) != 0)
+                {
+                    // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+                    Compress(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+                }
+            }
+            else
+#endif
            {
-                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                Compress(ref v);
+                CompandScalar(vectors, CompressTable);
            }
        }

@ -54,9 +131,10 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
        /// Expands a companded vector to its linear equivalent with respect to the energy.
        /// </summary>
        /// <param name="vector">The vector.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static void Expand(ref Vector4 vector)
        {
+            // Alpha is already a linear representation of opacity so we do not want to convert it.
            vector.X = Expand(vector.X);
            vector.Y = Expand(vector.Y);
            vector.Z = Expand(vector.Z);
@ -66,9 +144,10 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
        /// Compresses an uncompanded vector (linear) to its nonlinear equivalent.
        /// </summary>
        /// <param name="vector">The vector.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static void Compress(ref Vector4 vector)
        {
+            // Alpha is already a linear representation of opacity so we do not want to convert it.
            vector.X = Compress(vector.X);
            vector.Y = Compress(vector.Y);
            vector.Z = Compress(vector.Z);
@ -79,15 +158,84 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
        /// </summary>
        /// <param name="channel">The channel value.</param>
        /// <returns>The <see cref="float"/> representing the linear channel value.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float Expand(float channel) => channel <= 0.04045F ? channel / 12.92F : MathF.Pow((channel + 0.055F) / 1.055F, 2.4F);
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float Expand(float channel)
+            => channel <= 0.04045F ? channel / 12.92F : MathF.Pow((channel + 0.055F) / 1.055F, 2.4F);

        /// <summary>
        /// Compresses an uncompanded channel (linear) to its nonlinear equivalent.
        /// </summary>
        /// <param name="channel">The channel value.</param>
        /// <returns>The <see cref="float"/> representing the nonlinear channel value.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float Compress(float channel)
+            => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe void CompandAvx2(Span<Vector4> vectors, float[] table)
+        {
+            fixed (float* tablePointer = &table[0])
+            {
+                var scale = Vector256.Create((float)Scale);
+                Vector256<float> zero = Vector256<float>.Zero;
+                var offset = Vector256.Create(1);
+
+                // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
+                ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
+                ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
+
+                while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
+                {
+                    Vector256<float> multiplied = Avx.Multiply(scale, vectorsBase);
+                    multiplied = Avx.Min(Avx.Max(zero, multiplied), scale);
+
+                    Vector256<int> truncated = Avx.ConvertToVector256Int32WithTruncation(multiplied);
+                    Vector256<float> truncatedF = Avx.ConvertToVector256Single(truncated);
+
+                    Vector256<float> low = Avx2.GatherVector256(tablePointer, truncated, sizeof(float));
+                    Vector256<float> high = Avx2.GatherVector256(tablePointer, Avx2.Add(truncated, offset), sizeof(float));
+
+                    // Alpha is already a linear representation of opacity so we do not want to convert it.
+                    Vector256<float> companded = Numerics.Lerp(low, high, Avx.Subtract(multiplied, truncatedF));
+                    vectorsBase = Avx.Blend(companded, vectorsBase, Numerics.BlendAlphaControl);
+                    vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
+                }
+            }
+        }
+#endif
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe void CompandScalar(Span<Vector4> vectors, float[] table)
+        {
+            fixed (float* tablePointer = &table[0])
+            {
+                Vector4 zero = Vector4.Zero;
+                var scale = new Vector4(Scale);
+                ref Vector4 vectorsBase = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsLast = ref Unsafe.Add(ref vectorsBase, vectors.Length);
+
+                while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
+                {
+                    Vector4 multiplied = Numerics.Clamp(vectorsBase * Scale, zero, scale);
+
+                    float f0 = multiplied.X;
+                    float f1 = multiplied.Y;
+                    float f2 = multiplied.Z;
+
+                    uint i0 = (uint)f0;
+                    uint i1 = (uint)f1;
+                    uint i2 = (uint)f2;
+
+                    // Alpha is already a linear representation of opacity so we do not want to convert it.
+                    vectorsBase.X = Numerics.Lerp(tablePointer[i0], tablePointer[i0 + 1], f0 - (int)i0);
+                    vectorsBase.Y = Numerics.Lerp(tablePointer[i1], tablePointer[i1 + 1], f1 - (int)i1);
+                    vectorsBase.Z = Numerics.Lerp(tablePointer[i2], tablePointer[i2 + 1], f2 - (int)i2);
+
+                    vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
+                }
+            }
+        }
    }
-}
+}
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieLabToCieXyzConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieLabToCieXyzConverter.cs
@ -25,11 +25,11 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
            float fx = (a / 500F) + fy;
            float fz = fy - (b / 200F);

-            float fx3 = ImageMaths.Pow3(fx);
-            float fz3 = ImageMaths.Pow3(fz);
+            float fx3 = Numerics.Pow3(fx);
+            float fz3 = Numerics.Pow3(fz);

            float xr = fx3 > CieConstants.Epsilon ? fx3 : ((116F * fx) - 16F) / CieConstants.Kappa;
-            float yr = l > CieConstants.Kappa * CieConstants.Epsilon ? ImageMaths.Pow3((l + 16F) / 116F) : l / CieConstants.Kappa;
+            float yr = l > CieConstants.Kappa * CieConstants.Epsilon ? Numerics.Pow3((l + 16F) / 116F) : l / CieConstants.Kappa;
            float zr = fz3 > CieConstants.Epsilon ? fz3 : ((116F * fz) - 16F) / CieConstants.Kappa;

            var wxyz = new Vector3(input.WhitePoint.X, input.WhitePoint.Y, input.WhitePoint.Z);
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieLuvToCieXyzConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/CieLuvToCieXyzConverter.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System.Runtime.CompilerServices;
@ -24,7 +24,7 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
            float v0 = ComputeV0(input.WhitePoint);

            float y = l > CieConstants.Kappa * CieConstants.Epsilon
-                        ? ImageMaths.Pow3((l + 16) / 116)
+                        ? Numerics.Pow3((l + 16) / 116)
                        : l / CieConstants.Kappa;

            float a = ((52 * l / (u + (13 * l * u0))) - 1) / 3;
@ -71,4 +71,4 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
        private static float ComputeV0(in CieXyz input)
            => (9 * input.Y) / (input.X + (15 * input.Y) + (3 * input.Z));
    }
-}
+}
--- a/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/HunterLabToCieXyzConverter.cs
+++ b/src/ImageSharp/ColorSpaces/Conversion/Implementation/Converters/HunterLabToCieXyzConverter.cs
@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.ColorSpaces.Conversion
            float ka = ComputeKa(input.WhitePoint);
            float kb = ComputeKb(input.WhitePoint);

-            float pow = ImageMaths.Pow2(l / 100F);
+            float pow = Numerics.Pow2(l / 100F);
            float sqrtPow = MathF.Sqrt(pow);
            float y = pow * yn;

--- a/src/ImageSharp/ColorSpaces/YCbCr.cs
+++ b/src/ImageSharp/ColorSpaces/YCbCr.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System;
@ -100,4 +100,4 @@ namespace SixLabors.ImageSharp.ColorSpaces
                && this.Cr.Equals(other.Cr);
        }
    }
-}
+}
--- a/src/ImageSharp/Common/Extensions/ComparableExtensions.cs
+++ b/src/ImageSharp/Common/Extensions/ComparableExtensions.cs
@ -1,140 +0,0 @@
-// Copyright (c) Six Labors.
-// Licensed under the Apache License, Version 2.0.
-
-using System;
-using System.Runtime.CompilerServices;
-
-namespace SixLabors.ImageSharp
-{
-    /// <summary>
-    /// Extension methods for classes that implement <see cref="IComparable{T}"/>.
-    /// </summary>
-    internal static class ComparableExtensions
-    {
-        /// <summary>
-        /// Restricts a <see cref="byte"/> to be within a specified range.
-        /// </summary>
-        /// <param name="value">The value to clamp.</param>
-        /// <param name="min">The minimum value. If value is less than min, min will be returned.</param>
-        /// <param name="max">The maximum value. If value is greater than max, max will be returned.</param>
-        /// <returns>
-        /// The <see cref="byte"/> representing the clamped value.
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static byte Clamp(this byte value, byte min, byte max)
-        {
-            // Order is important here as someone might set min to higher than max.
-            if (value >= max)
-            {
-                return max;
-            }
-
-            if (value <= min)
-            {
-                return min;
-            }
-
-            return value;
-        }
-
-        /// <summary>
-        /// Restricts a <see cref="uint"/> to be within a specified range.
-        /// </summary>
-        /// <param name="value">The The value to clamp.</param>
-        /// <param name="min">The minimum value. If value is less than min, min will be returned.</param>
-        /// <param name="max">The maximum value. If value is greater than max, max will be returned.</param>
-        /// <returns>
-        /// The <see cref="int"/> representing the clamped value.
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static uint Clamp(this uint value, uint min, uint max)
-        {
-            if (value >= max)
-            {
-                return max;
-            }
-
-            if (value <= min)
-            {
-                return min;
-            }
-
-            return value;
-        }
-
-        /// <summary>
-        /// Restricts a <see cref="int"/> to be within a specified range.
-        /// </summary>
-        /// <param name="value">The The value to clamp.</param>
-        /// <param name="min">The minimum value. If value is less than min, min will be returned.</param>
-        /// <param name="max">The maximum value. If value is greater than max, max will be returned.</param>
-        /// <returns>
-        /// The <see cref="int"/> representing the clamped value.
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int Clamp(this int value, int min, int max)
-        {
-            if (value >= max)
-            {
-                return max;
-            }
-
-            if (value <= min)
-            {
-                return min;
-            }
-
-            return value;
-        }
-
-        /// <summary>
-        /// Restricts a <see cref="float"/> to be within a specified range.
-        /// </summary>
-        /// <param name="value">The The value to clamp.</param>
-        /// <param name="min">The minimum value. If value is less than min, min will be returned.</param>
-        /// <param name="max">The maximum value. If value is greater than max, max will be returned.</param>
-        /// <returns>
-        /// The <see cref="float"/> representing the clamped value.
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float Clamp(this float value, float min, float max)
-        {
-            if (value >= max)
-            {
-                return max;
-            }
-
-            if (value <= min)
-            {
-                return min;
-            }
-
-            return value;
-        }
-
-        /// <summary>
-        /// Restricts a <see cref="double"/> to be within a specified range.
-        /// </summary>
-        /// <param name="value">The The value to clamp.</param>
-        /// <param name="min">The minimum value. If value is less than min, min will be returned.</param>
-        /// <param name="max">The maximum value. If value is greater than max, max will be returned.</param>
-        /// <returns>
-        /// The <see cref="double"/> representing the clamped value.
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static double Clamp(this double value, double min, double max)
-        {
-            if (value >= max)
-            {
-                return max;
-            }
-
-            if (value <= min)
-            {
-                return min;
-            }
-
-            return value;
-        }
-    }
-}
--- a/src/ImageSharp/Common/Helpers/Buffer2DUtils.cs
+++ b/src/ImageSharp/Common/Helpers/Buffer2DUtils.cs
@ -1,109 +0,0 @@
-// Copyright (c) Six Labors.
-// Licensed under the Apache License, Version 2.0.
-
-using System;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-using SixLabors.ImageSharp.Memory;
-using SixLabors.ImageSharp.PixelFormats;
-
-namespace SixLabors.ImageSharp
-{
-    /// <summary>
-    /// Extension methods for <see cref="Buffer2D{T}"/>.
-    /// TODO: One day rewrite all this to use SIMD intrinsics. There's a lot of scope for improvement.
-    /// </summary>
-    internal static class Buffer2DUtils
-    {
-        /// <summary>
-        /// Computes the sum of vectors in <paramref name="targetRow"/> weighted by the kernel weight values.
-        /// </summary>
-        /// <typeparam name="TPixel">The pixel format.</typeparam>
-        /// <param name="kernel">The 1D convolution kernel.</param>
-        /// <param name="sourcePixels">The source frame.</param>
-        /// <param name="targetRow">The target row.</param>
-        /// <param name="row">The current row.</param>
-        /// <param name="column">The current column.</param>
-        /// <param name="minRow">The minimum working area row.</param>
-        /// <param name="maxRow">The maximum working area row.</param>
-        /// <param name="minColumn">The minimum working area column.</param>
-        /// <param name="maxColumn">The maximum working area column.</param>
-        public static void Convolve4<TPixel>(
-            Span<Complex64> kernel,
-            Buffer2D<TPixel> sourcePixels,
-            Span<ComplexVector4> targetRow,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            ComplexVector4 vector = default;
-            int kernelLength = kernel.Length;
-            int radiusY = kernelLength >> 1;
-            int sourceOffsetColumnBase = column + minColumn;
-            ref Complex64 baseRef = ref MemoryMarshal.GetReference(kernel);
-
-            for (int i = 0; i < kernelLength; i++)
-            {
-                int offsetY = (row + i - radiusY).Clamp(minRow, maxRow);
-                int offsetX = sourceOffsetColumnBase.Clamp(minColumn, maxColumn);
-                Span<TPixel> sourceRowSpan = sourcePixels.GetRowSpan(offsetY);
-                var currentColor = sourceRowSpan[offsetX].ToVector4();
-
-                vector.Sum(Unsafe.Add(ref baseRef, i) * currentColor);
-            }
-
-            targetRow[column] = vector;
-        }
-
-        /// <summary>
-        /// Computes the sum of vectors in <paramref name="targetRow"/> weighted by the kernel weight values and accumulates the partial results.
-        /// </summary>
-        /// <param name="kernel">The 1D convolution kernel.</param>
-        /// <param name="sourceValues">The source frame.</param>
-        /// <param name="targetRow">The target row.</param>
-        /// <param name="row">The current row.</param>
-        /// <param name="column">The current column.</param>
-        /// <param name="minRow">The minimum working area row.</param>
-        /// <param name="maxRow">The maximum working area row.</param>
-        /// <param name="minColumn">The minimum working area column.</param>
-        /// <param name="maxColumn">The maximum working area column.</param>
-        /// <param name="z">The weight factor for the real component of the complex pixel values.</param>
-        /// <param name="w">The weight factor for the imaginary component of the complex pixel values.</param>
-        public static void Convolve4AndAccumulatePartials(
-            Span<Complex64> kernel,
-            Buffer2D<ComplexVector4> sourceValues,
-            Span<Vector4> targetRow,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn,
-            float z,
-            float w)
-        {
-            ComplexVector4 vector = default;
-            int kernelLength = kernel.Length;
-            int radiusX = kernelLength >> 1;
-            int sourceOffsetColumnBase = column + minColumn;
-
-            int offsetY = row.Clamp(minRow, maxRow);
-            ref ComplexVector4 sourceRef = ref MemoryMarshal.GetReference(sourceValues.GetRowSpan(offsetY));
-            ref Complex64 baseRef = ref MemoryMarshal.GetReference(kernel);
-
-            for (int x = 0; x < kernelLength; x++)
-            {
-                int offsetX = (sourceOffsetColumnBase + x - radiusX).Clamp(minColumn, maxColumn);
-                vector.Sum(Unsafe.Add(ref baseRef, x) * Unsafe.Add(ref sourceRef, offsetX));
-            }
-
-            targetRow[column] += vector.WeightedSum(z, w);
-        }
-    }
-}
--- a/src/ImageSharp/Common/Helpers/ColorNumerics.cs
+++ b/src/ImageSharp/Common/Helpers/ColorNumerics.cs
@ -0,0 +1,177 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp
+{
+    /// <summary>
+    /// Provides optimized static methods for common mathematical functions specific
+    /// to color processing.
+    /// </summary>
+    internal static class ColorNumerics
+    {
+        /// <summary>
+        /// Vector for converting pixel to gray value as specified by
+        /// ITU-R Recommendation BT.709.
+        /// </summary>
+        private static readonly Vector4 Bt709 = new Vector4(.2126f, .7152f, .0722f, 0.0f);
+
+        /// <summary>
+        /// Convert a pixel value to grayscale using ITU-R Recommendation BT.709.
+        /// </summary>
+        /// <param name="vector">The vector to get the luminance from.</param>
+        /// <param name="luminanceLevels">
+        /// The number of luminance levels (256 for 8 bit, 65536 for 16 bit grayscale images).
+        /// </param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetBT709Luminance(ref Vector4 vector, int luminanceLevels)
+            => (int)MathF.Round(Vector4.Dot(vector, Bt709) * (luminanceLevels - 1));
+
+        /// <summary>
+        /// Gets the luminance from the rgb components using the formula
+        /// as specified by ITU-R Recommendation BT.709.
+        /// </summary>
+        /// <param name="r">The red component.</param>
+        /// <param name="g">The green component.</param>
+        /// <param name="b">The blue component.</param>
+        /// <returns>The <see cref="byte"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static byte Get8BitBT709Luminance(byte r, byte g, byte b)
+            => (byte)((r * .2126F) + (g * .7152F) + (b * .0722F) + 0.5F);
+
+        /// <summary>
+        /// Gets the luminance from the rgb components using the formula as
+        /// specified by ITU-R Recommendation BT.709.
+        /// </summary>
+        /// <param name="r">The red component.</param>
+        /// <param name="g">The green component.</param>
+        /// <param name="b">The blue component.</param>
+        /// <returns>The <see cref="ushort"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ushort Get16BitBT709Luminance(ushort r, ushort g, ushort b)
+            => (ushort)((r * .2126F) + (g * .7152F) + (b * .0722F) + 0.5F);
+
+        /// <summary>
+        /// Gets the luminance from the rgb components using the formula as specified
+        /// by ITU-R Recommendation BT.709.
+        /// </summary>
+        /// <param name="r">The red component.</param>
+        /// <param name="g">The green component.</param>
+        /// <param name="b">The blue component.</param>
+        /// <returns>The <see cref="ushort"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ushort Get16BitBT709Luminance(float r, float g, float b)
+            => (ushort)((r * .2126F) + (g * .7152F) + (b * .0722F) + 0.5F);
+
+        /// <summary>
+        /// Scales a value from a 16 bit <see cref="ushort"/> to an
+        /// 8 bit <see cref="byte"/> equivalent.
+        /// </summary>
+        /// <param name="component">The 8 bit component value.</param>
+        /// <returns>The <see cref="byte"/></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static byte DownScaleFrom16BitTo8Bit(ushort component)
+        {
+            // To scale to 8 bits From a 16-bit value V the required value (from the PNG specification) is:
+            //
+            //    (V * 255) / 65535
+            //
+            // This reduces to round(V / 257), or floor((V + 128.5)/257)
+            //
+            // Represent V as the two byte value vhi.vlo.  Make a guess that the
+            // result is the top byte of V, vhi, then the correction to this value
+            // is:
+            //
+            //    error = floor(((V-vhi.vhi) + 128.5) / 257)
+            //          = floor(((vlo-vhi) + 128.5) / 257)
+            //
+            // This can be approximated using integer arithmetic (and a signed
+            // shift):
+            //
+            //    error = (vlo-vhi+128) >> 8;
+            //
+            // The approximate differs from the exact answer only when (vlo-vhi) is
+            // 128; it then gives a correction of +1 when the exact correction is
+            // 0.  This gives 128 errors.  The exact answer (correct for all 16-bit
+            // input values) is:
+            //
+            //    error = (vlo-vhi+128)*65535 >> 24;
+            //
+            // An alternative arithmetic calculation which also gives no errors is:
+            //
+            //    (V * 255 + 32895) >> 16
+            return (byte)(((component * 255) + 32895) >> 16);
+        }
+
+        /// <summary>
+        /// Scales a value from an 8 bit <see cref="byte"/> to
+        /// an 16 bit <see cref="ushort"/> equivalent.
+        /// </summary>
+        /// <param name="component">The 8 bit component value.</param>
+        /// <returns>The <see cref="ushort"/></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ushort UpscaleFrom8BitTo16Bit(byte component)
+            => (ushort)(component * 257);
+
+        /// <summary>
+        /// Returns how many bits are required to store the specified number of colors.
+        /// Performs a Log2() on the value.
+        /// </summary>
+        /// <param name="colors">The number of colors.</param>
+        /// <returns>
+        /// The <see cref="int"/>
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetBitsNeededForColorDepth(int colors)
+            => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2)));
+
+        /// <summary>
+        /// Returns how many colors will be created by the specified number of bits.
+        /// </summary>
+        /// <param name="bitDepth">The bit depth.</param>
+        /// <returns>The <see cref="int"/></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetColorCountForBitDepth(int bitDepth)
+            => 1 << bitDepth;
+
+        /// <summary>
+        /// Transforms a vector by the given color matrix.
+        /// </summary>
+        /// <param name="vector">The source vector.</param>
+        /// <param name="matrix">The transformation color matrix.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Transform(ref Vector4 vector, ref ColorMatrix matrix)
+        {
+            float x = vector.X;
+            float y = vector.Y;
+            float z = vector.Z;
+            float w = vector.W;
+
+            vector.X = (x * matrix.M11) + (y * matrix.M21) + (z * matrix.M31) + (w * matrix.M41) + matrix.M51;
+            vector.Y = (x * matrix.M12) + (y * matrix.M22) + (z * matrix.M32) + (w * matrix.M42) + matrix.M52;
+            vector.Z = (x * matrix.M13) + (y * matrix.M23) + (z * matrix.M33) + (w * matrix.M43) + matrix.M53;
+            vector.W = (x * matrix.M14) + (y * matrix.M24) + (z * matrix.M34) + (w * matrix.M44) + matrix.M54;
+        }
+
+        /// <summary>
+        /// Bulk variant of <see cref="Transform(ref Vector4, ref ColorMatrix)"/>.
+        /// </summary>
+        /// <param name="vectors">The span of vectors</param>
+        /// <param name="matrix">The transformation color matrix.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Transform(Span<Vector4> vectors, ref ColorMatrix matrix)
+        {
+            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+
+            for (int i = 0; i < vectors.Length; i++)
+            {
+                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+                Transform(ref v, ref matrix);
+            }
+        }
+    }
+}
--- a/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs
+++ b/src/ImageSharp/Common/Helpers/DenseMatrixUtils.cs
@ -1,279 +0,0 @@
-// Copyright (c) Six Labors.
-// Licensed under the Apache License, Version 2.0.
-
-using System;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using SixLabors.ImageSharp.Memory;
-using SixLabors.ImageSharp.PixelFormats;
-
-namespace SixLabors.ImageSharp
-{
-    /// <summary>
-    /// Extension methods for <see cref="DenseMatrix{T}"/>.
-    /// TODO: One day rewrite all this to use SIMD intrinsics. There's a lot of scope for improvement.
-    /// </summary>
-    internal static class DenseMatrixUtils
-    {
-        /// <summary>
-        /// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the two kernel weight values.
-        /// Using this method the convolution filter is not applied to alpha in addition to the color channels.
-        /// </summary>
-        /// <typeparam name="TPixel">The pixel format.</typeparam>
-        /// <param name="matrixY">The vertical dense matrix.</param>
-        /// <param name="matrixX">The horizontal dense matrix.</param>
-        /// <param name="sourcePixels">The source frame.</param>
-        /// <param name="targetRowRef">The target row base reference.</param>
-        /// <param name="row">The current row.</param>
-        /// <param name="column">The current column.</param>
-        /// <param name="minRow">The minimum working area row.</param>
-        /// <param name="maxRow">The maximum working area row.</param>
-        /// <param name="minColumn">The minimum working area column.</param>
-        /// <param name="maxColumn">The maximum working area column.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Convolve2D3<TPixel>(
-            in DenseMatrix<float> matrixY,
-            in DenseMatrix<float> matrixX,
-            Buffer2D<TPixel> sourcePixels,
-            ref Vector4 targetRowRef,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            Convolve2DImpl(
-                in matrixY,
-                in matrixX,
-                sourcePixels,
-                row,
-                column,
-                minRow,
-                maxRow,
-                minColumn,
-                maxColumn,
-                out Vector4 vector);
-
-            ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
-            vector.W = target.W;
-
-            Vector4Utilities.UnPremultiply(ref vector);
-            target = vector;
-        }
-
-        /// <summary>
-        /// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the two kernel weight values.
-        /// Using this method the convolution filter is applied to alpha in addition to the color channels.
-        /// </summary>
-        /// <typeparam name="TPixel">The pixel format.</typeparam>
-        /// <param name="matrixY">The vertical dense matrix.</param>
-        /// <param name="matrixX">The horizontal dense matrix.</param>
-        /// <param name="sourcePixels">The source frame.</param>
-        /// <param name="targetRowRef">The target row base reference.</param>
-        /// <param name="row">The current row.</param>
-        /// <param name="column">The current column.</param>
-        /// <param name="minRow">The minimum working area row.</param>
-        /// <param name="maxRow">The maximum working area row.</param>
-        /// <param name="minColumn">The minimum working area column.</param>
-        /// <param name="maxColumn">The maximum working area column.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Convolve2D4<TPixel>(
-            in DenseMatrix<float> matrixY,
-            in DenseMatrix<float> matrixX,
-            Buffer2D<TPixel> sourcePixels,
-            ref Vector4 targetRowRef,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            Convolve2DImpl(
-                in matrixY,
-                in matrixX,
-                sourcePixels,
-                row,
-                column,
-                minRow,
-                maxRow,
-                minColumn,
-                maxColumn,
-                out Vector4 vector);
-
-            ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
-            Vector4Utilities.UnPremultiply(ref vector);
-            target = vector;
-        }
-
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Convolve2DImpl<TPixel>(
-            in DenseMatrix<float> matrixY,
-            in DenseMatrix<float> matrixX,
-            Buffer2D<TPixel> sourcePixels,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn,
-            out Vector4 vector)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            Vector4 vectorY = default;
-            Vector4 vectorX = default;
-            int matrixHeight = matrixY.Rows;
-            int matrixWidth = matrixY.Columns;
-            int radiusY = matrixHeight >> 1;
-            int radiusX = matrixWidth >> 1;
-            int sourceOffsetColumnBase = column + minColumn;
-
-            for (int y = 0; y < matrixHeight; y++)
-            {
-                int offsetY = (row + y - radiusY).Clamp(minRow, maxRow);
-                Span<TPixel> sourceRowSpan = sourcePixels.GetRowSpan(offsetY);
-
-                for (int x = 0; x < matrixWidth; x++)
-                {
-                    int offsetX = (sourceOffsetColumnBase + x - radiusX).Clamp(minColumn, maxColumn);
-                    var currentColor = sourceRowSpan[offsetX].ToVector4();
-                    Vector4Utilities.Premultiply(ref currentColor);
-
-                    vectorX += matrixX[y, x] * currentColor;
-                    vectorY += matrixY[y, x] * currentColor;
-                }
-            }
-
-            vector = Vector4.SquareRoot((vectorX * vectorX) + (vectorY * vectorY));
-        }
-
-        /// <summary>
-        /// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the kernel weight values.
-        /// Using this method the convolution filter is not applied to alpha in addition to the color channels.
-        /// </summary>
-        /// <typeparam name="TPixel">The pixel format.</typeparam>
-        /// <param name="matrix">The dense matrix.</param>
-        /// <param name="sourcePixels">The source frame.</param>
-        /// <param name="targetRowRef">The target row base reference.</param>
-        /// <param name="row">The current row.</param>
-        /// <param name="column">The current column.</param>
-        /// <param name="minRow">The minimum working area row.</param>
-        /// <param name="maxRow">The maximum working area row.</param>
-        /// <param name="minColumn">The minimum working area column.</param>
-        /// <param name="maxColumn">The maximum working area column.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Convolve3<TPixel>(
-            in DenseMatrix<float> matrix,
-            Buffer2D<TPixel> sourcePixels,
-            ref Vector4 targetRowRef,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            Vector4 vector = default;
-
-            ConvolveImpl(
-                in matrix,
-                sourcePixels,
-                row,
-                column,
-                minRow,
-                maxRow,
-                minColumn,
-                maxColumn,
-                ref vector);
-
-            ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
-            vector.W = target.W;
-
-            Vector4Utilities.UnPremultiply(ref vector);
-            target = vector;
-        }
-
-        /// <summary>
-        /// Computes the sum of vectors in the span referenced by <paramref name="targetRowRef"/> weighted by the kernel weight values.
-        /// Using this method the convolution filter is applied to alpha in addition to the color channels.
-        /// </summary>
-        /// <typeparam name="TPixel">The pixel format.</typeparam>
-        /// <param name="matrix">The dense matrix.</param>
-        /// <param name="sourcePixels">The source frame.</param>
-        /// <param name="targetRowRef">The target row base reference.</param>
-        /// <param name="row">The current row.</param>
-        /// <param name="column">The current column.</param>
-        /// <param name="minRow">The minimum working area row.</param>
-        /// <param name="maxRow">The maximum working area row.</param>
-        /// <param name="minColumn">The minimum working area column.</param>
-        /// <param name="maxColumn">The maximum working area column.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Convolve4<TPixel>(
-            in DenseMatrix<float> matrix,
-            Buffer2D<TPixel> sourcePixels,
-            ref Vector4 targetRowRef,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            Vector4 vector = default;
-
-            ConvolveImpl(
-                in matrix,
-                sourcePixels,
-                row,
-                column,
-                minRow,
-                maxRow,
-                minColumn,
-                maxColumn,
-                ref vector);
-
-            ref Vector4 target = ref Unsafe.Add(ref targetRowRef, column);
-            Vector4Utilities.UnPremultiply(ref vector);
-            target = vector;
-        }
-
-        [MethodImpl(InliningOptions.ShortMethod)]
-        private static void ConvolveImpl<TPixel>(
-            in DenseMatrix<float> matrix,
-            Buffer2D<TPixel> sourcePixels,
-            int row,
-            int column,
-            int minRow,
-            int maxRow,
-            int minColumn,
-            int maxColumn,
-            ref Vector4 vector)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            int matrixHeight = matrix.Rows;
-            int matrixWidth = matrix.Columns;
-            int radiusY = matrixHeight >> 1;
-            int radiusX = matrixWidth >> 1;
-            int sourceOffsetColumnBase = column + minColumn;
-
-            for (int y = 0; y < matrixHeight; y++)
-            {
-                int offsetY = (row + y - radiusY).Clamp(minRow, maxRow);
-                Span<TPixel> sourceRowSpan = sourcePixels.GetRowSpan(offsetY);
-
-                for (int x = 0; x < matrixWidth; x++)
-                {
-                    int offsetX = (sourceOffsetColumnBase + x - radiusX).Clamp(minColumn, maxColumn);
-                    var currentColor = sourceRowSpan[offsetX].ToVector4();
-                    Vector4Utilities.Premultiply(ref currentColor);
-                    vector += matrix[y, x] * currentColor;
-                }
-            }
-        }
-    }
-}
--- a/src/ImageSharp/Common/Helpers/ImageMaths.cs
+++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs
@ -1,373 +0,0 @@
-// Copyright (c) Six Labors.
-// Licensed under the Apache License, Version 2.0.
-
-using System;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-
-using SixLabors.ImageSharp.PixelFormats;
-
-namespace SixLabors.ImageSharp
-{
-    /// <summary>
-    /// Provides common mathematical methods.
-    /// </summary>
-    internal static class ImageMaths
-    {
-        /// <summary>
-        /// Vector for converting pixel to gray value as specified by ITU-R Recommendation BT.709.
-        /// </summary>
-        private static readonly Vector4 Bt709 = new Vector4(.2126f, .7152f, .0722f, 0.0f);
-
-        /// <summary>
-        /// Convert a pixel value to grayscale using ITU-R Recommendation BT.709.
-        /// </summary>
-        /// <param name="vector">The vector to get the luminance from.</param>
-        /// <param name="luminanceLevels">The number of luminance levels (256 for 8 bit, 65536 for 16 bit grayscale images)</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int GetBT709Luminance(ref Vector4 vector, int luminanceLevels)
-            => (int)MathF.Round(Vector4.Dot(vector, Bt709) * (luminanceLevels - 1));
-
-        /// <summary>
-        /// Gets the luminance from the rgb components using the formula as specified by ITU-R Recommendation BT.709.
-        /// </summary>
-        /// <param name="r">The red component.</param>
-        /// <param name="g">The green component.</param>
-        /// <param name="b">The blue component.</param>
-        /// <returns>The <see cref="byte"/>.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static byte Get8BitBT709Luminance(byte r, byte g, byte b) =>
-            (byte)((r * .2126F) + (g * .7152F) + (b * .0722F) + 0.5F);
-
-        /// <summary>
-        /// Gets the luminance from the rgb components using the formula as specified by ITU-R Recommendation BT.709.
-        /// </summary>
-        /// <param name="r">The red component.</param>
-        /// <param name="g">The green component.</param>
-        /// <param name="b">The blue component.</param>
-        /// <returns>The <see cref="ushort"/>.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static ushort Get16BitBT709Luminance(ushort r, ushort g, ushort b) =>
-            (ushort)((r * .2126F) + (g * .7152F) + (b * .0722F) + 0.5F);
-
-        /// <summary>
-        /// Gets the luminance from the rgb components using the formula as specified by ITU-R Recommendation BT.709.
-        /// </summary>
-        /// <param name="r">The red component.</param>
-        /// <param name="g">The green component.</param>
-        /// <param name="b">The blue component.</param>
-        /// <returns>The <see cref="ushort"/>.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static ushort Get16BitBT709Luminance(float r, float g, float b) =>
-            (ushort)((r * .2126F) + (g * .7152F) + (b * .0722F) + 0.5F);
-
-        /// <summary>
-        /// Scales a value from a 16 bit <see cref="ushort"/> to it's 8 bit <see cref="byte"/> equivalent.
-        /// </summary>
-        /// <param name="component">The 8 bit component value.</param>
-        /// <returns>The <see cref="byte"/></returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static byte DownScaleFrom16BitTo8Bit(ushort component)
-        {
-            // To scale to 8 bits From a 16-bit value V the required value (from the PNG specification) is:
-            //
-            //    (V * 255) / 65535
-            //
-            // This reduces to round(V / 257), or floor((V + 128.5)/257)
-            //
-            // Represent V as the two byte value vhi.vlo.  Make a guess that the
-            // result is the top byte of V, vhi, then the correction to this value
-            // is:
-            //
-            //    error = floor(((V-vhi.vhi) + 128.5) / 257)
-            //          = floor(((vlo-vhi) + 128.5) / 257)
-            //
-            // This can be approximated using integer arithmetic (and a signed
-            // shift):
-            //
-            //    error = (vlo-vhi+128) >> 8;
-            //
-            // The approximate differs from the exact answer only when (vlo-vhi) is
-            // 128; it then gives a correction of +1 when the exact correction is
-            // 0.  This gives 128 errors.  The exact answer (correct for all 16-bit
-            // input values) is:
-            //
-            //    error = (vlo-vhi+128)*65535 >> 24;
-            //
-            // An alternative arithmetic calculation which also gives no errors is:
-            //
-            //    (V * 255 + 32895) >> 16
-            return (byte)(((component * 255) + 32895) >> 16);
-        }
-
-        /// <summary>
-        /// Scales a value from an 8 bit <see cref="byte"/> to it's 16 bit <see cref="ushort"/> equivalent.
-        /// </summary>
-        /// <param name="component">The 8 bit component value.</param>
-        /// <returns>The <see cref="ushort"/></returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static ushort UpscaleFrom8BitTo16Bit(byte component) => (ushort)(component * 257);
-
-        /// <summary>
-        /// Determine the Greatest CommonDivisor (GCD) of two numbers.
-        /// </summary>
-        public static int GreatestCommonDivisor(int a, int b)
-        {
-            while (b != 0)
-            {
-                int temp = b;
-                b = a % b;
-                a = temp;
-            }
-
-            return a;
-        }
-
-        /// <summary>
-        /// Determine the Least Common Multiple (LCM) of two numbers.
-        /// </summary>
-        public static int LeastCommonMultiple(int a, int b)
-        {
-            // https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor
-            return (a / GreatestCommonDivisor(a, b)) * b;
-        }
-
-        /// <summary>
-        /// Calculates <paramref name="x"/> % 4
-        /// </summary>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int Modulo4(int x) => x & 3;
-
-        /// <summary>
-        /// Calculates <paramref name="x"/> % 8
-        /// </summary>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int Modulo8(int x) => x & 7;
-
-        /// <summary>
-        /// Fast (x mod m) calculator, with the restriction that
-        /// <paramref name="m"/> should be power of 2.
-        /// </summary>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int ModuloP2(int x, int m) => x & (m - 1);
-
-        /// <summary>
-        /// Returns the absolute value of a 32-bit signed integer. Uses bit shifting to speed up the operation.
-        /// </summary>
-        /// <param name="x">
-        /// A number that is greater than <see cref="int.MinValue"/>, but less than or equal to <see cref="int.MaxValue"/>
-        /// </param>
-        /// <returns>The <see cref="int"/></returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int FastAbs(int x)
-        {
-            int y = x >> 31;
-            return (x ^ y) - y;
-        }
-
-        /// <summary>
-        /// Returns a specified number raised to the power of 2
-        /// </summary>
-        /// <param name="x">A single-precision floating-point number</param>
-        /// <returns>The number <paramref name="x" /> raised to the power of 2.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float Pow2(float x) => x * x;
-
-        /// <summary>
-        /// Returns a specified number raised to the power of 3
-        /// </summary>
-        /// <param name="x">A single-precision floating-point number</param>
-        /// <returns>The number <paramref name="x" /> raised to the power of 3.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float Pow3(float x) => x * x * x;
-
-        /// <summary>
-        /// Returns how many bits are required to store the specified number of colors.
-        /// Performs a Log2() on the value.
-        /// </summary>
-        /// <param name="colors">The number of colors.</param>
-        /// <returns>
-        /// The <see cref="int"/>
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int GetBitsNeededForColorDepth(int colors) => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2)));
-
-        /// <summary>
-        /// Returns how many colors will be created by the specified number of bits.
-        /// </summary>
-        /// <param name="bitDepth">The bit depth.</param>
-        /// <returns>The <see cref="int"/></returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static int GetColorCountForBitDepth(int bitDepth) => 1 << bitDepth;
-
-        /// <summary>
-        /// Implementation of 1D Gaussian G(x) function
-        /// </summary>
-        /// <param name="x">The x provided to G(x).</param>
-        /// <param name="sigma">The spread of the blur.</param>
-        /// <returns>The Gaussian G(x)</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float Gaussian(float x, float sigma)
-        {
-            const float Numerator = 1.0f;
-            float denominator = MathF.Sqrt(2 * MathF.PI) * sigma;
-
-            float exponentNumerator = -x * x;
-            float exponentDenominator = 2 * Pow2(sigma);
-
-            float left = Numerator / denominator;
-            float right = MathF.Exp(exponentNumerator / exponentDenominator);
-
-            return left * right;
-        }
-
-        /// <summary>
-        /// Returns the result of a normalized sine cardinal function for the given value.
-        /// SinC(x) = sin(pi*x)/(pi*x).
-        /// </summary>
-        /// <param name="f">A single-precision floating-point number to calculate the result for.</param>
-        /// <returns>
-        /// The sine cardinal of <paramref name="f" />.
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float SinC(float f)
-        {
-            if (MathF.Abs(f) > Constants.Epsilon)
-            {
-                f *= MathF.PI;
-                float result = MathF.Sin(f) / f;
-                return MathF.Abs(result) < Constants.Epsilon ? 0F : result;
-            }
-
-            return 1F;
-        }
-
-        /// <summary>
-        /// Gets the bounding <see cref="Rectangle"/> from the given points.
-        /// </summary>
-        /// <param name="topLeft">
-        /// The <see cref="Point"/> designating the top left position.
-        /// </param>
-        /// <param name="bottomRight">
-        /// The <see cref="Point"/> designating the bottom right position.
-        /// </param>
-        /// <returns>
-        /// The bounding <see cref="Rectangle"/>.
-        /// </returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static Rectangle GetBoundingRectangle(Point topLeft, Point bottomRight) => new Rectangle(topLeft.X, topLeft.Y, bottomRight.X - topLeft.X, bottomRight.Y - topLeft.Y);
-
-        /// <summary>
-        /// Finds the bounding rectangle based on the first instance of any color component other
-        /// than the given one.
-        /// </summary>
-        /// <typeparam name="TPixel">The pixel format.</typeparam>
-        /// <param name="bitmap">The <see cref="Image{TPixel}"/> to search within.</param>
-        /// <param name="componentValue">The color component value to remove.</param>
-        /// <param name="channel">The <see cref="RgbaComponent"/> channel to test against.</param>
-        /// <returns>
-        /// The <see cref="Rectangle"/>.
-        /// </returns>
-        public static Rectangle GetFilteredBoundingRectangle<TPixel>(ImageFrame<TPixel> bitmap, float componentValue, RgbaComponent channel = RgbaComponent.B)
-            where TPixel : unmanaged, IPixel<TPixel>
-        {
-            int width = bitmap.Width;
-            int height = bitmap.Height;
-            Point topLeft = default;
-            Point bottomRight = default;
-
-            Func<ImageFrame<TPixel>, int, int, float, bool> delegateFunc;
-
-            // Determine which channel to check against
-            switch (channel)
-            {
-                case RgbaComponent.R:
-                    delegateFunc = (pixels, x, y, b) => MathF.Abs(pixels[x, y].ToVector4().X - b) > Constants.Epsilon;
-                    break;
-
-                case RgbaComponent.G:
-                    delegateFunc = (pixels, x, y, b) => MathF.Abs(pixels[x, y].ToVector4().Y - b) > Constants.Epsilon;
-                    break;
-
-                case RgbaComponent.B:
-                    delegateFunc = (pixels, x, y, b) => MathF.Abs(pixels[x, y].ToVector4().Z - b) > Constants.Epsilon;
-                    break;
-
-                default:
-                    delegateFunc = (pixels, x, y, b) => MathF.Abs(pixels[x, y].ToVector4().W - b) > Constants.Epsilon;
-                    break;
-            }
-
-            int GetMinY(ImageFrame<TPixel> pixels)
-            {
-                for (int y = 0; y < height; y++)
-                {
-                    for (int x = 0; x < width; x++)
-                    {
-                        if (delegateFunc(pixels, x, y, componentValue))
-                        {
-                            return y;
-                        }
-                    }
-                }
-
-                return 0;
-            }
-
-            int GetMaxY(ImageFrame<TPixel> pixels)
-            {
-                for (int y = height - 1; y > -1; y--)
-                {
-                    for (int x = 0; x < width; x++)
-                    {
-                        if (delegateFunc(pixels, x, y, componentValue))
-                        {
-                            return y;
-                        }
-                    }
-                }
-
-                return height;
-            }
-
-            int GetMinX(ImageFrame<TPixel> pixels)
-            {
-                for (int x = 0; x < width; x++)
-                {
-                    for (int y = 0; y < height; y++)
-                    {
-                        if (delegateFunc(pixels, x, y, componentValue))
-                        {
-                            return x;
-                        }
-                    }
-                }
-
-                return 0;
-            }
-
-            int GetMaxX(ImageFrame<TPixel> pixels)
-            {
-                for (int x = width - 1; x > -1; x--)
-                {
-                    for (int y = 0; y < height; y++)
-                    {
-                        if (delegateFunc(pixels, x, y, componentValue))
-                        {
-                            return x;
-                        }
-                    }
-                }
-
-                return width;
-            }
-
-            topLeft.Y = GetMinY(bitmap);
-            topLeft.X = GetMinX(bitmap);
-            bottomRight.Y = (GetMaxY(bitmap) + 1).Clamp(0, height);
-            bottomRight.X = (GetMaxX(bitmap) + 1).Clamp(0, width);
-
-            return GetBoundingRectangle(topLeft, bottomRight);
-        }
-    }
-}
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@ -0,0 +1,752 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
+namespace SixLabors.ImageSharp
+{
+    /// <summary>
+    /// Provides optimized static methods for trigonometric, logarithmic,
+    /// and other common mathematical functions.
+    /// </summary>
+    internal static class Numerics
+    {
+#if SUPPORTS_RUNTIME_INTRINSICS
+        public const int BlendAlphaControl = 0b_10_00_10_00;
+        private const int ShuffleAlphaControl = 0b_11_11_11_11;
+#endif
+
+        /// <summary>
+        /// Determine the Greatest CommonDivisor (GCD) of two numbers.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GreatestCommonDivisor(int a, int b)
+        {
+            while (b != 0)
+            {
+                int temp = b;
+                b = a % b;
+                a = temp;
+            }
+
+            return a;
+        }
+
+        /// <summary>
+        /// Determine the Least Common Multiple (LCM) of two numbers.
+        /// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int LeastCommonMultiple(int a, int b)
+            => a / GreatestCommonDivisor(a, b) * b;
+
+        /// <summary>
+        /// Calculates <paramref name="x"/> % 2
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int Modulo2(int x) => x & 1;
+
+        /// <summary>
+        /// Calculates <paramref name="x"/> % 4
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int Modulo4(int x) => x & 3;
+
+        /// <summary>
+        /// Calculates <paramref name="x"/> % 8
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int Modulo8(int x) => x & 7;
+
+        /// <summary>
+        /// Fast (x mod m) calculator, with the restriction that
+        /// <paramref name="m"/> should be power of 2.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int ModuloP2(int x, int m) => x & (m - 1);
+
+        /// <summary>
+        /// Returns the absolute value of a 32-bit signed integer.
+        /// Uses bit shifting to speed up the operation compared to <see cref="Math"/>.
+        /// </summary>
+        /// <param name="x">
+        /// A number that is greater than <see cref="int.MinValue"/>, but less than
+        /// or equal to <see cref="int.MaxValue"/>
+        /// </param>
+        /// <returns>The <see cref="int"/></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int Abs(int x)
+        {
+            int y = x >> 31;
+            return (x ^ y) - y;
+        }
+
+        /// <summary>
+        /// Returns a specified number raised to the power of 2
+        /// </summary>
+        /// <param name="x">A single-precision floating-point number</param>
+        /// <returns>The number <paramref name="x" /> raised to the power of 2.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float Pow2(float x) => x * x;
+
+        /// <summary>
+        /// Returns a specified number raised to the power of 3
+        /// </summary>
+        /// <param name="x">A single-precision floating-point number</param>
+        /// <returns>The number <paramref name="x" /> raised to the power of 3.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float Pow3(float x) => x * x * x;
+
+        /// <summary>
+        /// Implementation of 1D Gaussian G(x) function
+        /// </summary>
+        /// <param name="x">The x provided to G(x).</param>
+        /// <param name="sigma">The spread of the blur.</param>
+        /// <returns>The Gaussian G(x)</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float Gaussian(float x, float sigma)
+        {
+            const float Numerator = 1.0f;
+            float denominator = MathF.Sqrt(2 * MathF.PI) * sigma;
+
+            float exponentNumerator = -x * x;
+            float exponentDenominator = 2 * Pow2(sigma);
+
+            float left = Numerator / denominator;
+            float right = MathF.Exp(exponentNumerator / exponentDenominator);
+
+            return left * right;
+        }
+
+        /// <summary>
+        /// Returns the result of a normalized sine cardinal function for the given value.
+        /// SinC(x) = sin(pi*x)/(pi*x).
+        /// </summary>
+        /// <param name="f">A single-precision floating-point number to calculate the result for.</param>
+        /// <returns>
+        /// The sine cardinal of <paramref name="f" />.
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float SinC(float f)
+        {
+            if (MathF.Abs(f) > Constants.Epsilon)
+            {
+                f *= MathF.PI;
+                float result = MathF.Sin(f) / f;
+                return MathF.Abs(result) < Constants.Epsilon ? 0F : result;
+            }
+
+            return 1F;
+        }
+
+        /// <summary>
+        /// Returns the value clamped to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="value">The value to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        /// <returns>The clamped <see cref="byte"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static byte Clamp(byte value, byte min, byte max)
+        {
+            // Order is important here as someone might set min to higher than max.
+            if (value > max)
+            {
+                return max;
+            }
+
+            if (value < min)
+            {
+                return min;
+            }
+
+            return value;
+        }
+
+        /// <summary>
+        /// Returns the value clamped to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="value">The value to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        /// <returns>The clamped <see cref="uint"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint Clamp(uint value, uint min, uint max)
+        {
+            if (value > max)
+            {
+                return max;
+            }
+
+            if (value < min)
+            {
+                return min;
+            }
+
+            return value;
+        }
+
+        /// <summary>
+        /// Returns the value clamped to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="value">The value to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        /// <returns>The clamped <see cref="uint"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int Clamp(int value, int min, int max)
+        {
+            if (value > max)
+            {
+                return max;
+            }
+
+            if (value < min)
+            {
+                return min;
+            }
+
+            return value;
+        }
+
+        /// <summary>
+        /// Returns the value clamped to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="value">The value to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        /// <returns>The clamped <see cref="float"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float Clamp(float value, float min, float max)
+        {
+            if (value > max)
+            {
+                return max;
+            }
+
+            if (value < min)
+            {
+                return min;
+            }
+
+            return value;
+        }
+
+        /// <summary>
+        /// Returns the value clamped to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="value">The value to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        /// <returns>The clamped <see cref="double"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static double Clamp(double value, double min, double max)
+        {
+            if (value > max)
+            {
+                return max;
+            }
+
+            if (value < min)
+            {
+                return min;
+            }
+
+            return value;
+        }
+
+        /// <summary>
+        /// Returns the value clamped to the inclusive range of min and max.
+        /// 5x Faster than <see cref="Vector4.Clamp(Vector4, Vector4, Vector4)"/>
+        /// on platforms &lt; NET 5.
+        /// </summary>
+        /// <param name="value">The value to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        /// <returns>The clamped <see cref="Vector4"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector4 Clamp(Vector4 value, Vector4 min, Vector4 max)
+            => Vector4.Min(Vector4.Max(value, min), max);
+
+        /// <summary>
+        /// Clamps the span values to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="span">The span containing the values to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Clamp(Span<byte> span, byte min, byte max)
+        {
+            Span<byte> remainder = span.Slice(ClampReduce(span, min, max));
+
+            if (remainder.Length > 0)
+            {
+                ref byte remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
+                {
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Clamps the span values to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="span">The span containing the values to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Clamp(Span<uint> span, uint min, uint max)
+        {
+            Span<uint> remainder = span.Slice(ClampReduce(span, min, max));
+
+            if (remainder.Length > 0)
+            {
+                ref uint remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
+                {
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Clamps the span values to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="span">The span containing the values to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Clamp(Span<int> span, int min, int max)
+        {
+            Span<int> remainder = span.Slice(ClampReduce(span, min, max));
+
+            if (remainder.Length > 0)
+            {
+                ref int remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
+                {
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Clamps the span values to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="span">The span containing the values to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Clamp(Span<float> span, float min, float max)
+        {
+            Span<float> remainder = span.Slice(ClampReduce(span, min, max));
+
+            if (remainder.Length > 0)
+            {
+                ref float remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
+                {
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Clamps the span values to the inclusive range of min and max.
+        /// </summary>
+        /// <param name="span">The span containing the values to clamp.</param>
+        /// <param name="min">The minimum inclusive value.</param>
+        /// <param name="max">The maximum inclusive value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Clamp(Span<double> span, double min, double max)
+        {
+            Span<double> remainder = span.Slice(ClampReduce(span, min, max));
+
+            if (remainder.Length > 0)
+            {
+                ref double remainderStart = ref MemoryMarshal.GetReference(remainder);
+                ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
+
+                while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
+                {
+                    remainderStart = Clamp(remainderStart, min, max);
+
+                    remainderStart = ref Unsafe.Add(ref remainderStart, 1);
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static int ClampReduce<T>(Span<T> span, T min, T max)
+            where T : unmanaged
+        {
+            if (Vector.IsHardwareAccelerated && span.Length >= Vector<T>.Count)
+            {
+                int remainder = ModuloP2(span.Length, Vector<T>.Count);
+                int adjustedCount = span.Length - remainder;
+
+                if (adjustedCount > 0)
+                {
+                    ClampImpl(span.Slice(0, adjustedCount), min, max);
+                }
+
+                return adjustedCount;
+            }
+
+            return 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void ClampImpl<T>(Span<T> span, T min, T max)
+            where T : unmanaged
+        {
+            ref T sRef = ref MemoryMarshal.GetReference(span);
+            var vmin = new Vector<T>(min);
+            var vmax = new Vector<T>(max);
+
+            int n = span.Length / Vector<T>.Count;
+            int m = Modulo4(n);
+            int u = n - m;
+
+            ref Vector<T> vs0 = ref Unsafe.As<T, Vector<T>>(ref MemoryMarshal.GetReference(span));
+            ref Vector<T> vs1 = ref Unsafe.Add(ref vs0, 1);
+            ref Vector<T> vs2 = ref Unsafe.Add(ref vs0, 2);
+            ref Vector<T> vs3 = ref Unsafe.Add(ref vs0, 3);
+            ref Vector<T> vsEnd = ref Unsafe.Add(ref vs0, u);
+
+            while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
+            {
+                vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
+                vs1 = Vector.Min(Vector.Max(vmin, vs1), vmax);
+                vs2 = Vector.Min(Vector.Max(vmin, vs2), vmax);
+                vs3 = Vector.Min(Vector.Max(vmin, vs3), vmax);
+
+                vs0 = ref Unsafe.Add(ref vs0, 4);
+                vs1 = ref Unsafe.Add(ref vs1, 4);
+                vs2 = ref Unsafe.Add(ref vs2, 4);
+                vs3 = ref Unsafe.Add(ref vs3, 4);
+            }
+
+            if (m > 0)
+            {
+                vs0 = ref vsEnd;
+                vsEnd = ref Unsafe.Add(ref vsEnd, m);
+
+                while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
+                {
+                    vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
+
+                    vs0 = ref Unsafe.Add(ref vs0, 1);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Pre-multiplies the "x", "y", "z" components of a vector by its "w" component leaving the "w" component intact.
+        /// </summary>
+        /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Premultiply(ref Vector4 source)
+        {
+            float w = source.W;
+            source *= w;
+            source.W = w;
+        }
+
+        /// <summary>
+        /// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
+        /// </summary>
+        /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void UnPremultiply(ref Vector4 source)
+        {
+            float w = source.W;
+            source /= w;
+            source.W = w;
+        }
+
+        /// <summary>
+        /// Bulk variant of <see cref="Premultiply(ref Vector4)"/>
+        /// </summary>
+        /// <param name="vectors">The span of vectors</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Premultiply(Span<Vector4> vectors)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx2.IsSupported && vectors.Length >= 2)
+            {
+                // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
+                ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
+                ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
+
+                while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
+                {
+                    Vector256<float> source = vectorsBase;
+                    Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
+                    vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
+                    vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
+                }
+
+                if (Modulo2(vectors.Length) != 0)
+                {
+                    // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+                    Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+                }
+            }
+            else
+#endif
+            {
+                ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
+
+                while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
+                {
+                    Premultiply(ref vectorsStart);
+
+                    vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Bulk variant of <see cref="UnPremultiply(ref Vector4)"/>
+        /// </summary>
+        /// <param name="vectors">The span of vectors</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void UnPremultiply(Span<Vector4> vectors)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx2.IsSupported && vectors.Length >= 2)
+            {
+                // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
+                ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
+                ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
+
+                while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
+                {
+                    Vector256<float> source = vectorsBase;
+                    Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
+                    vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
+                    vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
+                }
+
+                if (Modulo2(vectors.Length) != 0)
+                {
+                    // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+                    UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+                }
+            }
+            else
+#endif
+            {
+                ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
+
+                while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
+                {
+                    UnPremultiply(ref vectorsStart);
+
+                    vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Calculates the cube pow of all the XYZ channels of the input vectors.
+        /// </summary>
+        /// <param name="vectors">The span of vectors</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
+        {
+            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+            ref Vector4 endRef = ref Unsafe.Add(ref baseRef, vectors.Length);
+
+            while (Unsafe.IsAddressLessThan(ref baseRef, ref endRef))
+            {
+                Vector4 v = baseRef;
+                float a = v.W;
+
+                // Fast path for the default gamma exposure, which is 3. In this case we can skip
+                // calling Math.Pow 3 times (one per component), as the method is an internal call and
+                // introduces quite a bit of overhead. Instead, we can just manually multiply the whole
+                // pixel in Vector4 format 3 times, and then restore the alpha channel before copying it
+                // back to the target index in the temporary span. The whole iteration will get completely
+                // inlined and traslated into vectorized instructions, with much better performance.
+                v = v * v * v;
+                v.W = a;
+
+                baseRef = v;
+                baseRef = ref Unsafe.Add(ref baseRef, 1);
+            }
+        }
+
+        /// <summary>
+        /// Calculates the cube root of all the XYZ channels of the input vectors.
+        /// </summary>
+        /// <param name="vectors">The span of vectors</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void CubeRootOnXYZ(Span<Vector4> vectors)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Sse41.IsSupported)
+            {
+                ref Vector128<float> vectors128Ref = ref Unsafe.As<Vector4, Vector128<float>>(ref MemoryMarshal.GetReference(vectors));
+                ref Vector128<float> vectors128End = ref Unsafe.Add(ref vectors128Ref, vectors.Length);
+
+                var v128_341 = Vector128.Create(341);
+                Vector128<int> v128_negativeZero = Vector128.Create(-0.0f).AsInt32();
+                Vector128<int> v128_one = Vector128.Create(1.0f).AsInt32();
+
+                var v128_13rd = Vector128.Create(1 / 3f);
+                var v128_23rds = Vector128.Create(2 / 3f);
+
+                while (Unsafe.IsAddressLessThan(ref vectors128Ref, ref vectors128End))
+                {
+                    Vector128<float> vecx = vectors128Ref;
+                    Vector128<int> veax = vecx.AsInt32();
+
+                    // If we can use SSE41 instructions, we can vectorize the entire cube root calculation, and also execute it
+                    // directly on 32 bit floating point values. What follows is a vectorized implementation of this method:
+                    // https://www.musicdsp.org/en/latest/Other/206-fast-cube-root-square-root-and-reciprocal-for-x86-sse-cpus.html.
+                    // Furthermore, after the initial setup in vectorized form, we're doing two Newton approximations here
+                    // using a different succession (the same used below), which should be less unstable due to not having cube pow.
+                    veax = Sse2.AndNot(v128_negativeZero, veax);
+                    veax = Sse2.Subtract(veax, v128_one);
+                    veax = Sse2.ShiftRightArithmetic(veax, 10);
+                    veax = Sse41.MultiplyLow(veax, v128_341);
+                    veax = Sse2.Add(veax, v128_one);
+                    veax = Sse2.AndNot(v128_negativeZero, veax);
+                    veax = Sse2.Or(veax, Sse2.And(vecx.AsInt32(), v128_negativeZero));
+
+                    Vector128<float> y4 = veax.AsSingle();
+
+                    if (Fma.IsSupported)
+                    {
+                        y4 = Fma.MultiplyAdd(v128_23rds, y4, Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
+                        y4 = Fma.MultiplyAdd(v128_23rds, y4, Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
+                    }
+                    else
+                    {
+                        y4 = Sse.Add(Sse.Multiply(v128_23rds, y4), Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
+                        y4 = Sse.Add(Sse.Multiply(v128_23rds, y4), Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
+                    }
+
+                    y4 = Sse41.Insert(y4, vecx, 0xF0);
+
+                    vectors128Ref = y4;
+                    vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1);
+                }
+            }
+            else
+#endif
+            {
+                ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
+                ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
+
+                // Fallback with scalar preprocessing and vectorized approximation steps
+                while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
+                {
+                    Vector4 v = vectorsRef;
+
+                    double
+                        x64 = v.X,
+                        y64 = v.Y,
+                        z64 = v.Z;
+                    float a = v.W;
+
+                    ulong
+                        xl = *(ulong*)&x64,
+                        yl = *(ulong*)&y64,
+                        zl = *(ulong*)&z64;
+
+                    // Here we use a trick to compute the starting value x0 for the cube root. This is because doing
+                    // pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
+                    // this means what we actually want is to find the cube root of our clamped values.
+                    // For more info on the  constant below, see:
+                    // https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
+                    // Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
+                    // store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
+                    // register, and use it to accelerate two steps of the Newton approximation using SIMD.
+                    xl = 0x2a9f8a7be393b600 + (xl / 3);
+                    yl = 0x2a9f8a7be393b600 + (yl / 3);
+                    zl = 0x2a9f8a7be393b600 + (zl / 3);
+
+                    Vector4 y4;
+                    y4.X = (float)*(double*)&xl;
+                    y4.Y = (float)*(double*)&yl;
+                    y4.Z = (float)*(double*)&zl;
+                    y4.W = 0;
+
+                    y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
+                    y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
+                    y4.W = a;
+
+                    vectorsRef = y4;
+                    vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
+                }
+            }
+        }
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+
+        /// <summary>
+        /// Performs a linear interpolation between two values based on the given weighting.
+        /// </summary>
+        /// <param name="value1">The first value.</param>
+        /// <param name="value2">The second value.</param>
+        /// <param name="amount">Values between 0 and 1 that indicates the weight of <paramref name="value2"/>.</param>
+        /// <returns>The <see cref="Vector256{Single}"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Lerp(
+            in Vector256<float> value1,
+            in Vector256<float> value2,
+            in Vector256<float> amount)
+        {
+            Vector256<float> diff = Avx.Subtract(value2, value1);
+            if (Fma.IsSupported)
+            {
+                return Fma.MultiplyAdd(diff, amount, value1);
+            }
+            else
+            {
+                return Avx.Add(Avx.Multiply(diff, amount), value1);
+            }
+        }
+#endif
+
+        /// <summary>
+        /// Performs a linear interpolation between two values based on the given weighting.
+        /// </summary>
+        /// <param name="value1">The first value.</param>
+        /// <param name="value2">The second value.</param>
+        /// <param name="amount">A value between 0 and 1 that indicates the weight of <paramref name="value2"/>.</param>
+        /// <returns>The <see cref="float"/>.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float Lerp(float value1, float value2, float amount)
+            => ((value2 - value1) * amount) + value1;
+    }
+}
--- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
@ -0,0 +1,193 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Buffers.Binary;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
+// and ROTR (Rotate Right) emitting efficient CPU instructions:
+// https://github.com/dotnet/coreclr/pull/1830
+namespace SixLabors.ImageSharp
+{
+    /// <summary>
+    /// Defines the contract for methods that allow the shuffling of pixel components.
+    /// Used for shuffling on platforms that do not support Hardware Intrinsics.
+    /// </summary>
+    internal interface IComponentShuffle
+    {
+        /// <summary>
+        /// Gets the shuffle control.
+        /// </summary>
+        byte Control { get; }
+
+        /// <summary>
+        /// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
+        /// using the control and store the results in <paramref name="dest"/>.
+        /// </summary>
+        /// <param name="source">The source span of bytes.</param>
+        /// <param name="dest">The destination span of bytes.</param>
+        void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
+    }
+
+    /// <inheritdoc/>
+    internal interface IShuffle4 : IComponentShuffle
+    {
+    }
+
+    internal readonly struct DefaultShuffle4 : IShuffle4
+    {
+        private readonly byte p3;
+        private readonly byte p2;
+        private readonly byte p1;
+        private readonly byte p0;
+
+        public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
+        {
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
+
+            this.p3 = p3;
+            this.p2 = p2;
+            this.p1 = p1;
+            this.p0 = p0;
+            this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
+        }
+
+        public byte Control { get; }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref byte sBase = ref MemoryMarshal.GetReference(source);
+            ref byte dBase = ref MemoryMarshal.GetReference(dest);
+
+            int p3 = this.p3;
+            int p2 = this.p2;
+            int p1 = this.p1;
+            int p0 = this.p0;
+
+            for (int i = 0; i < source.Length; i += 4)
+            {
+                Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
+                Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
+                Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
+                Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
+            }
+        }
+    }
+
+    internal readonly struct WXYZShuffle4 : IShuffle4
+    {
+        public byte Control
+        {
+            [MethodImpl(InliningOptions.ShortMethod)]
+            get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
+            ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
+            int n = source.Length / 4;
+
+            for (int i = 0; i < n; i++)
+            {
+                uint packed = Unsafe.Add(ref sBase, i);
+
+                // packed          = [W Z Y X]
+                // ROTL(8, packed) = [Z Y X W]
+                Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
+            }
+        }
+    }
+
+    internal readonly struct WZYXShuffle4 : IShuffle4
+    {
+        public byte Control
+        {
+            [MethodImpl(InliningOptions.ShortMethod)]
+            get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
+            ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
+            int n = source.Length / 4;
+
+            for (int i = 0; i < n; i++)
+            {
+                uint packed = Unsafe.Add(ref sBase, i);
+
+                // packed              = [W Z Y X]
+                // REVERSE(packedArgb) = [X Y Z W]
+                Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
+            }
+        }
+    }
+
+    internal readonly struct YZWXShuffle4 : IShuffle4
+    {
+        public byte Control
+        {
+            [MethodImpl(InliningOptions.ShortMethod)]
+            get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
+            ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
+            int n = source.Length / 4;
+
+            for (int i = 0; i < n; i++)
+            {
+                uint packed = Unsafe.Add(ref sBase, i);
+
+                // packed              = [W Z Y X]
+                // ROTR(8, packedArgb) = [Y Z W X]
+                Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24);
+            }
+        }
+    }
+
+    internal readonly struct ZYXWShuffle4 : IShuffle4
+    {
+        public byte Control
+        {
+            [MethodImpl(InliningOptions.ShortMethod)]
+            get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
+            ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
+            int n = source.Length / 4;
+
+            for (int i = 0; i < n; i++)
+            {
+                uint packed = Unsafe.Add(ref sBase, i);
+
+                // packed              = [W Z Y X]
+                // tmp1                = [W 0 Y 0]
+                // tmp2                = [0 Z 0 X]
+                // tmp3=ROTL(16, tmp2) = [0 X 0 Z]
+                // tmp1 + tmp3         = [W X Y Z]
+                uint tmp1 = packed & 0xFF00FF00;
+                uint tmp2 = packed & 0x00FF00FF;
+                uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
+
+                Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
+            }
+        }
+    }
+}
--- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
@ -0,0 +1,103 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp
+{
+    /// <inheritdoc/>
+    internal interface IPad3Shuffle4 : IComponentShuffle
+    {
+    }
+
+    internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
+    {
+        private readonly byte p3;
+        private readonly byte p2;
+        private readonly byte p1;
+        private readonly byte p0;
+
+        public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
+        {
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
+
+            this.p3 = p3;
+            this.p2 = p2;
+            this.p1 = p1;
+            this.p0 = p0;
+            this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
+        }
+
+        public byte Control { get; }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref byte sBase = ref MemoryMarshal.GetReference(source);
+            ref byte dBase = ref MemoryMarshal.GetReference(dest);
+
+            int p3 = this.p3;
+            int p2 = this.p2;
+            int p1 = this.p1;
+            int p0 = this.p0;
+
+            Span<byte> temp = stackalloc byte[4];
+            ref byte t = ref MemoryMarshal.GetReference(temp);
+            ref uint tu = ref Unsafe.As<byte, uint>(ref t);
+
+            for (int i = 0, j = 0; i < source.Length; i += 3, j += 4)
+            {
+                ref var s = ref Unsafe.Add(ref sBase, i);
+                tu = Unsafe.As<byte, uint>(ref s) | 0xFF000000;
+
+                Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0);
+                Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1);
+                Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2);
+                Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3);
+            }
+        }
+    }
+
+    internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
+    {
+        public byte Control
+        {
+            [MethodImpl(InliningOptions.ShortMethod)]
+            get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref byte sBase = ref MemoryMarshal.GetReference(source);
+            ref byte dBase = ref MemoryMarshal.GetReference(dest);
+
+            ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length);
+            ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);
+
+            while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
+            {
+                Unsafe.As<byte, uint>(ref dBase) = Unsafe.As<byte, uint>(ref sBase) | 0xFF000000;
+
+                sBase = ref Unsafe.Add(ref sBase, 3);
+                dBase = ref Unsafe.Add(ref dBase, 4);
+            }
+
+            while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
+            {
+                Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0);
+                Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1);
+                Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2);
+                Unsafe.Add(ref dBase, 3) = byte.MaxValue;
+
+                sBase = ref Unsafe.Add(ref sBase, 3);
+                dBase = ref Unsafe.Add(ref dBase, 4);
+            }
+        }
+    }
+}
--- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
@ -0,0 +1,53 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp
+{
+    /// <inheritdoc/>
+    internal interface IShuffle3 : IComponentShuffle
+    {
+    }
+
+    internal readonly struct DefaultShuffle3 : IShuffle3
+    {
+        private readonly byte p2;
+        private readonly byte p1;
+        private readonly byte p0;
+
+        public DefaultShuffle3(byte p2, byte p1, byte p0)
+        {
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 2, nameof(p2));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 2, nameof(p1));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 2, nameof(p0));
+
+            this.p2 = p2;
+            this.p1 = p1;
+            this.p0 = p0;
+            this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0);
+        }
+
+        public byte Control { get; }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref byte sBase = ref MemoryMarshal.GetReference(source);
+            ref byte dBase = ref MemoryMarshal.GetReference(dest);
+
+            int p2 = this.p2;
+            int p1 = this.p1;
+            int p0 = this.p0;
+
+            for (int i = 0; i < source.Length; i += 3)
+            {
+                Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
+                Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
+                Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
+            }
+        }
+    }
+}
--- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
@ -0,0 +1,101 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp
+{
+    /// <inheritdoc/>
+    internal interface IShuffle4Slice3 : IComponentShuffle
+    {
+    }
+
+    internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
+    {
+        private readonly byte p2;
+        private readonly byte p1;
+        private readonly byte p0;
+
+        public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0)
+        {
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
+            DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
+
+            this.p2 = p2;
+            this.p1 = p1;
+            this.p0 = p0;
+            this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
+        }
+
+        public byte Control { get; }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref byte sBase = ref MemoryMarshal.GetReference(source);
+            ref byte dBase = ref MemoryMarshal.GetReference(dest);
+
+            int p2 = this.p2;
+            int p1 = this.p1;
+            int p0 = this.p0;
+
+            for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4)
+            {
+                Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j);
+                Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j);
+                Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j);
+            }
+        }
+    }
+
+    internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3
+    {
+        public byte Control
+        {
+            [MethodImpl(InliningOptions.ShortMethod)]
+            get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
+            ref Byte3 dBase = ref Unsafe.As<byte, Byte3>(ref MemoryMarshal.GetReference(dest));
+
+            int n = source.Length / 4;
+            int m = Numerics.Modulo4(n);
+            int u = n - m;
+
+            ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u);
+            ref uint sEnd = ref Unsafe.Add(ref sBase, n);
+
+            while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
+            {
+                Unsafe.Add(ref dBase, 0) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 0));
+                Unsafe.Add(ref dBase, 1) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 1));
+                Unsafe.Add(ref dBase, 2) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 2));
+                Unsafe.Add(ref dBase, 3) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 3));
+
+                sBase = ref Unsafe.Add(ref sBase, 4);
+                dBase = ref Unsafe.Add(ref dBase, 4);
+            }
+
+            while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
+            {
+                Unsafe.Add(ref dBase, 0) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 0));
+
+                sBase = ref Unsafe.Add(ref sBase, 1);
+                dBase = ref Unsafe.Add(ref dBase, 1);
+            }
+        }
+    }
+
+    [StructLayout(LayoutKind.Explicit, Size = 3)]
+    internal readonly struct Byte3
+    {
+    }
+}
--- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs
@ -1,103 +0,0 @@
-// Copyright (c) Six Labors.
-// Licensed under the Apache License, Version 2.0.
-
-#if SUPPORTS_RUNTIME_INTRINSICS
-
-using System;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
-
-namespace SixLabors.ImageSharp
-{
-    internal static partial class SimdUtils
-    {
-        public static class Avx2Intrinsics
-        {
-            private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
-
-            /// <summary>
-            /// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
-            /// </summary>
-            [MethodImpl(InliningOptions.ShortMethod)]
-            internal static void NormalizedFloatToByteSaturateReduce(
-                ref ReadOnlySpan<float> source,
-                ref Span<byte> dest)
-            {
-                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
-
-                if (Avx2.IsSupported)
-                {
-                    int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
-                    int adjustedCount = source.Length - remainder;
-
-                    if (adjustedCount > 0)
-                    {
-                        NormalizedFloatToByteSaturate(
-                            source.Slice(0, adjustedCount),
-                            dest.Slice(0, adjustedCount));
-
-                        source = source.Slice(adjustedCount);
-                        dest = dest.Slice(adjustedCount);
-                    }
-                }
-            }
-
-            /// <summary>
-            /// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
-            /// </summary>
-            /// <remarks>
-            /// Implementation is based on MagicScaler code:
-            /// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477
-            /// </remarks>
-            internal static void NormalizedFloatToByteSaturate(
-                ReadOnlySpan<float> source,
-                Span<byte> dest)
-            {
-                VerifySpanInput(source, dest, Vector256<byte>.Count);
-
-                int n = dest.Length / Vector256<byte>.Count;
-
-                ref Vector256<float> sourceBase =
-                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
-                ref Vector256<byte> destBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
-
-                var maxBytes = Vector256.Create(255f);
-                ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
-                Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
-
-                for (int i = 0; i < n; i++)
-                {
-                    ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
-
-                    Vector256<float> f0 = s;
-                    Vector256<float> f1 = Unsafe.Add(ref s, 1);
-                    Vector256<float> f2 = Unsafe.Add(ref s, 2);
-                    Vector256<float> f3 = Unsafe.Add(ref s, 3);
-
-                    Vector256<int> w0 = ConvertToInt32(f0, maxBytes);
-                    Vector256<int> w1 = ConvertToInt32(f1, maxBytes);
-                    Vector256<int> w2 = ConvertToInt32(f2, maxBytes);
-                    Vector256<int> w3 = ConvertToInt32(f3, maxBytes);
-
-                    Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
-                    Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
-                    Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
-                    b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
-
-                    Unsafe.Add(ref destBase, i) = b;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale)
-            {
-                vf = Avx.Multiply(vf, scale);
-                return Avx.ConvertToVector256Int32(vf);
-            }
-        }
-    }
-}
-#endif
--- a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
@ -35,7 +35,7 @@ namespace SixLabors.ImageSharp
                    return;
                }

-                int remainder = ImageMaths.Modulo8(source.Length);
+                int remainder = Numerics.Modulo8(source.Length);
                int adjustedCount = source.Length - remainder;

                if (adjustedCount > 0)
@ -64,7 +64,7 @@ namespace SixLabors.ImageSharp
                    return;
                }

-                int remainder = ImageMaths.Modulo8(source.Length);
+                int remainder = Numerics.Modulo8(source.Length);
                int adjustedCount = source.Length - remainder;

                if (adjustedCount > 0)
--- a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
@ -57,7 +57,7 @@ namespace SixLabors.ImageSharp
                    return;
                }

-                int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
+                int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count);
                int adjustedCount = source.Length - remainder;

                if (adjustedCount > 0)
@ -84,7 +84,7 @@ namespace SixLabors.ImageSharp
                    return;
                }

-                int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
+                int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count);
                int adjustedCount = source.Length - remainder;

                if (adjustedCount > 0)
--- a/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp
            {
                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");

-                int remainder = ImageMaths.Modulo4(source.Length);
+                int remainder = Numerics.Modulo4(source.Length);
                int adjustedCount = source.Length - remainder;

                if (adjustedCount > 0)
@ -52,7 +52,7 @@ namespace SixLabors.ImageSharp
            {
                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");

-                int remainder = ImageMaths.Modulo4(source.Length);
+                int remainder = Numerics.Modulo4(source.Length);
                int adjustedCount = source.Length - remainder;

                if (adjustedCount > 0)
@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp
                    Vector4 s = Unsafe.Add(ref sBase, i);
                    s *= maxBytes;
                    s += half;
-                    s = Vector4Utilities.FastClamp(s, Vector4.Zero, maxBytes);
+                    s = Numerics.Clamp(s, Vector4.Zero, maxBytes);

                    ref ByteVector4 d = ref Unsafe.Add(ref dBase, i);
                    d.X = (byte)s.X;
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@ -0,0 +1,944 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp
+{
+    internal static partial class SimdUtils
+    {
+        public static class HwIntrinsics
+        {
+            public static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
+
+            public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
+
+            public static ReadOnlySpan<byte> PermuteMaskSwitchInnerDWords8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0 };
+
+            private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };
+
+            private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
+
+            private static ReadOnlySpan<byte> ShuffleMaskShiftAlpha =>
+                new byte[]
+                {
+                    0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
+                    0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
+                };
+
+            public static ReadOnlySpan<byte> PermuteMaskShiftAlpha8x32 =>
+                new byte[]
+                {
+                    0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
+                    5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
+                };
+
+            /// <summary>
+            /// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
+            /// using the control and store the results in <paramref name="dest"/>.
+            /// </summary>
+            /// <param name="source">The source span of floats.</param>
+            /// <param name="dest">The destination span of floats.</param>
+            /// <param name="control">The byte control.</param>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static void Shuffle4Reduce(
+                ref ReadOnlySpan<float> source,
+                ref Span<float> dest,
+                byte control)
+            {
+                if (Avx.IsSupported || Sse.IsSupported)
+                {
+                    int remainder = Avx.IsSupported
+                        ? Numerics.ModuloP2(source.Length, Vector256<float>.Count)
+                        : Numerics.ModuloP2(source.Length, Vector128<float>.Count);
+
+                    int adjustedCount = source.Length - remainder;
+
+                    if (adjustedCount > 0)
+                    {
+                        Shuffle4(
+                            source.Slice(0, adjustedCount),
+                            dest.Slice(0, adjustedCount),
+                            control);
+
+                        source = source.Slice(adjustedCount);
+                        dest = dest.Slice(adjustedCount);
+                    }
+                }
+            }
+
+            /// <summary>
+            /// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
+            /// using the control and store the results in <paramref name="dest"/>.
+            /// </summary>
+            /// <param name="source">The source span of bytes.</param>
+            /// <param name="dest">The destination span of bytes.</param>
+            /// <param name="control">The byte control.</param>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static void Shuffle4Reduce(
+                ref ReadOnlySpan<byte> source,
+                ref Span<byte> dest,
+                byte control)
+            {
+                if (Avx2.IsSupported || Ssse3.IsSupported)
+                {
+                    int remainder = Avx2.IsSupported
+                        ? Numerics.ModuloP2(source.Length, Vector256<byte>.Count)
+                        : Numerics.ModuloP2(source.Length, Vector128<byte>.Count);
+
+                    int adjustedCount = source.Length - remainder;
+
+                    if (adjustedCount > 0)
+                    {
+                        Shuffle4(
+                            source.Slice(0, adjustedCount),
+                            dest.Slice(0, adjustedCount),
+                            control);
+
+                        source = source.Slice(adjustedCount);
+                        dest = dest.Slice(adjustedCount);
+                    }
+                }
+            }
+
+            /// <summary>
+            /// Shuffles 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
+            /// using the control and store the results in <paramref name="dest"/>.
+            /// </summary>
+            /// <param name="source">The source span of bytes.</param>
+            /// <param name="dest">The destination span of bytes.</param>
+            /// <param name="control">The byte control.</param>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static void Shuffle3Reduce(
+                ref ReadOnlySpan<byte> source,
+                ref Span<byte> dest,
+                byte control)
+            {
+                if (Ssse3.IsSupported)
+                {
+                    int remainder = source.Length % (Vector128<byte>.Count * 3);
+
+                    int adjustedCount = source.Length - remainder;
+
+                    if (adjustedCount > 0)
+                    {
+                        Shuffle3(
+                            source.Slice(0, adjustedCount),
+                            dest.Slice(0, adjustedCount),
+                            control);
+
+                        source = source.Slice(adjustedCount);
+                        dest = dest.Slice(adjustedCount);
+                    }
+                }
+            }
+
+            /// <summary>
+            /// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
+            /// using the control and store the results in <paramref name="dest"/>.
+            /// </summary>
+            /// <param name="source">The source span of bytes.</param>
+            /// <param name="dest">The destination span of bytes.</param>
+            /// <param name="control">The byte control.</param>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static void Pad3Shuffle4Reduce(
+                ref ReadOnlySpan<byte> source,
+                ref Span<byte> dest,
+                byte control)
+            {
+                if (Ssse3.IsSupported)
+                {
+                    int remainder = source.Length % (Vector128<byte>.Count * 3);
+
+                    int sourceCount = source.Length - remainder;
+                    int destCount = sourceCount * 4 / 3;
+
+                    if (sourceCount > 0)
+                    {
+                        Pad3Shuffle4(
+                            source.Slice(0, sourceCount),
+                            dest.Slice(0, destCount),
+                            control);
+
+                        source = source.Slice(sourceCount);
+                        dest = dest.Slice(destCount);
+                    }
+                }
+            }
+
+            /// <summary>
+            /// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
+            /// using the control and store the results in <paramref name="dest"/>.
+            /// </summary>
+            /// <param name="source">The source span of bytes.</param>
+            /// <param name="dest">The destination span of bytes.</param>
+            /// <param name="control">The byte control.</param>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static void Shuffle4Slice3Reduce(
+                ref ReadOnlySpan<byte> source,
+                ref Span<byte> dest,
+                byte control)
+            {
+                if (Ssse3.IsSupported)
+                {
+                    int remainder = source.Length % (Vector128<byte>.Count * 4);
+
+                    int sourceCount = source.Length - remainder;
+                    int destCount = sourceCount * 3 / 4;
+
+                    if (sourceCount > 0)
+                    {
+                        Shuffle4Slice3(
+                            source.Slice(0, sourceCount),
+                            dest.Slice(0, destCount),
+                            control);
+
+                        source = source.Slice(sourceCount);
+                        dest = dest.Slice(destCount);
+                    }
+                }
+            }
+
+            [MethodImpl(InliningOptions.ShortMethod)]
+            private static void Shuffle4(
+                ReadOnlySpan<float> source,
+                Span<float> dest,
+                byte control)
+            {
+                if (Avx.IsSupported)
+                {
+                    ref Vector256<float> sourceBase =
+                        ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector256<float> destBase =
+                        ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
+
+                    int n = dest.Length / Vector256<float>.Count;
+                    int m = Numerics.Modulo4(n);
+                    int u = n - m;
+
+                    for (int i = 0; i < u; i += 4)
+                    {
+                        ref Vector256<float> vd0 = ref Unsafe.Add(ref destBase, i);
+                        ref Vector256<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
+
+                        vd0 = Avx.Permute(vs0, control);
+                        Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control);
+                        Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control);
+                        Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control);
+                    }
+
+                    if (m > 0)
+                    {
+                        for (int i = u; i < n; i++)
+                        {
+                            Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control);
+                        }
+                    }
+                }
+                else
+                {
+                    // Sse
+                    ref Vector128<float> sourceBase =
+                        ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector128<float> destBase =
+                        ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
+
+                    int n = dest.Length / Vector128<float>.Count;
+                    int m = Numerics.Modulo4(n);
+                    int u = n - m;
+
+                    for (int i = 0; i < u; i += 4)
+                    {
+                        ref Vector128<float> vd0 = ref Unsafe.Add(ref destBase, i);
+                        ref Vector128<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
+
+                        vd0 = Sse.Shuffle(vs0, vs0, control);
+
+                        Vector128<float> vs1 = Unsafe.Add(ref vs0, 1);
+                        Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control);
+
+                        Vector128<float> vs2 = Unsafe.Add(ref vs0, 2);
+                        Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control);
+
+                        Vector128<float> vs3 = Unsafe.Add(ref vs0, 3);
+                        Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control);
+                    }
+
+                    if (m > 0)
+                    {
+                        for (int i = u; i < n; i++)
+                        {
+                            Vector128<float> vs = Unsafe.Add(ref sourceBase, i);
+                            Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control);
+                        }
+                    }
+                }
+            }
+
+            [MethodImpl(InliningOptions.ShortMethod)]
+            private static void Shuffle4(
+                ReadOnlySpan<byte> source,
+                Span<byte> dest,
+                byte control)
+            {
+                if (Avx2.IsSupported)
+                {
+                    // I've chosen to do this for convenience while we determine what
+                    // shuffle controls to add to the library.
+                    // We can add static ROS instances if need be in the future.
+                    Span<byte> bytes = stackalloc byte[Vector256<byte>.Count];
+                    Shuffle.MmShuffleSpan(ref bytes, control);
+                    Vector256<byte> vshuffle = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
+
+                    ref Vector256<byte> sourceBase =
+                        ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector256<byte> destBase =
+                        ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
+
+                    int n = dest.Length / Vector256<byte>.Count;
+                    int m = Numerics.Modulo4(n);
+                    int u = n - m;
+
+                    for (int i = 0; i < u; i += 4)
+                    {
+                        ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
+                        ref Vector256<byte> vd0 = ref Unsafe.Add(ref destBase, i);
+
+                        vd0 = Avx2.Shuffle(vs0, vshuffle);
+                        Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
+                        Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
+                        Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
+                    }
+
+                    if (m > 0)
+                    {
+                        for (int i = u; i < n; i++)
+                        {
+                            Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
+                        }
+                    }
+                }
+                else
+                {
+                    // Ssse3
+                    Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
+                    Shuffle.MmShuffleSpan(ref bytes, control);
+                    Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
+
+                    ref Vector128<byte> sourceBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector128<byte> destBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
+
+                    int n = dest.Length / Vector128<byte>.Count;
+                    int m = Numerics.Modulo4(n);
+                    int u = n - m;
+
+                    for (int i = 0; i < u; i += 4)
+                    {
+                        ref Vector128<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
+                        ref Vector128<byte> vd0 = ref Unsafe.Add(ref destBase, i);
+
+                        vd0 = Ssse3.Shuffle(vs0, vshuffle);
+                        Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
+                        Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
+                        Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
+                    }
+
+                    if (m > 0)
+                    {
+                        for (int i = u; i < n; i++)
+                        {
+                            Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
+                        }
+                    }
+                }
+            }
+
+            [MethodImpl(InliningOptions.ShortMethod)]
+            private static void Shuffle3(
+                ReadOnlySpan<byte> source,
+                Span<byte> dest,
+                byte control)
+            {
+                if (Ssse3.IsSupported)
+                {
+                    ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
+                    Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
+                    ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
+                    Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
+                    Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
+
+                    Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
+                    Shuffle.MmShuffleSpan(ref bytes, control);
+                    Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
+
+                    ref Vector128<byte> sourceBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector128<byte> destBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
+
+                    int n = source.Length / Vector128<byte>.Count;
+
+                    for (int i = 0; i < n; i += 3)
+                    {
+                        ref Vector128<byte> vs = ref Unsafe.Add(ref sourceBase, i);
+
+                        Vector128<byte> v0 = vs;
+                        Vector128<byte> v1 = Unsafe.Add(ref vs, 1);
+                        Vector128<byte> v2 = Unsafe.Add(ref vs, 2);
+                        Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
+
+                        v2 = Ssse3.AlignRight(v2, v1, 8);
+                        v1 = Ssse3.AlignRight(v1, v0, 12);
+
+                        v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle);
+                        v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle);
+                        v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle);
+                        v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle);
+
+                        v0 = Ssse3.Shuffle(v0, vmaske);
+                        v1 = Ssse3.Shuffle(v1, vmasko);
+                        v2 = Ssse3.Shuffle(v2, vmaske);
+                        v3 = Ssse3.Shuffle(v3, vmasko);
+
+                        v0 = Ssse3.AlignRight(v1, v0, 4);
+                        v3 = Ssse3.AlignRight(v3, v2, 12);
+
+                        v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
+                        v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
+
+                        v1 = Ssse3.AlignRight(v2, v1, 8);
+
+                        ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, i);
+
+                        vd = v0;
+                        Unsafe.Add(ref vd, 1) = v1;
+                        Unsafe.Add(ref vd, 2) = v3;
+                    }
+                }
+            }
+
+            [MethodImpl(InliningOptions.ShortMethod)]
+            private static void Pad3Shuffle4(
+                ReadOnlySpan<byte> source,
+                Span<byte> dest,
+                byte control)
+            {
+                if (Ssse3.IsSupported)
+                {
+                    ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
+                    Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
+                    Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
+
+                    Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
+                    Shuffle.MmShuffleSpan(ref bytes, control);
+                    Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
+
+                    ref Vector128<byte> sourceBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector128<byte> destBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
+
+                    int n = source.Length / Vector128<byte>.Count;
+
+                    for (int i = 0, j = 0; i < n; i += 3, j += 4)
+                    {
+                        ref Vector128<byte> v0 = ref Unsafe.Add(ref sourceBase, i);
+                        Vector128<byte> v1 = Unsafe.Add(ref v0, 1);
+                        Vector128<byte> v2 = Unsafe.Add(ref v0, 2);
+                        Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
+
+                        v2 = Ssse3.AlignRight(v2, v1, 8);
+                        v1 = Ssse3.AlignRight(v1, v0, 12);
+
+                        ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, j);
+
+                        vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle);
+                        Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle);
+                        Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle);
+                        Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle);
+                    }
+                }
+            }
+
+            [MethodImpl(InliningOptions.ShortMethod)]
+            private static void Shuffle4Slice3(
+                ReadOnlySpan<byte> source,
+                Span<byte> dest,
+                byte control)
+            {
+                if (Ssse3.IsSupported)
+                {
+                    ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
+                    Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
+                    Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
+
+                    Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
+                    Shuffle.MmShuffleSpan(ref bytes, control);
+                    Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
+
+                    ref Vector128<byte> sourceBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector128<byte> destBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
+
+                    int n = source.Length / Vector128<byte>.Count;
+
+                    for (int i = 0, j = 0; i < n; i += 4, j += 3)
+                    {
+                        ref Vector128<byte> vs = ref Unsafe.Add(ref sourceBase, i);
+
+                        Vector128<byte> v0 = vs;
+                        Vector128<byte> v1 = Unsafe.Add(ref vs, 1);
+                        Vector128<byte> v2 = Unsafe.Add(ref vs, 2);
+                        Vector128<byte> v3 = Unsafe.Add(ref vs, 3);
+
+                        v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske);
+                        v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko);
+                        v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske);
+                        v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko);
+
+                        v0 = Ssse3.AlignRight(v1, v0, 4);
+                        v3 = Ssse3.AlignRight(v3, v2, 12);
+
+                        v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
+                        v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
+
+                        v1 = Ssse3.AlignRight(v2, v1, 8);
+
+                        ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, j);
+
+                        vd = v0;
+                        Unsafe.Add(ref vd, 1) = v1;
+                        Unsafe.Add(ref vd, 2) = v3;
+                    }
+                }
+            }
+
+            /// <summary>
+            /// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
+            /// </summary>
+            /// <param name="va">The vector to add to the intermediate result.</param>
+            /// <param name="vm0">The first vector to multiply.</param>
+            /// <param name="vm1">The second vector to multiply.</param>
+            /// <returns>The <see cref="Vector256{T}"/>.</returns>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static Vector256<float> MultiplyAdd(
+                in Vector256<float> va,
+                in Vector256<float> vm0,
+                in Vector256<float> vm1)
+            {
+                if (Fma.IsSupported)
+                {
+                    return Fma.MultiplyAdd(vm1, vm0, va);
+                }
+                else
+                {
+                    return Avx.Add(Avx.Multiply(vm0, vm1), va);
+                }
+            }
+
+            /// <summary>
+            /// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
+            /// </summary>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            internal static void ByteToNormalizedFloatReduce(
+                ref ReadOnlySpan<byte> source,
+                ref Span<float> dest)
+            {
+                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+                if (Avx2.IsSupported || Sse2.IsSupported)
+                {
+                    int remainder;
+                    if (Avx2.IsSupported)
+                    {
+                        remainder = Numerics.ModuloP2(source.Length, Vector256<byte>.Count);
+                    }
+                    else
+                    {
+                        remainder = Numerics.ModuloP2(source.Length, Vector128<byte>.Count);
+                    }
+
+                    int adjustedCount = source.Length - remainder;
+
+                    if (adjustedCount > 0)
+                    {
+                        ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
+
+                        source = source.Slice(adjustedCount);
+                        dest = dest.Slice(adjustedCount);
+                    }
+                }
+            }
+
+            /// <summary>
+            /// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
+            /// </summary>
+            /// <remarks>
+            /// Implementation is based on MagicScaler code:
+            /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182
+            /// </remarks>
+            internal static unsafe void ByteToNormalizedFloat(
+                ReadOnlySpan<byte> source,
+                Span<float> dest)
+            {
+                if (Avx2.IsSupported)
+                {
+                    VerifySpanInput(source, dest, Vector256<byte>.Count);
+
+                    int n = dest.Length / Vector256<byte>.Count;
+
+                    byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector256<float> destBase =
+                        ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
+
+                    var scale = Vector256.Create(1 / (float)byte.MaxValue);
+
+                    for (int i = 0; i < n; i++)
+                    {
+                        int si = Vector256<byte>.Count * i;
+                        Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
+                        Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count);
+                        Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2));
+                        Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3));
+
+                        Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
+                        Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
+                        Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
+                        Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
+
+                        ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4);
+
+                        d = f0;
+                        Unsafe.Add(ref d, 1) = f1;
+                        Unsafe.Add(ref d, 2) = f2;
+                        Unsafe.Add(ref d, 3) = f3;
+                    }
+                }
+                else
+                {
+                    // Sse
+                    VerifySpanInput(source, dest, Vector128<byte>.Count);
+
+                    int n = dest.Length / Vector128<byte>.Count;
+
+                    byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector128<float> destBase =
+                        ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
+
+                    var scale = Vector128.Create(1 / (float)byte.MaxValue);
+                    Vector128<byte> zero = Vector128<byte>.Zero;
+
+                    for (int i = 0; i < n; i++)
+                    {
+                        int si = Vector128<byte>.Count * i;
+
+                        Vector128<int> i0, i1, i2, i3;
+                        if (Sse41.IsSupported)
+                        {
+                            i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
+                            i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count);
+                            i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2));
+                            i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3));
+                        }
+                        else
+                        {
+                            Vector128<byte> b = Sse2.LoadVector128(sourceBase + si);
+                            Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16();
+                            Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16();
+
+                            i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
+                            i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
+                            i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
+                            i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
+                        }
+
+                        Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
+                        Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
+                        Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
+                        Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
+
+                        ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4);
+
+                        d = f0;
+                        Unsafe.Add(ref d, 1) = f1;
+                        Unsafe.Add(ref d, 2) = f2;
+                        Unsafe.Add(ref d, 3) = f3;
+                    }
+                }
+            }
+
+            /// <summary>
+            /// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
+            /// </summary>
+            [MethodImpl(InliningOptions.ShortMethod)]
+            internal static void NormalizedFloatToByteSaturateReduce(
+                ref ReadOnlySpan<float> source,
+                ref Span<byte> dest)
+            {
+                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
+
+                if (Avx2.IsSupported || Sse2.IsSupported)
+                {
+                    int remainder;
+                    if (Avx2.IsSupported)
+                    {
+                        remainder = Numerics.ModuloP2(source.Length, Vector256<byte>.Count);
+                    }
+                    else
+                    {
+                        remainder = Numerics.ModuloP2(source.Length, Vector128<byte>.Count);
+                    }
+
+                    int adjustedCount = source.Length - remainder;
+
+                    if (adjustedCount > 0)
+                    {
+                        NormalizedFloatToByteSaturate(
+                            source.Slice(0, adjustedCount),
+                            dest.Slice(0, adjustedCount));
+
+                        source = source.Slice(adjustedCount);
+                        dest = dest.Slice(adjustedCount);
+                    }
+                }
+            }
+
+            /// <summary>
+            /// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
+            /// </summary>
+            /// <remarks>
+            /// Implementation is based on MagicScaler code:
+            /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622
+            /// </remarks>
+            internal static void NormalizedFloatToByteSaturate(
+                ReadOnlySpan<float> source,
+                Span<byte> dest)
+            {
+                if (Avx2.IsSupported)
+                {
+                    VerifySpanInput(source, dest, Vector256<byte>.Count);
+
+                    int n = dest.Length / Vector256<byte>.Count;
+
+                    ref Vector256<float> sourceBase =
+                        ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector256<byte> destBase =
+                        ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
+
+                    var scale = Vector256.Create((float)byte.MaxValue);
+                    ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
+                    Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
+
+                    for (int i = 0; i < n; i++)
+                    {
+                        ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
+
+                        Vector256<float> f0 = Avx.Multiply(scale, s);
+                        Vector256<float> f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1));
+                        Vector256<float> f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2));
+                        Vector256<float> f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3));
+
+                        Vector256<int> w0 = Avx.ConvertToVector256Int32(f0);
+                        Vector256<int> w1 = Avx.ConvertToVector256Int32(f1);
+                        Vector256<int> w2 = Avx.ConvertToVector256Int32(f2);
+                        Vector256<int> w3 = Avx.ConvertToVector256Int32(f3);
+
+                        Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
+                        Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
+                        Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
+                        b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
+
+                        Unsafe.Add(ref destBase, i) = b;
+                    }
+                }
+                else
+                {
+                    // Sse
+                    VerifySpanInput(source, dest, Vector128<byte>.Count);
+
+                    int n = dest.Length / Vector128<byte>.Count;
+
+                    ref Vector128<float> sourceBase =
+                        ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
+
+                    ref Vector128<byte> destBase =
+                        ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
+
+                    var scale = Vector128.Create((float)byte.MaxValue);
+
+                    for (int i = 0; i < n; i++)
+                    {
+                        ref Vector128<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
+
+                        Vector128<float> f0 = Sse.Multiply(scale, s);
+                        Vector128<float> f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1));
+                        Vector128<float> f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2));
+                        Vector128<float> f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3));
+
+                        Vector128<int> w0 = Sse2.ConvertToVector128Int32(f0);
+                        Vector128<int> w1 = Sse2.ConvertToVector128Int32(f1);
+                        Vector128<int> w2 = Sse2.ConvertToVector128Int32(f2);
+                        Vector128<int> w3 = Sse2.ConvertToVector128Int32(f3);
+
+                        Vector128<short> u0 = Sse2.PackSignedSaturate(w0, w1);
+                        Vector128<short> u1 = Sse2.PackSignedSaturate(w2, w3);
+
+                        Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1);
+                    }
+                }
+            }
+
+            internal static void PackFromRgbPlanesAvx2Reduce(
+                ref ReadOnlySpan<byte> redChannel,
+                ref ReadOnlySpan<byte> greenChannel,
+                ref ReadOnlySpan<byte> blueChannel,
+                ref Span<Rgb24> destination)
+            {
+                ref Vector256<byte> rBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(redChannel));
+                ref Vector256<byte> gBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(greenChannel));
+                ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel));
+                ref byte dBase = ref Unsafe.As<Rgb24, byte>(ref MemoryMarshal.GetReference(destination));
+
+                int count = redChannel.Length / Vector256<byte>.Count;
+
+                ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
+                Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
+
+                ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
+                Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
+
+                Vector256<byte> a = Vector256.Create((byte)255);
+
+                Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
+
+                for (int i = 0; i < count; i++)
+                {
+                    Vector256<byte> r0 = Unsafe.Add(ref rBase, i);
+                    Vector256<byte> g0 = Unsafe.Add(ref gBase, i);
+                    Vector256<byte> b0 = Unsafe.Add(ref bBase, i);
+
+                    r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
+                    g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
+                    b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
+
+                    Vector256<byte> rg = Avx2.UnpackLow(r0, g0);
+                    Vector256<byte> b1 = Avx2.UnpackLow(b0, a);
+
+                    Vector256<byte> rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+                    Vector256<byte> rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+                    rg = Avx2.UnpackHigh(r0, g0);
+                    b1 = Avx2.UnpackHigh(b0, a);
+
+                    Vector256<byte> rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+                    Vector256<byte> rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+                    rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha);
+                    rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha);
+                    rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha);
+                    rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha);
+
+                    rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte();
+                    rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte();
+                    rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte();
+                    rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte();
+
+                    ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i);
+                    ref byte d2 = ref Unsafe.Add(ref d1, 24);
+                    ref byte d3 = ref Unsafe.Add(ref d2, 24);
+                    ref byte d4 = ref Unsafe.Add(ref d3, 24);
+
+                    Unsafe.As<byte, Vector256<byte>>(ref d1) = rgb1;
+                    Unsafe.As<byte, Vector256<byte>>(ref d2) = rgb2;
+                    Unsafe.As<byte, Vector256<byte>>(ref d3) = rgb3;
+                    Unsafe.As<byte, Vector256<byte>>(ref d4) = rgb4;
+                }
+
+                int slice = count * Vector256<byte>.Count;
+                redChannel = redChannel.Slice(slice);
+                greenChannel = greenChannel.Slice(slice);
+                blueChannel = blueChannel.Slice(slice);
+                destination = destination.Slice(slice);
+            }
+
+            internal static void PackFromRgbPlanesAvx2Reduce(
+                ref ReadOnlySpan<byte> redChannel,
+                ref ReadOnlySpan<byte> greenChannel,
+                ref ReadOnlySpan<byte> blueChannel,
+                ref Span<Rgba32> destination)
+            {
+                ref Vector256<byte> rBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(redChannel));
+                ref Vector256<byte> gBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(greenChannel));
+                ref Vector256<byte> bBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(blueChannel));
+                ref Vector256<byte> dBase = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(destination));
+
+                int count = redChannel.Length / Vector256<byte>.Count;
+
+                ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
+                Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
+
+                ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
+                Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
+
+                Vector256<byte> a = Vector256.Create((byte)255);
+
+                Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
+
+                for (int i = 0; i < count; i++)
+                {
+                    Vector256<byte> r0 = Unsafe.Add(ref rBase, i);
+                    Vector256<byte> g0 = Unsafe.Add(ref gBase, i);
+                    Vector256<byte> b0 = Unsafe.Add(ref bBase, i);
+
+                    r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte();
+                    g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte();
+                    b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte();
+
+                    Vector256<byte> rg = Avx2.UnpackLow(r0, g0);
+                    Vector256<byte> b1 = Avx2.UnpackLow(b0, a);
+
+                    Vector256<byte> rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+                    Vector256<byte> rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+                    rg = Avx2.UnpackHigh(r0, g0);
+                    b1 = Avx2.UnpackHigh(b0, a);
+
+                    Vector256<byte> rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+                    Vector256<byte> rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte();
+
+                    ref Vector256<byte> d0 = ref Unsafe.Add(ref dBase, i * 4);
+                    d0 = rgb1;
+                    Unsafe.Add(ref d0, 1) = rgb2;
+                    Unsafe.Add(ref d0, 2) = rgb3;
+                    Unsafe.Add(ref d0, 3) = rgb4;
+                }
+
+                int slice = count * Vector256<byte>.Count;
+                redChannel = redChannel.Slice(slice);
+                greenChannel = greenChannel.Slice(slice);
+                blueChannel = blueChannel.Slice(slice);
+                destination = destination.Slice(slice);
+            }
+        }
+    }
+}
+#endif
--- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs
@ -0,0 +1,206 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.PixelFormats;
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
+namespace SixLabors.ImageSharp
+{
+    internal static partial class SimdUtils
+    {
+        [MethodImpl(InliningOptions.ShortMethod)]
+        internal static void PackFromRgbPlanes(
+            Configuration configuration,
+            ReadOnlySpan<byte> redChannel,
+            ReadOnlySpan<byte> greenChannel,
+            ReadOnlySpan<byte> blueChannel,
+            Span<Rgb24> destination)
+        {
+            DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
+            DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
+            DebugGuard.IsTrue(destination.Length > redChannel.Length + 2, nameof(destination), "'destination' must contain a padding of 3 elements!");
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx2.IsSupported)
+            {
+                HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+            }
+            else
+#endif
+            {
+                PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+            }
+
+            PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        internal static void PackFromRgbPlanes(
+            Configuration configuration,
+            ReadOnlySpan<byte> redChannel,
+            ReadOnlySpan<byte> greenChannel,
+            ReadOnlySpan<byte> blueChannel,
+            Span<Rgba32> destination)
+        {
+            DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!");
+            DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!");
+            DebugGuard.IsTrue(destination.Length > redChannel.Length, nameof(destination), "'destination' span should not be shorter than the source channels!");
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx2.IsSupported)
+            {
+                HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+            }
+            else
+#endif
+            {
+                PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination);
+            }
+
+            PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination);
+        }
+
+        private static void PackFromRgbPlanesScalarBatchedReduce(
+            ref ReadOnlySpan<byte> redChannel,
+            ref ReadOnlySpan<byte> greenChannel,
+            ref ReadOnlySpan<byte> blueChannel,
+            ref Span<Rgb24> destination)
+        {
+            ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel));
+            ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel));
+            ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel));
+            ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
+
+            int count = redChannel.Length / 4;
+            for (int i = 0; i < count; i++)
+            {
+                ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4);
+                ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1);
+                ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2);
+                ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3);
+
+                ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
+                ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
+                ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
+
+                d0.R = rr.V0;
+                d0.G = gg.V0;
+                d0.B = bb.V0;
+
+                d1.R = rr.V1;
+                d1.G = gg.V1;
+                d1.B = bb.V1;
+
+                d2.R = rr.V2;
+                d2.G = gg.V2;
+                d2.B = bb.V2;
+
+                d3.R = rr.V3;
+                d3.G = gg.V3;
+                d3.B = bb.V3;
+            }
+
+            int finished = count * 4;
+            redChannel = redChannel.Slice(finished);
+            greenChannel = greenChannel.Slice(finished);
+            blueChannel = blueChannel.Slice(finished);
+            destination = destination.Slice(finished);
+        }
+
+        private static void PackFromRgbPlanesScalarBatchedReduce(
+            ref ReadOnlySpan<byte> redChannel,
+            ref ReadOnlySpan<byte> greenChannel,
+            ref ReadOnlySpan<byte> blueChannel,
+            ref Span<Rgba32> destination)
+        {
+            ref ByteTuple4 r = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(redChannel));
+            ref ByteTuple4 g = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(greenChannel));
+            ref ByteTuple4 b = ref Unsafe.As<byte, ByteTuple4>(ref MemoryMarshal.GetReference(blueChannel));
+            ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination);
+
+            int count = redChannel.Length / 4;
+            destination.Fill(new Rgba32(0, 0, 0, 255));
+            for (int i = 0; i < count; i++)
+            {
+                ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4);
+                ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1);
+                ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2);
+                ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3);
+
+                ref ByteTuple4 rr = ref Unsafe.Add(ref r, i);
+                ref ByteTuple4 gg = ref Unsafe.Add(ref g, i);
+                ref ByteTuple4 bb = ref Unsafe.Add(ref b, i);
+
+                d0.R = rr.V0;
+                d0.G = gg.V0;
+                d0.B = bb.V0;
+
+                d1.R = rr.V1;
+                d1.G = gg.V1;
+                d1.B = bb.V1;
+
+                d2.R = rr.V2;
+                d2.G = gg.V2;
+                d2.B = bb.V2;
+
+                d3.R = rr.V3;
+                d3.G = gg.V3;
+                d3.B = bb.V3;
+            }
+
+            int finished = count * 4;
+            redChannel = redChannel.Slice(finished);
+            greenChannel = greenChannel.Slice(finished);
+            blueChannel = blueChannel.Slice(finished);
+            destination = destination.Slice(finished);
+        }
+
+        private static void PackFromRgbPlanesRemainder(
+            ReadOnlySpan<byte> redChannel,
+            ReadOnlySpan<byte> greenChannel,
+            ReadOnlySpan<byte> blueChannel,
+            Span<Rgb24> destination)
+        {
+            ref byte r = ref MemoryMarshal.GetReference(redChannel);
+            ref byte g = ref MemoryMarshal.GetReference(greenChannel);
+            ref byte b = ref MemoryMarshal.GetReference(blueChannel);
+            ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination);
+
+            for (int i = 0; i < destination.Length; i++)
+            {
+                ref Rgb24 d = ref Unsafe.Add(ref rgb, i);
+                d.R = Unsafe.Add(ref r, i);
+                d.G = Unsafe.Add(ref g, i);
+                d.B = Unsafe.Add(ref b, i);
+            }
+        }
+
+        private static void PackFromRgbPlanesRemainder(
+            ReadOnlySpan<byte> redChannel,
+            ReadOnlySpan<byte> greenChannel,
+            ReadOnlySpan<byte> blueChannel,
+            Span<Rgba32> destination)
+        {
+            ref byte r = ref MemoryMarshal.GetReference(redChannel);
+            ref byte g = ref MemoryMarshal.GetReference(greenChannel);
+            ref byte b = ref MemoryMarshal.GetReference(blueChannel);
+            ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination);
+
+            for (int i = 0; i < destination.Length; i++)
+            {
+                ref Rgba32 d = ref Unsafe.Add(ref rgba, i);
+                d.R = Unsafe.Add(ref r, i);
+                d.G = Unsafe.Add(ref g, i);
+                d.B = Unsafe.Add(ref b, i);
+                d.A = 255;
+            }
+        }
+    }
+}
--- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
@ -0,0 +1,275 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace SixLabors.ImageSharp
+{
+    internal static partial class SimdUtils
+    {
+        /// <summary>
+        /// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
+        /// using the control and store the results in <paramref name="dest"/>.
+        /// </summary>
+        /// <param name="source">The source span of floats.</param>
+        /// <param name="dest">The destination span of floats.</param>
+        /// <param name="control">The byte control.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static void Shuffle4(
+            ReadOnlySpan<float> source,
+            Span<float> dest,
+            byte control)
+        {
+            VerifyShuffle4SpanInput(source, dest);
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control);
+#endif
+
+            // Deal with the remainder:
+            if (source.Length > 0)
+            {
+                Shuffle4Remainder(source, dest, control);
+            }
+        }
+
+        /// <summary>
+        /// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
+        /// using the control and store the results in <paramref name="dest"/>.
+        /// </summary>
+        /// <param name="source">The source span of bytes.</param>
+        /// <param name="dest">The destination span of bytes.</param>
+        /// <param name="shuffle">The type of shuffle to perform.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static void Shuffle4<TShuffle>(
+            ReadOnlySpan<byte> source,
+            Span<byte> dest,
+            TShuffle shuffle)
+            where TShuffle : struct, IShuffle4
+        {
+            VerifyShuffle4SpanInput(source, dest);
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control);
+#endif
+
+            // Deal with the remainder:
+            if (source.Length > 0)
+            {
+                shuffle.RunFallbackShuffle(source, dest);
+            }
+        }
+
+        /// <summary>
+        /// Shuffle 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
+        /// using the control and store the results in <paramref name="dest"/>.
+        /// </summary>
+        /// <param name="source">The source span of bytes.</param>
+        /// <param name="dest">The destination span of bytes.</param>
+        /// <param name="shuffle">The type of shuffle to perform.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static void Shuffle3<TShuffle>(
+            ReadOnlySpan<byte> source,
+            Span<byte> dest,
+            TShuffle shuffle)
+            where TShuffle : struct, IShuffle3
+        {
+            VerifyShuffle3SpanInput(source, dest);
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control);
+#endif
+
+            // Deal with the remainder:
+            if (source.Length > 0)
+            {
+                shuffle.RunFallbackShuffle(source, dest);
+            }
+        }
+
+        /// <summary>
+        /// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
+        /// using the control and store the results in <paramref name="dest"/>.
+        /// </summary>
+        /// <param name="source">The source span of bytes.</param>
+        /// <param name="dest">The destination span of bytes.</param>
+        /// <param name="shuffle">The type of shuffle to perform.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static void Pad3Shuffle4<TShuffle>(
+            ReadOnlySpan<byte> source,
+            Span<byte> dest,
+            TShuffle shuffle)
+            where TShuffle : struct, IPad3Shuffle4
+        {
+            VerifyPad3Shuffle4SpanInput(source, dest);
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control);
+#endif
+
+            // Deal with the remainder:
+            if (source.Length > 0)
+            {
+                shuffle.RunFallbackShuffle(source, dest);
+            }
+        }
+
+        /// <summary>
+        /// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
+        /// using the control and store the results in <paramref name="dest"/>.
+        /// </summary>
+        /// <param name="source">The source span of bytes.</param>
+        /// <param name="dest">The destination span of bytes.</param>
+        /// <param name="shuffle">The type of shuffle to perform.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static void Shuffle4Slice3<TShuffle>(
+            ReadOnlySpan<byte> source,
+            Span<byte> dest,
+            TShuffle shuffle)
+            where TShuffle : struct, IShuffle4Slice3
+        {
+            VerifyShuffle4Slice3SpanInput(source, dest);
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control);
+#endif
+
+            // Deal with the remainder:
+            if (source.Length > 0)
+            {
+                shuffle.RunFallbackShuffle(source, dest);
+            }
+        }
+
+        private static void Shuffle4Remainder(
+            ReadOnlySpan<float> source,
+            Span<float> dest,
+            byte control)
+        {
+            ref float sBase = ref MemoryMarshal.GetReference(source);
+            ref float dBase = ref MemoryMarshal.GetReference(dest);
+            Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
+
+            for (int i = 0; i < source.Length; i += 4)
+            {
+                Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
+                Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
+                Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
+                Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
+            }
+        }
+
+        [Conditional("DEBUG")]
+        private static void VerifyShuffle4SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
+            where T : struct
+        {
+            DebugGuard.IsTrue(
+                source.Length == dest.Length,
+                nameof(source),
+                "Input spans must be of same length!");
+
+            DebugGuard.IsTrue(
+                source.Length % 4 == 0,
+                nameof(source),
+                "Input spans must be divisable by 4!");
+        }
+
+        [Conditional("DEBUG")]
+        private static void VerifyShuffle3SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
+            where T : struct
+        {
+            DebugGuard.IsTrue(
+                source.Length == dest.Length,
+                nameof(source),
+                "Input spans must be of same length!");
+
+            DebugGuard.IsTrue(
+                source.Length % 3 == 0,
+                nameof(source),
+                "Input spans must be divisable by 3!");
+        }
+
+        [Conditional("DEBUG")]
+        private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            DebugGuard.IsTrue(
+                source.Length % 3 == 0,
+                nameof(source),
+                "Input span must be divisable by 3!");
+
+            DebugGuard.IsTrue(
+                dest.Length % 4 == 0,
+                nameof(dest),
+                "Output span must be divisable by 4!");
+
+            DebugGuard.IsTrue(
+                source.Length == dest.Length * 3 / 4,
+                nameof(source),
+                "Input span must be 3/4 the length of the output span!");
+        }
+
+        [Conditional("DEBUG")]
+        private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan<byte> source, Span<byte> dest)
+        {
+            DebugGuard.IsTrue(
+                source.Length % 4 == 0,
+                nameof(source),
+                "Input span must be divisable by 4!");
+
+            DebugGuard.IsTrue(
+                dest.Length % 3 == 0,
+                nameof(dest),
+                "Output span must be divisable by 3!");
+
+            DebugGuard.IsTrue(
+                dest.Length >= source.Length * 3 / 4,
+                nameof(source),
+                "Output span must be at least 3/4 the length of the input span!");
+        }
+
+        public static class Shuffle
+        {
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0)
+                => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);
+
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static void MmShuffleSpan(ref Span<byte> span, byte control)
+            {
+                InverseMmShuffle(
+                     control,
+                     out int p3,
+                     out int p2,
+                     out int p1,
+                     out int p0);
+
+                ref byte spanBase = ref MemoryMarshal.GetReference(span);
+
+                for (int i = 0; i < span.Length; i += 4)
+                {
+                    Unsafe.Add(ref spanBase, i) = (byte)(p0 + i);
+                    Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i);
+                    Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i);
+                    Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i);
+                }
+            }
+
+            [MethodImpl(InliningOptions.ShortMethod)]
+            public static void InverseMmShuffle(
+                byte control,
+                out int p3,
+                out int p2,
+                out int p1,
+                out int p0)
+            {
+                p3 = control >> 6 & 0x3;
+                p2 = control >> 4 & 0x3;
+                p1 = control >> 2 & 0x3;
+                p0 = control >> 0 & 0x3;
+            }
+        }
+    }
+}
--- a/src/ImageSharp/Common/Helpers/SimdUtils.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs
@ -6,6 +6,7 @@ using System.Diagnostics;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.PixelFormats;
 #if SUPPORTS_RUNTIME_INTRINSICS
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
@ -25,6 +26,25 @@ namespace SixLabors.ImageSharp
        public static bool HasVector8 { get; } =
            Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;

+        /// <summary>
+        /// Gets a value indicating whether <see cref="Vector{T}"/> code is being JIT-ed to SSE instructions
+        /// where float and integer registers are of size 128 byte.
+        /// </summary>
+        public static bool HasVector4 { get; } =
+            Vector.IsHardwareAccelerated && Vector<float>.Count == 4;
+
+        public static bool HasAvx2
+        {
+            get
+            {
+#if SUPPORTS_RUNTIME_INTRINSICS
+                return Avx2.IsSupported;
+#else
+                return false;
+#endif
+            }
+        }
+
        /// <summary>
        /// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
        /// </summary>
@ -32,7 +52,7 @@ namespace SixLabors.ImageSharp
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        internal static Vector4 PseudoRound(this Vector4 v)
        {
-            Vector4 sign = Vector4Utilities.FastClamp(v, new Vector4(-1), new Vector4(1));
+            Vector4 sign = Numerics.Clamp(v, new Vector4(-1), new Vector4(1));

            return v + (sign * 0.5f);
        }
@ -79,8 +99,9 @@ namespace SixLabors.ImageSharp
        internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
        {
            DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
-
-#if SUPPORTS_EXTENDED_INTRINSICS
+#if SUPPORTS_RUNTIME_INTRINSICS
+            HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest);
+#elif SUPPORTS_EXTENDED_INTRINSICS
            ExtendedIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest);
 #else
            BasicIntrinsics256.ByteToNormalizedFloatReduce(ref source, ref dest);
@ -110,7 +131,7 @@ namespace SixLabors.ImageSharp
            DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");

 #if SUPPORTS_RUNTIME_INTRINSICS
-            Avx2Intrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
+            HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
 #elif SUPPORTS_EXTENDED_INTRINSICS
            ExtendedIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
 #else
@ -170,7 +191,7 @@ namespace SixLabors.ImageSharp
        }

        [MethodImpl(InliningOptions.ShortMethod)]
-        private static byte ConvertToByte(float f) => (byte)ComparableExtensions.Clamp((f * 255f) + 0.5f, 0, 255f);
+        private static byte ConvertToByte(float f) => (byte)Numerics.Clamp((f * 255F) + 0.5F, 0, 255F);

        [Conditional("DEBUG")]
        private static void VerifyHasVector8(string operation)
@ -186,7 +207,7 @@ namespace SixLabors.ImageSharp
        {
            DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
            DebugGuard.IsTrue(
-                ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
+                Numerics.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
                nameof(source),
                $"length should be divisible by {shouldBeDivisibleBy}!");
        }
@ -196,9 +217,17 @@ namespace SixLabors.ImageSharp
        {
            DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
            DebugGuard.IsTrue(
-                ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
+                Numerics.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
                nameof(source),
                $"length should be divisible by {shouldBeDivisibleBy}!");
        }
+
+        private struct ByteTuple4
+        {
+            public byte V0;
+            public byte V1;
+            public byte V2;
+            public byte V3;
+        }
    }
 }
--- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
@ -1,122 +0,0 @@
-// Copyright (c) Six Labors.
-// Licensed under the Apache License, Version 2.0.
-
-using System;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-namespace SixLabors.ImageSharp
-{
-    /// <summary>
-    /// Utility methods for the <see cref="Vector4"/> struct.
-    /// </summary>
-    internal static class Vector4Utilities
-    {
-        /// <summary>
-        /// Restricts a vector between a minimum and a maximum value.
-        /// 5x Faster then <see cref="Vector4.Clamp(Vector4, Vector4, Vector4)"/>.
-        /// </summary>
-        /// <param name="x">The vector to restrict.</param>
-        /// <param name="min">The minimum value.</param>
-        /// <param name="max">The maximum value.</param>
-        /// <returns>The <see cref="Vector4"/>.</returns>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static Vector4 FastClamp(Vector4 x, Vector4 min, Vector4 max)
-            => Vector4.Min(Vector4.Max(x, min), max);
-
-        /// <summary>
-        /// Pre-multiplies the "x", "y", "z" components of a vector by its "w" component leaving the "w" component intact.
-        /// </summary>
-        /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Premultiply(ref Vector4 source)
-        {
-            float w = source.W;
-            source *= w;
-            source.W = w;
-        }
-
-        /// <summary>
-        /// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
-        /// </summary>
-        /// <param name="source">The <see cref="Vector4"/> to premultiply</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void UnPremultiply(ref Vector4 source)
-        {
-            float w = source.W;
-            source /= w;
-            source.W = w;
-        }
-
-        /// <summary>
-        /// Bulk variant of <see cref="Premultiply(ref Vector4)"/>
-        /// </summary>
-        /// <param name="vectors">The span of vectors</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Premultiply(Span<Vector4> vectors)
-        {
-            // TODO: This method can be AVX2 optimized using Vector<float>
-            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
-
-            for (int i = 0; i < vectors.Length; i++)
-            {
-                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                Premultiply(ref v);
-            }
-        }
-
-        /// <summary>
-        /// Bulk variant of <see cref="UnPremultiply(ref Vector4)"/>
-        /// </summary>
-        /// <param name="vectors">The span of vectors</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void UnPremultiply(Span<Vector4> vectors)
-        {
-            // TODO: This method can be AVX2 optimized using Vector<float>
-            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
-
-            for (int i = 0; i < vectors.Length; i++)
-            {
-                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                UnPremultiply(ref v);
-            }
-        }
-
-        /// <summary>
-        /// Transforms a vector by the given matrix.
-        /// </summary>
-        /// <param name="vector">The source vector.</param>
-        /// <param name="matrix">The transformation matrix.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Transform(ref Vector4 vector, ref ColorMatrix matrix)
-        {
-            float x = vector.X;
-            float y = vector.Y;
-            float z = vector.Z;
-            float w = vector.W;
-
-            vector.X = (x * matrix.M11) + (y * matrix.M21) + (z * matrix.M31) + (w * matrix.M41) + matrix.M51;
-            vector.Y = (x * matrix.M12) + (y * matrix.M22) + (z * matrix.M32) + (w * matrix.M42) + matrix.M52;
-            vector.Z = (x * matrix.M13) + (y * matrix.M23) + (z * matrix.M33) + (w * matrix.M43) + matrix.M53;
-            vector.W = (x * matrix.M14) + (y * matrix.M24) + (z * matrix.M34) + (w * matrix.M44) + matrix.M54;
-        }
-
-        /// <summary>
-        /// Bulk variant of <see cref="Transform(ref Vector4, ref ColorMatrix)"/>.
-        /// </summary>
-        /// <param name="vectors">The span of vectors</param>
-        /// <param name="matrix">The transformation matrix.</param>
-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static void Transform(Span<Vector4> vectors, ref ColorMatrix matrix)
-        {
-            ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
-
-            for (int i = 0; i < vectors.Length; i++)
-            {
-                ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
-                Transform(ref v, ref matrix);
-            }
-        }
-    }
-}
--- a/src/ImageSharp/Formats/Bmp/BmpDecoderCore.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpDecoderCore.cs
@ -1385,7 +1385,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
                    {
                        case BmpFileMarkerType.Bitmap:
                            colorMapSizeBytes = this.fileHeader.Offset - BmpFileHeader.Size - this.infoHeader.HeaderSize;
-                            int colorCountForBitDepth = ImageMaths.GetColorCountForBitDepth(this.infoHeader.BitsPerPixel);
+                            int colorCountForBitDepth = ColorNumerics.GetColorCountForBitDepth(this.infoHeader.BitsPerPixel);
                            bytesPerColorMapEntry = colorMapSizeBytes / colorCountForBitDepth;

                            // Edge case for less-than-full-sized palette: bytesPerColorMapEntry should be at least 3.
@ -1399,7 +1399,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
                        case BmpFileMarkerType.Pointer:
                            // OS/2 bitmaps always have 3 colors per color palette entry.
                            bytesPerColorMapEntry = 3;
-                            colorMapSizeBytes = ImageMaths.GetColorCountForBitDepth(this.infoHeader.BitsPerPixel) * bytesPerColorMapEntry;
+                            colorMapSizeBytes = ColorNumerics.GetColorCountForBitDepth(this.infoHeader.BitsPerPixel) * bytesPerColorMapEntry;
                            break;
                    }
                }
--- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
+++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
@ -6,7 +6,6 @@ using System.Buffers;
 using System.IO;
 using System.Runtime.InteropServices;
 using System.Threading;
-using System.Threading.Tasks;
 using SixLabors.ImageSharp.Advanced;
 using SixLabors.ImageSharp.Common.Helpers;
 using SixLabors.ImageSharp.Memory;
@ -171,7 +170,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp

            var fileHeader = new BmpFileHeader(
                type: BmpConstants.TypeMarkers.Bitmap,
-                fileSize: BmpFileHeader.Size + infoHeaderSize + infoHeader.ImageSize,
+                fileSize: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize + infoHeader.ImageSize,
                reserved: 0,
                offset: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize);

@ -263,7 +262,9 @@ namespace SixLabors.ImageSharp.Formats.Bmp
        private void Write24Bit<TPixel>(Stream stream, Buffer2D<TPixel> pixels)
            where TPixel : unmanaged, IPixel<TPixel>
        {
-            using (IManagedByteBuffer row = this.AllocateRow(pixels.Width, 3))
+            int width = pixels.Width;
+            int rowBytesWithoutPadding = width * 3;
+            using (IManagedByteBuffer row = this.AllocateRow(width, 3))
            {
                for (int y = pixels.Height - 1; y >= 0; y--)
                {
@ -271,8 +272,8 @@ namespace SixLabors.ImageSharp.Formats.Bmp
                    PixelOperations<TPixel>.Instance.ToBgr24Bytes(
                        this.configuration,
                        pixelSpan,
-                        row.GetSpan(),
-                        pixelSpan.Length);
+                        row.Slice(0, rowBytesWithoutPadding),
+                        width);
                    stream.Write(row.Array, 0, row.Length());
                }
            }
@ -287,7 +288,9 @@ namespace SixLabors.ImageSharp.Formats.Bmp
        private void Write16Bit<TPixel>(Stream stream, Buffer2D<TPixel> pixels)
            where TPixel : unmanaged, IPixel<TPixel>
        {
-            using (IManagedByteBuffer row = this.AllocateRow(pixels.Width, 2))
+            int width = pixels.Width;
+            int rowBytesWithoutPadding = width * 2;
+            using (IManagedByteBuffer row = this.AllocateRow(width, 2))
            {
                for (int y = pixels.Height - 1; y >= 0; y--)
                {
@ -296,7 +299,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
                    PixelOperations<TPixel>.Instance.ToBgra5551Bytes(
                        this.configuration,
                        pixelSpan,
-                        row.GetSpan(),
+                        row.Slice(0, rowBytesWithoutPadding),
                        pixelSpan.Length);

                    stream.Write(row.Array, 0, row.Length());
@ -342,20 +345,12 @@ namespace SixLabors.ImageSharp.Formats.Bmp
            using IndexedImageFrame<TPixel> quantized = frameQuantizer.BuildPaletteAndQuantizeFrame(image, image.Bounds());

            ReadOnlySpan<TPixel> quantizedColors = quantized.Palette.Span;
-            var color = default(Rgba32);
-
-            // TODO: Use bulk conversion here for better perf
-            int idx = 0;
-            foreach (TPixel quantizedColor in quantizedColors)
+            var quantizedColorBytes = quantizedColors.Length * 4;
+            PixelOperations<TPixel>.Instance.ToBgra32(this.configuration, quantizedColors, MemoryMarshal.Cast<byte, Bgra32>(colorPalette.Slice(0, quantizedColorBytes)));
+            Span<uint> colorPaletteAsUInt = MemoryMarshal.Cast<byte, uint>(colorPalette);
+            for (int i = 0; i < colorPaletteAsUInt.Length; i++)
            {
-                quantizedColor.ToRgba32(ref color);
-                colorPalette[idx] = color.B;
-                colorPalette[idx + 1] = color.G;
-                colorPalette[idx + 2] = color.R;
-
-                // Padding byte, always 0.
-                colorPalette[idx + 3] = 0;
-                idx += 4;
+                colorPaletteAsUInt[i] = colorPaletteAsUInt[i] & 0x00FFFFFF; // Padding byte, always 0.
            }

            stream.Write(colorPalette);
--- a/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifDecoderCore.cs
@ -535,7 +535,8 @@ namespace SixLabors.ImageSharp.Formats.Gif
                return;
            }

-            Buffer2DRegion<TPixel> pixelRegion = frame.PixelBuffer.GetRegion(this.restoreArea.Value);
+            var interest = Rectangle.Intersect(frame.Bounds(), this.restoreArea.Value);
+            Buffer2DRegion<TPixel> pixelRegion = frame.PixelBuffer.GetRegion(interest);
            pixelRegion.Clear();

            this.restoreArea = null;
--- a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
+++ b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs
@ -105,7 +105,7 @@ namespace SixLabors.ImageSharp.Formats.Gif
            }

            // Get the number of bits.
-            this.bitDepth = ImageMaths.GetBitsNeededForColorDepth(quantized.Palette.Length);
+            this.bitDepth = ColorNumerics.GetBitsNeededForColorDepth(quantized.Palette.Length);

            // Write the header.
            this.WriteHeader(stream);
@ -212,7 +212,7 @@ namespace SixLabors.ImageSharp.Formats.Gif
                    }
                }

-                this.bitDepth = ImageMaths.GetBitsNeededForColorDepth(quantized.Palette.Length);
+                this.bitDepth = ColorNumerics.GetBitsNeededForColorDepth(quantized.Palette.Length);
                this.WriteGraphicalControlExtension(frameMetadata, this.GetTransparentIndex(quantized), stream);
                this.WriteImageDescriptor(frame, true, stream);
                this.WriteColorTable(quantized, stream);
@ -468,7 +468,7 @@ namespace SixLabors.ImageSharp.Formats.Gif
            where TPixel : unmanaged, IPixel<TPixel>
        {
            // The maximum number of colors for the bit depth
-            int colorTableLength = ImageMaths.GetColorCountForBitDepth(this.bitDepth) * Unsafe.SizeOf<Rgb24>();
+            int colorTableLength = ColorNumerics.GetColorCountForBitDepth(this.bitDepth) * Unsafe.SizeOf<Rgb24>();

            using IManagedByteBuffer colorTable = this.memoryAllocator.AllocateManagedByteBuffer(colorTableLength, AllocationOptions.Clean);
            PixelOperations<TPixel>.Instance.ToRgb24Bytes(
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs
@ -10,118 +10,38 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
 {
 	internal partial struct Block8x8F
    {
-		/// <summary>
-        /// Transpose the block into the destination block.
-        /// </summary>
-        /// <param name="d">The destination block</param>
-		[MethodImpl(InliningOptions.ShortMethod)]
-        public void TransposeInto(ref Block8x8F d)
-        {
-            d.V0L.X = V0L.X;
-            d.V1L.X = V0L.Y;
-            d.V2L.X = V0L.Z;
-            d.V3L.X = V0L.W;
-            d.V4L.X = V0R.X;
-            d.V5L.X = V0R.Y;
-            d.V6L.X = V0R.Z;
-            d.V7L.X = V0R.W;
-
-            d.V0L.Y = V1L.X;
-            d.V1L.Y = V1L.Y;
-            d.V2L.Y = V1L.Z;
-            d.V3L.Y = V1L.W;
-            d.V4L.Y = V1R.X;
-            d.V5L.Y = V1R.Y;
-            d.V6L.Y = V1R.Z;
-            d.V7L.Y = V1R.W;
-
-            d.V0L.Z = V2L.X;
-            d.V1L.Z = V2L.Y;
-            d.V2L.Z = V2L.Z;
-            d.V3L.Z = V2L.W;
-            d.V4L.Z = V2R.X;
-            d.V5L.Z = V2R.Y;
-            d.V6L.Z = V2R.Z;
-            d.V7L.Z = V2R.W;
-
-            d.V0L.W = V3L.X;
-            d.V1L.W = V3L.Y;
-            d.V2L.W = V3L.Z;
-            d.V3L.W = V3L.W;
-            d.V4L.W = V3R.X;
-            d.V5L.W = V3R.Y;
-            d.V6L.W = V3R.Z;
-            d.V7L.W = V3R.W;
-
-            d.V0R.X = V4L.X;
-            d.V1R.X = V4L.Y;
-            d.V2R.X = V4L.Z;
-            d.V3R.X = V4L.W;
-            d.V4R.X = V4R.X;
-            d.V5R.X = V4R.Y;
-            d.V6R.X = V4R.Z;
-            d.V7R.X = V4R.W;
-
-            d.V0R.Y = V5L.X;
-            d.V1R.Y = V5L.Y;
-            d.V2R.Y = V5L.Z;
-            d.V3R.Y = V5L.W;
-            d.V4R.Y = V5R.X;
-            d.V5R.Y = V5R.Y;
-            d.V6R.Y = V5R.Z;
-            d.V7R.Y = V5R.W;
-
-            d.V0R.Z = V6L.X;
-            d.V1R.Z = V6L.Y;
-            d.V2R.Z = V6L.Z;
-            d.V3R.Z = V6L.W;
-            d.V4R.Z = V6R.X;
-            d.V5R.Z = V6R.Y;
-            d.V6R.Z = V6R.Z;
-            d.V7R.Z = V6R.W;
-
-            d.V0R.W = V7L.X;
-            d.V1R.W = V7L.Y;
-            d.V2R.W = V7L.Z;
-            d.V3R.W = V7L.W;
-            d.V4R.W = V7R.X;
-            d.V5R.W = V7R.Y;
-            d.V6R.W = V7R.Z;
-            d.V7R.W = V7R.W;
-        }
-
 		/// <summary>
        /// Level shift by +maximum/2, clip to [0, maximum]
        /// </summary>
-        public void NormalizeColorsInplace(float maximum)
+        public void NormalizeColorsInPlace(float maximum)
        {
            var CMin4 = new Vector4(0F);
            var CMax4 = new Vector4(maximum);
            var COff4 = new Vector4(MathF.Ceiling(maximum / 2));

-            this.V0L = Vector4Utilities.FastClamp(this.V0L + COff4, CMin4, CMax4);
-            this.V0R = Vector4Utilities.FastClamp(this.V0R + COff4, CMin4, CMax4);
-            this.V1L = Vector4Utilities.FastClamp(this.V1L + COff4, CMin4, CMax4);
-            this.V1R = Vector4Utilities.FastClamp(this.V1R + COff4, CMin4, CMax4);
-            this.V2L = Vector4Utilities.FastClamp(this.V2L + COff4, CMin4, CMax4);
-            this.V2R = Vector4Utilities.FastClamp(this.V2R + COff4, CMin4, CMax4);
-            this.V3L = Vector4Utilities.FastClamp(this.V3L + COff4, CMin4, CMax4);
-            this.V3R = Vector4Utilities.FastClamp(this.V3R + COff4, CMin4, CMax4);
-            this.V4L = Vector4Utilities.FastClamp(this.V4L + COff4, CMin4, CMax4);
-            this.V4R = Vector4Utilities.FastClamp(this.V4R + COff4, CMin4, CMax4);
-            this.V5L = Vector4Utilities.FastClamp(this.V5L + COff4, CMin4, CMax4);
-            this.V5R = Vector4Utilities.FastClamp(this.V5R + COff4, CMin4, CMax4);
-            this.V6L = Vector4Utilities.FastClamp(this.V6L + COff4, CMin4, CMax4);
-            this.V6R = Vector4Utilities.FastClamp(this.V6R + COff4, CMin4, CMax4);
-            this.V7L = Vector4Utilities.FastClamp(this.V7L + COff4, CMin4, CMax4);
-            this.V7R = Vector4Utilities.FastClamp(this.V7R + COff4, CMin4, CMax4);
+            this.V0L = Numerics.Clamp(this.V0L + COff4, CMin4, CMax4);
+            this.V0R = Numerics.Clamp(this.V0R + COff4, CMin4, CMax4);
+            this.V1L = Numerics.Clamp(this.V1L + COff4, CMin4, CMax4);
+            this.V1R = Numerics.Clamp(this.V1R + COff4, CMin4, CMax4);
+            this.V2L = Numerics.Clamp(this.V2L + COff4, CMin4, CMax4);
+            this.V2R = Numerics.Clamp(this.V2R + COff4, CMin4, CMax4);
+            this.V3L = Numerics.Clamp(this.V3L + COff4, CMin4, CMax4);
+            this.V3R = Numerics.Clamp(this.V3R + COff4, CMin4, CMax4);
+            this.V4L = Numerics.Clamp(this.V4L + COff4, CMin4, CMax4);
+            this.V4R = Numerics.Clamp(this.V4R + COff4, CMin4, CMax4);
+            this.V5L = Numerics.Clamp(this.V5L + COff4, CMin4, CMax4);
+            this.V5R = Numerics.Clamp(this.V5R + COff4, CMin4, CMax4);
+            this.V6L = Numerics.Clamp(this.V6L + COff4, CMin4, CMax4);
+            this.V6R = Numerics.Clamp(this.V6R + COff4, CMin4, CMax4);
+            this.V7L = Numerics.Clamp(this.V7L + COff4, CMin4, CMax4);
+            this.V7R = Numerics.Clamp(this.V7R + COff4, CMin4, CMax4);
        }

        /// <summary>
-        /// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
+        /// AVX2-only variant for executing <see cref="NormalizeColorsInPlace"/> and <see cref="RoundInPlace"/> in one step.
        /// </summary>
        [MethodImpl(InliningOptions.ShortMethod)]
-        public void NormalizeColorsAndRoundInplaceVector8(float maximum)
+        public void NormalizeColorsAndRoundInPlaceVector8(float maximum)
        {
            var off = new Vector<float>(MathF.Ceiling(maximum / 2));
            var max = new Vector<float>(maximum);
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt
@ -23,42 +23,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
 {
 	internal partial struct Block8x8F
    {
-		/// <summary>
-        /// Transpose the block into the destination block.
-        /// </summary>
-        /// <param name="d">The destination block</param>
-		[MethodImpl(InliningOptions.ShortMethod)]
-        public void TransposeInto(ref Block8x8F d)
-        {
-            <#
-			PushIndent("            ");
-
-            for (int i = 0; i < 8; i++)
-            {
-                char destCoord = coordz[i % 4];
-                char destSide = (i / 4) % 2 == 0 ? 'L' : 'R';
-
-                for (int j = 0; j < 8; j++)
-                {
-					if(i > 0 && j == 0){
-					WriteLine("");
-					}
-
-                    char srcCoord = coordz[j % 4];
-                    char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R';
-
-                    var expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord};\r\n";
-					Write(expression);
-                }
-            }
-			PopIndent();
-			#>
-        }
-
 		/// <summary>
        /// Level shift by +maximum/2, clip to [0, maximum]
        /// </summary>
-        public void NormalizeColorsInplace(float maximum)
+        public void NormalizeColorsInPlace(float maximum)
        {
            var CMin4 = new Vector4(0F);
            var CMax4 = new Vector4(maximum);
@ -73,7 +41,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
                for (int j = 0; j < 2; j++)
                {
 					char side = j == 0 ? 'L' : 'R';
-					Write($"this.V{i}{side} = Vector4Utilities.FastClamp(this.V{i}{side} + COff4, CMin4, CMax4);\r\n");
+					Write($"this.V{i}{side} = Numerics.Clamp(this.V{i}{side} + COff4, CMin4, CMax4);\r\n");
                }
            }
 			PopIndent();
@ -81,10 +49,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        }

        /// <summary>
-        /// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
+        /// AVX2-only variant for executing <see cref="NormalizeColorsInPlace"/> and <see cref="RoundInPlace"/> in one step.
        /// </summary>
        [MethodImpl(InliningOptions.ShortMethod)]
-        public void NormalizeColorsAndRoundInplaceVector8(float maximum)
+        public void NormalizeColorsAndRoundInPlaceVector8(float maximum)
        {
            var off = new Vector<float>(MathF.Ceiling(maximum / 2));
            var max = new Vector<float>(maximum);
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
@ -6,6 +6,10 @@ using System.Diagnostics;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
 using System.Text;

 // ReSharper disable InconsistentNaming
@ -14,6 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
    /// <summary>
    /// Represents a Jpeg block with <see cref="float"/> coefficients.
    /// </summary>
+    [StructLayout(LayoutKind.Sequential)]
    internal partial struct Block8x8F : IEquatable<Block8x8F>
    {
        /// <summary>
@ -47,9 +52,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        public Vector4 V7R;
 #pragma warning restore SA1600 // ElementsMustBeDocumented

-        private static readonly Vector4 NegativeOne = new Vector4(-1);
-        private static readonly Vector4 Offset = new Vector4(.5F);
-
        /// <summary>
        /// Get/Set scalar elements at a given index
        /// </summary>
@ -57,7 +59,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// <returns>The float value at the specified index</returns>
        public float this[int idx]
        {
-            [MethodImpl(InliningOptions.ShortMethod)]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            get
            {
                GuardBlockIndex(idx);
@ -65,7 +67,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
                return Unsafe.Add(ref selfRef, idx);
            }

-            [MethodImpl(InliningOptions.ShortMethod)]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            set
            {
                GuardBlockIndex(idx);
@ -151,10 +153,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// </summary>
        [MethodImpl(InliningOptions.ShortMethod)]
        public void Clear()
-        {
-            // The cheapest way to do this in C#:
-            this = default;
-        }
+            => this = default; // The cheapest way to do this in C#:

        /// <summary>
        /// Load raw 32bit floating point data from source.
@ -176,9 +175,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// <param name="source">Source</param>
        [MethodImpl(InliningOptions.ShortMethod)]
        public static unsafe void LoadFrom(Block8x8F* blockPtr, Span<float> source)
-        {
-            blockPtr->LoadFrom(source);
-        }
+            => blockPtr->LoadFrom(source);

        /// <summary>
        /// Load raw 32bit floating point data from source
@ -232,9 +229,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// <param name="dest">The destination.</param>
        [MethodImpl(InliningOptions.ShortMethod)]
        public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span<float> dest)
-        {
-            blockPtr->ScaledCopyTo(dest);
-        }
+            => blockPtr->ScaledCopyTo(dest);

        /// <summary>
        /// Copy raw 32bit floating point data to dest
@ -277,73 +272,156 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// </summary>
        /// <param name="value">The value to multiply by.</param>
        [MethodImpl(InliningOptions.ShortMethod)]
-        public void MultiplyInplace(float value)
-        {
-            this.V0L *= value;
-            this.V0R *= value;
-            this.V1L *= value;
-            this.V1R *= value;
-            this.V2L *= value;
-            this.V2R *= value;
-            this.V3L *= value;
-            this.V3R *= value;
-            this.V4L *= value;
-            this.V4R *= value;
-            this.V5L *= value;
-            this.V5R *= value;
-            this.V6L *= value;
-            this.V6R *= value;
-            this.V7L *= value;
-            this.V7R *= value;
+        public void MultiplyInPlace(float value)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx.IsSupported)
+            {
+                var valueVec = Vector256.Create(value);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
+            }
+            else
+#endif
+            {
+                var valueVec = new Vector4(value);
+                this.V0L *= valueVec;
+                this.V0R *= valueVec;
+                this.V1L *= valueVec;
+                this.V1R *= valueVec;
+                this.V2L *= valueVec;
+                this.V2R *= valueVec;
+                this.V3L *= valueVec;
+                this.V3R *= valueVec;
+                this.V4L *= valueVec;
+                this.V4R *= valueVec;
+                this.V5L *= valueVec;
+                this.V5R *= valueVec;
+                this.V6L *= valueVec;
+                this.V6R *= valueVec;
+                this.V7L *= valueVec;
+                this.V7R *= valueVec;
+            }
        }

        /// <summary>
        /// Multiply all elements of the block by the corresponding elements of 'other'.
        /// </summary>
        [MethodImpl(InliningOptions.ShortMethod)]
-        public void MultiplyInplace(ref Block8x8F other)
-        {
-            this.V0L *= other.V0L;
-            this.V0R *= other.V0R;
-            this.V1L *= other.V1L;
-            this.V1R *= other.V1R;
-            this.V2L *= other.V2L;
-            this.V2R *= other.V2R;
-            this.V3L *= other.V3L;
-            this.V3R *= other.V3R;
-            this.V4L *= other.V4L;
-            this.V4R *= other.V4R;
-            this.V5L *= other.V5L;
-            this.V5R *= other.V5R;
-            this.V6L *= other.V6L;
-            this.V6R *= other.V6R;
-            this.V7L *= other.V7L;
-            this.V7R *= other.V7R;
+        public unsafe void MultiplyInPlace(ref Block8x8F other)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx.IsSupported)
+            {
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V0L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V0L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V0L));
+
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V1L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V1L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V1L));
+
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V2L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V2L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V2L));
+
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V3L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V3L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V3L));
+
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V4L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V4L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V4L));
+
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V5L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V5L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V5L));
+
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V6L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V6L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V6L));
+
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V7L)
+                    = Avx.Multiply(
+                        Unsafe.As<Vector4, Vector256<float>>(ref this.V7L),
+                        Unsafe.As<Vector4, Vector256<float>>(ref other.V7L));
+            }
+            else
+#endif
+            {
+                this.V0L *= other.V0L;
+                this.V0R *= other.V0R;
+                this.V1L *= other.V1L;
+                this.V1R *= other.V1R;
+                this.V2L *= other.V2L;
+                this.V2R *= other.V2R;
+                this.V3L *= other.V3L;
+                this.V3R *= other.V3R;
+                this.V4L *= other.V4L;
+                this.V4R *= other.V4R;
+                this.V5L *= other.V5L;
+                this.V5R *= other.V5R;
+                this.V6L *= other.V6L;
+                this.V6R *= other.V6R;
+                this.V7L *= other.V7L;
+                this.V7R *= other.V7R;
+            }
        }

        /// <summary>
        /// Adds a vector to all elements of the block.
        /// </summary>
-        /// <param name="diff">The added vector</param>
+        /// <param name="value">The added vector.</param>
        [MethodImpl(InliningOptions.ShortMethod)]
-        public void AddToAllInplace(Vector4 diff)
-        {
-            this.V0L += diff;
-            this.V0R += diff;
-            this.V1L += diff;
-            this.V1R += diff;
-            this.V2L += diff;
-            this.V2R += diff;
-            this.V3L += diff;
-            this.V3R += diff;
-            this.V4L += diff;
-            this.V4R += diff;
-            this.V5L += diff;
-            this.V5R += diff;
-            this.V6L += diff;
-            this.V6R += diff;
-            this.V7L += diff;
-            this.V7R += diff;
+        public void AddInPlace(float value)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx.IsSupported)
+            {
+                var valueVec = Vector256.Create(value);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
+                Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
+            }
+            else
+#endif
+            {
+                var valueVec = new Vector4(value);
+                this.V0L += valueVec;
+                this.V0R += valueVec;
+                this.V1L += valueVec;
+                this.V1R += valueVec;
+                this.V2L += valueVec;
+                this.V2R += valueVec;
+                this.V3L += valueVec;
+                this.V3R += valueVec;
+                this.V4L += valueVec;
+                this.V4R += valueVec;
+                this.V5L += valueVec;
+                this.V5R += valueVec;
+                this.V6L += valueVec;
+                this.V6R += valueVec;
+                this.V7L += valueVec;
+                this.V7R += valueVec;
+            }
        }

        /// <summary>
@ -352,7 +430,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// <param name="blockPtr">The block pointer.</param>
        /// <param name="qtPtr">The qt pointer.</param>
        /// <param name="unzigPtr">Unzig pointer</param>
-        // [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
        {
            float* b = (float*)blockPtr;
@ -398,6 +475,57 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// <param name="source">The source block.</param>
        public static unsafe void Scale16X16To8X8(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx2.IsSupported)
+            {
+                Scale16X16To8X8Vectorized(ref destination, source);
+                return;
+            }
+#endif
+
+            Scale16X16To8X8Scalar(ref destination, source);
+        }
+
+        private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            Debug.Assert(Avx2.IsSupported, "AVX2 is required to execute this method");
+
+            var f2 = Vector256.Create(2f);
+            var f025 = Vector256.Create(0.25f);
+            Vector256<int> switchInnerDoubleWords = Unsafe.As<byte, Vector256<int>>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32));
+            ref Vector256<float> destRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref destination);
+
+            for (int i = 0; i < 2; i++)
+            {
+                ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i));
+                ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1));
+
+                for (int j = 0; j < 8; j += 2)
+                {
+                    Vector256<float> a = Unsafe.Add(ref in1, j);
+                    Vector256<float> b = Unsafe.Add(ref in1, j + 1);
+                    Vector256<float> c = Unsafe.Add(ref in2, j);
+                    Vector256<float> d = Unsafe.Add(ref in2, j + 1);
+
+                    Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
+                    Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
+                    Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
+                    Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);
+
+                    Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
+                    Vector256<float> add = Avx.Add(sum, f2);
+                    Vector256<float> res = Avx.Multiply(add, f025);
+
+                    destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
+                    destRef = ref Unsafe.Add(ref destRef, 1);
+                }
+            }
+#endif
+        }
+
+        private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
+        {
            for (int i = 0; i < 4; i++)
            {
                int dstOff = ((i & 2) << 4) | ((i & 1) << 2);
@ -418,22 +546,60 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        [MethodImpl(InliningOptions.ShortMethod)]
        private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
        {
-            a.V0L = DivideRound(a.V0L, b.V0L);
-            a.V0R = DivideRound(a.V0R, b.V0R);
-            a.V1L = DivideRound(a.V1L, b.V1L);
-            a.V1R = DivideRound(a.V1R, b.V1R);
-            a.V2L = DivideRound(a.V2L, b.V2L);
-            a.V2R = DivideRound(a.V2R, b.V2R);
-            a.V3L = DivideRound(a.V3L, b.V3L);
-            a.V3R = DivideRound(a.V3R, b.V3R);
-            a.V4L = DivideRound(a.V4L, b.V4L);
-            a.V4R = DivideRound(a.V4R, b.V4R);
-            a.V5L = DivideRound(a.V5L, b.V5L);
-            a.V5R = DivideRound(a.V5R, b.V5R);
-            a.V6L = DivideRound(a.V6L, b.V6L);
-            a.V6R = DivideRound(a.V6R, b.V6R);
-            a.V7L = DivideRound(a.V7L, b.V7L);
-            a.V7R = DivideRound(a.V7R, b.V7R);
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx.IsSupported)
+            {
+                var vnegOne = Vector256.Create(-1f);
+                var vadd = Vector256.Create(.5F);
+                var vone = Vector256.Create(1f);
+
+                ref Vector256<float> aBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref a.V0L));
+                ref Vector256<float> bBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref b.V0L));
+                ref Vector256<float> aEnd = ref Unsafe.Add(ref aBase, 8);
+
+                do
+                {
+                    Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd);
+                    Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff);
+
+                    aBase = ref Unsafe.Add(ref aBase, 1);
+                    bBase = ref Unsafe.Add(ref bBase, 1);
+                }
+                while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd));
+            }
+            else
+#endif
+            {
+                a.V0L = DivideRound(a.V0L, b.V0L);
+                a.V0R = DivideRound(a.V0R, b.V0R);
+                a.V1L = DivideRound(a.V1L, b.V1L);
+                a.V1R = DivideRound(a.V1R, b.V1R);
+                a.V2L = DivideRound(a.V2L, b.V2L);
+                a.V2R = DivideRound(a.V2R, b.V2R);
+                a.V3L = DivideRound(a.V3L, b.V3L);
+                a.V3R = DivideRound(a.V3R, b.V3R);
+                a.V4L = DivideRound(a.V4L, b.V4L);
+                a.V4R = DivideRound(a.V4R, b.V4R);
+                a.V5L = DivideRound(a.V5L, b.V5L);
+                a.V5R = DivideRound(a.V5R, b.V5R);
+                a.V6L = DivideRound(a.V6L, b.V6L);
+                a.V6R = DivideRound(a.V6R, b.V6R);
+                a.V7L = DivideRound(a.V7L, b.V7L);
+                a.V7R = DivideRound(a.V7R, b.V7R);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
+        {
+            var neg = new Vector4(-1);
+            var add = new Vector4(.5F);
+
+            // sign(dividend) = max(min(dividend, 1), -1)
+            Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One);
+
+            // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
+            return (dividend / divisor) + (sign * add);
        }

        public void RoundInto(ref Block8x8 dest)
@ -464,23 +630,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// <summary>
        /// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block.
        /// </summary>
-        public void NormalizeColorsAndRoundInplace(float maximum)
+        public void NormalizeColorsAndRoundInPlace(float maximum)
        {
            if (SimdUtils.HasVector8)
            {
-                this.NormalizeColorsAndRoundInplaceVector8(maximum);
+                this.NormalizeColorsAndRoundInPlaceVector8(maximum);
            }
            else
            {
-                this.NormalizeColorsInplace(maximum);
-                this.RoundInplace();
+                this.NormalizeColorsInPlace(maximum);
+                this.RoundInPlace();
            }
        }

        /// <summary>
        /// Rounds all values in the block.
        /// </summary>
-        public void RoundInplace()
+        public void RoundInPlace()
        {
            for (int i = 0; i < Size; i++)
            {
@ -535,8 +701,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components

        /// <inheritdoc />
        public bool Equals(Block8x8F other)
-        {
-            return this.V0L == other.V0L
+            => this.V0L == other.V0L
            && this.V0R == other.V0R
            && this.V1L == other.V1L
            && this.V1R == other.V1R
@ -552,7 +717,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
            && this.V6R == other.V6R
            && this.V7L == other.V7L
            && this.V7R == other.V7R;
-        }

        /// <inheritdoc />
        public override string ToString()
@ -580,21 +744,163 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
            return row.FastRound();
        }

-        [MethodImpl(InliningOptions.ShortMethod)]
-        private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
-        {
-            // sign(dividend) = max(min(dividend, 1), -1)
-            Vector4 sign = Vector4Utilities.FastClamp(dividend, NegativeOne, Vector4.One);
-
-            // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
-            return (dividend / divisor) + (sign * Offset);
-        }
-
        [Conditional("DEBUG")]
        private static void GuardBlockIndex(int idx)
        {
            DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
            DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
        }
+
+        /// <summary>
+        /// Transpose the block into the destination block.
+        /// </summary>
+        /// <param name="d">The destination block</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void TransposeInto(ref Block8x8F d)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Avx.IsSupported)
+            {
+                // https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
+                Vector256<float> r0 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
+                   1);
+
+                Vector256<float> r1 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
+                   1);
+
+                Vector256<float> r2 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
+                   1);
+
+                Vector256<float> r3 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
+                   1);
+
+                Vector256<float> r4 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
+                   1);
+
+                Vector256<float> r5 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
+                   1);
+
+                Vector256<float> r6 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
+                   1);
+
+                Vector256<float> r7 = Avx.InsertVector128(
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
+                   Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
+                   1);
+
+                Vector256<float> t0 = Avx.UnpackLow(r0, r1);
+                Vector256<float> t2 = Avx.UnpackLow(r2, r3);
+                Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
+
+                Vector256<float> t4 = Avx.UnpackLow(r4, r5);
+                Vector256<float> t6 = Avx.UnpackLow(r6, r7);
+                v = Avx.Shuffle(t4, t6, 0x4E);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
+
+                Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
+                Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
+                v = Avx.Shuffle(t1, t3, 0x4E);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
+
+                Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
+                Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
+                v = Avx.Shuffle(t5, t7, 0x4E);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
+                Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
+            }
+            else
+#endif
+            {
+                d.V0L.X = this.V0L.X;
+                d.V1L.X = this.V0L.Y;
+                d.V2L.X = this.V0L.Z;
+                d.V3L.X = this.V0L.W;
+                d.V4L.X = this.V0R.X;
+                d.V5L.X = this.V0R.Y;
+                d.V6L.X = this.V0R.Z;
+                d.V7L.X = this.V0R.W;
+
+                d.V0L.Y = this.V1L.X;
+                d.V1L.Y = this.V1L.Y;
+                d.V2L.Y = this.V1L.Z;
+                d.V3L.Y = this.V1L.W;
+                d.V4L.Y = this.V1R.X;
+                d.V5L.Y = this.V1R.Y;
+                d.V6L.Y = this.V1R.Z;
+                d.V7L.Y = this.V1R.W;
+
+                d.V0L.Z = this.V2L.X;
+                d.V1L.Z = this.V2L.Y;
+                d.V2L.Z = this.V2L.Z;
+                d.V3L.Z = this.V2L.W;
+                d.V4L.Z = this.V2R.X;
+                d.V5L.Z = this.V2R.Y;
+                d.V6L.Z = this.V2R.Z;
+                d.V7L.Z = this.V2R.W;
+
+                d.V0L.W = this.V3L.X;
+                d.V1L.W = this.V3L.Y;
+                d.V2L.W = this.V3L.Z;
+                d.V3L.W = this.V3L.W;
+                d.V4L.W = this.V3R.X;
+                d.V5L.W = this.V3R.Y;
+                d.V6L.W = this.V3R.Z;
+                d.V7L.W = this.V3R.W;
+
+                d.V0R.X = this.V4L.X;
+                d.V1R.X = this.V4L.Y;
+                d.V2R.X = this.V4L.Z;
+                d.V3R.X = this.V4L.W;
+                d.V4R.X = this.V4R.X;
+                d.V5R.X = this.V4R.Y;
+                d.V6R.X = this.V4R.Z;
+                d.V7R.X = this.V4R.W;
+
+                d.V0R.Y = this.V5L.X;
+                d.V1R.Y = this.V5L.Y;
+                d.V2R.Y = this.V5L.Z;
+                d.V3R.Y = this.V5L.W;
+                d.V4R.Y = this.V5R.X;
+                d.V5R.Y = this.V5R.Y;
+                d.V6R.Y = this.V5R.Z;
+                d.V7R.Y = this.V5R.W;
+
+                d.V0R.Z = this.V6L.X;
+                d.V1R.Z = this.V6L.Y;
+                d.V2R.Z = this.V6L.Z;
+                d.V3R.Z = this.V6L.W;
+                d.V4R.Z = this.V6R.X;
+                d.V5R.Z = this.V6R.Y;
+                d.V6R.Z = this.V6R.Z;
+                d.V7R.Z = this.V6R.W;
+
+                d.V0R.W = this.V7L.X;
+                d.V1R.W = this.V7L.Y;
+                d.V2R.W = this.V7L.Z;
+                d.V3R.W = this.V7L.W;
+                d.V4R.W = this.V7R.X;
+                d.V5R.W = this.V7R.Y;
+                d.V6R.W = this.V7R.Z;
+                d.V7R.W = this.V7R.W;
+            }
+        }
    }
 }
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs
@ -0,0 +1,18 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter
+        {
+            protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision)
+                : base(colorSpace, precision, 8)
+            {
+            }
+
+            protected sealed override bool IsAvailable => SimdUtils.HasAvx2;
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs
@ -0,0 +1,18 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal abstract class BasicJpegColorConverter : JpegColorConverter
+        {
+            protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision)
+                : base(colorSpace, precision)
+            {
+            }
+
+            protected override bool IsAvailable => true;
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
@ -0,0 +1,81 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static SixLabors.ImageSharp.SimdUtils;
+#endif
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromCmykAvx2 : Avx2JpegColorConverter
+        {
+            public FromCmykAvx2(int precision)
+                : base(JpegColorSpace.Cmyk, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+#if SUPPORTS_RUNTIME_INTRINSICS
+                ref Vector256<float> cBase =
+                                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
+                ref Vector256<float> mBase =
+                                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
+                ref Vector256<float> yBase =
+                                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
+                ref Vector256<float> kBase =
+                                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
+
+                ref Vector256<float> resultBase =
+                    ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
+
+                // Used for the color conversion
+                var scale = Vector256.Create(1 / this.MaximumValue);
+                var one = Vector256.Create(1F);
+
+                // Used for packing
+                ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
+                Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
+
+                int n = result.Length / 8;
+                for (int i = 0; i < n; i++)
+                {
+                    Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
+                    Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
+                    Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
+                    Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
+
+                    k = Avx.Multiply(k, scale);
+
+                    c = Avx.Multiply(Avx.Multiply(c, k), scale);
+                    m = Avx.Multiply(Avx.Multiply(m, k), scale);
+                    y = Avx.Multiply(Avx.Multiply(y, k), scale);
+
+                    Vector256<float> cmLo = Avx.UnpackLow(c, m);
+                    Vector256<float> yoLo = Avx.UnpackLow(y, one);
+                    Vector256<float> cmHi = Avx.UnpackHigh(c, m);
+                    Vector256<float> yoHi = Avx.UnpackHigh(y, one);
+
+                    ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
+
+                    destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
+                    Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
+                    Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
+                    Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
+                }
+#endif
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
@ -8,16 +8,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
 {
    internal abstract partial class JpegColorConverter
    {
-        internal sealed class FromCmyk : JpegColorConverter
+        internal sealed class FromCmykBasic : BasicJpegColorConverter
        {
-            public FromCmyk(int precision)
+            public FromCmykBasic(int precision)
                : base(JpegColorSpace.Cmyk, precision)
            {
            }

            public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
            {
-                // TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
+                ConvertCore(values, result, this.MaximumValue);
+            }
+
+            internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
+            {
                ReadOnlySpan<float> cVals = values.Component0;
                ReadOnlySpan<float> mVals = values.Component1;
                ReadOnlySpan<float> yVals = values.Component2;
@ -25,7 +29,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters

                var v = new Vector4(0, 0, 0, 1F);

-                var maximum = 1 / this.MaximumValue;
+                var maximum = 1 / maxValue;
                var scale = new Vector4(maximum, maximum, maximum, 1F);

                for (int i = 0; i < result.Length; i++)
@ -33,7 +37,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
                    float c = cVals[i];
                    float m = mVals[i];
                    float y = yVals[i];
-                    float k = kVals[i] / this.MaximumValue;
+                    float k = kVals[i] / maxValue;

                    v.X = c * k;
                    v.Y = m * k;
@ -47,4 +51,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
            }
        }
    }
-}
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
@ -0,0 +1,71 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Tuples;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromCmykVector8 : Vector8JpegColorConverter
+        {
+            public FromCmykVector8(int precision)
+                : base(JpegColorSpace.Cmyk, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+                ref Vector<float> cBase =
+                                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
+                ref Vector<float> mBase =
+                                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
+                ref Vector<float> yBase =
+                                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
+                ref Vector<float> kBase =
+                                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
+
+                ref Vector4Octet resultBase =
+                    ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
+
+                Vector4Pair cc = default;
+                Vector4Pair mm = default;
+                Vector4Pair yy = default;
+                ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
+                ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
+                ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);
+
+                var scale = new Vector<float>(1 / this.MaximumValue);
+
+                // Walking 8 elements at one step:
+                int n = result.Length / 8;
+                for (int i = 0; i < n; i++)
+                {
+                    Vector<float> c = Unsafe.Add(ref cBase, i);
+                    Vector<float> m = Unsafe.Add(ref mBase, i);
+                    Vector<float> y = Unsafe.Add(ref yBase, i);
+                    Vector<float> k = Unsafe.Add(ref kBase, i) * scale;
+
+                    c = (c * k) * scale;
+                    m = (m * k) * scale;
+                    y = (y * k) * scale;
+
+                    ccRefAsVector = c;
+                    mmRefAsVector = m;
+                    yyRefAsVector = y;
+
+                    // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
+                    ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
+                    destination.Pack(ref cc, ref mm, ref yy);
+                }
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
@ -0,0 +1,63 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static SixLabors.ImageSharp.SimdUtils;
+#endif
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter
+        {
+            public FromGrayscaleAvx2(int precision)
+                : base(JpegColorSpace.Grayscale, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+#if SUPPORTS_RUNTIME_INTRINSICS
+                ref Vector256<float> gBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
+
+                ref Vector256<float> resultBase =
+                    ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
+
+                // Used for the color conversion
+                var scale = Vector256.Create(1 / this.MaximumValue);
+                var one = Vector256.Create(1F);
+
+                // Used for packing
+                ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
+                Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
+
+                int n = result.Length / 8;
+                for (int i = 0; i < n; i++)
+                {
+                    Vector256<float> g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale);
+
+                    g = Avx2.PermuteVar8x32(g, vcontrol);
+
+                    ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
+
+                    destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
+                    Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000);
+                    Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000);
+                    Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000);
+                }
+#endif
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
 {
    internal abstract partial class JpegColorConverter
    {
-        internal sealed class FromGrayscale : JpegColorConverter
+        internal sealed class FromGrayscaleBasic : BasicJpegColorConverter
        {
-            public FromGrayscale(int precision)
+            public FromGrayscaleBasic(int precision)
                : base(JpegColorSpace.Grayscale, precision)
            {
            }

            public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
            {
-                var maximum = 1 / this.MaximumValue;
+                ConvertCore(values, result, this.MaximumValue);
+            }
+
+            internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
+            {
+                var maximum = 1 / maxValue;
                var scale = new Vector4(maximum, maximum, maximum, 1F);

                ref float sBase = ref MemoryMarshal.GetReference(values.Component0);
@ -35,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
            }
        }
    }
-}
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
@ -0,0 +1,72 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static SixLabors.ImageSharp.SimdUtils;
+#endif
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromRgbAvx2 : Avx2JpegColorConverter
+        {
+            public FromRgbAvx2(int precision)
+                : base(JpegColorSpace.RGB, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+#if SUPPORTS_RUNTIME_INTRINSICS
+                ref Vector256<float> rBase =
+                                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
+                ref Vector256<float> gBase =
+                                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
+                ref Vector256<float> bBase =
+                                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
+
+                ref Vector256<float> resultBase =
+                    ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
+
+                // Used for the color conversion
+                var scale = Vector256.Create(1 / this.MaximumValue);
+                var one = Vector256.Create(1F);
+
+                // Used for packing
+                ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
+                Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
+
+                int n = result.Length / 8;
+                for (int i = 0; i < n; i++)
+                {
+                    Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale);
+                    Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale);
+                    Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale);
+
+                    Vector256<float> rgLo = Avx.UnpackLow(r, g);
+                    Vector256<float> boLo = Avx.UnpackLow(b, one);
+                    Vector256<float> rgHi = Avx.UnpackHigh(r, g);
+                    Vector256<float> boHi = Avx.UnpackHigh(b, one);
+
+                    ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
+
+                    destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00);
+                    Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10);
+                    Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00);
+                    Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10);
+                }
+#endif
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
@ -8,23 +8,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
 {
    internal abstract partial class JpegColorConverter
    {
-        internal sealed class FromRgb : JpegColorConverter
+        internal sealed class FromRgbBasic : BasicJpegColorConverter
        {
-            public FromRgb(int precision)
+            public FromRgbBasic(int precision)
                : base(JpegColorSpace.RGB, precision)
            {
            }

            public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
            {
-                // TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
+                ConvertCore(values, result, this.MaximumValue);
+            }
+
+            internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
+            {
                ReadOnlySpan<float> rVals = values.Component0;
                ReadOnlySpan<float> gVals = values.Component1;
                ReadOnlySpan<float> bVals = values.Component2;

                var v = new Vector4(0, 0, 0, 1);

-                var maximum = 1 / this.MaximumValue;
+                var maximum = 1 / maxValue;
                var scale = new Vector4(maximum, maximum, maximum, 1F);

                for (int i = 0; i < result.Length; i++)
@ -44,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
            }
        }
    }
-}
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
@ -0,0 +1,67 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Tuples;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromRgbVector8 : Vector8JpegColorConverter
+        {
+            public FromRgbVector8(int precision)
+                : base(JpegColorSpace.RGB, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+                ref Vector<float> rBase =
+                                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
+                ref Vector<float> gBase =
+                                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
+                ref Vector<float> bBase =
+                                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
+
+                ref Vector4Octet resultBase =
+                    ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
+
+                Vector4Pair rr = default;
+                Vector4Pair gg = default;
+                Vector4Pair bb = default;
+                ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
+                ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
+                ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
+
+                var scale = new Vector<float>(1 / this.MaximumValue);
+
+                // Walking 8 elements at one step:
+                int n = result.Length / 8;
+                for (int i = 0; i < n; i++)
+                {
+                    Vector<float> r = Unsafe.Add(ref rBase, i);
+                    Vector<float> g = Unsafe.Add(ref gBase, i);
+                    Vector<float> b = Unsafe.Add(ref bBase, i);
+                    r *= scale;
+                    g *= scale;
+                    b *= scale;
+
+                    rrRefAsVector = r;
+                    ggRefAsVector = g;
+                    bbRefAsVector = b;
+
+                    // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
+                    ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
+                    destination.Pack(ref rr, ref gg, ref bb);
+                }
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs
@ -0,0 +1,101 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static SixLabors.ImageSharp.SimdUtils;
+#endif
+
+// ReSharper disable ImpureMethodCallOnReadonlyValueField
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter
+        {
+            public FromYCbCrAvx2(int precision)
+                : base(JpegColorSpace.YCbCr, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+                #if SUPPORTS_RUNTIME_INTRINSICS
+                ref Vector256<float> yBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
+                ref Vector256<float> cbBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
+                ref Vector256<float> crBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
+
+                ref Vector256<float> resultBase =
+                    ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
+
+                // Used for the color conversion
+                var chromaOffset = Vector256.Create(-this.HalfValue);
+                var scale = Vector256.Create(1 / this.MaximumValue);
+                var rCrMult = Vector256.Create(1.402F);
+                var gCbMult = Vector256.Create(-0.344136F);
+                var gCrMult = Vector256.Create(-0.714136F);
+                var bCbMult = Vector256.Create(1.772F);
+
+                // Used for packing.
+                var va = Vector256.Create(1F);
+                ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
+                Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
+
+                // Walking 8 elements at one step:
+                int n = result.Length / 8;
+                for (int i = 0; i < n; i++)
+                {
+                    // y = yVals[i];
+                    // cb = cbVals[i] - 128F;
+                    // cr = crVals[i] - 128F;
+                    Vector256<float> y = Unsafe.Add(ref yBase, i);
+                    Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
+                    Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
+
+                    y = Avx2.PermuteVar8x32(y, vcontrol);
+                    cb = Avx2.PermuteVar8x32(cb, vcontrol);
+                    cr = Avx2.PermuteVar8x32(cr, vcontrol);
+
+                    // r = y + (1.402F * cr);
+                    // g = y - (0.344136F * cb) - (0.714136F * cr);
+                    // b = y + (1.772F * cb);
+                    // Adding & multiplying 8 elements at one time:
+                    Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
+                    Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
+                    Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
+
+                    // TODO: We should be saving to RGBA not Vector4
+                    r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
+                    g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
+                    b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
+
+                    Vector256<float> vte = Avx.UnpackLow(r, b);
+                    Vector256<float> vto = Avx.UnpackLow(g, va);
+
+                    ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
+
+                    destination = Avx.UnpackLow(vte, vto);
+                    Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
+
+                    vte = Avx.UnpackHigh(r, b);
+                    vto = Avx.UnpackHigh(g, va);
+
+                    Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
+                    Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
+                }
+#endif
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs
@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
 {
    internal abstract partial class JpegColorConverter
    {
-        internal sealed class FromYCbCrBasic : JpegColorConverter
+        internal sealed class FromYCbCrBasic : BasicJpegColorConverter
        {
            public FromYCbCrBasic(int precision)
                : base(JpegColorSpace.YCbCr, precision)
@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
            }
        }
    }
-}
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs
@ -5,37 +5,24 @@ using System;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-
 using SixLabors.ImageSharp.Tuples;

 namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
 {
    internal abstract partial class JpegColorConverter
    {
-        internal sealed class FromYCbCrSimd : JpegColorConverter
+        internal sealed class FromYCbCrVector4 : VectorizedJpegColorConverter
        {
-            public FromYCbCrSimd(int precision)
-                : base(JpegColorSpace.YCbCr, precision)
+            public FromYCbCrVector4(int precision)
+                : base(JpegColorSpace.YCbCr, precision, 8)
            {
            }

-            public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
-            {
-                int remainder = result.Length % 8;
-                int simdCount = result.Length - remainder;
-                if (simdCount > 0)
-                {
-                    ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
-                }
+            protected override bool IsAvailable => SimdUtils.HasVector4;

-                FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
-            }
-
-            /// <summary>
-            /// SIMD convert using buffers of sizes divisible by 8.
-            /// </summary>
-            internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
            {
+                // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
                DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!");

                ref Vector4Pair yBase =
@ -48,7 +35,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
                ref Vector4Octet resultBase =
                    ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));

-                var chromaOffset = new Vector4(-halfValue);
+                var chromaOffset = new Vector4(-this.HalfValue);
+                var maxValue = this.MaximumValue;

                // Walking 8 elements at one step:
                int n = result.Length / 8;
@ -87,31 +75,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
                    tmp.MultiplyInplace(1.772F);
                    b.AddInplace(ref tmp);

-                    if (Vector<float>.Count == 4)
-                    {
-                        // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
-                        r.RoundAndDownscalePreVector8(maxValue);
-                        g.RoundAndDownscalePreVector8(maxValue);
-                        b.RoundAndDownscalePreVector8(maxValue);
-                    }
-                    else if (SimdUtils.HasVector8)
-                    {
-                        r.RoundAndDownscaleVector8(maxValue);
-                        g.RoundAndDownscaleVector8(maxValue);
-                        b.RoundAndDownscaleVector8(maxValue);
-                    }
-                    else
-                    {
-                        // TODO: Run fallback scalar code here
-                        // However, no issues expected before someone implements this: https://github.com/dotnet/coreclr/issues/12007
-                        JpegThrowHelper.ThrowNotImplementedException("Your CPU architecture is too modern!");
-                    }
+                    r.RoundAndDownscalePreVector8(maxValue);
+                    g.RoundAndDownscalePreVector8(maxValue);
+                    b.RoundAndDownscalePreVector8(maxValue);

                    // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
                    ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
                    destination.Pack(ref r, ref g, ref b);
                }
            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
        }
    }
 }
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
@ -1,11 +1,10 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-
 using SixLabors.ImageSharp.Tuples;

 // ReSharper disable ImpureMethodCallOnReadonlyValueField
@ -13,40 +12,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
 {
    internal abstract partial class JpegColorConverter
    {
-        internal sealed class FromYCbCrSimdVector8 : JpegColorConverter
+        internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter
        {
-            public FromYCbCrSimdVector8(int precision)
+            public FromYCbCrVector8(int precision)
                : base(JpegColorSpace.YCbCr, precision)
            {
            }

-            public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
-
-            public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
-            {
-                int remainder = result.Length % 8;
-                int simdCount = result.Length - remainder;
-                if (simdCount > 0)
-                {
-                    ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
-                }
-
-                FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
-            }
-
-            /// <summary>
-            /// SIMD convert using buffers of sizes divisible by 8.
-            /// </summary>
-            internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
            {
-                // This implementation is actually AVX specific.
-                // An AVX register is capable of storing 8 float-s.
-                if (!IsAvailable)
-                {
-                    throw new InvalidOperationException(
-                        "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!");
-                }
-
                ref Vector<float> yBase =
                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
                ref Vector<float> cbBase =
@ -57,7 +31,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
                ref Vector4Octet resultBase =
                    ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));

-                var chromaOffset = new Vector<float>(-halfValue);
+                var chromaOffset = new Vector<float>(-this.HalfValue);

                // Walking 8 elements at one step:
                int n = result.Length / 8;
@ -70,7 +44,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
                ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
                ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);

-                var scale = new Vector<float>(1 / maxValue);
+                var scale = new Vector<float>(1 / this.MaximumValue);

                for (int i = 0; i < n; i++)
                {
@ -105,6 +79,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
                    destination.Pack(ref rr, ref gg, ref bb);
                }
            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
        }
    }
 }
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs
@ -0,0 +1,110 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static SixLabors.ImageSharp.SimdUtils;
+#endif
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromYccKAvx2 : Avx2JpegColorConverter
+        {
+            public FromYccKAvx2(int precision)
+                : base(JpegColorSpace.Ycck, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+#if SUPPORTS_RUNTIME_INTRINSICS
+                ref Vector256<float> yBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
+                ref Vector256<float> cbBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
+                ref Vector256<float> crBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
+                ref Vector256<float> kBase =
+                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
+
+                ref Vector256<float> resultBase =
+                    ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
+
+                // Used for the color conversion
+                var chromaOffset = Vector256.Create(-this.HalfValue);
+                var scale = Vector256.Create(1 / this.MaximumValue);
+                var max = Vector256.Create(this.MaximumValue);
+                var rCrMult = Vector256.Create(1.402F);
+                var gCbMult = Vector256.Create(-0.344136F);
+                var gCrMult = Vector256.Create(-0.714136F);
+                var bCbMult = Vector256.Create(1.772F);
+
+                // Used for packing.
+                var va = Vector256.Create(1F);
+                ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
+                Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
+
+                // Walking 8 elements at one step:
+                int n = result.Length / 8;
+                for (int i = 0; i < n; i++)
+                {
+                    // y = yVals[i];
+                    // cb = cbVals[i] - 128F;
+                    // cr = crVals[i] - 128F;
+                    // k = kVals[i] / 256F;
+                    Vector256<float> y = Unsafe.Add(ref yBase, i);
+                    Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
+                    Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
+                    Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max);
+
+                    y = Avx2.PermuteVar8x32(y, vcontrol);
+                    cb = Avx2.PermuteVar8x32(cb, vcontrol);
+                    cr = Avx2.PermuteVar8x32(cr, vcontrol);
+                    k = Avx2.PermuteVar8x32(k, vcontrol);
+
+                    // r = y + (1.402F * cr);
+                    // g = y - (0.344136F * cb) - (0.714136F * cr);
+                    // b = y + (1.772F * cb);
+                    // Adding & multiplying 8 elements at one time:
+                    Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
+                    Vector256<float> g =
+                        HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
+                    Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
+
+                    r = Avx.Subtract(max, Avx.RoundToNearestInteger(r));
+                    g = Avx.Subtract(max, Avx.RoundToNearestInteger(g));
+                    b = Avx.Subtract(max, Avx.RoundToNearestInteger(b));
+
+                    r = Avx.Multiply(Avx.Multiply(r, k), scale);
+                    g = Avx.Multiply(Avx.Multiply(g, k), scale);
+                    b = Avx.Multiply(Avx.Multiply(b, k), scale);
+
+                    Vector256<float> vte = Avx.UnpackLow(r, b);
+                    Vector256<float> vto = Avx.UnpackLow(g, va);
+
+                    ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
+
+                    destination = Avx.UnpackLow(vte, vto);
+                    Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
+
+                    vte = Avx.UnpackHigh(r, b);
+                    vto = Avx.UnpackHigh(g, va);
+
+                    Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
+                    Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
+                }
+#endif
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs
@ -8,14 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
 {
    internal abstract partial class JpegColorConverter
    {
-        internal sealed class FromYccK : JpegColorConverter
+        internal sealed class FromYccKBasic : BasicJpegColorConverter
        {
-            public FromYccK(int precision)
+            public FromYccKBasic(int precision)
                : base(JpegColorSpace.Ycck, precision)
            {
            }

            public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
+            {
+                ConvertCore(values, result, this.MaximumValue, this.HalfValue);
+            }
+
+            internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
            {
                // TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
                ReadOnlySpan<float> yVals = values.Component0;
@ -25,19 +30,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters

                var v = new Vector4(0, 0, 0, 1F);

-                var maximum = 1 / this.MaximumValue;
+                var maximum = 1 / maxValue;
                var scale = new Vector4(maximum, maximum, maximum, 1F);

                for (int i = 0; i < result.Length; i++)
                {
                    float y = yVals[i];
-                    float cb = cbVals[i] - this.HalfValue;
-                    float cr = crVals[i] - this.HalfValue;
-                    float k = kVals[i] / this.MaximumValue;
+                    float cb = cbVals[i] - halfValue;
+                    float cr = crVals[i] - halfValue;
+                    float k = kVals[i] / maxValue;

-                    v.X = (this.MaximumValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
-                    v.Y = (this.MaximumValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k;
-                    v.Z = (this.MaximumValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
+                    v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
+                    v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k;
+                    v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
                    v.W = 1F;

                    v *= scale;
@ -47,4 +52,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
            }
        }
    }
-}
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs
@ -0,0 +1,91 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Tuples;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal sealed class FromYccKVector8 : Vector8JpegColorConverter
+        {
+            public FromYccKVector8(int precision)
+                : base(JpegColorSpace.Ycck, precision)
+            {
+            }
+
+            protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
+            {
+                ref Vector<float> yBase =
+                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
+                ref Vector<float> cbBase =
+                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
+                ref Vector<float> crBase =
+                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
+                ref Vector<float> kBase =
+                    ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
+
+                ref Vector4Octet resultBase =
+                    ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
+
+                var chromaOffset = new Vector<float>(-this.HalfValue);
+
+                // Walking 8 elements at one step:
+                int n = result.Length / 8;
+
+                Vector4Pair rr = default;
+                Vector4Pair gg = default;
+                Vector4Pair bb = default;
+
+                ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
+                ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
+                ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
+
+                var scale = new Vector<float>(1 / this.MaximumValue);
+                var max = new Vector<float>(this.MaximumValue);
+
+                for (int i = 0; i < n; i++)
+                {
+                    // y = yVals[i];
+                    // cb = cbVals[i] - 128F;
+                    // cr = crVals[i] - 128F;
+                    // k = kVals[i] / 256F;
+                    Vector<float> y = Unsafe.Add(ref yBase, i);
+                    Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
+                    Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
+                    Vector<float> k = Unsafe.Add(ref kBase, i) / max;
+
+                    // r = y + (1.402F * cr);
+                    // g = y - (0.344136F * cb) - (0.714136F * cr);
+                    // b = y + (1.772F * cb);
+                    // Adding & multiplying 8 elements at one time:
+                    Vector<float> r = y + (cr * new Vector<float>(1.402F));
+                    Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
+                    Vector<float> b = y + (cb * new Vector<float>(1.772F));
+
+                    r = (max - r.FastRound()) * k;
+                    g = (max - g.FastRound()) * k;
+                    b = (max - b.FastRound()) * k;
+                    r *= scale;
+                    g *= scale;
+                    b *= scale;
+
+                    rrRefAsVector = r;
+                    ggRefAsVector = g;
+                    bbRefAsVector = b;
+
+                    // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
+                    ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
+                    destination.Pack(ref rr, ref gg, ref bb);
+                }
+            }
+
+            protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
+                FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs
@ -0,0 +1,18 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter
+        {
+            protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision)
+                : base(colorSpace, precision, 8)
+            {
+            }
+
+            protected sealed override bool IsAvailable => SimdUtils.HasVector8;
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs
@ -0,0 +1,46 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
+{
+    internal abstract partial class JpegColorConverter
+    {
+        internal abstract class VectorizedJpegColorConverter : JpegColorConverter
+        {
+            private readonly int vectorSize;
+
+            protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize)
+                : base(colorSpace, precision)
+            {
+                this.vectorSize = vectorSize;
+            }
+
+            public sealed override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
+            {
+                int remainder = result.Length % this.vectorSize;
+                int simdCount = result.Length - remainder;
+                if (simdCount > 0)
+                {
+                    // This implementation is actually AVX specific.
+                    // An AVX register is capable of storing 8 float-s.
+                    if (!this.IsAvailable)
+                    {
+                        throw new InvalidOperationException(
+                            "This converter can be used only on architecture having 256 byte floating point SIMD registers!");
+                    }
+
+                    this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount));
+                }
+
+                this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder));
+            }
+
+            protected abstract void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result);
+
+            protected abstract void ConvertCore(in ComponentValues values, Span<Vector4> result);
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
@ -3,8 +3,8 @@

 using System;
 using System.Collections.Generic;
+using System.Linq;
 using System.Numerics;
-
 using SixLabors.ImageSharp.Memory;
 using SixLabors.ImageSharp.Tuples;

@ -18,22 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
        /// <summary>
        /// The available converters
        /// </summary>
-        private static readonly JpegColorConverter[] Converters =
-            {
-                // 8-bit converters
-                GetYCbCrConverter(8),
-                new FromYccK(8),
-                new FromCmyk(8),
-                new FromGrayscale(8),
-                new FromRgb(8),
-
-                // 12-bit converters
-                GetYCbCrConverter(12),
-                new FromYccK(12),
-                new FromCmyk(12),
-                new FromGrayscale(12),
-                new FromRgb(12),
-            };
+        private static readonly JpegColorConverter[] Converters = CreateConverters();

        /// <summary>
        /// Initializes a new instance of the <see cref="JpegColorConverter"/> class.
@ -46,6 +31,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
            this.HalfValue = MathF.Ceiling(this.MaximumValue / 2);
        }

+        /// <summary>
+        /// Gets a value indicating whether this <see cref="JpegColorConverter"/> is available
+        /// on the current runtime and CPU architecture.
+        /// </summary>
+        protected abstract bool IsAvailable { get; }
+
        /// <summary>
        /// Gets the <see cref="JpegColorSpace"/> of this converter.
        /// </summary>
@ -71,8 +62,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
        /// </summary>
        public static JpegColorConverter GetConverter(JpegColorSpace colorSpace, int precision)
        {
-            JpegColorConverter converter = Array.Find(Converters, c => c.ColorSpace == colorSpace
-                                                                    && c.Precision == precision);
+            JpegColorConverter converter = Array.Find(
+                Converters,
+                c => c.ColorSpace == colorSpace
+                && c.Precision == precision);

            if (converter is null)
            {
@ -90,10 +83,88 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
        public abstract void ConvertToRgba(in ComponentValues values, Span<Vector4> result);

        /// <summary>
-        /// Returns the <see cref="JpegColorConverter"/> for the YCbCr colorspace that matches the current CPU architecture.
+        /// Returns the <see cref="JpegColorConverter"/>s for all supported colorspaces and precisions.
+        /// </summary>
+        private static JpegColorConverter[] CreateConverters()
+        {
+            var converters = new List<JpegColorConverter>();
+
+            // 8-bit converters
+            converters.AddRange(GetYCbCrConverters(8));
+            converters.AddRange(GetYccKConverters(8));
+            converters.AddRange(GetCmykConverters(8));
+            converters.AddRange(GetGrayScaleConverters(8));
+            converters.AddRange(GetRgbConverters(8));
+
+            // 12-bit converters
+            converters.AddRange(GetYCbCrConverters(12));
+            converters.AddRange(GetYccKConverters(12));
+            converters.AddRange(GetCmykConverters(12));
+            converters.AddRange(GetGrayScaleConverters(12));
+            converters.AddRange(GetRgbConverters(12));
+
+            return converters.Where(x => x.IsAvailable).ToArray();
+        }
+
+        /// <summary>
+        /// Returns the <see cref="JpegColorConverter"/>s for the YCbCr colorspace.
+        /// </summary>
+        private static IEnumerable<JpegColorConverter> GetYCbCrConverters(int precision)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            yield return new FromYCbCrAvx2(precision);
+#endif
+            yield return new FromYCbCrVector8(precision);
+            yield return new FromYCbCrVector4(precision);
+            yield return new FromYCbCrBasic(precision);
+        }
+
+        /// <summary>
+        /// Returns the <see cref="JpegColorConverter"/>s for the YccK colorspace.
+        /// </summary>
+        private static IEnumerable<JpegColorConverter> GetYccKConverters(int precision)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            yield return new FromYccKAvx2(precision);
+#endif
+            yield return new FromYccKVector8(precision);
+            yield return new FromYccKBasic(precision);
+        }
+
+        /// <summary>
+        /// Returns the <see cref="JpegColorConverter"/>s for the CMYK colorspace.
+        /// </summary>
+        private static IEnumerable<JpegColorConverter> GetCmykConverters(int precision)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            yield return new FromCmykAvx2(precision);
+#endif
+            yield return new FromCmykVector8(precision);
+            yield return new FromCmykBasic(precision);
+        }
+
+        /// <summary>
+        /// Returns the <see cref="JpegColorConverter"/>s for the gray scale colorspace.
+        /// </summary>
+        private static IEnumerable<JpegColorConverter> GetGrayScaleConverters(int precision)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            yield return new FromGrayscaleAvx2(precision);
+#endif
+            yield return new FromGrayscaleBasic(precision);
+        }
+
+        /// <summary>
+        /// Returns the <see cref="JpegColorConverter"/>s for the RGB colorspace.
        /// </summary>
-        private static JpegColorConverter GetYCbCrConverter(int precision) =>
-            FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision);
+        private static IEnumerable<JpegColorConverter> GetRgbConverters(int precision)
+        {
+#if SUPPORTS_RUNTIME_INTRINSICS
+            yield return new FromRgbAvx2(precision);
+#endif
+            yield return new FromRgbVector8(precision);
+            yield return new FromRgbBasic(precision);
+        }

        /// <summary>
        /// A stack-only struct to reference the input buffers using <see cref="ReadOnlySpan{T}"/>-s.
@ -230,6 +301,52 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
                this.V7.Z = b.B.W;
                this.V7.W = 1f;
            }
+
+            /// <summary>
+            /// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ...
+            /// </summary>
+            public void Pack(ref Vector4Pair g)
+            {
+                this.V0.X = g.A.X;
+                this.V0.Y = g.A.X;
+                this.V0.Z = g.A.X;
+                this.V0.W = 1f;
+
+                this.V1.X = g.A.Y;
+                this.V1.Y = g.A.Y;
+                this.V1.Z = g.A.Y;
+                this.V1.W = 1f;
+
+                this.V2.X = g.A.Z;
+                this.V2.Y = g.A.Z;
+                this.V2.Z = g.A.Z;
+                this.V2.W = 1f;
+
+                this.V3.X = g.A.W;
+                this.V3.Y = g.A.W;
+                this.V3.Z = g.A.W;
+                this.V3.W = 1f;
+
+                this.V4.X = g.B.X;
+                this.V4.Y = g.B.X;
+                this.V4.Z = g.B.X;
+                this.V4.W = 1f;
+
+                this.V5.X = g.B.Y;
+                this.V5.Y = g.B.Y;
+                this.V5.Z = g.B.Y;
+                this.V5.W = 1f;
+
+                this.V6.X = g.B.Z;
+                this.V6.Y = g.B.Z;
+                this.V6.Z = g.B.Z;
+                this.V6.W = 1f;
+
+                this.V7.X = g.B.W;
+                this.V7.Y = g.B.W;
+                this.V7.Z = g.B.W;
+                this.V7.W = 1f;
+            }
        }
    }
 }
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
@ -81,14 +81,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            b.LoadFrom(ref sourceBlock);

            // Dequantize:
-            b.MultiplyInplace(ref this.DequantiazationTable);
+            b.MultiplyInPlace(ref this.DequantiazationTable);

            FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2);

            // To conform better to libjpeg we actually NEED TO loose precision here.
            // This is because they store blocks as Int16 between all the operations.
            // To be "more accurate", we need to emulate this by rounding!
-            this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue);
+            this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue);

            this.WorkspaceBlock1.ScaledCopyTo(
                ref destAreaOrigin,
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
@ -1,16 +1,17 @@
 // Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

+using System;
 using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.PixelFormats;

 namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
 {
    /// <summary>
    /// Provides 8-bit lookup tables for converting from Rgb to YCbCr colorspace.
    /// Methods to build the tables are based on libjpeg implementation.
-    /// TODO: Replace this logic with SIMD conversion (similar to the one in the decoder)!
    /// </summary>
-    internal unsafe struct RgbToYCbCrTables
+    internal unsafe struct RgbToYCbCrConverterLut
    {
        /// <summary>
        /// The red luminance table
@ -63,10 +64,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        /// <summary>
        /// Initializes the YCbCr tables
        /// </summary>
-        /// <returns>The initialized <see cref="RgbToYCbCrTables"/></returns>
-        public static RgbToYCbCrTables Create()
+        /// <returns>The initialized <see cref="RgbToYCbCrConverterLut"/></returns>
+        public static RgbToYCbCrConverterLut Create()
        {
-            RgbToYCbCrTables tables = default;
+            RgbToYCbCrConverterLut tables = default;

            for (int i = 0; i <= 255; i++)
            {
@ -92,11 +93,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        }

        /// <summary>
-        /// TODO: Replace this logic with SIMD conversion (similar to the one in the decoder)!
        /// Optimized method to allocates the correct y, cb, and cr values to the DCT blocks from the given r, g, b values.
        /// </summary>
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public void ConvertPixelInto(
+        private void ConvertPixelInto(
            int r,
            int g,
            int b,
@ -111,10 +111,29 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
            // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
            cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits;

-            // float cr = MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero);
+            // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
            crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits;
        }

+        public void Convert(Span<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
+        {
+            ref Rgb24 rgbStart = ref rgbSpan[0];
+
+            for (int i = 0; i < 64; i++)
+            {
+                ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i);
+
+                this.ConvertPixelInto(
+                    c.R,
+                    c.G,
+                    c.B,
+                    ref yBlock,
+                    ref cbBlock,
+                    ref crBlock,
+                    i);
+            }
+        }
+
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static int Fix(float x)
            => (int)((x * (1L << ScaleBits)) + 0.5F);
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
@ -0,0 +1,120 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Diagnostics;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
+{
+    internal static class RgbToYCbCrConverterVectorized
+    {
+        public static bool IsSupported
+        {
+            get
+            {
+#if SUPPORTS_RUNTIME_INTRINSICS
+                return Avx2.IsSupported;
+#else
+                return false;
+#endif
+            }
+        }
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+        private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
+        {
+            0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0,
+            3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0
+        };
+
+        private static ReadOnlySpan<byte> MoveLast24BytesToSeparateLanes => new byte[]
+        {
+            2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
+            5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0
+        };
+
+        private static ReadOnlySpan<byte> ExtractRgb => new byte[]
+        {
+            0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+            0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF
+        };
+#endif
+
+        public static void Convert(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
+        {
+            Debug.Assert(IsSupported, "AVX2 is required to run this converter");
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            var f0299 = Vector256.Create(0.299f);
+            var f0587 = Vector256.Create(0.587f);
+            var f0114 = Vector256.Create(0.114f);
+            var fn0168736 = Vector256.Create(-0.168736f);
+            var fn0331264 = Vector256.Create(-0.331264f);
+            var f128 = Vector256.Create(128f);
+            var fn0418688 = Vector256.Create(-0.418688f);
+            var fn0081312F = Vector256.Create(-0.081312F);
+            var f05 = Vector256.Create(0.5f);
+            var zero = Vector256.Create(0).AsByte();
+
+            ref Vector256<byte> inRef = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan));
+            ref Vector256<float> destYRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref yBlock);
+            ref Vector256<float> destCbRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref cbBlock);
+            ref Vector256<float> destCrRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref crBlock);
+
+            var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
+            var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb));
+            Vector256<byte> rgb, rg, bx;
+            Vector256<float> r, g, b;
+            for (int i = 0; i < 7; i++)
+            {
+                rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte();
+
+                rgb = Avx2.Shuffle(rgb, extractRgbMask);
+
+                rg = Avx2.UnpackLow(rgb, zero);
+                bx = Avx2.UnpackHigh(rgb, zero);
+
+                r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
+                g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
+                b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
+
+                // (0.299F * r) + (0.587F * g) + (0.114F * b);
+                Unsafe.Add(ref destYRef, i) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
+
+                // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
+                Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
+
+                // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
+                Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
+            }
+
+            extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes));
+            rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte();
+            rgb = Avx2.Shuffle(rgb, extractRgbMask);
+
+            rg = Avx2.UnpackLow(rgb, zero);
+            bx = Avx2.UnpackHigh(rgb, zero);
+
+            r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
+            g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
+            b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
+
+            // (0.299F * r) + (0.587F * g) + (0.114F * b);
+            Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
+
+            // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
+            Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
+
+            // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
+            Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
+#endif
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
@ -2,7 +2,6 @@
 // Licensed under the Apache License, Version 2.0.

 using System;
-using System.Runtime.CompilerServices;
 using SixLabors.ImageSharp.Advanced;
 using SixLabors.ImageSharp.PixelFormats;

@ -33,7 +32,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        /// <summary>
        /// The color conversion tables
        /// </summary>
-        private RgbToYCbCrTables colorTables;
+        private RgbToYCbCrConverterLut colorTables;

        /// <summary>
        /// Temporal 8x8 block to hold TPixel data
@ -48,16 +47,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        public static YCbCrForwardConverter<TPixel> Create()
        {
            var result = default(YCbCrForwardConverter<TPixel>);
-            result.colorTables = RgbToYCbCrTables.Create();
+            if (!RgbToYCbCrConverterVectorized.IsSupported)
+            {
+                // Avoid creating lookup tables, when vectorized converter is supported
+                result.colorTables = RgbToYCbCrConverterLut.Create();
+            }
+
            return result;
        }

        /// <summary>
        /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (<see cref="Y"/>, <see cref="Cb"/>, <see cref="Cr"/>)
        /// </summary>
-        public void Convert(ImageFrame<TPixel> frame, int x, int y, in RowOctet<TPixel> currentRows)
+        public void Convert(ImageFrame<TPixel> frame, int x, int y, ref RowOctet<TPixel> currentRows)
        {
-            this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, currentRows);
+            this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows);

            Span<Rgb24> rgbSpan = this.rgbBlock.AsSpanUnsafe();
            PixelOperations<TPixel>.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan);
@ -65,20 +69,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
            ref Block8x8F yBlock = ref this.Y;
            ref Block8x8F cbBlock = ref this.Cb;
            ref Block8x8F crBlock = ref this.Cr;
-            ref Rgb24 rgbStart = ref rgbSpan[0];

-            for (int i = 0; i < 64; i++)
+            if (RgbToYCbCrConverterVectorized.IsSupported)
            {
-                ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i);
-
-                this.colorTables.ConvertPixelInto(
-                    c.R,
-                    c.G,
-                    c.B,
-                    ref yBlock,
-                    ref cbBlock,
-                    ref crBlock,
-                    i);
+                RgbToYCbCrConverterVectorized.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
+            }
+            else
+            {
+                this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
            }
        }
    }
--- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System.Numerics;
@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// <param name="temp">Temporary block provided by the caller</param>
        public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
        {
-            // TODO: Transpose is a bottleneck now. We need full AVX support to optimize it:
-            // https://github.com/dotnet/corefx/issues/22940
            src.TransposeInto(ref temp);

            IDCT8x4_LeftPart(ref temp, ref dest);
@ -63,7 +61,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
            IDCT8x4_RightPart(ref temp, ref dest);

            // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing?
-            dest.MultiplyInplace(C_0_125);
+            dest.MultiplyInPlace(C_0_125);
        }

        /// <summary>
@ -326,7 +324,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
            src.TransposeInto(ref temp);
            if (offsetSourceByNeg128)
            {
-                temp.AddToAllInplace(new Vector4(-128));
+                temp.AddInPlace(-128F);
            }

            FDCT8x4_LeftPart(ref temp, ref dest);
@ -337,7 +335,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
            FDCT8x4_LeftPart(ref temp, ref dest);
            FDCT8x4_RightPart(ref temp, ref dest);

-            dest.MultiplyInplace(C_0_125);
+            dest.MultiplyInPlace(C_0_125);
        }
    }
-}
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs
@ -57,7 +57,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// Load a 8x8 region of an image into the block.
        /// The "outlying" area of the block will be stretched out with pixels on the right and bottom edge of the image.
        /// </summary>
-        public void LoadAndStretchEdges(Buffer2D<T> source, int sourceX, int sourceY, in RowOctet<T> currentRows)
+        public void LoadAndStretchEdges(Buffer2D<T> source, int sourceX, int sourceY, ref RowOctet<T> currentRows)
        {
            int width = Math.Min(8, source.Width - sourceX);
            int height = Math.Min(8, source.Height - sourceY);
--- a/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System;
@ -12,39 +12,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
    /// Cache 8 pixel rows on the stack, which may originate from different buffers of a <see cref="MemoryGroup{T}"/>.
    /// </summary>
    [StructLayout(LayoutKind.Sequential)]
-    internal readonly ref struct RowOctet<T>
+    internal ref struct RowOctet<T>
        where T : struct
    {
-        private readonly Span<T> row0;
-        private readonly Span<T> row1;
-        private readonly Span<T> row2;
-        private readonly Span<T> row3;
-        private readonly Span<T> row4;
-        private readonly Span<T> row5;
-        private readonly Span<T> row6;
-        private readonly Span<T> row7;
-
-        public RowOctet(Buffer2D<T> buffer, int startY)
-        {
-            int y = startY;
-            int height = buffer.Height;
-            this.row0 = y < height ? buffer.GetRowSpan(y++) : default;
-            this.row1 = y < height ? buffer.GetRowSpan(y++) : default;
-            this.row2 = y < height ? buffer.GetRowSpan(y++) : default;
-            this.row3 = y < height ? buffer.GetRowSpan(y++) : default;
-            this.row4 = y < height ? buffer.GetRowSpan(y++) : default;
-            this.row5 = y < height ? buffer.GetRowSpan(y++) : default;
-            this.row6 = y < height ? buffer.GetRowSpan(y++) : default;
-            this.row7 = y < height ? buffer.GetRowSpan(y) : default;
-        }
+        private Span<T> row0;
+        private Span<T> row1;
+        private Span<T> row2;
+        private Span<T> row3;
+        private Span<T> row4;
+        private Span<T> row5;
+        private Span<T> row6;
+        private Span<T> row7;

+        // No unsafe tricks, since Span<T> can't be used as a generic argument
        public Span<T> this[int y]
        {
-            [MethodImpl(InliningOptions.ShortMethod)]
-            get
-            {
-                // No unsafe tricks, since Span<T> can't be used as a generic argument
-                return y switch
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get =>
+                y switch
                {
                    0 => this.row0,
                    1 => this.row1,
@ -56,13 +41,57 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
                    7 => this.row7,
                    _ => ThrowIndexOutOfRangeException()
                };
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            private set
+            {
+                switch (y)
+                {
+                    case 0:
+                        this.row0 = value;
+                        break;
+                    case 1:
+                        this.row1 = value;
+                        break;
+                    case 2:
+                        this.row2 = value;
+                        break;
+                    case 3:
+                        this.row3 = value;
+                        break;
+                    case 4:
+                        this.row4 = value;
+                        break;
+                    case 5:
+                        this.row5 = value;
+                        break;
+                    case 6:
+                        this.row6 = value;
+                        break;
+                    default:
+                        this.row7 = value;
+                        break;
+                }
            }
        }

-        [MethodImpl(InliningOptions.ColdPath)]
-        private static Span<T> ThrowIndexOutOfRangeException()
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public void Update(Buffer2D<T> buffer, int startY)
        {
-            throw new IndexOutOfRangeException();
+            // We don't actually have to assign values outside of the
+            // frame pixel buffer since they are never requested.
+            int y = startY;
+            int yEnd = Math.Min(y + 8, buffer.Height);
+
+            int i = 0;
+            while (y < yEnd)
+            {
+                this[i++] = buffer.GetRowSpan(y++);
+            }
        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private static Span<T> ThrowIndexOutOfRangeException()
+        => throw new IndexOutOfRangeException();
    }
 }
--- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
+++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
@ -6,6 +6,7 @@ using System.Buffers.Binary;
 using System.IO;
 using System.Linq;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Threading;
 using SixLabors.ImageSharp.Common.Helpers;
 using SixLabors.ImageSharp.Formats.Jpeg.Components;
@ -212,8 +213,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
            ImageMetadata metadata = image.Metadata;

            // System.Drawing produces identical output for jpegs with a quality parameter of 0 and 1.
-            int qlty = (this.quality ?? metadata.GetJpegMetadata().Quality).Clamp(1, 100);
-            this.subsample = this.subsample ?? (qlty >= 91 ? JpegSubsample.Ratio444 : JpegSubsample.Ratio420);
+            int qlty = Numerics.Clamp(this.quality ?? metadata.GetJpegMetadata().Quality, 1, 100);
+            this.subsample ??= qlty >= 91 ? JpegSubsample.Ratio444 : JpegSubsample.Ratio420;

            // Convert from a quality rating to a scaling factor.
            int scale;
@ -313,7 +314,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
        /// </summary>
        /// <param name="bits">The packed bits.</param>
        /// <param name="count">The number of bits</param>
-        private void Emit(uint bits, uint count)
+        /// <param name="emitBufferBase">The reference to the emitBuffer.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        private void Emit(uint bits, uint count, ref byte emitBufferBase)
        {
            count += this.bitCount;
            bits <<= (int)(32 - count);
@ -327,10 +330,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                while (count >= 8)
                {
                    byte b = (byte)(bits >> 24);
-                    this.emitBuffer[len++] = b;
-                    if (b == 0xff)
+                    Unsafe.Add(ref emitBufferBase, len++) = b;
+                    if (b == byte.MaxValue)
                    {
-                        this.emitBuffer[len++] = 0x00;
+                        Unsafe.Add(ref emitBufferBase, len++) = byte.MinValue;
                    }

                    bits <<= 8;
@ -352,11 +355,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
        /// </summary>
        /// <param name="index">The index of the Huffman encoder</param>
        /// <param name="value">The value to encode.</param>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private void EmitHuff(HuffIndex index, int value)
+        /// <param name="emitBufferBase">The reference to the emit buffer.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        private void EmitHuff(HuffIndex index, int value, ref byte emitBufferBase)
        {
            uint x = HuffmanLut.TheHuffmanLut[(int)index].Values[value];
-            this.Emit(x & ((1 << 24) - 1), x >> 24);
+            this.Emit(x & ((1 << 24) - 1), x >> 24, ref emitBufferBase);
        }

        /// <summary>
@ -365,8 +369,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
        /// <param name="index">The index of the Huffman encoder</param>
        /// <param name="runLength">The number of copies to encode.</param>
        /// <param name="value">The value to encode.</param>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private void EmitHuffRLE(HuffIndex index, int runLength, int value)
+        /// <param name="emitBufferBase">The reference to the emit buffer.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
+        private void EmitHuffRLE(HuffIndex index, int runLength, int value, ref byte emitBufferBase)
        {
            int a = value;
            int b = value;
@ -386,10 +391,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                bt = 8 + (uint)BitCountLut[a >> 8];
            }

-            this.EmitHuff(index, (int)((uint)(runLength << 4) | bt));
+            this.EmitHuff(index, (int)((uint)(runLength << 4) | bt), ref emitBufferBase);
            if (bt > 0)
            {
-                this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt);
+                this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt, ref emitBufferBase);
            }
        }

@ -399,7 +404,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
        /// <typeparam name="TPixel">The pixel format.</typeparam>
        /// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
        /// <param name="cancellationToken">The token to monitor for cancellation.</param>
-        private void Encode444<TPixel>(Image<TPixel> pixels, CancellationToken cancellationToken)
+        /// <param name="emitBufferBase">The reference to the emit buffer.</param>
+        private void Encode444<TPixel>(Image<TPixel> pixels, CancellationToken cancellationToken, ref byte emitBufferBase)
            where TPixel : unmanaged, IPixel<TPixel>
        {
            // TODO: Need a JpegScanEncoder<TPixel> class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.)
@ -418,15 +424,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
            var pixelConverter = YCbCrForwardConverter<TPixel>.Create();
            ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
            Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
+            RowOctet<TPixel> currentRows = default;

            for (int y = 0; y < pixels.Height; y += 8)
            {
                cancellationToken.ThrowIfCancellationRequested();
-                var currentRows = new RowOctet<TPixel>(pixelBuffer, y);
+                currentRows.Update(pixelBuffer, y);

                for (int x = 0; x < pixels.Width; x += 8)
                {
-                    pixelConverter.Convert(frame, x, y, currentRows);
+                    pixelConverter.Convert(frame, x, y, ref currentRows);

                    prevDCY = this.WriteBlock(
                        QuantIndex.Luminance,
@ -435,7 +442,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                        ref temp1,
                        ref temp2,
                        ref onStackLuminanceQuantTable,
-                        ref unzig);
+                        ref unzig,
+                        ref emitBufferBase);
+
                    prevDCCb = this.WriteBlock(
                        QuantIndex.Chrominance,
                        prevDCCb,
@ -443,7 +452,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                        ref temp1,
                        ref temp2,
                        ref onStackChrominanceQuantTable,
-                        ref unzig);
+                        ref unzig,
+                        ref emitBufferBase);
+
                    prevDCCr = this.WriteBlock(
                        QuantIndex.Chrominance,
                        prevDCCr,
@ -451,7 +462,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                        ref temp1,
                        ref temp2,
                        ref onStackChrominanceQuantTable,
-                        ref unzig);
+                        ref unzig,
+                        ref emitBufferBase);
                }
            }
        }
@ -517,9 +529,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
        /// <param name="tempDest2">Temporal block 2</param>
        /// <param name="quant">Quantization table</param>
        /// <param name="unZig">The 8x8 Unzig block.</param>
-        /// <returns>
-        /// The <see cref="int"/>
-        /// </returns>
+        /// <param name="emitBufferBase">The reference to the emit buffer.</param>
+        /// <returns>The <see cref="int"/>.</returns>
        private int WriteBlock(
            QuantIndex index,
            int prevDC,
@ -527,7 +538,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
            ref Block8x8F tempDest1,
            ref Block8x8F tempDest2,
            ref Block8x8F quant,
-            ref ZigZag unZig)
+            ref ZigZag unZig,
+            ref byte emitBufferBase)
        {
            FastFloatingPointDCT.TransformFDCT(ref src, ref tempDest1, ref tempDest2);

@ -536,7 +548,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
            int dc = (int)tempDest2[0];

            // Emit the DC delta.
-            this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC);
+            this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC, ref emitBufferBase);

            // Emit the AC components.
            var h = (HuffIndex)((2 * (int)index) + 1);
@ -554,18 +566,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                {
                    while (runLength > 15)
                    {
-                        this.EmitHuff(h, 0xf0);
+                        this.EmitHuff(h, 0xf0, ref emitBufferBase);
                        runLength -= 16;
                    }

-                    this.EmitHuffRLE(h, runLength, ac);
+                    this.EmitHuffRLE(h, runLength, ac, ref emitBufferBase);
                    runLength = 0;
                }
            }

            if (runLength > 0)
            {
-                this.EmitHuff(h, 0x00);
+                this.EmitHuff(h, 0x00, ref emitBufferBase);
            }

            return dc;
@ -747,9 +759,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
        /// </summary>
        /// <param name="app1Length">The length of the data the app1 marker contains.</param>
        private void WriteApp1Header(int app1Length)
-        {
-            this.WriteAppHeader(app1Length, JpegConstants.Markers.APP1);
-        }
+            => this.WriteAppHeader(app1Length, JpegConstants.Markers.APP1);

        /// <summary>
        /// Writes a AppX header.
@ -953,19 +963,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
            // TODO: Need a JpegScanEncoder<TPixel> class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.)
            // TODO: We should allow grayscale writing.
            this.outputStream.Write(SosHeaderYCbCr);
-
+            ref byte emitBufferBase = ref MemoryMarshal.GetReference<byte>(this.emitBuffer);
            switch (this.subsample)
            {
                case JpegSubsample.Ratio444:
-                    this.Encode444(image, cancellationToken);
+                    this.Encode444(image, cancellationToken, ref emitBufferBase);
                    break;
                case JpegSubsample.Ratio420:
-                    this.Encode420(image, cancellationToken);
+                    this.Encode420(image, cancellationToken, ref emitBufferBase);
                    break;
            }

            // Pad the last byte with 1's.
-            this.Emit(0x7f, 7);
+            this.Emit(0x7f, 7, ref emitBufferBase);
        }

        /// <summary>
@ -975,7 +985,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
        /// <typeparam name="TPixel">The pixel format.</typeparam>
        /// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
        /// <param name="cancellationToken">The token to monitor for cancellation.</param>
-        private void Encode420<TPixel>(Image<TPixel> pixels, CancellationToken cancellationToken)
+        /// <param name="emitBufferBase">The reference to the emit buffer.</param>
+        private void Encode420<TPixel>(Image<TPixel> pixels, CancellationToken cancellationToken, ref byte emitBufferBase)
            where TPixel : unmanaged, IPixel<TPixel>
        {
            // TODO: Need a JpegScanEncoder<TPixel> class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.)
@ -997,6 +1008,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
            int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
            ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
            Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
+            RowOctet<TPixel> currentRows = default;

            for (int y = 0; y < pixels.Height; y += 16)
            {
@ -1008,10 +1020,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                        int xOff = (i & 1) * 8;
                        int yOff = (i & 2) * 4;

-                        // TODO: Try pushing this to the outer loop!
-                        var currentRows = new RowOctet<TPixel>(pixelBuffer, y + yOff);
-
-                        pixelConverter.Convert(frame, x + xOff, y + yOff, currentRows);
+                        currentRows.Update(pixelBuffer, y + yOff);
+                        pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows);

                        cb[i] = pixelConverter.Cb;
                        cr[i] = pixelConverter.Cr;
@ -1023,7 +1033,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                            ref temp1,
                            ref temp2,
                            ref onStackLuminanceQuantTable,
-                            ref unzig);
+                            ref unzig,
+                            ref emitBufferBase);
                    }

                    Block8x8F.Scale16X16To8X8(ref b, cb);
@ -1034,7 +1045,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                        ref temp1,
                        ref temp2,
                        ref onStackChrominanceQuantTable,
-                        ref unzig);
+                        ref unzig,
+                        ref emitBufferBase);

                    Block8x8F.Scale16X16To8X8(ref b, cr);
                    prevDCCr = this.WriteBlock(
@ -1044,7 +1056,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                        ref temp1,
                        ref temp2,
                        ref onStackChrominanceQuantTable,
-                        ref unzig);
+                        ref unzig,
+                        ref emitBufferBase);
                }
            }
        }
--- a/src/ImageSharp/Formats/PixelTypeInfo.cs
+++ b/src/ImageSharp/Formats/PixelTypeInfo.cs
@ -1,8 +1,7 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System.Runtime.CompilerServices;
-
 using SixLabors.ImageSharp.PixelFormats;

 namespace SixLabors.ImageSharp.Formats
@ -16,9 +15,11 @@ namespace SixLabors.ImageSharp.Formats
        /// Initializes a new instance of the <see cref="PixelTypeInfo"/> class.
        /// </summary>
        /// <param name="bitsPerPixel">Color depth, in number of bits per pixel.</param>
-        internal PixelTypeInfo(int bitsPerPixel)
+        /// <param name="alpha">Tthe pixel alpha transparency behavior.</param>
+        internal PixelTypeInfo(int bitsPerPixel, PixelAlphaRepresentation? alpha = null)
        {
            this.BitsPerPixel = bitsPerPixel;
+            this.AlphaRepresentation = alpha;
        }

        /// <summary>
@ -26,8 +27,20 @@ namespace SixLabors.ImageSharp.Formats
        /// </summary>
        public int BitsPerPixel { get; }

+        /// <summary>
+        /// Gets the pixel alpha transparency behavior.
+        /// <see langword="null"/> means unknown, unspecified.
+        /// </summary>
+        public PixelAlphaRepresentation? AlphaRepresentation { get; }
+
        internal static PixelTypeInfo Create<TPixel>()
            where TPixel : unmanaged, IPixel<TPixel> =>
            new PixelTypeInfo(Unsafe.SizeOf<TPixel>() * 8);
+
+        internal static PixelTypeInfo Create<TPixel>(PixelAlphaRepresentation alpha)
+            where TPixel : unmanaged, IPixel<TPixel>
+        {
+            return new PixelTypeInfo(Unsafe.SizeOf<TPixel>() * 8, alpha);
+        }
    }
 }
--- a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System;
@ -76,7 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
                ++x;
                ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
                res = (byte)(scan - (above >> 1));
-                sum += ImageMaths.FastAbs(unchecked((sbyte)res));
+                sum += Numerics.Abs(unchecked((sbyte)res));
            }

            for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
@ -87,7 +87,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
                ++x;
                ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
                res = (byte)(scan - Average(left, above));
-                sum += ImageMaths.FastAbs(unchecked((sbyte)res));
+                sum += Numerics.Abs(unchecked((sbyte)res));
            }

            sum -= 3;
@ -102,4 +102,4 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static int Average(byte left, byte above) => (left + above) >> 1;
    }
-}
+}
--- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System;
@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
                ++x;
                ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
                res = (byte)(scan - PaethPredictor(0, above, 0));
-                sum += ImageMaths.FastAbs(unchecked((sbyte)res));
+                sum += Numerics.Abs(unchecked((sbyte)res));
            }

            for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
@ -91,7 +91,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
                ++x;
                ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
                res = (byte)(scan - PaethPredictor(left, above, upperLeft));
-                sum += ImageMaths.FastAbs(unchecked((sbyte)res));
+                sum += Numerics.Abs(unchecked((sbyte)res));
            }

            sum -= 4;
@ -111,9 +111,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
        private static byte PaethPredictor(byte left, byte above, byte upperLeft)
        {
            int p = left + above - upperLeft;
-            int pa = ImageMaths.FastAbs(p - left);
-            int pb = ImageMaths.FastAbs(p - above);
-            int pc = ImageMaths.FastAbs(p - upperLeft);
+            int pa = Numerics.Abs(p - left);
+            int pb = Numerics.Abs(p - above);
+            int pc = Numerics.Abs(p - upperLeft);

            if (pa <= pb && pa <= pc)
            {
@ -128,4 +128,4 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
            return upperLeft;
        }
    }
-}
+}
--- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
                ++x;
                ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
                res = scan;
-                sum += ImageMaths.FastAbs(unchecked((sbyte)res));
+                sum += Numerics.Abs(unchecked((sbyte)res));
            }

            for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
@ -71,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
                ++x;
                ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
                res = (byte)(scan - prev);
-                sum += ImageMaths.FastAbs(unchecked((sbyte)res));
+                sum += Numerics.Abs(unchecked((sbyte)res));
            }

            sum -= 1;
--- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs
@ -64,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
                ++x;
                ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
                res = (byte)(scan - above);
-                sum += ImageMaths.FastAbs(unchecked((sbyte)res));
+                sum += Numerics.Abs(unchecked((sbyte)res));
            }

            sum -= 2;
--- a/src/ImageSharp/Formats/Png/PngEncoderCore.cs
+++ b/src/ImageSharp/Formats/Png/PngEncoderCore.cs
@ -284,7 +284,7 @@ namespace SixLabors.ImageSharp.Formats.Png
                            rowSpan.Length,
                            AllocationOptions.Clean))
                        {
-                            int scaleFactor = 255 / (ImageMaths.GetColorCountForBitDepth(this.bitDepth) - 1);
+                            int scaleFactor = 255 / (ColorNumerics.GetColorCountForBitDepth(this.bitDepth) - 1);
                            Span<byte> tempSpan = temp.GetSpan();

                            // We need to first create an array of luminance bytes then scale them down to the correct bit depth.
@ -314,7 +314,7 @@ namespace SixLabors.ImageSharp.Formats.Png
                        for (int x = 0, o = 0; x < rgbaSpan.Length; x++, o += 4)
                        {
                            Rgba64 rgba = Unsafe.Add(ref rgbaRef, x);
-                            ushort luminance = ImageMaths.Get16BitBT709Luminance(rgba.R, rgba.G, rgba.B);
+                            ushort luminance = ColorNumerics.Get16BitBT709Luminance(rgba.R, rgba.G, rgba.B);
                            BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o, 2), luminance);
                            BinaryPrimitives.WriteUInt16BigEndian(rawScanlineSpan.Slice(o + 2, 2), rgba.A);
                        }
@ -329,7 +329,7 @@ namespace SixLabors.ImageSharp.Formats.Png
                    {
                        Unsafe.Add(ref rowSpanRef, x).ToRgba32(ref rgba);
                        Unsafe.Add(ref rawScanlineSpanRef, o) =
-                            ImageMaths.Get8BitBT709Luminance(rgba.R, rgba.G, rgba.B);
+                            ColorNumerics.Get8BitBT709Luminance(rgba.R, rgba.G, rgba.B);
                        Unsafe.Add(ref rawScanlineSpanRef, o + 1) = rgba.A;
                    }
                }
--- a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs
+++ b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs
@ -35,6 +35,15 @@ namespace SixLabors.ImageSharp.Formats.Png
            options.ColorType ??= pngMetadata.ColorType ?? SuggestColorType<TPixel>();
            options.BitDepth ??= pngMetadata.BitDepth ?? SuggestBitDepth<TPixel>();

+            // Ensure bit depth and color type are a supported combination.
+            // Bit8 is the only bit depth supported by all color types.
+            byte bits = (byte)options.BitDepth;
+            byte[] validBitDepths = PngConstants.ColorTypes[options.ColorType.Value];
+            if (Array.IndexOf(validBitDepths, bits) == -1)
+            {
+                options.BitDepth = PngBitDepth.Bit8;
+            }
+
            options.InterlaceMethod ??= pngMetadata.InterlaceMethod;

            use16Bit = options.BitDepth == PngBitDepth.Bit16;
@ -44,12 +53,6 @@ namespace SixLabors.ImageSharp.Formats.Png
            {
                options.ChunkFilter = PngChunkFilter.ExcludeAll;
            }
-
-            // Ensure we are not allowing impossible combinations.
-            if (!PngConstants.ColorTypes.ContainsKey(options.ColorType.Value))
-            {
-                throw new NotSupportedException("Color type is not supported or not valid.");
-            }
        }

        /// <summary>
@ -68,16 +71,11 @@ namespace SixLabors.ImageSharp.Formats.Png
                return null;
            }

-            byte bits = (byte)options.BitDepth;
-            if (Array.IndexOf(PngConstants.ColorTypes[options.ColorType.Value], bits) == -1)
-            {
-                throw new NotSupportedException("Bit depth is not supported or not valid.");
-            }
-
            // Use the metadata to determine what quantization depth to use if no quantizer has been set.
            if (options.Quantizer is null)
            {
-                var maxColors = ImageMaths.GetColorCountForBitDepth(bits);
+                byte bits = (byte)options.BitDepth;
+                var maxColors = ColorNumerics.GetColorCountForBitDepth(bits);
                options.Quantizer = new WuQuantizer(new QuantizerOptions { MaxColors = maxColors });
            }

@ -103,7 +101,7 @@ namespace SixLabors.ImageSharp.Formats.Png
            byte bitDepth;
            if (options.ColorType == PngColorType.Palette)
            {
-                byte quantizedBits = (byte)ImageMaths.GetBitsNeededForColorDepth(quantizedFrame.Palette.Length).Clamp(1, 8);
+                byte quantizedBits = (byte)Numerics.Clamp(ColorNumerics.GetBitsNeededForColorDepth(quantizedFrame.Palette.Length), 1, 8);
                byte bits = Math.Max((byte)options.BitDepth, quantizedBits);

                // Png only supports in four pixel depths: 1, 2, 4, and 8 bits when using the PLTE chunk
--- a/src/ImageSharp/Formats/Png/PngScanlineProcessor.cs
+++ b/src/ImageSharp/Formats/Png/PngScanlineProcessor.cs
@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Formats.Png
            TPixel pixel = default;
            ref byte scanlineSpanRef = ref MemoryMarshal.GetReference(scanlineSpan);
            ref TPixel rowSpanRef = ref MemoryMarshal.GetReference(rowSpan);
-            int scaleFactor = 255 / (ImageMaths.GetColorCountForBitDepth(header.BitDepth) - 1);
+            int scaleFactor = 255 / (ColorNumerics.GetColorCountForBitDepth(header.BitDepth) - 1);

            if (!hasTrans)
            {
@ -96,7 +96,7 @@ namespace SixLabors.ImageSharp.Formats.Png
            TPixel pixel = default;
            ref byte scanlineSpanRef = ref MemoryMarshal.GetReference(scanlineSpan);
            ref TPixel rowSpanRef = ref MemoryMarshal.GetReference(rowSpan);
-            int scaleFactor = 255 / (ImageMaths.GetColorCountForBitDepth(header.BitDepth) - 1);
+            int scaleFactor = 255 / (ColorNumerics.GetColorCountForBitDepth(header.BitDepth) - 1);

            if (!hasTrans)
            {
--- a/src/ImageSharp/Formats/Tga/TgaEncoderCore.cs
+++ b/src/ImageSharp/Formats/Tga/TgaEncoderCore.cs
@ -365,7 +365,7 @@ namespace SixLabors.ImageSharp.Formats.Tga
            where TPixel : unmanaged, IPixel<TPixel>
        {
            var vector = sourcePixel.ToVector4();
-            return ImageMaths.GetBT709Luminance(ref vector, 256);
+            return ColorNumerics.GetBT709Luminance(ref vector, 256);
        }
    }
 }
--- a/src/ImageSharp/Image.FromBytes.cs
+++ b/src/ImageSharp/Image.FromBytes.cs
@ -91,9 +91,9 @@ namespace SixLabors.ImageSharp
        /// <param name="data">The byte array containing image data.</param>
        /// <exception cref="ArgumentNullException">The configuration is null.</exception>
        /// <exception cref="ArgumentNullException">The data is null.</exception>
-        /// <returns>A new <see cref="Image{Rgba32}"/>.</returns>
-        public static Image<Rgba32> Load(byte[] data)
-            => Load<Rgba32>(Configuration.Default, data);
+        /// <returns>A new <see cref="Image"/>.</returns>
+        public static Image Load(byte[] data)
+            => Load(Configuration.Default, data);

        /// <summary>
        /// Load a new instance of <see cref="Image{TPixel}"/> from the given encoded byte array.
--- a/src/ImageSharp/Image.WrapMemory.cs
+++ b/src/ImageSharp/Image.WrapMemory.cs
@ -16,8 +16,22 @@ namespace SixLabors.ImageSharp
    public abstract partial class Image
    {
        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
-        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: using this method does not transfer the ownership of the underlying buffer of the input <see cref="Memory{T}"/>
+        /// to the new <see cref="Image{TPixel}"/> instance. This means that consumers of this method must ensure that the input buffer
+        /// is either self-contained, (for example, a <see cref="Memory{T}"/> instance wrapping a new array that was
+        /// created), or that the owning object is not disposed until the returned <see cref="Image{TPixel}"/> is disposed.
+        /// </para>
+        /// <para>
+        /// If the input <see cref="Memory{T}"/> instance is one retrieved from an <see cref="IMemoryOwner{T}"/> instance
+        /// rented from a memory pool (such as <see cref="MemoryPool{T}"/>), and that owning instance is disposed while the image is still
+        /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other
+        /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately.
+        /// </para>
        /// </summary>
        /// <typeparam name="TPixel">The pixel type</typeparam>
        /// <param name="configuration">The <see cref="Configuration"/></param>
@ -38,15 +52,29 @@ namespace SixLabors.ImageSharp
        {
            Guard.NotNull(configuration, nameof(configuration));
            Guard.NotNull(metadata, nameof(metadata));
-            Guard.IsTrue(pixelMemory.Length == width * height, nameof(pixelMemory), "The length of the input memory doesn't match the specified image size");
+            Guard.IsTrue(pixelMemory.Length >= width * height, nameof(pixelMemory), "The length of the input memory is less than the specified image size");

            var memorySource = MemoryGroup<TPixel>.Wrap(pixelMemory);
            return new Image<TPixel>(configuration, memorySource, width, height, metadata);
        }

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
-        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: using this method does not transfer the ownership of the underlying buffer of the input <see cref="Memory{T}"/>
+        /// to the new <see cref="Image{TPixel}"/> instance. This means that consumers of this method must ensure that the input buffer
+        /// is either self-contained, (for example, a <see cref="Memory{T}"/> instance wrapping a new array that was
+        /// created), or that the owning object is not disposed until the returned <see cref="Image{TPixel}"/> is disposed.
+        /// </para>
+        /// <para>
+        /// If the input <see cref="Memory{T}"/> instance is one retrieved from an <see cref="IMemoryOwner{T}"/> instance
+        /// rented from a memory pool (such as <see cref="MemoryPool{T}"/>), and that owning instance is disposed while the image is still
+        /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other
+        /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately.
+        /// </para>
        /// </summary>
        /// <typeparam name="TPixel">The pixel type</typeparam>
        /// <param name="configuration">The <see cref="Configuration"/></param>
@ -64,9 +92,22 @@ namespace SixLabors.ImageSharp
            => WrapMemory(configuration, pixelMemory, width, height, new ImageMetadata());

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
-        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
-        /// The memory is being observed, the caller remains responsible for managing it's lifecycle.
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: using this method does not transfer the ownership of the underlying buffer of the input <see cref="Memory{T}"/>
+        /// to the new <see cref="Image{TPixel}"/> instance. This means that consumers of this method must ensure that the input buffer
+        /// is either self-contained, (for example, a <see cref="Memory{T}"/> instance wrapping a new array that was
+        /// created), or that the owning object is not disposed until the returned <see cref="Image{TPixel}"/> is disposed.
+        /// </para>
+        /// <para>
+        /// If the input <see cref="Memory{T}"/> instance is one retrieved from an <see cref="IMemoryOwner{T}"/> instance
+        /// rented from a memory pool (such as <see cref="MemoryPool{T}"/>), and that owning instance is disposed while the image is still
+        /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other
+        /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately.
+        /// </para>
        /// </summary>
        /// <typeparam name="TPixel">The pixel type.</typeparam>
        /// <param name="pixelMemory">The pixel memory.</param>
@ -81,7 +122,7 @@ namespace SixLabors.ImageSharp
            => WrapMemory(Configuration.Default, pixelMemory, width, height);

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels,
        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
        /// The ownership of the <paramref name="pixelMemoryOwner"/> is being transferred to the new <see cref="Image{TPixel}"/> instance,
        /// meaning that the caller is not allowed to dispose <paramref name="pixelMemoryOwner"/>.
@ -106,14 +147,14 @@ namespace SixLabors.ImageSharp
        {
            Guard.NotNull(configuration, nameof(configuration));
            Guard.NotNull(metadata, nameof(metadata));
-            Guard.IsTrue(pixelMemoryOwner.Memory.Length == width * height, nameof(pixelMemoryOwner), "The length of the input memory doesn't match the specified image size");
+            Guard.IsTrue(pixelMemoryOwner.Memory.Length >= width * height, nameof(pixelMemoryOwner), "The length of the input memory is less than the specified image size");

            var memorySource = MemoryGroup<TPixel>.Wrap(pixelMemoryOwner);
            return new Image<TPixel>(configuration, memorySource, width, height, metadata);
        }

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels,
        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
        /// The ownership of the <paramref name="pixelMemoryOwner"/> is being transferred to the new <see cref="Image{TPixel}"/> instance,
        /// meaning that the caller is not allowed to dispose <paramref name="pixelMemoryOwner"/>.
@ -135,7 +176,7 @@ namespace SixLabors.ImageSharp
            => WrapMemory(configuration, pixelMemoryOwner, width, height, new ImageMetadata());

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels,
        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
        /// The ownership of the <paramref name="pixelMemoryOwner"/> is being transferred to the new <see cref="Image{TPixel}"/> instance,
        /// meaning that the caller is not allowed to dispose <paramref name="pixelMemoryOwner"/>.
@ -154,8 +195,22 @@ namespace SixLabors.ImageSharp
            => WrapMemory(Configuration.Default, pixelMemoryOwner, width, height);

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
-        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: using this method does not transfer the ownership of the underlying buffer of the input <see cref="Memory{T}"/>
+        /// to the new <see cref="Image{TPixel}"/> instance. This means that consumers of this method must ensure that the input buffer
+        /// is either self-contained, (for example, a <see cref="Memory{T}"/> instance wrapping a new array that was
+        /// created), or that the owning object is not disposed until the returned <see cref="Image{TPixel}"/> is disposed.
+        /// </para>
+        /// <para>
+        /// If the input <see cref="Memory{T}"/> instance is one retrieved from an <see cref="IMemoryOwner{T}"/> instance
+        /// rented from a memory pool (such as <see cref="MemoryPool{T}"/>), and that owning instance is disposed while the image is still
+        /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other
+        /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately.
+        /// </para>
        /// </summary>
        /// <typeparam name="TPixel">The pixel type</typeparam>
        /// <param name="configuration">The <see cref="Configuration"/></param>
@ -179,15 +234,29 @@ namespace SixLabors.ImageSharp

            var memoryManager = new ByteMemoryManager<TPixel>(byteMemory);

-            Guard.IsTrue(memoryManager.Memory.Length == width * height, nameof(byteMemory), "The length of the input memory doesn't match the specified image size");
+            Guard.IsTrue(memoryManager.Memory.Length >= width * height, nameof(byteMemory), "The length of the input memory is less than the specified image size");

            var memorySource = MemoryGroup<TPixel>.Wrap(memoryManager.Memory);
            return new Image<TPixel>(configuration, memorySource, width, height, metadata);
        }

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
-        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: using this method does not transfer the ownership of the underlying buffer of the input <see cref="Memory{T}"/>
+        /// to the new <see cref="Image{TPixel}"/> instance. This means that consumers of this method must ensure that the input buffer
+        /// is either self-contained, (for example, a <see cref="Memory{T}"/> instance wrapping a new array that was
+        /// created), or that the owning object is not disposed until the returned <see cref="Image{TPixel}"/> is disposed.
+        /// </para>
+        /// <para>
+        /// If the input <see cref="Memory{T}"/> instance is one retrieved from an <see cref="IMemoryOwner{T}"/> instance
+        /// rented from a memory pool (such as <see cref="MemoryPool{T}"/>), and that owning instance is disposed while the image is still
+        /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other
+        /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately.
+        /// </para>
        /// </summary>
        /// <typeparam name="TPixel">The pixel type</typeparam>
        /// <param name="configuration">The <see cref="Configuration"/></param>
@ -205,9 +274,22 @@ namespace SixLabors.ImageSharp
            => WrapMemory<TPixel>(configuration, byteMemory, width, height, new ImageMetadata());

        /// <summary>
-        /// Wraps an existing contiguous memory area of 'width' x 'height' pixels,
-        /// allowing to view/manipulate it as an <see cref="Image{TPixel}"/> instance.
-        /// The memory is being observed, the caller remains responsible for managing it's lifecycle.
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: using this method does not transfer the ownership of the underlying buffer of the input <see cref="Memory{T}"/>
+        /// to the new <see cref="Image{TPixel}"/> instance. This means that consumers of this method must ensure that the input buffer
+        /// is either self-contained, (for example, a <see cref="Memory{T}"/> instance wrapping a new array that was
+        /// created), or that the owning object is not disposed until the returned <see cref="Image{TPixel}"/> is disposed.
+        /// </para>
+        /// <para>
+        /// If the input <see cref="Memory{T}"/> instance is one retrieved from an <see cref="IMemoryOwner{T}"/> instance
+        /// rented from a memory pool (such as <see cref="MemoryPool{T}"/>), and that owning instance is disposed while the image is still
+        /// in use, this will lead to undefined behavior and possibly runtime crashes (as the same buffer might then be modified by other
+        /// consumers while the returned image is still working on it). Make sure to control the lifetime of the input buffers appropriately.
+        /// </para>
        /// </summary>
        /// <typeparam name="TPixel">The pixel type.</typeparam>
        /// <param name="byteMemory">The byte memory representing the pixel data.</param>
@ -220,5 +302,128 @@ namespace SixLabors.ImageSharp
            int height)
            where TPixel : unmanaged, IPixel<TPixel>
            => WrapMemory<TPixel>(Configuration.Default, byteMemory, width, height);
+
+        /// <summary>
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: this method relies on callers to carefully manage the target memory area being referenced by the
+        /// pointer and that the lifetime of such a memory area is at least equal to that of the returned
+        /// <see cref="Image{TPixel}"/> instance. For example, if the input pointer references an unmanaged memory area,
+        /// callers must ensure that the memory area is not freed as long as the returned <see cref="Image{TPixel}"/> is
+        /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers
+        /// must ensure that objects will remain pinned as long as the <see cref="Image{TPixel}"/> instance is in use.
+        /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes.
+        /// </para>
+        /// <para>
+        /// Note also that if you have a <see cref="Memory{T}"/> or an array (which can be cast to <see cref="Memory{T}"/>) of
+        /// either <see cref="byte"/> or <typeparamref name="TPixel"/> values, it is highly recommended to use one of the other
+        /// available overloads of this method instead (such as <see cref="WrapMemory{TPixel}(Configuration, Memory{byte}, int, int)"/>
+        /// or <see cref="WrapMemory{TPixel}(Configuration, Memory{TPixel}, int, int)"/>, to make the resulting code less error
+        /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when
+        /// doing interop or working with buffers that are located in unmanaged memory.
+        /// </para>
+        /// </summary>
+        /// <typeparam name="TPixel">The pixel type</typeparam>
+        /// <param name="configuration">The <see cref="Configuration"/></param>
+        /// <param name="pointer">The pointer to the target memory buffer to wrap.</param>
+        /// <param name="width">The width of the memory image.</param>
+        /// <param name="height">The height of the memory image.</param>
+        /// <param name="metadata">The <see cref="ImageMetadata"/>.</param>
+        /// <exception cref="ArgumentNullException">The configuration is null.</exception>
+        /// <exception cref="ArgumentNullException">The metadata is null.</exception>
+        /// <returns>An <see cref="Image{TPixel}"/> instance</returns>
+        public static unsafe Image<TPixel> WrapMemory<TPixel>(
+            Configuration configuration,
+            void* pointer,
+            int width,
+            int height,
+            ImageMetadata metadata)
+            where TPixel : unmanaged, IPixel<TPixel>
+        {
+            Guard.IsFalse(pointer == null, nameof(pointer), "Pointer must be not null");
+            Guard.NotNull(configuration, nameof(configuration));
+            Guard.NotNull(metadata, nameof(metadata));
+
+            var memoryManager = new UnmanagedMemoryManager<TPixel>(pointer, width * height);
+
+            var memorySource = MemoryGroup<TPixel>.Wrap(memoryManager.Memory);
+            return new Image<TPixel>(configuration, memorySource, width, height, metadata);
+        }
+
+        /// <summary>
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: this method relies on callers to carefully manage the target memory area being referenced by the
+        /// pointer and that the lifetime of such a memory area is at least equal to that of the returned
+        /// <see cref="Image{TPixel}"/> instance. For example, if the input pointer references an unmanaged memory area,
+        /// callers must ensure that the memory area is not freed as long as the returned <see cref="Image{TPixel}"/> is
+        /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers
+        /// must ensure that objects will remain pinned as long as the <see cref="Image{TPixel}"/> instance is in use.
+        /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes.
+        /// </para>
+        /// <para>
+        /// Note also that if you have a <see cref="Memory{T}"/> or an array (which can be cast to <see cref="Memory{T}"/>) of
+        /// either <see cref="byte"/> or <typeparamref name="TPixel"/> values, it is highly recommended to use one of the other
+        /// available overloads of this method instead (such as <see cref="WrapMemory{TPixel}(Configuration, Memory{byte}, int, int)"/>
+        /// or <see cref="WrapMemory{TPixel}(Configuration, Memory{TPixel}, int, int)"/>, to make the resulting code less error
+        /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when
+        /// doing interop or working with buffers that are located in unmanaged memory.
+        /// </para>
+        /// </summary>
+        /// <typeparam name="TPixel">The pixel type</typeparam>
+        /// <param name="configuration">The <see cref="Configuration"/></param>
+        /// <param name="pointer">The pointer to the target memory buffer to wrap.</param>
+        /// <param name="width">The width of the memory image.</param>
+        /// <param name="height">The height of the memory image.</param>
+        /// <exception cref="ArgumentNullException">The configuration is null.</exception>
+        /// <returns>An <see cref="Image{TPixel}"/> instance.</returns>
+        public static unsafe Image<TPixel> WrapMemory<TPixel>(
+            Configuration configuration,
+            void* pointer,
+            int width,
+            int height)
+            where TPixel : unmanaged, IPixel<TPixel>
+            => WrapMemory<TPixel>(configuration, pointer, width, height, new ImageMetadata());
+
+        /// <summary>
+        /// <para>
+        /// Wraps an existing contiguous memory area of at least 'width' x 'height' pixels allowing viewing/manipulation as
+        /// an <see cref="Image{TPixel}"/> instance.
+        /// </para>
+        /// <para>
+        /// Please note: this method relies on callers to carefully manage the target memory area being referenced by the
+        /// pointer and that the lifetime of such a memory area is at least equal to that of the returned
+        /// <see cref="Image{TPixel}"/> instance. For example, if the input pointer references an unmanaged memory area,
+        /// callers must ensure that the memory area is not freed as long as the returned <see cref="Image{TPixel}"/> is
+        /// in use and not disposed. The same applies if the input memory area points to a pinned managed object, as callers
+        /// must ensure that objects will remain pinned as long as the <see cref="Image{TPixel}"/> instance is in use.
+        /// Failing to do so constitutes undefined behavior and will likely lead to memory corruption and runtime crashes.
+        /// </para>
+        /// <para>
+        /// Note also that if you have a <see cref="Memory{T}"/> or an array (which can be cast to <see cref="Memory{T}"/>) of
+        /// either <see cref="byte"/> or <typeparamref name="TPixel"/> values, it is highly recommended to use one of the other
+        /// available overloads of this method instead (such as <see cref="WrapMemory{TPixel}(Configuration, Memory{byte}, int, int)"/>
+        /// or <see cref="WrapMemory{TPixel}(Configuration, Memory{TPixel}, int, int)"/>, to make the resulting code less error
+        /// prone and avoid having to pin the underlying memory buffer in use. This method is primarily meant to be used when
+        /// doing interop or working with buffers that are located in unmanaged memory.
+        /// </para>
+        /// </summary>
+        /// <typeparam name="TPixel">The pixel type.</typeparam>
+        /// <param name="pointer">The pointer to the target memory buffer to wrap.</param>
+        /// <param name="width">The width of the memory image.</param>
+        /// <param name="height">The height of the memory image.</param>
+        /// <returns>An <see cref="Image{TPixel}"/> instance.</returns>
+        public static unsafe Image<TPixel> WrapMemory<TPixel>(
+            void* pointer,
+            int width,
+            int height)
+            where TPixel : unmanaged, IPixel<TPixel>
+            => WrapMemory<TPixel>(Configuration.Default, pointer, width, height);
    }
 }
--- a/src/ImageSharp/ImageSharp.csproj
+++ b/src/ImageSharp/ImageSharp.csproj
@ -4,41 +4,48 @@
  <PropertyGroup>
    <AssemblyName>SixLabors.ImageSharp</AssemblyName>
    <AssemblyTitle>SixLabors.ImageSharp</AssemblyTitle>
-    <Description>A cross-platform library for the processing of image files; written in C#</Description>
-    <NeutralLanguage>en</NeutralLanguage>
-
-    <VersionPrefix Condition="$(packageversion) != ''">$(packageversion)</VersionPrefix>
-    <VersionPrefix Condition="$(packageversion) == ''">0.0.1</VersionPrefix>
-
-    <TargetFrameworks>netcoreapp3.1;netcoreapp2.1;netstandard2.1;netstandard2.0;netstandard1.3;net472</TargetFrameworks>
-
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <GenerateDocumentationFile>true</GenerateDocumentationFile>
-    <PackageId>SixLabors.ImageSharp</PackageId>
-    <PackageTags>Image Resize Crop Gif Jpg Jpeg Bitmap Png Core</PackageTags>
    <RootNamespace>SixLabors.ImageSharp</RootNamespace>
+    <PackageId>SixLabors.ImageSharp</PackageId>
+    <PackageIcon>sixlabors.imagesharp.128.png</PackageIcon>
+    <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
+    <RepositoryUrl Condition="'$(RepositoryUrl)' == ''">https://github.com/SixLabors/ImageSharp/</RepositoryUrl>
+    <PackageProjectUrl>$(RepositoryUrl)</PackageProjectUrl>
+    <PackageTags>Image Resize Crop Gif Jpg Jpeg Bitmap Png Tga NetCore</PackageTags>
+    <Description>A new, fully featured, fully managed, cross-platform, 2D graphics API for .NET</Description>
  </PropertyGroup>

+  <Choose>
+    <When Condition="$(SIXLABORS_TESTING) == true">
+      <PropertyGroup>
+        <TargetFrameworks>net5.0;netcoreapp3.1;netcoreapp2.1;netstandard2.1;netstandard2.0;netstandard1.3;net472</TargetFrameworks>
+      </PropertyGroup>
+    </When>
+    <Otherwise>
+      <PropertyGroup>
+        <TargetFrameworks>netcoreapp3.1;netcoreapp2.1;netstandard2.1;netstandard2.0;netstandard1.3;net472</TargetFrameworks>
+      </PropertyGroup>
+    </Otherwise>
+  </Choose>
+
  <ItemGroup>
-    <PackageReference Include="Microsoft.SourceLink.GitHub" />
-    <PackageReference Include="MinVer" PrivateAssets="All" />
+    <None Include="..\..\shared-infrastructure\branding\icons\imagesharp\sixlabors.imagesharp.128.png" Pack="true" PackagePath="" />
  </ItemGroup>

-  <ItemGroup Condition=" '$(TargetFramework)' == 'netcoreapp2.1' ">
-    <PackageReference Include="System.Runtime.CompilerServices.Unsafe" />
+  <ItemGroup>
+    <PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="5.0.0" />
  </ItemGroup>

  <ItemGroup Condition=" $(TargetFramework.StartsWith('netstandard')) OR '$(TargetFramework)' == 'net472'">
-    <PackageReference Include="System.Numerics.Vectors" />
-    <PackageReference Include="System.Buffers" />
-    <PackageReference Include="System.Memory" />
+    <PackageReference Include="System.Numerics.Vectors" Version="4.5.0" />
+    <PackageReference Include="System.Buffers" Version="4.5.1" />
+    <PackageReference Include="System.Memory" Version="4.5.4" />
  </ItemGroup>

  <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard1.3'">
-    <PackageReference Include="System.IO.Compression" />
-    <PackageReference Include="System.IO.UnmanagedMemoryStream" />
-    <PackageReference Include="System.Threading.Tasks.Parallel" />
-    <PackageReference Include="System.ValueTuple" />
+    <PackageReference Include="System.IO.Compression" Version="4.3.0" />
+    <PackageReference Include="System.IO.UnmanagedMemoryStream" Version="4.3.0" />
+    <PackageReference Include="System.Threading.Tasks.Parallel" Version="4.3.0" />
+    <PackageReference Include="System.ValueTuple" Version="4.5.0" />
  </ItemGroup>

  <ItemGroup>
@ -62,62 +69,62 @@
      <AutoGen>True</AutoGen>
      <DependentUpon>PixelOperations{TPixel}.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Argb32.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Argb32.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>Argb32.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Bgr24.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Bgr24.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>Bgr24.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Bgra32.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Bgra32.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>Bgra32.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Bgra5551.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Bgra5551.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>Bgra5551.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\L8.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\L16.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
-      <DependentUpon>L8.PixelOperations.Generated.tt</DependentUpon>
+      <DependentUpon>L16.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\L16.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\L8.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
-      <DependentUpon>L16.PixelOperations.Generated.tt</DependentUpon>
+      <DependentUpon>L8.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\La16.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\La16.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>La16.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\La32.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\La32.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>La32.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Rgb24.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgb24.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>Rgb24.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Rgba32.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgb48.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
-      <DependentUpon>Rgba32.PixelOperations.Generated.tt</DependentUpon>
+      <DependentUpon>Rgb48.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Rgb48.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgba32.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
-      <DependentUpon>Rgb48.PixelOperations.Generated.tt</DependentUpon>
+      <DependentUpon>Rgba32.PixelOperations.Generated.tt</DependentUpon>
    </Compile>
-    <Compile Update="PixelFormats\PixelImplementations\Generated\Rgba64.PixelOperations.Generated.cs">
+    <Compile Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgba64.PixelOperations.Generated.cs">
      <DesignTime>True</DesignTime>
      <AutoGen>True</AutoGen>
      <DependentUpon>Rgba64.PixelOperations.Generated.tt</DependentUpon>
@ -156,51 +163,51 @@
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>PixelOperations{TPixel}.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Argb32.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Argb32.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Argb32.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Bgr24.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Bgr24.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Bgr24.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Bgra32.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Bgra32.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Bgra32.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Bgra5551.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Bgra5551.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Bgra5551.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\L8.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\L8.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>L8.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\L16.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\L16.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>L16.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\La16.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\La16.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>La16.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\La32.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\La32.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>La32.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Rgb24.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgb24.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Rgb24.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Rgba32.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgba32.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Rgba32.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Rgb48.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgb48.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Rgb48.PixelOperations.Generated.cs</LastGenOutput>
    </None>
-    <None Update="PixelFormats\PixelImplementations\Generated\Rgba64.PixelOperations.Generated.tt">
+    <None Update="PixelFormats\PixelImplementations\PixelOperations\Generated\Rgba64.PixelOperations.Generated.tt">
      <Generator>TextTemplatingFileGenerator</Generator>
      <LastGenOutput>Rgba64.PixelOperations.Generated.cs</LastGenOutput>
    </None>
--- a/src/ImageSharp/Image{TPixel}.cs
+++ b/src/ImageSharp/Image{TPixel}.cs
@ -201,14 +201,14 @@ namespace SixLabors.ImageSharp
        public bool TryGetSinglePixelSpan(out Span<TPixel> span)
        {
            IMemoryGroup<TPixel> mg = this.GetPixelMemoryGroup();
-            if (mg.Count > 1)
+            if (mg.Count == 1)
            {
-                span = default;
-                return false;
+                span = mg[0].Span;
+                return true;
            }

-            span = mg.Single().Span;
-            return true;
+            span = default;
+            return false;
        }

        /// <summary>
--- a/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs
+++ b/src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs
@ -53,8 +53,13 @@ namespace SixLabors.ImageSharp.Memory
                {
                    ThrowObjectDisposedException();
                }
-
+#if SUPPORTS_CREATESPAN
+                ref byte r0 = ref MemoryMarshal.GetReference<byte>(this.Data);
+                return MemoryMarshal.CreateSpan(ref Unsafe.As<byte, T>(ref r0), this.length);
+#else
                return MemoryMarshal.Cast<byte, T>(this.Data.AsSpan()).Slice(0, this.length);
+#endif
+
            }

            /// <inheritdoc />
--- a/src/ImageSharp/Memory/ByteMemoryManager{T}.cs
+++ b/src/ImageSharp/Memory/ByteMemoryManager{T}.cs
@ -1,5 +1,6 @@
 // Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.
+
 using System;
 using System.Buffers;
 using System.Runtime.CompilerServices;
--- a/src/ImageSharp/Memory/UnmanagedMemoryManager{T}.cs
+++ b/src/ImageSharp/Memory/UnmanagedMemoryManager{T}.cs
@ -0,0 +1,60 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Buffers;
+
+namespace SixLabors.ImageSharp.Memory
+{
+    /// <summary>
+    /// A custom <see cref="MemoryManager{T}"/> that can wrap a rawpointer to a buffer of a specified type.
+    /// </summary>
+    /// <typeparam name="T">The value type to use when casting the wrapped <see cref="Memory{T}"/> instance.</typeparam>
+    /// <remarks>This manager doesn't own the memory buffer that it points to.</remarks>
+    internal sealed unsafe class UnmanagedMemoryManager<T> : MemoryManager<T>
+        where T : unmanaged
+    {
+        /// <summary>
+        /// The pointer to the memory buffer.
+        /// </summary>
+        private readonly void* pointer;
+
+        /// <summary>
+        /// The length of the memory area.
+        /// </summary>
+        private readonly int length;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="UnmanagedMemoryManager{T}"/> class.
+        /// </summary>
+        /// <param name="pointer">The pointer to the memory buffer.</param>
+        /// <param name="length">The length of the memory area.</param>
+        public UnmanagedMemoryManager(void* pointer, int length)
+        {
+            this.pointer = pointer;
+            this.length = length;
+        }
+
+        /// <inheritdoc/>
+        protected override void Dispose(bool disposing)
+        {
+        }
+
+        /// <inheritdoc/>
+        public override Span<T> GetSpan()
+        {
+            return new Span<T>(this.pointer, this.length);
+        }
+
+        /// <inheritdoc/>
+        public override MemoryHandle Pin(int elementIndex = 0)
+        {
+            return new MemoryHandle(((T*)this.pointer) + elementIndex);
+        }
+
+        /// <inheritdoc/>
+        public override void Unpin()
+        {
+        }
+    }
+}
--- a/src/ImageSharp/Metadata/Profiles/Exif/DC-008-Translation-2019-E.pdf
+++ b/src/ImageSharp/Metadata/Profiles/Exif/DC-008-Translation-2019-E.pdf
--- a/src/ImageSharp/Metadata/Profiles/Exif/DC-X008-Translation-2019-E.pdf
+++ b/src/ImageSharp/Metadata/Profiles/Exif/DC-X008-Translation-2019-E.pdf
--- a/src/ImageSharp/Metadata/Profiles/ICC/DataReader/IccDataReader.cs
+++ b/src/ImageSharp/Metadata/Profiles/ICC/DataReader/IccDataReader.cs
@ -40,7 +40,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
        /// <param name="index">The new index position</param>
        public void SetIndex(int index)
        {
-            this.currentIndex = index.Clamp(0, this.data.Length);
+            this.currentIndex = Numerics.Clamp(index, 0, this.data.Length);
        }

        /// <summary>
--- a/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.Lut.cs
+++ b/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.Lut.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
@ -17,7 +17,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
        {
            foreach (float item in value.Values)
            {
-                this.WriteByte((byte)((item * byte.MaxValue) + 0.5f).Clamp(0, byte.MaxValue));
+                this.WriteByte((byte)Numerics.Clamp((item * byte.MaxValue) + 0.5F, 0, byte.MaxValue));
            }

            return value.Values.Length;
@ -32,7 +32,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
        {
            foreach (float item in value.Values)
            {
-                this.WriteUInt16((ushort)((item * ushort.MaxValue) + 0.5f).Clamp(0, ushort.MaxValue));
+                this.WriteUInt16((ushort)Numerics.Clamp((item * ushort.MaxValue) + 0.5F, 0, ushort.MaxValue));
            }

            return value.Values.Length * 2;
@ -78,7 +78,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
            {
                foreach (float item in inArray)
                {
-                    count += this.WriteByte((byte)((item * byte.MaxValue) + 0.5f).Clamp(0, byte.MaxValue));
+                    count += this.WriteByte((byte)Numerics.Clamp((item * byte.MaxValue) + 0.5F, 0, byte.MaxValue));
                }
            }

@ -97,7 +97,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
            {
                foreach (float item in inArray)
                {
-                    count += this.WriteUInt16((ushort)((item * ushort.MaxValue) + 0.5f).Clamp(0, ushort.MaxValue));
+                    count += this.WriteUInt16((ushort)Numerics.Clamp((item * ushort.MaxValue) + 0.5F, 0, ushort.MaxValue));
                }
            }

--- a/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.NonPrimitives.cs
+++ b/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.NonPrimitives.cs
@ -33,9 +33,9 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
        /// <returns>the number of bytes written</returns>
        public int WriteVersionNumber(in IccVersion value)
        {
-            int major = value.Major.Clamp(0, byte.MaxValue);
-            int minor = value.Minor.Clamp(0, 15);
-            int bugfix = value.Patch.Clamp(0, 15);
+            int major = Numerics.Clamp(value.Major, 0, byte.MaxValue);
+            int minor = Numerics.Clamp(value.Minor, 0, 15);
+            int bugfix = Numerics.Clamp(value.Patch, 0, 15);

            int version = (major << 24) | (minor << 20) | (bugfix << 16);
            return this.WriteInt32(version);
--- a/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.Primitives.cs
+++ b/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.Primitives.cs
@ -1,4 +1,4 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

 using System;
@ -112,7 +112,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
            const double Max = short.MaxValue + (65535d / 65536d);
            const double Min = short.MinValue;

-            value = value.Clamp(Min, Max);
+            value = Numerics.Clamp(value, Min, Max);
            value *= 65536d;

            return this.WriteInt32((int)Math.Round(value, MidpointRounding.AwayFromZero));
@ -128,7 +128,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
            const double Max = ushort.MaxValue + (65535d / 65536d);
            const double Min = ushort.MinValue;

-            value = value.Clamp(Min, Max);
+            value = Numerics.Clamp(value, Min, Max);
            value *= 65536d;

            return this.WriteUInt32((uint)Math.Round(value, MidpointRounding.AwayFromZero));
@ -144,7 +144,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
            const double Max = 1 + (32767d / 32768d);
            const double Min = 0;

-            value = value.Clamp(Min, Max);
+            value = Numerics.Clamp(value, Min, Max);
            value *= 32768d;

            return this.WriteUInt16((ushort)Math.Round(value, MidpointRounding.AwayFromZero));
@ -160,7 +160,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
            const double Max = byte.MaxValue + (255d / 256d);
            const double Min = byte.MinValue;

-            value = value.Clamp(Min, Max);
+            value = Numerics.Clamp(value, Min, Max);
            value *= 256d;

            return this.WriteUInt16((ushort)Math.Round(value, MidpointRounding.AwayFromZero));
--- a/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.TagDataEntry.cs
+++ b/src/ImageSharp/Metadata/Profiles/ICC/DataWriter/IccDataWriter.TagDataEntry.cs
@ -240,7 +240,7 @@ namespace SixLabors.ImageSharp.Metadata.Profiles.Icc
                count += this.WriteUInt32((uint)value.CurveData.Length);
                for (int i = 0; i < value.CurveData.Length; i++)
                {
-                    count += this.WriteUInt16((ushort)((value.CurveData[i] * ushort.MaxValue) + 0.5f).Clamp(0, ushort.MaxValue));
+                    count += this.WriteUInt16((ushort)Numerics.Clamp((value.CurveData[i] * ushort.MaxValue) + 0.5F, 0, ushort.MaxValue));
                }
            }