diff --git a/src/ImageProcessorCore/Formats/Jpg/Block.cs b/src/ImageProcessorCore/Formats/Jpg/Block.cs
index 5145a6f061..3e5a4e5342 100644
--- a/src/ImageProcessorCore/Formats/Jpg/Block.cs
+++ b/src/ImageProcessorCore/Formats/Jpg/Block.cs
@@ -3,16 +3,16 @@ namespace ImageProcessorCore.Formats.Jpg
     using System;
     using System.IO;
 
-	internal class Block
-	{
-		public const int blockSize = 64;
-		private int[] _data;
+    internal class Block
+    {
+        public const int blockSize = 64;
+        private int[] _data;
 
-		public Block()
-		{
-			_data = new int[blockSize];
-		}
+        public Block()
+        {
+            _data = new int[blockSize];
+        }
 
-		public int this[int idx] { get { return _data[idx]; } set { _data[idx] = value; } }
-	}
+        public int this[int idx] { get { return _data[idx]; } set { _data[idx] = value; } }
+    }
 }
diff --git a/src/ImageProcessorCore/Formats/Jpg/Colors.cs b/src/ImageProcessorCore/Formats/Jpg/Colors.cs
index 6bccfb49a6..6bd030b544 100644
--- a/src/ImageProcessorCore/Formats/Jpg/Colors.cs
+++ b/src/ImageProcessorCore/Formats/Jpg/Colors.cs
@@ -3,43 +3,43 @@ namespace ImageProcessorCore.Formats.Jpg
     using System;
     using System.IO;
 
-	internal static class Colors
-	{
-		public static void RGBToYCbCr(byte r, byte g, byte b, out byte yy, out byte cb, out byte cr)
-		{
-			// The JFIF specification says:
-			//	Y' =  0.2990*R + 0.5870*G + 0.1140*B
-			//	Cb = -0.1687*R - 0.3313*G + 0.5000*B + 128
-			//	Cr =  0.5000*R - 0.4187*G - 0.0813*B + 128
-			// http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.
+    internal static class Colors
+    {
+        public static void RGBToYCbCr(byte r, byte g, byte b, out byte yy, out byte cb, out byte cr)
+        {
+            // The JFIF specification says:
+            //  Y' =  0.2990*R + 0.5870*G + 0.1140*B
+            //  Cb = -0.1687*R - 0.3313*G + 0.5000*B + 128
+            //  Cr =  0.5000*R - 0.4187*G - 0.0813*B + 128
+            // http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.
 
-			int iyy = (19595*r + 38470*g + 7471*b + (1<<15)) >> 16;
-			int icb = (-11056*r - 21712*g + 32768*b + (257<<15)) >> 16;
-			int icr = (32768*r - 27440*g - 5328*b + (257<<15)) >> 16;
+            int iyy = (19595*r + 38470*g + 7471*b + (1<<15)) >> 16;
+            int icb = (-11056*r - 21712*g + 32768*b + (257<<15)) >> 16;
+            int icr = (32768*r - 27440*g - 5328*b + (257<<15)) >> 16;
 
-			if(iyy < 0) yy = 0; else if(iyy > 255) yy = 255; else yy = (byte)iyy;
-			if(icb < 0) cb = 0; else if(icb > 255) cb = 255; else cb = (byte)icb;
-			if(icr < 0) cr = 0; else if(icr > 255) cr = 255; else cr = (byte)icr;
-		}
+            if(iyy < 0) yy = 0; else if(iyy > 255) yy = 255; else yy = (byte)iyy;
+            if(icb < 0) cb = 0; else if(icb > 255) cb = 255; else cb = (byte)icb;
+            if(icr < 0) cr = 0; else if(icr > 255) cr = 255; else cr = (byte)icr;
+        }
 
-		public static void YCbCrToRGB(byte yy, byte cb, byte cr, out byte r, out byte g, out byte b)
-		{
-			// The JFIF specification says:
-			//	R = Y' + 1.40200*(Cr-128)
-			//	G = Y' - 0.34414*(Cb-128) - 0.71414*(Cr-128)
-			//	B = Y' + 1.77200*(Cb-128)
-			// http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.
+        public static void YCbCrToRGB(byte yy, byte cb, byte cr, out byte r, out byte g, out byte b)
+        {
+            // The JFIF specification says:
+            //  R = Y' + 1.40200*(Cr-128)
+            //  G = Y' - 0.34414*(Cb-128) - 0.71414*(Cr-128)
+            //  B = Y' + 1.77200*(Cb-128)
+            // http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.
 
-			int yy1 = yy * 0x10100; // Convert 0x12 to 0x121200.
-			int cb1 = cb - 128;
-			int cr1 = cr - 128;
-			int ir = (yy1 + 91881*cr1) >> 16;
-			int ig = (yy1 - 22554*cb1 - 46802*cr1) >> 16;
-			int ib = (yy1 + 116130*cb1) >> 16;
+            int yy1 = yy * 0x10100; // Convert 0x12 to 0x121200.
+            int cb1 = cb - 128;
+            int cr1 = cr - 128;
+            int ir = (yy1 + 91881*cr1) >> 16;
+            int ig = (yy1 - 22554*cb1 - 46802*cr1) >> 16;
+            int ib = (yy1 + 116130*cb1) >> 16;
 
-			if(ir < 0) r = 0; else if(ir > 255) r = 255; else r = (byte)ir;
-			if(ig < 0) g = 0; else if(ig > 255) g = 255; else g = (byte)ig;
-			if(ib < 0) b = 0; else if(ib > 255) b = 255; else b = (byte)ib;
-		}
-	}
+            if(ir < 0) r = 0; else if(ir > 255) r = 255; else r = (byte)ir;
+            if(ig < 0) g = 0; else if(ig > 255) g = 255; else g = (byte)ig;
+            if(ib < 0) b = 0; else if(ib > 255) b = 255; else b = (byte)ib;
+        }
+    }
 }
diff --git a/src/ImageProcessorCore/Formats/Jpg/Decoder.cs b/src/ImageProcessorCore/Formats/Jpg/Decoder.cs
index de7ff752ad..0a968bbbe5 100644
--- a/src/ImageProcessorCore/Formats/Jpg/Decoder.cs
+++ b/src/ImageProcessorCore/Formats/Jpg/Decoder.cs
@@ -1,1654 +1,1654 @@
 namespace ImageProcessorCore.Formats.Jpg
 {
-	using System;
-	using System.IO;
-	using System.Threading.Tasks;
-
-	internal partial class Decoder
-	{
-		private class errMissingFF00 : Exception { }
-		private class errShortHuffmanData : Exception { }
-		private class errEOF : Exception { }
-
-		// maxCodeLength is the maximum (inclusive) number of bits in a Huffman code.
-		private const int maxCodeLength = 16;
-		// maxNCodes is the maximum (inclusive) number of codes in a Huffman tree.
-		private const int maxNCodes = 256;
-		// lutSize is the log-2 size of the Huffman decoder's look-up table.
-		private const int lutSize = 8;
-		private const int maxComponents = 4;
-		private const int maxTc = 1;
-		private const int maxTh = 3;
-		private const int maxTq = 3;
-		private const int dcTable = 0;
-		private const int acTable = 1;
-
-		private const int sof0Marker = 0xc0; // Start Of Frame (Baseline).
-		private const int sof1Marker = 0xc1; // Start Of Frame (Extended Sequential).
-		private const int sof2Marker = 0xc2; // Start Of Frame (Progressive).
-		private const int dhtMarker  = 0xc4; // Define Huffman Table.
-		private const int rst0Marker = 0xd0; // ReSTart (0).
-		private const int rst7Marker = 0xd7; // ReSTart (7).
-		private const int soiMarker  = 0xd8; // Start Of Image.
-		private const int eoiMarker  = 0xd9; // End Of Image.
-		private const int sosMarker  = 0xda; // Start Of Scan.
-		private const int dqtMarker  = 0xdb; // Define Quantization Table.
-		private const int driMarker  = 0xdd; // Define Restart Interval.
-		private const int comMarker  = 0xfe; // COMment.
-		// "APPlication specific" markers aren't part of the JPEG spec per se,
-		// but in practice, their use is described at
-		// http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
-		private const int app0Marker  = 0xe0;
-		private const int app14Marker = 0xee;
-		private const int app15Marker = 0xef;
-
-		// See http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
-		private const int adobeTransformUnknown = 0;
-		private const int adobeTransformYCbCr = 1;
-		private const int adobeTransformYCbCrK = 2;
-
-		// unzig maps from the zig-zag ordering to the natural ordering. For example,
-		// unzig[3] is the column and row of the fourth element in zig-zag order. The
-		// value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
-		private static readonly int[] unzig = new int[] {
-			0, 1, 8, 16, 9, 2, 3, 10,
-			17, 24, 32, 25, 18, 11, 4, 5,
-			12, 19, 26, 33, 40, 48, 41, 34,
-			27, 20, 13, 6, 7, 14, 21, 28,
-			35, 42, 49, 56, 57, 50, 43, 36,
-			29, 22, 15, 23, 30, 37, 44, 51,
-			58, 59, 52, 45, 38, 31, 39, 46,
-			53, 60, 61, 54, 47, 55, 62, 63,
-		};
-
-		private class component
-		{
-			public int h; // Horizontal sampling factor.
-			public int v; // Vertical sampling factor.
-			public byte c; // Component identifier.
-			public byte tq; // Quantization table destination selector.
-		}
-
-		public Decoder()
-		{
-			huff = new huffman_class[maxTc + 1, maxTh + 1];
-			quant = new Block[maxTq + 1];
-			tmp = new byte[2 * Block.blockSize];
-			comp = new component[maxComponents];
-			progCoeffs = new Block[maxComponents][];
-			bits = new bits_class();
-			bytes = new bytes_class();
-
-			for(int i = 0; i < maxTc + 1; i++)
-				for(int j = 0; j < maxTh + 1; j++)
-					huff[i, j] = new huffman_class();
-
-			for(int i = 0; i < quant.Length; i++)
-				quant[i] = new Block();
-
-			for(int i = 0; i < comp.Length; i++)
-				comp[i] = new component();
-		}
-
-		private class img_ycbcr
-		{
-			public enum YCbCrSubsampleRatio
-			{
-				YCbCrSubsampleRatio444,
-				YCbCrSubsampleRatio422,
-				YCbCrSubsampleRatio420,
-				YCbCrSubsampleRatio440,
-				YCbCrSubsampleRatio411,
-				YCbCrSubsampleRatio410,
-			}
-
-			private static void yCbCrSize(int w, int h, YCbCrSubsampleRatio ratio, out int cw, out int ch)
-			{
-				switch(ratio)
-				{
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio422:
-						cw = (w+1)/2;
-						ch = h;
-						break;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio420:
-						cw = (w+1)/2;
-						ch = (h+1)/2;
-						break;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio440:
-						cw = w;
-						ch = (h+1)/2;
-						break;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio411:
-						cw = (w+3)/4;
-						ch = h;
-						break;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio410:
-						cw = (w+3)/4;
-						ch = (h+1)/2;
-						break;
-					default:
-						// Default to 4:4:4 subsampling.
-						cw = w;
-						ch = h;
-						break;
-				}
-			}
-
-			private img_ycbcr()
-			{
-			}
-
-			public byte[] pix_y, pix_cb, pix_cr;
-			public int y_stride, c_stride;
-			public int y_offset, c_offset;
-			public int x, y, w, h;
-			public YCbCrSubsampleRatio ratio;
-
-			public img_ycbcr(int w, int h, YCbCrSubsampleRatio ratio)
-			{
-				int cw, ch;
-				yCbCrSize(w, h, ratio, out cw, out ch);
-				this.pix_y = new byte[w*h];
-				this.pix_cb = new byte[cw*ch];
-				this.pix_cr = new byte[cw*ch];
-				this.ratio = ratio;
-				this.y_stride = w;
-				this.c_stride = cw;
-				this.x = 0;
-				this.y = 0;
-				this.w = w;
-				this.h = h;
-			}
-
-			public img_ycbcr subimage(int x, int y, int w, int h)
-			{
-				var ret = new img_ycbcr();
-				ret.w = w;
-				ret.h = h;
-				ret.pix_y = pix_y;
-				ret.pix_cb = pix_cb;
-				ret.pix_cr = pix_cr;
-				ret.ratio = ratio;
-				ret.y_stride = y_stride;
-				ret.c_stride = c_stride;
-				ret.y_offset = y * y_stride + x;
-				ret.c_offset = y * c_stride + x;
-				return ret;
-			}
-
-			public int get_row_y_offset(int y)
-			{
-				return y * y_stride;
-			}
-
-			public int get_row_c_offset(int y)
-			{
-				switch(ratio)
-				{
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio422:
-						return y * c_stride;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio420:
-						return (y/2) * c_stride;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio440:
-						return (y/2) * c_stride;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio411:
-						return y * c_stride;
-					case YCbCrSubsampleRatio.YCbCrSubsampleRatio410:
-						return (y/2) * c_stride;
-					default:
-						return y * c_stride;
-				}
-			}
-		}
-
-		public class img_rgb
-		{
-			public byte[] pixels;
-			public int stride;
-			public int w, h;
-
-			private img_rgb()
-			{
-			}
-
-			public img_rgb(int w, int h)
-			{
-				this.w = w;
-				this.h = h;
-				this.pixels = new byte[w*h*3];
-				this.stride = w*3;
-			}
-
-			public int get_row_offset(int y)
-			{
-				return y * stride;
-			}
-		}
-
-		public class img_gray
-		{
-			public byte[] pixels;
-			public int stride;
-			public int x, y, w, h;
-			public int offset;
-
-			private img_gray()
-			{
-			}
-
-			public img_gray(int w, int h)
-			{
-				this.w = w;
-				this.h = h;
-				this.pixels = new byte[w*h];
-				this.stride = w;
-				this.offset = 0;
-			}
-
-			public img_gray subimage(int x, int y, int w, int h)
-			{
-				var ret = new img_gray();
-				ret.w = w;
-				ret.h = h;
-				ret.pixels = pixels;
-				ret.stride = stride;
-				ret.offset = y * stride + x;
-				return ret;
-			}
-
-			public int get_row_offset(int y)
-			{
-				return offset + y * stride;
-			}
-		}
-
-		private class huffman_class
-		{
-			public huffman_class()
-			{
-				lut = new ushort[1 << lutSize];
-				vals = new byte[maxNCodes];
-				minCodes = new int[maxCodeLength];
-				maxCodes = new int[maxCodeLength];
-				valsIndices = new int[maxCodeLength];
-				nCodes = 0;
-			}
-
-			// length is the number of codes in the tree.
-			public int nCodes;
-			// lut is the look-up table for the next lutSize bits in the bit-stream.
-			// The high 8 bits of the uint16 are the encoded value. The low 8 bits
-			// are 1 plus the code length, or 0 if the value is too large to fit in
-			// lutSize bits.
-			public ushort[] lut;
-			// vals are the decoded values, sorted by their encoding.
-			public byte[] vals;
-			// minCodes[i] is the minimum code of length i, or -1 if there are no
-			// codes of that length.
-			public int[] minCodes;
-			// maxCodes[i] is the maximum code of length i, or -1 if there are no
-			// codes of that length.
-			public int[] maxCodes;
-			// valsIndices[i] is the index into vals of minCodes[i].
-			public int[] valsIndices;
-		}
-
-		// bytes is a byte buffer, similar to a bufio.Reader, except that it
-		// has to be able to unread more than 1 byte, due to byte stuffing.
-		// Byte stuffing is specified in section F.1.2.3.
-		private class bytes_class
-		{
-			public bytes_class()
-			{
-				buf = new byte[4096];
-				i = 0;
-				j = 0;
-				nUnreadable = 0;
-			}
-
-			// buf[i:j] are the buffered bytes read from the underlying
-			// io.Reader that haven't yet been passed further on.
-			public byte[] buf;
-			public int i, j;
-			// nUnreadable is the number of bytes to back up i after
-			// overshooting. It can be 0, 1 or 2.
-			public int nUnreadable;
-		}
-
-		// bits holds the unprocessed bits that have been taken from the byte-stream.
-		// The n least significant bits of a form the unread bits, to be read in MSB to
-		// LSB order.
-		private class bits_class
-		{
-			public uint a; // accumulator.
-			public uint m; // mask. m==1<<(n-1) when n>0, with m==0 when n==0.
-			public int n; // the number of unread bits in a.
-		}
-
-		public int width, height;
-		public int nComp;
-		public img_gray img1; //grayscale
-		public img_rgb imgrgb; //RGB
-
-		private img_ycbcr img3; //YCrCb
-		private Stream r;
-		private bits_class bits;
-		private bytes_class bytes;
-		private byte[] blackPix;
-		private int blackStride;
-
-		private int ri; // Restart Interval.
-		private bool progressive;
-		private bool jfif;
-		private bool adobeTransformValid;
-		private byte adobeTransform;
-		private ushort eobRun; // End-of-Band run, specified in section G.1.2.2.
-		private component[] comp;
-		private Block[][] progCoeffs; // Saved state between progressive-mode scans.
-		private huffman_class[,] huff;
-		private Block[] quant; // Quantization tables, in zig-zag order.
-		private byte[] tmp;
-
-		// ensureNBits reads bytes from the byte buffer to ensure that bits.n is at
-		// least n. For best performance (avoiding function calls inside hot loops),
-		// the caller is the one responsible for first checking that bits.n < n.
-		private void ensureNBits(int n)
-		{
-			while(true)
-			{
-				var c = readByteStuffedByte();
-				bits.a = bits.a<<8 | (uint)c;
-				bits.n += 8;
-				if(bits.m == 0)
-					bits.m = 1 << 7;
-				else
-					bits.m <<= 8;
-				if(bits.n >= n)
-					break;
-			}
-		}
-
-		// receiveExtend is the composition of RECEIVE and EXTEND, specified in section
-		// F.2.2.1.
-		private int receiveExtend(byte t)
-		{
-			if(bits.n < t)
-				ensureNBits(t);
-
-			bits.n -= t;
-			bits.m >>= t;
-			int s = 1 << t;
-			int x = (int)((bits.a>>bits.n) & (s - 1));
-			if(x < (s>>1))
-				x += ((-1) << t) + 1;
-			return x;
-		}
-
-		// processDHT processes a Define Huffman Table marker, and initializes a huffman
-		// struct from its contents. Specified in section B.2.4.2.
-		private void processDHT(int n)
-		{
-			while(n > 0)
-			{
-				if(n < 17)
-					throw new Exception("DHT has wrong length");
-
-				readFull(tmp, 0, 17);
-
-				int tc = tmp[0] >> 4;
-				if(tc > maxTc)
-					throw new Exception("bad Tc value");
-
-				int th = tmp[0] & 0x0f;
-				if(th > maxTh || !progressive && th > 1)
-					throw new Exception("bad Th value");
-
-				huffman_class h = huff[tc, th];
-
-				// Read nCodes and h.vals (and derive h.nCodes).
-				// nCodes[i] is the number of codes with code length i.
-				// h.nCodes is the total number of codes.
-				h.nCodes = 0;
-
-				int[] ncodes = new int[maxCodeLength];
-				for(int i = 0; i < ncodes.Length; i++)
-				{
-					ncodes[i] = tmp[i+1];
-					h.nCodes += ncodes[i];
-				}
-
-				if(h.nCodes == 0)
-					throw new Exception("Huffman table has zero length");
-				if(h.nCodes > maxNCodes)
-					throw new Exception("Huffman table has excessive length");
-
-				n -= h.nCodes + 17;
-				if(n < 0)
-					throw new Exception("DHT has wrong length");
-
-				readFull(h.vals, 0, h.nCodes);
-
-				// Derive the look-up table.
-				for(int i = 0; i < h.lut.Length; i++)
-					h.lut[i] = 0;
-
-				uint x = 0, code = 0;
-
-				for(int i = 0; i < lutSize; i++)
-				{
-					code <<= 1;
-
-					for(int j = 0; j < ncodes[i]; j++)
-					{
-						// The codeLength is 1+i, so shift code by 8-(1+i) to
-						// calculate the high bits for every 8-bit sequence
-						// whose codeLength's high bits matches code.
-						// The high 8 bits of lutValue are the encoded value.
-						// The low 8 bits are 1 plus the codeLength.
-						byte base2 = (byte)(code << (7 - i));
-						ushort lutValue = (ushort)(((ushort)h.vals[x]<<8) | (2+i));
-						for(int k = 0; k < 1<<(7-i); k++)
-							h.lut[base2|k] = lutValue;
-						code++;
-						x++;
-					}
-				}
-
-				// Derive minCodes, maxCodes, and valsIndices.
-				int c = 0, index = 0;
-				for(int i = 0; i < ncodes.Length; i++)
-				{
-					int nc = ncodes[i];
-					if(nc == 0)
-					{
-						h.minCodes[i] = -1;
-						h.maxCodes[i] = -1;
-						h.valsIndices[i] = -1;
-					}
-					else
-					{
-						h.minCodes[i] = c;
-						h.maxCodes[i] = c + nc - 1;
-						h.valsIndices[i] = index;
-						c += nc;
-						index += nc;
-					}
-					c <<= 1;
-				}
-			}
-		}
-
-		// decodeHuffman returns the next Huffman-coded value from the bit-stream,
-		// decoded according to h.
-		private byte decodeHuffman(huffman_class h)
-		{
-			if(h.nCodes == 0)
-				throw new Exception("uninitialized Huffman table");
-
-			if(bits.n < 8)
-			{
-				try
-				{
-					ensureNBits(8);
-				}
-				catch(errMissingFF00)
-				{
-					if(bytes.nUnreadable != 0)
-						unreadByteStuffedByte();
-					goto slowPath;
-				}
-				catch(errShortHuffmanData)
-				{
-					if(bytes.nUnreadable != 0)
-						unreadByteStuffedByte();
-					goto slowPath;
-				}
-			}
-
-			ushort v = h.lut[(bits.a>>(bits.n-lutSize))&0xff];
-			if(v != 0)
-			{
-				byte n = (byte)((v & 0xff) - 1);
-				bits.n -= n;
-				bits.m >>= n;
-				return (byte)(v >> 8);
-			}
-
-		slowPath:
-			int code = 0;
-			for(int i = 0; i < maxCodeLength; i++)
-			{
-				if(bits.n == 0)
-					ensureNBits(1);
-				if((bits.a & bits.m) != 0)
-					code |= 1;
-				bits.n--;
-				bits.m >>= 1;
-				if(code <= h.maxCodes[i])
-					return h.vals[h.valsIndices[i]+code-h.minCodes[i]];
-				code <<= 1;
-			}
-
-			throw new Exception("bad Huffman code");
-		}
-
-		private bool decodeBit()
-		{
-			if(bits.n == 0)
-				ensureNBits(1);
-
-			bool ret = (bits.a & bits.m) != 0;
-			bits.n--;
-			bits.m >>= 1;
-			return ret;
-		}
-
-		private uint decodeBits(int n)
-		{
-			if(bits.n < n)
-				ensureNBits(n);
-
-			uint ret = bits.a >> (bits.n-n);
-			ret = (uint)(ret & ((1 << n) - 1));
-			bits.n -= n;
-			bits.m >>= n;
-			return ret;
-		}
-
-		// fill fills up the bytes.buf buffer from the underlying io.Reader. It
-		// should only be called when there are no unread bytes in bytes.
-		private void fill()
-		{
-			if(bytes.i != bytes.j)
-				throw new Exception("jpeg: fill called when unread bytes exist");
-
-			// Move the last 2 bytes to the start of the buffer, in case we need
-			// to call unreadByteStuffedByte.
-			if(bytes.j > 2)
-			{
-				bytes.buf[0] = bytes.buf[bytes.j-2];
-				bytes.buf[1] = bytes.buf[bytes.j-1];
-				bytes.i = 2;
-				bytes.j = 2;
-			}
-
-			// Fill in the rest of the buffer.
-			int n = r.Read(bytes.buf, bytes.j, bytes.buf.Length - bytes.j);
-			if(n == 0)
-				throw new errEOF();
-			bytes.j += n;
-		}
-
-		// unreadByteStuffedByte undoes the most recent readByteStuffedByte call,
-		// giving a byte of data back from bits to bytes. The Huffman look-up table
-		// requires at least 8 bits for look-up, which means that Huffman decoding can
-		// sometimes overshoot and read one or two too many bytes. Two-byte overshoot
-		// can happen when expecting to read a 0xff 0x00 byte-stuffed byte.
-		private void unreadByteStuffedByte()
-		{
-			bytes.i -= bytes.nUnreadable;
-			bytes.nUnreadable = 0;
-			if(bits.n >= 8)
-			{
-				bits.a >>= 8;
-				bits.n -= 8;
-				bits.m >>= 8;
-			}
-		}
-
-		// readByte returns the next byte, whether buffered or not buffere It does
-		// not care about byte stuffing.
-		private byte readByte()
-		{
-			while(bytes.i == bytes.j)
-				fill();
-			byte x = bytes.buf[bytes.i];
-			bytes.i++;
-			bytes.nUnreadable = 0;
-			return x;
-		}
-
-		// readByteStuffedByte is like readByte but is for byte-stuffed Huffman data.
-		private byte readByteStuffedByte()
-		{
-			byte x;
-
-			// Take the fast path if bytes.buf contains at least two bytes.
-			if(bytes.i+2 <= bytes.j)
-			{
-				x = bytes.buf[bytes.i];
-				bytes.i++;
-				bytes.nUnreadable = 1;
-				if(x != 0xff)
-					return x;
-				if(bytes.buf[bytes.i] != 0x00)
-					throw new errMissingFF00();
-				bytes.i++;
-				bytes.nUnreadable = 2;
-				return 0xff;
-			}
-
-			bytes.nUnreadable = 0;
-
-			x = readByte();
-			bytes.nUnreadable = 1;
-			if(x != 0xff)
-				return x;
-			x = readByte();
-			bytes.nUnreadable = 2;
-			if(x != 0x00)
-				throw new errMissingFF00();
-			return 0xff;
-		}
-
-		// readFull reads exactly len(p) bytes into p. It does not care about byte
-		// stuffing.
-		void readFull(byte[] data, int offset, int len)
-		{
-			// Unread the overshot bytes, if any.
-			if(bytes.nUnreadable != 0)
-			{
-				if(bits.n >= 8)
-					unreadByteStuffedByte();
-				bytes.nUnreadable = 0;
-			}
-
-			while(len > 0)
-			{
-				if(bytes.j - bytes.i >= len)
-				{
-					Array.Copy(bytes.buf, bytes.i, data, offset, len);
-					bytes.i += len;
-					len -= len;
-				}
-				else
-				{
-					Array.Copy(bytes.buf, bytes.i, data, offset, bytes.j - bytes.i);
-					bytes.i += bytes.j - bytes.i;
-					offset += bytes.j - bytes.i;
-					len -= bytes.j - bytes.i;
-
-					fill();
-				}
-			}
-		}
-
-		// ignore ignores the next n bytes.
-		private void ignore(int n)
-		{
-			// Unread the overshot bytes, if any.
-			if(bytes.nUnreadable != 0)
-			{
-				if(bits.n >= 8)
-					unreadByteStuffedByte();
-				bytes.nUnreadable = 0;
-			}
-
-			while(true)
-			{
-				int m = bytes.j - bytes.i;
-				if(m > n) m = n;
-				bytes.i += m;
-				n -= m;
-				if(n == 0)
-					break;
-				else
-					fill();
-			}
-		}
-
-		// Specified in section B.2.2.
-		private void processSOF(int n)
-		{
-			if(nComp != 0)
-				throw new Exception("multiple SOF markers");
-
-			switch(n)
-			{
-				case 6 + 3*1: // Grayscale image.
-					nComp = 1;
-					break;
-				case 6 + 3*3: // YCbCr or RGB image.
-					nComp = 3;
-					break;
-				case 6 + 3*4: // YCbCrK or CMYK image.
-					nComp = 4;
-					break;
-				default:
-					throw new Exception("number of components");
-			}
-
-			readFull(tmp, 0, n);
-
-			// We only support 8-bit precision.
-			if(tmp[0] != 8)
-				throw new Exception("precision");
-
-			height = (tmp[1]<<8) + tmp[2];
-			width = (tmp[3]<<8) + tmp[4];
-			if(tmp[5] != nComp)
-				throw new Exception("SOF has wrong length");
-
-			for(int i = 0; i < nComp; i++)
-			{
-				comp[i].c = tmp[6+3*i];
-				// Section B.2.2 states that "the value of C_i shall be different from
-				// the values of C_1 through C_(i-1)".
-				for(int j = 0; j < i; j++)
-				{
-					if(comp[i].c == comp[j].c)
-						throw new Exception("repeated component identifier");
-				}
-
-				comp[i].tq = tmp[8+3*i];
-				if(comp[i].tq > maxTq)
-					throw new Exception("bad Tq value");
-
-				byte hv = tmp[7+3*i];
-				int h = hv>>4;
-				int v = hv&0x0f;
-				if(h < 1 || 4 < h || v < 1 || 4 < v)
-					throw new Exception("luma/chroma subsampling ratio");
-				if(h == 3 || v == 3)
-					throw new Exception("unsupported subsampling ratio");
-
-				switch(nComp)
-				{
-					case 1:
-					{
-						// If a JPEG image has only one component, section A.2 says "this data
-						// is non-interleaved by definition" and section A.2.2 says "[in this
-						// case...] the order of data units within a scan shall be left-to-right
-						// and top-to-bottom... regardless of the values of H_1 and V_1". Section
-						// 4.8.2 also says "[for non-interleaved data], the MCU is defined to be
-						// one data unit". Similarly, section A.1.1 explains that it is the ratio
-						// of H_i to max_j(H_j) that matters, and similarly for V. For grayscale
-						// images, H_1 is the maximum H_j for all components j, so that ratio is
-						// always 1. The component's (h, v) is effectively always (1, 1): even if
-						// the nominal (h, v) is (2, 1), a 20x5 image is encoded in three 8x8
-						// MCUs, not two 16x8 MCUs.
-						h = 1;
-						v = 1;
-						break;
-					}
-
-					case 3:
-					{
-						// For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2, 4:2:0,
-						// 4:1:1 or 4:1:0 chroma subsampling ratios. This implies that the
-						// (h, v) values for the Y component are either (1, 1), (1, 2),
-						// (2, 1), (2, 2), (4, 1) or (4, 2), and the Y component's values
-						// must be a multiple of the Cb and Cr component's values. We also
-						// assume that the two chroma components have the same subsampling
-						// ratio.
-						switch(i)
-						{
-							case 0: // Y.
-							{
-								// We have already verified, above, that h and v are both
-								// either 1, 2 or 4, so invalid (h, v) combinations are those
-								// with v == 4.
-								if(v == 4)
-									throw new Exception("unsupported subsampling ratio");
-								break;
-							}
-							case 1: // Cb.
-							{
-								if(comp[0].h%h != 0 || comp[0].v%v != 0)
-									throw new Exception("unsupported subsampling ratio");
-								break;
-							}
-							case 2: // Cr.
-							{
-								if(comp[1].h != h || comp[1].v != v)
-									throw new Exception("unsupported subsampling ratio");
-								break;
-							}
-						}
-						break;
-					}
-
-					case 4:
-					{
-						// For 4-component images (either CMYK or YCbCrK), we only support two
-						// hv vectors: [0x11 0x11 0x11 0x11] and [0x22 0x11 0x11 0x22].
-						// Theoretically, 4-component JPEG images could mix and match hv values
-						// but in practice, those two combinations are the only ones in use,
-						// and it simplifies the applyBlack code below if we can assume that:
-						//	- for CMYK, the C and K channels have full samples, and if the M
-						//	  and Y channels subsample, they subsample both horizontally and
-						//	  vertically.
-						//	- for YCbCrK, the Y and K channels have full samples.
-						switch(i)
-						{
-							case 0:
-								if(hv != 0x11 && hv != 0x22)
-									throw new Exception("unsupported subsampling ratio");
-								break;
-							case 1:
-							case 2:
-								if(hv != 0x11)
-									throw new Exception("unsupported subsampling ratio");
-								break;
-							case 3:
-								if(comp[0].h != h || comp[0].v != v)
-									throw new Exception("unsupported subsampling ratio");
-								break;
-						}
-						break;
-					}
-				}
-
-				comp[i].h = h;
-				comp[i].v = v;
-			}
-		}
-
-		// Specified in section B.2.4.1.
-		private void processDQT(int n)
-		{
-			while(n > 0)
-			{
-				bool done = false;
-
-				n--;
-				byte x = readByte();
-				byte tq = (byte)(x & 0x0f);
-				if(tq > maxTq)
-					throw new Exception("bad Tq value");
-
-				switch(x >> 4)
-				{
-					case 0:
-						if(n < Block.blockSize)
-						{
-							done = true;
-							break;
-						}
-						n -= Block.blockSize;
-						readFull(tmp, 0, Block.blockSize);
-
-						for(int i = 0; i < Block.blockSize; i++)
-							quant[tq][i] = tmp[i];
-						break;
-					case 1:
-						if(n < 2*Block.blockSize)
-						{
-							done = true;
-							break;
-						}
-						n -= 2 * Block.blockSize;
-						readFull(tmp, 0, 2*Block.blockSize);
-
-						for(int i = 0; i < Block.blockSize; i++)
-							quant[tq][i] = ((int)tmp[2*i]<<8) | (int)tmp[2*i+1];
-						break;
-					default:
-						throw new Exception("bad Pq value");
-				}
-
-				if(done)
-					break;
-			}
-
-			if(n != 0)
-				throw new Exception("DQT has wrong length");
-		}
-
-		// Specified in section B.2.4.4.
-		private void processDRI(int n)
-		{
-			if(n != 2)
-				throw new Exception("DRI has wrong length");
-
-			readFull(tmp, 0, 2);
-			ri = ((int)tmp[0]<<8) + (int)tmp[1];
-		}
-
-		private void processApp0Marker(int n)
-		{
-			if(n < 5)
-			{
-				ignore(n);
-				return;
-			}
-
-			readFull(tmp, 0, 5);
-			n -= 5;
-
-			jfif = tmp[0] == 'J' && tmp[1] == 'F' && tmp[2] == 'I' && tmp[3] == 'F' && tmp[4] == '\x00';
-			if(n > 0)
-				ignore(n);
-		}
-
-		private void processApp14Marker(int n)
-		{
-			if(n < 12)
-			{
-				ignore(n);
-				return;
-			}
-
-			readFull(tmp, 0, 12);
-			n -= 12;
-
-			if(tmp[0] == 'A' && tmp[1] == 'd' && tmp[2] == 'o' && tmp[3] == 'b' && tmp[4] == 'e')
-			{
-				adobeTransformValid = true;
-				adobeTransform = tmp[11];
-			}
-
-			if(n > 0)
-				ignore(n);
-		}
-
-		// decode reads a JPEG image from r and returns it as an image.Image.
-		public void decode(Stream r, bool configOnly)
-		{
-			this.r = r;
-
-			// Check for the Start Of Image marker.
-			readFull(tmp, 0, 2);
-			if(tmp[0] != 0xff || tmp[1] != soiMarker)
-				throw new Exception("missing SOI marker");
-
-			// Process the remaining segments until the End Of Image marker.
-			while(true)
-			{
-				readFull(tmp, 0, 2);
-				while(tmp[0] != 0xff)
-				{
-					// Strictly speaking, this is a format error. However, libjpeg is
-					// liberal in what it accepts. As of version 9, next_marker in
-					// jdmarker.c treats this as a warning (JWRN_EXTRANEOUS_DATA) and
-					// continues to decode the stream. Even before next_marker sees
-					// extraneous data, jpeg_fill_bit_buffer in jdhuff.c reads as many
-					// bytes as it can, possibly past the end of a scan's data. It
-					// effectively puts back any markers that it overscanned (e.g. an
-					// "\xff\xd9" EOI marker), but it does not put back non-marker data,
-					// and thus it can silently ignore a small number of extraneous
-					// non-marker bytes before next_marker has a chance to see them (and
-					// print a warning).
-					//
-					// We are therefore also liberal in what we accept. Extraneous data
-					// is silently ignore
-					//
-					// This is similar to, but not exactly the same as, the restart
-					// mechanism within a scan (the RST[0-7] markers).
-					//
-					// Note that extraneous 0xff bytes in e.g. SOS data are escaped as
-					// "\xff\x00", and so are detected a little further down below.
-					tmp[0] = tmp[1];
-					tmp[1] = readByte();
-				}
-				byte marker = tmp[1];
-				if(marker == 0) {
-					// Treat "\xff\x00" as extraneous data.
-					continue;
-				}
-
-				while(marker == 0xff)
-				{
-					// Section B.1.1.2 says, "Any marker may optionally be preceded by any
-					// number of fill bytes, which are bytes assigned code X'FF'".
-					marker = readByte();
-				}
-
-				if(marker == eoiMarker) // End Of Image.
-					break;
-
-				if(rst0Marker <= marker && marker <= rst7Marker)
-				{
-					// Figures B.2 and B.16 of the specification suggest that restart markers should
-					// only occur between Entropy Coded Segments and not after the final ECS.
-					// However, some encoders may generate incorrect JPEGs with a final restart
-					// marker. That restart marker will be seen here instead of inside the processSOS
-					// method, and is ignored as a harmless error. Restart markers have no extra data,
-					// so we check for this before we read the 16-bit length of the segment.
-					continue;
-				}
-
-				// Read the 16-bit length of the segment. The value includes the 2 bytes for the
-				// length itself, so we subtract 2 to get the number of remaining bytes.
-				readFull(tmp, 0, 2);
-				int n = ((int)tmp[0]<<8) + (int)tmp[1] - 2;
-				if(n < 0)
-					throw new Exception("short segment length");
-
-				switch(marker)
-				{
-					case sof0Marker:
-					case sof1Marker:
-					case sof2Marker:
-						progressive = (marker == sof2Marker);
-						processSOF(n);
-						if(configOnly && jfif)
-							return;
-						break;
-					case dhtMarker:
-						if(configOnly)
-							ignore(n);
-						else
-							processDHT(n);
-						break;
-					case dqtMarker:
-						if(configOnly)
-							ignore(n);
-						else
-							processDQT(n);
-						break;
-					case sosMarker:
-						if(configOnly)
-							return;
-						else
-							processSOS(n);
-						break;
-					case driMarker:
-						if(configOnly)
-							ignore(n);
-						else
-							processDRI(n);
-						break;
-					case app0Marker:
-						processApp0Marker(n);
-						break;
-					case app14Marker:
-						processApp14Marker(n);
-						break;
-					default:
-						if(app0Marker <= marker && marker <= app15Marker || marker == comMarker)
-							ignore(n);
-						else if(marker < 0xc0) // See Table B.1 "Marker code assignments".
-							throw new Exception("unknown marker");
-						else
-							throw new Exception("unknown marker");
-						break;
-					}
-			}
-
-			if(img1 != null)
-				return;
-			else if(img3 != null)
-			{
-				if(comp[0].c == 'R' && comp[1].c == 'G' && comp[2].c == 'B')
-					imgrgb = convert_direct_to_rgb(width, height);
-				else
-					imgrgb = convert_to_rgb(width, height);
-			}
-
-			/*if img3 != nil {
-				if blackPix != nil {
-					return applyBlack()
-				} else if isRGB() {
-					return convertToRGB()
-				}
-				return img3, nil
-			}*/
-
-			throw new Exception("missing SOS marker");
-		}
-
-		private img_rgb convert_to_rgb(int w, int h)
-		{
-			var ret = new img_rgb(w, h);
-			int cScale = comp[0].h / comp[1].h;
-
-			Parallel.For(
-				0,
-				h,
-				y =>
-				{
-					int po = ret.get_row_offset(y);
-					int yo = img3.get_row_y_offset(y);
-					int co = img3.get_row_c_offset(y);
-
-					for (int x = 0; x < w; x++)
-					{
-						byte yy = img3.pix_y[yo+x];
-						byte cb = img3.pix_cb[co+x/cScale];
-						byte cr = img3.pix_cr[co+x/cScale];
-
-						byte r, g, b;
-						Colors.YCbCrToRGB(yy, cb, cr, out r, out g, out b);
-						ret.pixels[po+3*x+0] = r;
-						ret.pixels[po+3*x+1] = g;
-						ret.pixels[po+3*x+2] = b;
-					}
-				}
-			);
-
-			return ret;
-		}
-
-		private img_rgb convert_direct_to_rgb(int w, int h)
-		{
-			var ret = new img_rgb(w, h);
-			
-			int cScale = comp[0].h / comp[1].h;
-			for(var y = 0; y < h; y++)
-			{
-				int po = ret.get_row_offset(y);
-				int yo = img3.get_row_y_offset(y);
-				int co = img3.get_row_c_offset(y);
-				for(int x = 0; x < w; x++)
-				{
-					ret.pixels[po+3*x+0] = img3.pix_y[yo+x];
-					ret.pixels[po+3*x+1] = img3.pix_cb[co+x/cScale];
-					ret.pixels[po+3*x+2] = img3.pix_cr[co+x/cScale];
-				}
-			}
-
-			return ret;
-		}
-
-		private struct scan_scruct {
-			public byte compIndex;
-			public byte td;
-			public byte ta;
-		}
-
-		// Specified in section B.2.3.
-		void processSOS(int n)
-		{
-			if(nComp == 0)
-				throw new Exception("missing SOF marker");
-
-			if(n < 6 || 4+2*nComp < n || n%2 != 0)
-				throw new Exception("SOS has wrong length");
-
-			readFull(tmp, 0, n);
-			nComp = tmp[0];
-			if(n != 4+2*nComp)
-				throw new Exception("SOS length inconsistent with number of components");
-
-			var scan = new scan_scruct[maxComponents];
-			int totalHV = 0;
-			for(int i = 0; i < nComp; i++)
-			{
-				int cs = tmp[1+2*i]; // Component selector.
-				int compIndex = -1;
-				for(int j = 0; j < nComp; j++)
-				{
-					var compv = comp[j];
-					if(cs == compv.c)
-						compIndex = j;
-				}
-				if(compIndex < 0)
-					throw new Exception("unknown component selector");
-
-				scan[i].compIndex = (byte)compIndex;
-				// Section B.2.3 states that "the value of Cs_j shall be different from
-				// the values of Cs_1 through Cs_(j-1)". Since we have previously
-				// verified that a frame's component identifiers (C_i values in section
-				// B.2.2) are unique, it suffices to check that the implicit indexes
-				// into comp are unique.
-				for(int j = 0; j < i; j++)
-				{
-					if(scan[i].compIndex == scan[j].compIndex)
-						throw new Exception("repeated component selector");
-				}
-
-				totalHV += comp[compIndex].h * comp[compIndex].v;
-
-				scan[i].td = (byte)(tmp[2+2*i] >> 4);
-				if(scan[i].td > maxTh)
-					throw new Exception("bad Td value");
-
-				scan[i].ta = (byte)(tmp[2+2*i] & 0x0f);
-				if(scan[i].ta > maxTh)
-					throw new Exception("bad Ta value");
-			}
-
-			// Section B.2.3 states that if there is more than one component then the
-			// total H*V values in a scan must be <= 10.
-			if(nComp > 1 && totalHV > 10)
-				throw new Exception("total sampling factors too large");
-
-			// zigStart and zigEnd are the spectral selection bounds.
-			// ah and al are the successive approximation high and low values.
-			// The spec calls these values Ss, Se, Ah and Al.
-			//
-			// For progressive JPEGs, these are the two more-or-less independent
-			// aspects of progression. Spectral selection progression is when not
-			// all of a block's 64 DCT coefficients are transmitted in one pass.
-			// For example, three passes could transmit coefficient 0 (the DC
-			// component), coefficients 1-5, and coefficients 6-63, in zig-zag
-			// order. Successive approximation is when not all of the bits of a
-			// band of coefficients are transmitted in one pass. For example,
-			// three passes could transmit the 6 most significant bits, followed
-			// by the second-least significant bit, followed by the least
-			// significant bit.
-			//
-			// For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
-			int zigStart = 0;
-			int zigEnd = Block.blockSize - 1;
-			int ah = 0;
-			int al = 0;
-
-			if(progressive)
-			{
-				zigStart = (int)(tmp[1+2*nComp]);
-				zigEnd = (int)(tmp[2+2*nComp]);
-				ah = (int)(tmp[3+2*nComp] >> 4);
-				al = (int)(tmp[3+2*nComp] & 0x0f);
-				if((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || Block.blockSize <= zigEnd)
-					throw new Exception("bad spectral selection bounds");;
-				if(zigStart != 0 && nComp != 1)
-					throw new Exception("progressive AC coefficients for more than one component");
-				if(ah != 0 && ah != al+1)
-					throw new Exception("bad successive approximation values");
-			}
-
-			// mxx and myy are the number of MCUs (Minimum Coded Units) in the image.
-			int h0 = comp[0].h;
-			int v0 = comp[0].v;
-			int mxx = (width + 8*h0 - 1) / (8 * h0);
-			int myy = (height + 8*v0 - 1) / (8 * v0);
-			
-			if(img1 == null && img3 == null)
-				makeImg(mxx, myy);
-
-			if(progressive)
-			{
-				for(int i = 0; i < nComp; i++)
-				{
-					int compIndex = scan[i].compIndex;
-					if(progCoeffs[compIndex] == null)
-					{
-						progCoeffs[compIndex] = new Block[mxx*myy*comp[compIndex].h*comp[compIndex].v];
-
-						for(int j = 0; j < progCoeffs[compIndex].Length; j++)
-							progCoeffs[compIndex][j] = new Block();
-					}
-				}
-			}
-
-			bits = new bits_class();
-
-			byte mcu = 0;
-			byte expectedRST = rst0Marker;
-
-			// b is the decoded coefficients, in natural (not zig-zag) order.
-			Block b = new Block();
-			int[] dc = new int[maxComponents];
-			// bx and by are the location of the current block, in units of 8x8
-			// blocks: the third block in the first row has (bx, by) = (2, 0).
-			int bx, by, blockCount = 0;
-
-			for(int my = 0; my < myy; my++)
-			{
-				for(int mx = 0; mx < mxx; mx++)
-				{
-					for(int i = 0; i < nComp; i++)
-					{
-						int compIndex = scan[i].compIndex;
-						int hi = comp[compIndex].h;
-						int vi = comp[compIndex].v;
-						Block qt = quant[comp[compIndex].tq];
-
-						for(int j = 0; j < hi*vi; j++)
-						{
-							// The blocks are traversed one MCU at a time. For 4:2:0 chroma
-							// subsampling, there are four Y 8x8 blocks in every 16x16 MCU.
-							//
-							// For a baseline 32x16 pixel image, the Y blocks visiting order is:
-							//	0 1 4 5
-							//	2 3 6 7
-							//
-							// For progressive images, the interleaved scans (those with nComp > 1)
-							// are traversed as above, but non-interleaved scans are traversed left
-							// to right, top to bottom:
-							//	0 1 2 3
-							//	4 5 6 7
-							// Only DC scans (zigStart == 0) can be interleave AC scans must have
-							// only one component.
-							//
-							// To further complicate matters, for non-interleaved scans, there is no
-							// data for any blocks that are inside the image at the MCU level but
-							// outside the image at the pixel level. For example, a 24x16 pixel 4:2:0
-							// progressive image consists of two 16x16 MCUs. The interleaved scans
-							// will process 8 Y blocks:
-							//	0 1 4 5
-							//	2 3 6 7
-							// The non-interleaved scans will process only 6 Y blocks:
-							//	0 1 2
-							//	3 4 5
-							if(nComp != 1)
-							{
-								bx = hi*mx + j%hi;
-								by = vi*my + j/hi;
-							}
-							else
-							{
-								int q = mxx * hi;
-								bx = blockCount % q;
-								by = blockCount / q;
-								blockCount++;
-								if(bx*8 >= width || by*8 >= height)
-									continue;
-							}
-
-							// Load the previous partially decoded coefficients, if applicable.
-							if(progressive)
-								b = progCoeffs[compIndex][by*mxx*hi+bx];
-							else
-								b = new Block();
-
-							if(ah != 0)
-							{
-								refine(b, huff[acTable, scan[i].ta], zigStart, zigEnd, 1<<al);
-							}
-							else
-							{
-								int zig = zigStart;
-								if(zig == 0)
-								{
-									zig++;
-									// Decode the DC coefficient, as specified in section F.2.2.1.
-									byte value = decodeHuffman(huff[dcTable, scan[i].td]);
-									if(value > 16)
-										throw new Exception("excessive DC component");
-									int dcDelta = receiveExtend(value);
-									dc[compIndex] += dcDelta;
-									b[0] = dc[compIndex] << al;
-								}
-
-								if(zig <= zigEnd && eobRun > 0)
-								{
-									eobRun--;
-								}
-								else
-								{
-									// Decode the AC coefficients, as specified in section F.2.2.2.
-									var huffv = huff[acTable, scan[i].ta];
-									for( ; zig <= zigEnd; zig++)
-									{
-										byte value = decodeHuffman(huffv);
-										byte val0 = (byte)(value >> 4);
-										byte val1 = (byte)(value & 0x0f);
-										if(val1 != 0)
-										{
-											zig += val0;
-											if(zig > zigEnd)
-												break;
-
-											var ac = receiveExtend(val1);
-											b[unzig[zig]] = ac << al;
-										}
-										else
-										{
-											if(val0 != 0x0f)
-											{
-												eobRun = (ushort)(1 << val0);
-												if(val0 != 0)
-												{
-													uint bits = decodeBits(val0);
-													eobRun |= (ushort)(bits);
-												}
-												eobRun--;
-												break;
-											}
-											zig += 0x0f;
-										}
-									}
-								}
-							}
-
-							if(progressive)
-							{
-								if(zigEnd != Block.blockSize-1 || al != 0)
-								{
-									// We haven't completely decoded this 8x8 block. Save the coefficients.
-									progCoeffs[compIndex][by*mxx*hi+bx] = b;
-									// At this point, we could execute the rest of the loop body to dequantize and
-									// perform the inverse DCT, to save early stages of a progressive image to the
-									// *image.YCbCr buffers (the whole point of progressive encoding), but in Go,
-									// the jpeg.Decode function does not return until the entire image is decoded,
-									// so we "continue" here to avoid wasted computation.
-									continue;
-								}
-							}
-
-							// Dequantize, perform the inverse DCT and store the block to the image.
-							for(int zig = 0; zig < Block.blockSize; zig++)
-								b[unzig[zig]] *= qt[zig];
-
-							IDCT(b);
-							
-							byte[] dst = null;
-							int offset = 0;
-							int stride = 0;
-
-							if(nComp == 1)
-							{
-								dst = img1.pixels;
-								stride = img1.stride;
-								offset = img1.offset + 8*(by*img1.stride+bx);
-							}
-							else
-							{
-								switch(compIndex)
-								{
-									case 0:
-									{
-										dst = img3.pix_y;
-										stride = img3.y_stride;
-										offset = img3.y_offset + 8*(by*img3.y_stride+bx);
-										break;
-									}
-									case 1:
-									{
-										dst = img3.pix_cb;
-										stride = img3.c_stride;
-										offset = img3.c_offset + 8*(by*img3.c_stride+bx);
-										break;
-									}
-									case 2:
-									{
-										dst = img3.pix_cr;
-										stride = img3.c_stride;
-										offset = img3.c_offset + 8*(by*img3.c_stride+bx);
-										break;
-									}
-									case 3:
-									{
-										//dst, stride = blackPix[8*(by*blackStride+bx):], blackStride
-										break;
-									}
-									default:
-										throw new Exception("too many components");
-								}
-							}
-
-							// Level shift by +128, clip to [0, 255], and write to dst.
-							for(int y = 0; y < 8; y++)
-							{
-								int y8 = y * 8;
-								int yStride = y * stride;
-								for(int x = 0; x < 8; x++)
-								{
-									int c = b[y8+x];
-									if(c < -128)
-										c = 0;
-									else if(c > 127)
-										c = 255;
-									else
-										c += 128;
-
-									dst[yStride+x+offset] = (byte)c;
-								}
-							}
-						} // for j
-					} // for i
-
-					mcu++;
-
-					if(ri > 0 && mcu%ri == 0 && mcu < mxx*myy)
-					{
-						// A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input,
-						// but this one assumes well-formed input, and hence the restart marker follows immediately.
-						readFull(tmp, 0, 2);
-						if(tmp[0] != 0xff || tmp[1] != expectedRST)
-							throw new Exception("bad RST marker");
-
-						expectedRST++;
-						if(expectedRST == rst7Marker+1)
-							expectedRST = rst0Marker;
-
-						// Reset the Huffman decoder.
-						bits = new bits_class();
-						// Reset the DC components, as per section F.2.1.3.1.
-						dc = new int[maxComponents];
-						// Reset the progressive decoder state, as per section G.1.2.2.
-						eobRun = 0;
-					}
-				} // for mx
-			} // for my
-		}
-
-		// refine decodes a successive approximation refinement block, as specified in
-		// section G.1.2.
-		private void refine(Block b, huffman_class h, int zigStart, int zigEnd, int delta)
-		{
-			// Refining a DC component is trivial.
-			if(zigStart == 0)
-			{
-				if(zigEnd != 0)
-					throw new Exception("Invalid state for zig DC component");
-
-				bool bit = decodeBit();
-				if(bit)
-					b[0] |= delta;
-
-				return;
-			}
-
-			// Refining AC components is more complicated; see sections G.1.2.2 and G.1.2.3.
-			int zig = zigStart;
-			if(eobRun == 0)
-			{
-				for( ; zig <= zigEnd; zig++)
-				{
-					bool done = false;
-					int z = 0;
-					var val = decodeHuffman(h);
-					int val0 = val >> 4;
-					int val1 = val & 0x0f;
-
-					switch(val1)
-					{
-						case 0:
-							if(val0 != 0x0f)
-							{
-								eobRun = (ushort)(1 << val0);
-								if(val0 != 0)
-								{
-									uint bits = decodeBits(val0);
-									eobRun |= (ushort)bits;
-								}
-							}
-							done = true;
-							break;
-						case 1:
-							z = delta;
-							bool bit = decodeBit();
-							if(!bit)
-								z = -z;
-							break;
-						default:
-							throw new Exception("unexpected Huffman code");
-					}
-
-					zig = refineNonZeroes(b, zig, zigEnd, val0, delta);
-					if(zig > zigEnd)
-						throw new Exception("too many coefficients");
-
-					if(z != 0)
-						b[unzig[zig]] = z;
-				}
-			}
-
-			if(eobRun > 0)
-			{
-				eobRun--;
-				refineNonZeroes(b, zig, zigEnd, -1, delta);
-			}
-		}
-
-		// refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0,
-		// the first nz zero entries are skipped over.
-		public int refineNonZeroes(Block b, int zig, int zigEnd, int nz, int delta)
-		{
-			for( ; zig <= zigEnd; zig++)
-			{
-				int u = unzig[zig];
-				if(b[u] == 0)
-				{
-					if(nz == 0)
-						break;
-					nz--;
-					continue;
-				}
-
-				bool bit = decodeBit();
-				if(!bit)
-					continue;
-
-				if(b[u] >= 0)
-					b[u] += delta;
-				else
-					b[u] -= delta;
-			}
-
-			return zig;
-		}
-
-		private void makeImg(int mxx, int myy)
-		{
-			if(nComp == 1)
-			{
-				var m = new img_gray(8*mxx, 8*myy);
-				img1 = m.subimage(0, 0, width, height);
-			}
-			else
-			{
-				var h0 = comp[0].h;
-				var v0 = comp[0].v;
-				var hRatio = h0 / comp[1].h;
-				var vRatio = v0 / comp[1].v;
-
-				var ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio444;
-				switch((hRatio<<4) | vRatio)
-				{
-					case 0x11:
-						ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio444;
-						break;
-					case 0x12:
-						ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio440;
-						break;
-					case 0x21:
-						ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio422;
-						break;
-					case 0x22:
-						ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio420;
-						break;
-					case 0x41:
-						ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio411;
-						break;
-					case 0x42:
-						ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio410;
-						break;
-				}
-
-				var m = new img_ycbcr(8*h0*mxx, 8*v0*myy, ratio);
-				img3 = m.subimage(0, 0, width, height);
-
-				/*if d.nComp == 4 {
-					h3, v3 := d.comp[3].h, d.comp[3].v
-					d.blackPix = make([]byte, 8*h3*mxx*8*v3*myy)
-					d.blackStride = 8 * h3 * mxx
-				}*/
-			}
-		}
-	}
+    using System;
+    using System.IO;
+    using System.Threading.Tasks;
+
+    internal partial class Decoder
+    {
+        private class errMissingFF00 : Exception { }
+        private class errShortHuffmanData : Exception { }
+        private class errEOF : Exception { }
+
+        // maxCodeLength is the maximum (inclusive) number of bits in a Huffman code.
+        private const int maxCodeLength = 16;
+        // maxNCodes is the maximum (inclusive) number of codes in a Huffman tree.
+        private const int maxNCodes = 256;
+        // lutSize is the log-2 size of the Huffman decoder's look-up table.
+        private const int lutSize = 8;
+        private const int maxComponents = 4;
+        private const int maxTc = 1;
+        private const int maxTh = 3;
+        private const int maxTq = 3;
+        private const int dcTable = 0;
+        private const int acTable = 1;
+
+        private const int sof0Marker = 0xc0; // Start Of Frame (Baseline).
+        private const int sof1Marker = 0xc1; // Start Of Frame (Extended Sequential).
+        private const int sof2Marker = 0xc2; // Start Of Frame (Progressive).
+        private const int dhtMarker  = 0xc4; // Define Huffman Table.
+        private const int rst0Marker = 0xd0; // ReSTart (0).
+        private const int rst7Marker = 0xd7; // ReSTart (7).
+        private const int soiMarker  = 0xd8; // Start Of Image.
+        private const int eoiMarker  = 0xd9; // End Of Image.
+        private const int sosMarker  = 0xda; // Start Of Scan.
+        private const int dqtMarker  = 0xdb; // Define Quantization Table.
+        private const int driMarker  = 0xdd; // Define Restart Interval.
+        private const int comMarker  = 0xfe; // COMment.
+        // "APPlication specific" markers aren't part of the JPEG spec per se,
+        // but in practice, their use is described at
+        // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
+        private const int app0Marker  = 0xe0;
+        private const int app14Marker = 0xee;
+        private const int app15Marker = 0xef;
+
+        // See http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
+        private const int adobeTransformUnknown = 0;
+        private const int adobeTransformYCbCr = 1;
+        private const int adobeTransformYCbCrK = 2;
+
+        // unzig maps from the zig-zag ordering to the natural ordering. For example,
+        // unzig[3] is the column and row of the fourth element in zig-zag order. The
+        // value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
+        private static readonly int[] unzig = new int[] {
+            0, 1, 8, 16, 9, 2, 3, 10,
+            17, 24, 32, 25, 18, 11, 4, 5,
+            12, 19, 26, 33, 40, 48, 41, 34,
+            27, 20, 13, 6, 7, 14, 21, 28,
+            35, 42, 49, 56, 57, 50, 43, 36,
+            29, 22, 15, 23, 30, 37, 44, 51,
+            58, 59, 52, 45, 38, 31, 39, 46,
+            53, 60, 61, 54, 47, 55, 62, 63,
+        };
+
+        private class component
+        {
+            public int h; // Horizontal sampling factor.
+            public int v; // Vertical sampling factor.
+            public byte c; // Component identifier.
+            public byte tq; // Quantization table destination selector.
+        }
+
+        public Decoder()
+        {
+            huff = new huffman_class[maxTc + 1, maxTh + 1];
+            quant = new Block[maxTq + 1];
+            tmp = new byte[2 * Block.blockSize];
+            comp = new component[maxComponents];
+            progCoeffs = new Block[maxComponents][];
+            bits = new bits_class();
+            bytes = new bytes_class();
+
+            for(int i = 0; i < maxTc + 1; i++)
+                for(int j = 0; j < maxTh + 1; j++)
+                    huff[i, j] = new huffman_class();
+
+            for(int i = 0; i < quant.Length; i++)
+                quant[i] = new Block();
+
+            for(int i = 0; i < comp.Length; i++)
+                comp[i] = new component();
+        }
+
+        private class img_ycbcr
+        {
+            public enum YCbCrSubsampleRatio
+            {
+                YCbCrSubsampleRatio444,
+                YCbCrSubsampleRatio422,
+                YCbCrSubsampleRatio420,
+                YCbCrSubsampleRatio440,
+                YCbCrSubsampleRatio411,
+                YCbCrSubsampleRatio410,
+            }
+
+            private static void yCbCrSize(int w, int h, YCbCrSubsampleRatio ratio, out int cw, out int ch)
+            {
+                switch(ratio)
+                {
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio422:
+                        cw = (w+1)/2;
+                        ch = h;
+                        break;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio420:
+                        cw = (w+1)/2;
+                        ch = (h+1)/2;
+                        break;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio440:
+                        cw = w;
+                        ch = (h+1)/2;
+                        break;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio411:
+                        cw = (w+3)/4;
+                        ch = h;
+                        break;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio410:
+                        cw = (w+3)/4;
+                        ch = (h+1)/2;
+                        break;
+                    default:
+                        // Default to 4:4:4 subsampling.
+                        cw = w;
+                        ch = h;
+                        break;
+                }
+            }
+
+            private img_ycbcr()
+            {
+            }
+
+            public byte[] pix_y, pix_cb, pix_cr;
+            public int y_stride, c_stride;
+            public int y_offset, c_offset;
+            public int x, y, w, h;
+            public YCbCrSubsampleRatio ratio;
+
+            public img_ycbcr(int w, int h, YCbCrSubsampleRatio ratio)
+            {
+                int cw, ch;
+                yCbCrSize(w, h, ratio, out cw, out ch);
+                this.pix_y = new byte[w*h];
+                this.pix_cb = new byte[cw*ch];
+                this.pix_cr = new byte[cw*ch];
+                this.ratio = ratio;
+                this.y_stride = w;
+                this.c_stride = cw;
+                this.x = 0;
+                this.y = 0;
+                this.w = w;
+                this.h = h;
+            }
+
+            public img_ycbcr subimage(int x, int y, int w, int h)
+            {
+                var ret = new img_ycbcr();
+                ret.w = w;
+                ret.h = h;
+                ret.pix_y = pix_y;
+                ret.pix_cb = pix_cb;
+                ret.pix_cr = pix_cr;
+                ret.ratio = ratio;
+                ret.y_stride = y_stride;
+                ret.c_stride = c_stride;
+                ret.y_offset = y * y_stride + x;
+                ret.c_offset = y * c_stride + x;
+                return ret;
+            }
+
+            public int get_row_y_offset(int y)
+            {
+                return y * y_stride;
+            }
+
+            public int get_row_c_offset(int y)
+            {
+                switch(ratio)
+                {
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio422:
+                        return y * c_stride;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio420:
+                        return (y/2) * c_stride;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio440:
+                        return (y/2) * c_stride;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio411:
+                        return y * c_stride;
+                    case YCbCrSubsampleRatio.YCbCrSubsampleRatio410:
+                        return (y/2) * c_stride;
+                    default:
+                        return y * c_stride;
+                }
+            }
+        }
+
+        public class img_rgb
+        {
+            public byte[] pixels;
+            public int stride;
+            public int w, h;
+
+            private img_rgb()
+            {
+            }
+
+            public img_rgb(int w, int h)
+            {
+                this.w = w;
+                this.h = h;
+                this.pixels = new byte[w*h*3];
+                this.stride = w*3;
+            }
+
+            public int get_row_offset(int y)
+            {
+                return y * stride;
+            }
+        }
+
+        public class img_gray
+        {
+            public byte[] pixels;
+            public int stride;
+            public int x, y, w, h;
+            public int offset;
+
+            private img_gray()
+            {
+            }
+
+            public img_gray(int w, int h)
+            {
+                this.w = w;
+                this.h = h;
+                this.pixels = new byte[w*h];
+                this.stride = w;
+                this.offset = 0;
+            }
+
+            public img_gray subimage(int x, int y, int w, int h)
+            {
+                var ret = new img_gray();
+                ret.w = w;
+                ret.h = h;
+                ret.pixels = pixels;
+                ret.stride = stride;
+                ret.offset = y * stride + x;
+                return ret;
+            }
+
+            public int get_row_offset(int y)
+            {
+                return offset + y * stride;
+            }
+        }
+
+        private class huffman_class
+        {
+            public huffman_class()
+            {
+                lut = new ushort[1 << lutSize];
+                vals = new byte[maxNCodes];
+                minCodes = new int[maxCodeLength];
+                maxCodes = new int[maxCodeLength];
+                valsIndices = new int[maxCodeLength];
+                nCodes = 0;
+            }
+
+            // length is the number of codes in the tree.
+            public int nCodes;
+            // lut is the look-up table for the next lutSize bits in the bit-stream.
+            // The high 8 bits of the uint16 are the encoded value. The low 8 bits
+            // are 1 plus the code length, or 0 if the value is too large to fit in
+            // lutSize bits.
+            public ushort[] lut;
+            // vals are the decoded values, sorted by their encoding.
+            public byte[] vals;
+            // minCodes[i] is the minimum code of length i, or -1 if there are no
+            // codes of that length.
+            public int[] minCodes;
+            // maxCodes[i] is the maximum code of length i, or -1 if there are no
+            // codes of that length.
+            public int[] maxCodes;
+            // valsIndices[i] is the index into vals of minCodes[i].
+            public int[] valsIndices;
+        }
+
+        // bytes is a byte buffer, similar to a bufio.Reader, except that it
+        // has to be able to unread more than 1 byte, due to byte stuffing.
+        // Byte stuffing is specified in section F.1.2.3.
+        private class bytes_class
+        {
+            public bytes_class()
+            {
+                buf = new byte[4096];
+                i = 0;
+                j = 0;
+                nUnreadable = 0;
+            }
+
+            // buf[i:j] are the buffered bytes read from the underlying
+            // io.Reader that haven't yet been passed further on.
+            public byte[] buf;
+            public int i, j;
+            // nUnreadable is the number of bytes to back up i after
+            // overshooting. It can be 0, 1 or 2.
+            public int nUnreadable;
+        }
+
+        // bits holds the unprocessed bits that have been taken from the byte-stream.
+        // The n least significant bits of a form the unread bits, to be read in MSB to
+        // LSB order.
+        private class bits_class
+        {
+            public uint a; // accumulator.
+            public uint m; // mask. m==1<<(n-1) when n>0, with m==0 when n==0.
+            public int n; // the number of unread bits in a.
+        }
+
+        public int width, height;
+        public int nComp;
+        public img_gray img1; //grayscale
+        public img_rgb imgrgb; //RGB
+
+        private img_ycbcr img3; //YCrCb
+        private Stream r;
+        private bits_class bits;
+        private bytes_class bytes;
+        private byte[] blackPix;
+        private int blackStride;
+
+        private int ri; // Restart Interval.
+        private bool progressive;
+        private bool jfif;
+        private bool adobeTransformValid;
+        private byte adobeTransform;
+        private ushort eobRun; // End-of-Band run, specified in section G.1.2.2.
+        private component[] comp;
+        private Block[][] progCoeffs; // Saved state between progressive-mode scans.
+        private huffman_class[,] huff;
+        private Block[] quant; // Quantization tables, in zig-zag order.
+        private byte[] tmp;
+
+        // ensureNBits reads bytes from the byte buffer to ensure that bits.n is at
+        // least n. For best performance (avoiding function calls inside hot loops),
+        // the caller is the one responsible for first checking that bits.n < n.
+        private void ensureNBits(int n)
+        {
+            while(true)
+            {
+                var c = readByteStuffedByte();
+                bits.a = bits.a<<8 | (uint)c;
+                bits.n += 8;
+                if(bits.m == 0)
+                    bits.m = 1 << 7;
+                else
+                    bits.m <<= 8;
+                if(bits.n >= n)
+                    break;
+            }
+        }
+
+        // receiveExtend is the composition of RECEIVE and EXTEND, specified in section
+        // F.2.2.1.
+        private int receiveExtend(byte t)
+        {
+            if(bits.n < t)
+                ensureNBits(t);
+
+            bits.n -= t;
+            bits.m >>= t;
+            int s = 1 << t;
+            int x = (int)((bits.a>>bits.n) & (s - 1));
+            if(x < (s>>1))
+                x += ((-1) << t) + 1;
+            return x;
+        }
+
+        // processDHT processes a Define Huffman Table marker, and initializes a huffman
+        // struct from its contents. Specified in section B.2.4.2.
+        private void processDHT(int n)
+        {
+            while(n > 0)
+            {
+                if(n < 17)
+                    throw new Exception("DHT has wrong length");
+
+                readFull(tmp, 0, 17);
+
+                int tc = tmp[0] >> 4;
+                if(tc > maxTc)
+                    throw new Exception("bad Tc value");
+
+                int th = tmp[0] & 0x0f;
+                if(th > maxTh || !progressive && th > 1)
+                    throw new Exception("bad Th value");
+
+                huffman_class h = huff[tc, th];
+
+                // Read nCodes and h.vals (and derive h.nCodes).
+                // nCodes[i] is the number of codes with code length i.
+                // h.nCodes is the total number of codes.
+                h.nCodes = 0;
+
+                int[] ncodes = new int[maxCodeLength];
+                for(int i = 0; i < ncodes.Length; i++)
+                {
+                    ncodes[i] = tmp[i+1];
+                    h.nCodes += ncodes[i];
+                }
+
+                if(h.nCodes == 0)
+                    throw new Exception("Huffman table has zero length");
+                if(h.nCodes > maxNCodes)
+                    throw new Exception("Huffman table has excessive length");
+
+                n -= h.nCodes + 17;
+                if(n < 0)
+                    throw new Exception("DHT has wrong length");
+
+                readFull(h.vals, 0, h.nCodes);
+
+                // Derive the look-up table.
+                for(int i = 0; i < h.lut.Length; i++)
+                    h.lut[i] = 0;
+
+                uint x = 0, code = 0;
+
+                for(int i = 0; i < lutSize; i++)
+                {
+                    code <<= 1;
+
+                    for(int j = 0; j < ncodes[i]; j++)
+                    {
+                        // The codeLength is 1+i, so shift code by 8-(1+i) to
+                        // calculate the high bits for every 8-bit sequence
+                        // whose codeLength's high bits matches code.
+                        // The high 8 bits of lutValue are the encoded value.
+                        // The low 8 bits are 1 plus the codeLength.
+                        byte base2 = (byte)(code << (7 - i));
+                        ushort lutValue = (ushort)(((ushort)h.vals[x]<<8) | (2+i));
+                        for(int k = 0; k < 1<<(7-i); k++)
+                            h.lut[base2|k] = lutValue;
+                        code++;
+                        x++;
+                    }
+                }
+
+                // Derive minCodes, maxCodes, and valsIndices.
+                int c = 0, index = 0;
+                for(int i = 0; i < ncodes.Length; i++)
+                {
+                    int nc = ncodes[i];
+                    if(nc == 0)
+                    {
+                        h.minCodes[i] = -1;
+                        h.maxCodes[i] = -1;
+                        h.valsIndices[i] = -1;
+                    }
+                    else
+                    {
+                        h.minCodes[i] = c;
+                        h.maxCodes[i] = c + nc - 1;
+                        h.valsIndices[i] = index;
+                        c += nc;
+                        index += nc;
+                    }
+                    c <<= 1;
+                }
+            }
+        }
+
+        // decodeHuffman returns the next Huffman-coded value from the bit-stream,
+        // decoded according to h.
+        private byte decodeHuffman(huffman_class h)
+        {
+            if(h.nCodes == 0)
+                throw new Exception("uninitialized Huffman table");
+
+            if(bits.n < 8)
+            {
+                try
+                {
+                    ensureNBits(8);
+                }
+                catch(errMissingFF00)
+                {
+                    if(bytes.nUnreadable != 0)
+                        unreadByteStuffedByte();
+                    goto slowPath;
+                }
+                catch(errShortHuffmanData)
+                {
+                    if(bytes.nUnreadable != 0)
+                        unreadByteStuffedByte();
+                    goto slowPath;
+                }
+            }
+
+            ushort v = h.lut[(bits.a>>(bits.n-lutSize))&0xff];
+            if(v != 0)
+            {
+                byte n = (byte)((v & 0xff) - 1);
+                bits.n -= n;
+                bits.m >>= n;
+                return (byte)(v >> 8);
+            }
+
+        slowPath:
+            int code = 0;
+            for(int i = 0; i < maxCodeLength; i++)
+            {
+                if(bits.n == 0)
+                    ensureNBits(1);
+                if((bits.a & bits.m) != 0)
+                    code |= 1;
+                bits.n--;
+                bits.m >>= 1;
+                if(code <= h.maxCodes[i])
+                    return h.vals[h.valsIndices[i]+code-h.minCodes[i]];
+                code <<= 1;
+            }
+
+            throw new Exception("bad Huffman code");
+        }
+
+        private bool decodeBit()
+        {
+            if(bits.n == 0)
+                ensureNBits(1);
+
+            bool ret = (bits.a & bits.m) != 0;
+            bits.n--;
+            bits.m >>= 1;
+            return ret;
+        }
+
+        private uint decodeBits(int n)
+        {
+            if(bits.n < n)
+                ensureNBits(n);
+
+            uint ret = bits.a >> (bits.n-n);
+            ret = (uint)(ret & ((1 << n) - 1));
+            bits.n -= n;
+            bits.m >>= n;
+            return ret;
+        }
+
+        // fill fills up the bytes.buf buffer from the underlying io.Reader. It
+        // should only be called when there are no unread bytes in bytes.
+        private void fill()
+        {
+            if(bytes.i != bytes.j)
+                throw new Exception("jpeg: fill called when unread bytes exist");
+
+            // Move the last 2 bytes to the start of the buffer, in case we need
+            // to call unreadByteStuffedByte.
+            if(bytes.j > 2)
+            {
+                bytes.buf[0] = bytes.buf[bytes.j-2];
+                bytes.buf[1] = bytes.buf[bytes.j-1];
+                bytes.i = 2;
+                bytes.j = 2;
+            }
+
+            // Fill in the rest of the buffer.
+            int n = r.Read(bytes.buf, bytes.j, bytes.buf.Length - bytes.j);
+            if(n == 0)
+                throw new errEOF();
+            bytes.j += n;
+        }
+
+        // unreadByteStuffedByte undoes the most recent readByteStuffedByte call,
+        // giving a byte of data back from bits to bytes. The Huffman look-up table
+        // requires at least 8 bits for look-up, which means that Huffman decoding can
+        // sometimes overshoot and read one or two too many bytes. Two-byte overshoot
+        // can happen when expecting to read a 0xff 0x00 byte-stuffed byte.
+        private void unreadByteStuffedByte()
+        {
+            bytes.i -= bytes.nUnreadable;
+            bytes.nUnreadable = 0;
+            if(bits.n >= 8)
+            {
+                bits.a >>= 8;
+                bits.n -= 8;
+                bits.m >>= 8;
+            }
+        }
+
+        // readByte returns the next byte, whether buffered or not buffere It does
+        // not care about byte stuffing.
+        private byte readByte()
+        {
+            while(bytes.i == bytes.j)
+                fill();
+            byte x = bytes.buf[bytes.i];
+            bytes.i++;
+            bytes.nUnreadable = 0;
+            return x;
+        }
+
+        // readByteStuffedByte is like readByte but is for byte-stuffed Huffman data.
+        private byte readByteStuffedByte()
+        {
+            byte x;
+
+            // Take the fast path if bytes.buf contains at least two bytes.
+            if(bytes.i+2 <= bytes.j)
+            {
+                x = bytes.buf[bytes.i];
+                bytes.i++;
+                bytes.nUnreadable = 1;
+                if(x != 0xff)
+                    return x;
+                if(bytes.buf[bytes.i] != 0x00)
+                    throw new errMissingFF00();
+                bytes.i++;
+                bytes.nUnreadable = 2;
+                return 0xff;
+            }
+
+            bytes.nUnreadable = 0;
+
+            x = readByte();
+            bytes.nUnreadable = 1;
+            if(x != 0xff)
+                return x;
+            x = readByte();
+            bytes.nUnreadable = 2;
+            if(x != 0x00)
+                throw new errMissingFF00();
+            return 0xff;
+        }
+
+        // readFull reads exactly len(p) bytes into p. It does not care about byte
+        // stuffing.
+        void readFull(byte[] data, int offset, int len)
+        {
+            // Unread the overshot bytes, if any.
+            if(bytes.nUnreadable != 0)
+            {
+                if(bits.n >= 8)
+                    unreadByteStuffedByte();
+                bytes.nUnreadable = 0;
+            }
+
+            while(len > 0)
+            {
+                if(bytes.j - bytes.i >= len)
+                {
+                    Array.Copy(bytes.buf, bytes.i, data, offset, len);
+                    bytes.i += len;
+                    len -= len;
+                }
+                else
+                {
+                    Array.Copy(bytes.buf, bytes.i, data, offset, bytes.j - bytes.i);
+                    bytes.i += bytes.j - bytes.i;
+                    offset += bytes.j - bytes.i;
+                    len -= bytes.j - bytes.i;
+
+                    fill();
+                }
+            }
+        }
+
+        // ignore ignores the next n bytes.
+        private void ignore(int n)
+        {
+            // Unread the overshot bytes, if any.
+            if(bytes.nUnreadable != 0)
+            {
+                if(bits.n >= 8)
+                    unreadByteStuffedByte();
+                bytes.nUnreadable = 0;
+            }
+
+            while(true)
+            {
+                int m = bytes.j - bytes.i;
+                if(m > n) m = n;
+                bytes.i += m;
+                n -= m;
+                if(n == 0)
+                    break;
+                else
+                    fill();
+            }
+        }
+
+        // Specified in section B.2.2.
+        private void processSOF(int n)
+        {
+            if(nComp != 0)
+                throw new Exception("multiple SOF markers");
+
+            switch(n)
+            {
+                case 6 + 3*1: // Grayscale image.
+                    nComp = 1;
+                    break;
+                case 6 + 3*3: // YCbCr or RGB image.
+                    nComp = 3;
+                    break;
+                case 6 + 3*4: // YCbCrK or CMYK image.
+                    nComp = 4;
+                    break;
+                default:
+                    throw new Exception("number of components");
+            }
+
+            readFull(tmp, 0, n);
+
+            // We only support 8-bit precision.
+            if(tmp[0] != 8)
+                throw new Exception("precision");
+
+            height = (tmp[1]<<8) + tmp[2];
+            width = (tmp[3]<<8) + tmp[4];
+            if(tmp[5] != nComp)
+                throw new Exception("SOF has wrong length");
+
+            for(int i = 0; i < nComp; i++)
+            {
+                comp[i].c = tmp[6+3*i];
+                // Section B.2.2 states that "the value of C_i shall be different from
+                // the values of C_1 through C_(i-1)".
+                for(int j = 0; j < i; j++)
+                {
+                    if(comp[i].c == comp[j].c)
+                        throw new Exception("repeated component identifier");
+                }
+
+                comp[i].tq = tmp[8+3*i];
+                if(comp[i].tq > maxTq)
+                    throw new Exception("bad Tq value");
+
+                byte hv = tmp[7+3*i];
+                int h = hv>>4;
+                int v = hv&0x0f;
+                if(h < 1 || 4 < h || v < 1 || 4 < v)
+                    throw new Exception("luma/chroma subsampling ratio");
+                if(h == 3 || v == 3)
+                    throw new Exception("unsupported subsampling ratio");
+
+                switch(nComp)
+                {
+                    case 1:
+                    {
+                        // If a JPEG image has only one component, section A.2 says "this data
+                        // is non-interleaved by definition" and section A.2.2 says "[in this
+                        // case...] the order of data units within a scan shall be left-to-right
+                        // and top-to-bottom... regardless of the values of H_1 and V_1". Section
+                        // 4.8.2 also says "[for non-interleaved data], the MCU is defined to be
+                        // one data unit". Similarly, section A.1.1 explains that it is the ratio
+                        // of H_i to max_j(H_j) that matters, and similarly for V. For grayscale
+                        // images, H_1 is the maximum H_j for all components j, so that ratio is
+                        // always 1. The component's (h, v) is effectively always (1, 1): even if
+                        // the nominal (h, v) is (2, 1), a 20x5 image is encoded in three 8x8
+                        // MCUs, not two 16x8 MCUs.
+                        h = 1;
+                        v = 1;
+                        break;
+                    }
+
+                    case 3:
+                    {
+                        // For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2, 4:2:0,
+                        // 4:1:1 or 4:1:0 chroma subsampling ratios. This implies that the
+                        // (h, v) values for the Y component are either (1, 1), (1, 2),
+                        // (2, 1), (2, 2), (4, 1) or (4, 2), and the Y component's values
+                        // must be a multiple of the Cb and Cr component's values. We also
+                        // assume that the two chroma components have the same subsampling
+                        // ratio.
+                        switch(i)
+                        {
+                            case 0: // Y.
+                            {
+                                // We have already verified, above, that h and v are both
+                                // either 1, 2 or 4, so invalid (h, v) combinations are those
+                                // with v == 4.
+                                if(v == 4)
+                                    throw new Exception("unsupported subsampling ratio");
+                                break;
+                            }
+                            case 1: // Cb.
+                            {
+                                if(comp[0].h%h != 0 || comp[0].v%v != 0)
+                                    throw new Exception("unsupported subsampling ratio");
+                                break;
+                            }
+                            case 2: // Cr.
+                            {
+                                if(comp[1].h != h || comp[1].v != v)
+                                    throw new Exception("unsupported subsampling ratio");
+                                break;
+                            }
+                        }
+                        break;
+                    }
+
+                    case 4:
+                    {
+                        // For 4-component images (either CMYK or YCbCrK), we only support two
+                        // hv vectors: [0x11 0x11 0x11 0x11] and [0x22 0x11 0x11 0x22].
+                        // Theoretically, 4-component JPEG images could mix and match hv values
+                        // but in practice, those two combinations are the only ones in use,
+                        // and it simplifies the applyBlack code below if we can assume that:
+                        //  - for CMYK, the C and K channels have full samples, and if the M
+                        //    and Y channels subsample, they subsample both horizontally and
+                        //    vertically.
+                        //  - for YCbCrK, the Y and K channels have full samples.
+                        switch(i)
+                        {
+                            case 0:
+                                if(hv != 0x11 && hv != 0x22)
+                                    throw new Exception("unsupported subsampling ratio");
+                                break;
+                            case 1:
+                            case 2:
+                                if(hv != 0x11)
+                                    throw new Exception("unsupported subsampling ratio");
+                                break;
+                            case 3:
+                                if(comp[0].h != h || comp[0].v != v)
+                                    throw new Exception("unsupported subsampling ratio");
+                                break;
+                        }
+                        break;
+                    }
+                }
+
+                comp[i].h = h;
+                comp[i].v = v;
+            }
+        }
+
+        // Specified in section B.2.4.1.
+        private void processDQT(int n)
+        {
+            while(n > 0)
+            {
+                bool done = false;
+
+                n--;
+                byte x = readByte();
+                byte tq = (byte)(x & 0x0f);
+                if(tq > maxTq)
+                    throw new Exception("bad Tq value");
+
+                switch(x >> 4)
+                {
+                    case 0:
+                        if(n < Block.blockSize)
+                        {
+                            done = true;
+                            break;
+                        }
+                        n -= Block.blockSize;
+                        readFull(tmp, 0, Block.blockSize);
+
+                        for(int i = 0; i < Block.blockSize; i++)
+                            quant[tq][i] = tmp[i];
+                        break;
+                    case 1:
+                        if(n < 2*Block.blockSize)
+                        {
+                            done = true;
+                            break;
+                        }
+                        n -= 2 * Block.blockSize;
+                        readFull(tmp, 0, 2*Block.blockSize);
+
+                        for(int i = 0; i < Block.blockSize; i++)
+                            quant[tq][i] = ((int)tmp[2*i]<<8) | (int)tmp[2*i+1];
+                        break;
+                    default:
+                        throw new Exception("bad Pq value");
+                }
+
+                if(done)
+                    break;
+            }
+
+            if(n != 0)
+                throw new Exception("DQT has wrong length");
+        }
+
+        // Specified in section B.2.4.4.
+        private void processDRI(int n)
+        {
+            if(n != 2)
+                throw new Exception("DRI has wrong length");
+
+            readFull(tmp, 0, 2);
+            ri = ((int)tmp[0]<<8) + (int)tmp[1];
+        }
+
+        private void processApp0Marker(int n)
+        {
+            if(n < 5)
+            {
+                ignore(n);
+                return;
+            }
+
+            readFull(tmp, 0, 5);
+            n -= 5;
+
+            jfif = tmp[0] == 'J' && tmp[1] == 'F' && tmp[2] == 'I' && tmp[3] == 'F' && tmp[4] == '\x00';
+            if(n > 0)
+                ignore(n);
+        }
+
+        private void processApp14Marker(int n)
+        {
+            if(n < 12)
+            {
+                ignore(n);
+                return;
+            }
+
+            readFull(tmp, 0, 12);
+            n -= 12;
+
+            if(tmp[0] == 'A' && tmp[1] == 'd' && tmp[2] == 'o' && tmp[3] == 'b' && tmp[4] == 'e')
+            {
+                adobeTransformValid = true;
+                adobeTransform = tmp[11];
+            }
+
+            if(n > 0)
+                ignore(n);
+        }
+
+        // decode reads a JPEG image from r and returns it as an image.Image.
+        public void decode(Stream r, bool configOnly)
+        {
+            this.r = r;
+
+            // Check for the Start Of Image marker.
+            readFull(tmp, 0, 2);
+            if(tmp[0] != 0xff || tmp[1] != soiMarker)
+                throw new Exception("missing SOI marker");
+
+            // Process the remaining segments until the End Of Image marker.
+            while(true)
+            {
+                readFull(tmp, 0, 2);
+                while(tmp[0] != 0xff)
+                {
+                    // Strictly speaking, this is a format error. However, libjpeg is
+                    // liberal in what it accepts. As of version 9, next_marker in
+                    // jdmarker.c treats this as a warning (JWRN_EXTRANEOUS_DATA) and
+                    // continues to decode the stream. Even before next_marker sees
+                    // extraneous data, jpeg_fill_bit_buffer in jdhuff.c reads as many
+                    // bytes as it can, possibly past the end of a scan's data. It
+                    // effectively puts back any markers that it overscanned (e.g. an
+                    // "\xff\xd9" EOI marker), but it does not put back non-marker data,
+                    // and thus it can silently ignore a small number of extraneous
+                    // non-marker bytes before next_marker has a chance to see them (and
+                    // print a warning).
+                    //
+                    // We are therefore also liberal in what we accept. Extraneous data
+                    // is silently ignore
+                    //
+                    // This is similar to, but not exactly the same as, the restart
+                    // mechanism within a scan (the RST[0-7] markers).
+                    //
+                    // Note that extraneous 0xff bytes in e.g. SOS data are escaped as
+                    // "\xff\x00", and so are detected a little further down below.
+                    tmp[0] = tmp[1];
+                    tmp[1] = readByte();
+                }
+                byte marker = tmp[1];
+                if(marker == 0) {
+                    // Treat "\xff\x00" as extraneous data.
+                    continue;
+                }
+
+                while(marker == 0xff)
+                {
+                    // Section B.1.1.2 says, "Any marker may optionally be preceded by any
+                    // number of fill bytes, which are bytes assigned code X'FF'".
+                    marker = readByte();
+                }
+
+                if(marker == eoiMarker) // End Of Image.
+                    break;
+
+                if(rst0Marker <= marker && marker <= rst7Marker)
+                {
+                    // Figures B.2 and B.16 of the specification suggest that restart markers should
+                    // only occur between Entropy Coded Segments and not after the final ECS.
+                    // However, some encoders may generate incorrect JPEGs with a final restart
+                    // marker. That restart marker will be seen here instead of inside the processSOS
+                    // method, and is ignored as a harmless error. Restart markers have no extra data,
+                    // so we check for this before we read the 16-bit length of the segment.
+                    continue;
+                }
+
+                // Read the 16-bit length of the segment. The value includes the 2 bytes for the
+                // length itself, so we subtract 2 to get the number of remaining bytes.
+                readFull(tmp, 0, 2);
+                int n = ((int)tmp[0]<<8) + (int)tmp[1] - 2;
+                if(n < 0)
+                    throw new Exception("short segment length");
+
+                switch(marker)
+                {
+                    case sof0Marker:
+                    case sof1Marker:
+                    case sof2Marker:
+                        progressive = (marker == sof2Marker);
+                        processSOF(n);
+                        if(configOnly && jfif)
+                            return;
+                        break;
+                    case dhtMarker:
+                        if(configOnly)
+                            ignore(n);
+                        else
+                            processDHT(n);
+                        break;
+                    case dqtMarker:
+                        if(configOnly)
+                            ignore(n);
+                        else
+                            processDQT(n);
+                        break;
+                    case sosMarker:
+                        if(configOnly)
+                            return;
+                        else
+                            processSOS(n);
+                        break;
+                    case driMarker:
+                        if(configOnly)
+                            ignore(n);
+                        else
+                            processDRI(n);
+                        break;
+                    case app0Marker:
+                        processApp0Marker(n);
+                        break;
+                    case app14Marker:
+                        processApp14Marker(n);
+                        break;
+                    default:
+                        if(app0Marker <= marker && marker <= app15Marker || marker == comMarker)
+                            ignore(n);
+                        else if(marker < 0xc0) // See Table B.1 "Marker code assignments".
+                            throw new Exception("unknown marker");
+                        else
+                            throw new Exception("unknown marker");
+                        break;
+                    }
+            }
+
+            if(img1 != null)
+                return;
+            else if(img3 != null)
+            {
+                if(comp[0].c == 'R' && comp[1].c == 'G' && comp[2].c == 'B')
+                    imgrgb = convert_direct_to_rgb(width, height);
+                else
+                    imgrgb = convert_to_rgb(width, height);
+            }
+
+            /*if img3 != nil {
+                if blackPix != nil {
+                    return applyBlack()
+                } else if isRGB() {
+                    return convertToRGB()
+                }
+                return img3, nil
+            }*/
+
+            throw new Exception("missing SOS marker");
+        }
+
+        private img_rgb convert_to_rgb(int w, int h)
+        {
+            var ret = new img_rgb(w, h);
+            int cScale = comp[0].h / comp[1].h;
+
+            Parallel.For(
+                0,
+                h,
+                y =>
+                {
+                    int po = ret.get_row_offset(y);
+                    int yo = img3.get_row_y_offset(y);
+                    int co = img3.get_row_c_offset(y);
+
+                    for (int x = 0; x < w; x++)
+                    {
+                        byte yy = img3.pix_y[yo+x];
+                        byte cb = img3.pix_cb[co+x/cScale];
+                        byte cr = img3.pix_cr[co+x/cScale];
+
+                        byte r, g, b;
+                        Colors.YCbCrToRGB(yy, cb, cr, out r, out g, out b);
+                        ret.pixels[po+3*x+0] = r;
+                        ret.pixels[po+3*x+1] = g;
+                        ret.pixels[po+3*x+2] = b;
+                    }
+                }
+            );
+
+            return ret;
+        }
+
+        private img_rgb convert_direct_to_rgb(int w, int h)
+        {
+            var ret = new img_rgb(w, h);
+            
+            int cScale = comp[0].h / comp[1].h;
+            for(var y = 0; y < h; y++)
+            {
+                int po = ret.get_row_offset(y);
+                int yo = img3.get_row_y_offset(y);
+                int co = img3.get_row_c_offset(y);
+                for(int x = 0; x < w; x++)
+                {
+                    ret.pixels[po+3*x+0] = img3.pix_y[yo+x];
+                    ret.pixels[po+3*x+1] = img3.pix_cb[co+x/cScale];
+                    ret.pixels[po+3*x+2] = img3.pix_cr[co+x/cScale];
+                }
+            }
+
+            return ret;
+        }
+
+        private struct scan_scruct {
+            public byte compIndex;
+            public byte td;
+            public byte ta;
+        }
+
+        // Specified in section B.2.3.
+        void processSOS(int n)
+        {
+            if(nComp == 0)
+                throw new Exception("missing SOF marker");
+
+            if(n < 6 || 4+2*nComp < n || n%2 != 0)
+                throw new Exception("SOS has wrong length");
+
+            readFull(tmp, 0, n);
+            nComp = tmp[0];
+            if(n != 4+2*nComp)
+                throw new Exception("SOS length inconsistent with number of components");
+
+            var scan = new scan_scruct[maxComponents];
+            int totalHV = 0;
+            for(int i = 0; i < nComp; i++)
+            {
+                int cs = tmp[1+2*i]; // Component selector.
+                int compIndex = -1;
+                for(int j = 0; j < nComp; j++)
+                {
+                    var compv = comp[j];
+                    if(cs == compv.c)
+                        compIndex = j;
+                }
+                if(compIndex < 0)
+                    throw new Exception("unknown component selector");
+
+                scan[i].compIndex = (byte)compIndex;
+                // Section B.2.3 states that "the value of Cs_j shall be different from
+                // the values of Cs_1 through Cs_(j-1)". Since we have previously
+                // verified that a frame's component identifiers (C_i values in section
+                // B.2.2) are unique, it suffices to check that the implicit indexes
+                // into comp are unique.
+                for(int j = 0; j < i; j++)
+                {
+                    if(scan[i].compIndex == scan[j].compIndex)
+                        throw new Exception("repeated component selector");
+                }
+
+                totalHV += comp[compIndex].h * comp[compIndex].v;
+
+                scan[i].td = (byte)(tmp[2+2*i] >> 4);
+                if(scan[i].td > maxTh)
+                    throw new Exception("bad Td value");
+
+                scan[i].ta = (byte)(tmp[2+2*i] & 0x0f);
+                if(scan[i].ta > maxTh)
+                    throw new Exception("bad Ta value");
+            }
+
+            // Section B.2.3 states that if there is more than one component then the
+            // total H*V values in a scan must be <= 10.
+            if(nComp > 1 && totalHV > 10)
+                throw new Exception("total sampling factors too large");
+
+            // zigStart and zigEnd are the spectral selection bounds.
+            // ah and al are the successive approximation high and low values.
+            // The spec calls these values Ss, Se, Ah and Al.
+            //
+            // For progressive JPEGs, these are the two more-or-less independent
+            // aspects of progression. Spectral selection progression is when not
+            // all of a block's 64 DCT coefficients are transmitted in one pass.
+            // For example, three passes could transmit coefficient 0 (the DC
+            // component), coefficients 1-5, and coefficients 6-63, in zig-zag
+            // order. Successive approximation is when not all of the bits of a
+            // band of coefficients are transmitted in one pass. For example,
+            // three passes could transmit the 6 most significant bits, followed
+            // by the second-least significant bit, followed by the least
+            // significant bit.
+            //
+            // For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
+            int zigStart = 0;
+            int zigEnd = Block.blockSize - 1;
+            int ah = 0;
+            int al = 0;
+
+            if(progressive)
+            {
+                zigStart = (int)(tmp[1+2*nComp]);
+                zigEnd = (int)(tmp[2+2*nComp]);
+                ah = (int)(tmp[3+2*nComp] >> 4);
+                al = (int)(tmp[3+2*nComp] & 0x0f);
+                if((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || Block.blockSize <= zigEnd)
+                    throw new Exception("bad spectral selection bounds");;
+                if(zigStart != 0 && nComp != 1)
+                    throw new Exception("progressive AC coefficients for more than one component");
+                if(ah != 0 && ah != al+1)
+                    throw new Exception("bad successive approximation values");
+            }
+
+            // mxx and myy are the number of MCUs (Minimum Coded Units) in the image.
+            int h0 = comp[0].h;
+            int v0 = comp[0].v;
+            int mxx = (width + 8*h0 - 1) / (8 * h0);
+            int myy = (height + 8*v0 - 1) / (8 * v0);
+            
+            if(img1 == null && img3 == null)
+                makeImg(mxx, myy);
+
+            if(progressive)
+            {
+                for(int i = 0; i < nComp; i++)
+                {
+                    int compIndex = scan[i].compIndex;
+                    if(progCoeffs[compIndex] == null)
+                    {
+                        progCoeffs[compIndex] = new Block[mxx*myy*comp[compIndex].h*comp[compIndex].v];
+
+                        for(int j = 0; j < progCoeffs[compIndex].Length; j++)
+                            progCoeffs[compIndex][j] = new Block();
+                    }
+                }
+            }
+
+            bits = new bits_class();
+
+            byte mcu = 0;
+            byte expectedRST = rst0Marker;
+
+            // b is the decoded coefficients, in natural (not zig-zag) order.
+            Block b = new Block();
+            int[] dc = new int[maxComponents];
+            // bx and by are the location of the current block, in units of 8x8
+            // blocks: the third block in the first row has (bx, by) = (2, 0).
+            int bx, by, blockCount = 0;
+
+            for(int my = 0; my < myy; my++)
+            {
+                for(int mx = 0; mx < mxx; mx++)
+                {
+                    for(int i = 0; i < nComp; i++)
+                    {
+                        int compIndex = scan[i].compIndex;
+                        int hi = comp[compIndex].h;
+                        int vi = comp[compIndex].v;
+                        Block qt = quant[comp[compIndex].tq];
+
+                        for(int j = 0; j < hi*vi; j++)
+                        {
+                            // The blocks are traversed one MCU at a time. For 4:2:0 chroma
+                            // subsampling, there are four Y 8x8 blocks in every 16x16 MCU.
+                            //
+                            // For a baseline 32x16 pixel image, the Y blocks visiting order is:
+                            //  0 1 4 5
+                            //  2 3 6 7
+                            //
+                            // For progressive images, the interleaved scans (those with nComp > 1)
+                            // are traversed as above, but non-interleaved scans are traversed left
+                            // to right, top to bottom:
+                            //  0 1 2 3
+                            //  4 5 6 7
+                            // Only DC scans (zigStart == 0) can be interleave AC scans must have
+                            // only one component.
+                            //
+                            // To further complicate matters, for non-interleaved scans, there is no
+                            // data for any blocks that are inside the image at the MCU level but
+                            // outside the image at the pixel level. For example, a 24x16 pixel 4:2:0
+                            // progressive image consists of two 16x16 MCUs. The interleaved scans
+                            // will process 8 Y blocks:
+                            //  0 1 4 5
+                            //  2 3 6 7
+                            // The non-interleaved scans will process only 6 Y blocks:
+                            //  0 1 2
+                            //  3 4 5
+                            if(nComp != 1)
+                            {
+                                bx = hi*mx + j%hi;
+                                by = vi*my + j/hi;
+                            }
+                            else
+                            {
+                                int q = mxx * hi;
+                                bx = blockCount % q;
+                                by = blockCount / q;
+                                blockCount++;
+                                if(bx*8 >= width || by*8 >= height)
+                                    continue;
+                            }
+
+                            // Load the previous partially decoded coefficients, if applicable.
+                            if(progressive)
+                                b = progCoeffs[compIndex][by*mxx*hi+bx];
+                            else
+                                b = new Block();
+
+                            if(ah != 0)
+                            {
+                                refine(b, huff[acTable, scan[i].ta], zigStart, zigEnd, 1<<al);
+                            }
+                            else
+                            {
+                                int zig = zigStart;
+                                if(zig == 0)
+                                {
+                                    zig++;
+                                    // Decode the DC coefficient, as specified in section F.2.2.1.
+                                    byte value = decodeHuffman(huff[dcTable, scan[i].td]);
+                                    if(value > 16)
+                                        throw new Exception("excessive DC component");
+                                    int dcDelta = receiveExtend(value);
+                                    dc[compIndex] += dcDelta;
+                                    b[0] = dc[compIndex] << al;
+                                }
+
+                                if(zig <= zigEnd && eobRun > 0)
+                                {
+                                    eobRun--;
+                                }
+                                else
+                                {
+                                    // Decode the AC coefficients, as specified in section F.2.2.2.
+                                    var huffv = huff[acTable, scan[i].ta];
+                                    for( ; zig <= zigEnd; zig++)
+                                    {
+                                        byte value = decodeHuffman(huffv);
+                                        byte val0 = (byte)(value >> 4);
+                                        byte val1 = (byte)(value & 0x0f);
+                                        if(val1 != 0)
+                                        {
+                                            zig += val0;
+                                            if(zig > zigEnd)
+                                                break;
+
+                                            var ac = receiveExtend(val1);
+                                            b[unzig[zig]] = ac << al;
+                                        }
+                                        else
+                                        {
+                                            if(val0 != 0x0f)
+                                            {
+                                                eobRun = (ushort)(1 << val0);
+                                                if(val0 != 0)
+                                                {
+                                                    uint bits = decodeBits(val0);
+                                                    eobRun |= (ushort)(bits);
+                                                }
+                                                eobRun--;
+                                                break;
+                                            }
+                                            zig += 0x0f;
+                                        }
+                                    }
+                                }
+                            }
+
+                            if(progressive)
+                            {
+                                if(zigEnd != Block.blockSize-1 || al != 0)
+                                {
+                                    // We haven't completely decoded this 8x8 block. Save the coefficients.
+                                    progCoeffs[compIndex][by*mxx*hi+bx] = b;
+                                    // At this point, we could execute the rest of the loop body to dequantize and
+                                    // perform the inverse DCT, to save early stages of a progressive image to the
+                                    // *image.YCbCr buffers (the whole point of progressive encoding), but in Go,
+                                    // the jpeg.Decode function does not return until the entire image is decoded,
+                                    // so we "continue" here to avoid wasted computation.
+                                    continue;
+                                }
+                            }
+
+                            // Dequantize, perform the inverse DCT and store the block to the image.
+                            for(int zig = 0; zig < Block.blockSize; zig++)
+                                b[unzig[zig]] *= qt[zig];
+
+                            IDCT(b);
+                            
+                            byte[] dst = null;
+                            int offset = 0;
+                            int stride = 0;
+
+                            if(nComp == 1)
+                            {
+                                dst = img1.pixels;
+                                stride = img1.stride;
+                                offset = img1.offset + 8*(by*img1.stride+bx);
+                            }
+                            else
+                            {
+                                switch(compIndex)
+                                {
+                                    case 0:
+                                    {
+                                        dst = img3.pix_y;
+                                        stride = img3.y_stride;
+                                        offset = img3.y_offset + 8*(by*img3.y_stride+bx);
+                                        break;
+                                    }
+                                    case 1:
+                                    {
+                                        dst = img3.pix_cb;
+                                        stride = img3.c_stride;
+                                        offset = img3.c_offset + 8*(by*img3.c_stride+bx);
+                                        break;
+                                    }
+                                    case 2:
+                                    {
+                                        dst = img3.pix_cr;
+                                        stride = img3.c_stride;
+                                        offset = img3.c_offset + 8*(by*img3.c_stride+bx);
+                                        break;
+                                    }
+                                    case 3:
+                                    {
+                                        //dst, stride = blackPix[8*(by*blackStride+bx):], blackStride
+                                        break;
+                                    }
+                                    default:
+                                        throw new Exception("too many components");
+                                }
+                            }
+
+                            // Level shift by +128, clip to [0, 255], and write to dst.
+                            for(int y = 0; y < 8; y++)
+                            {
+                                int y8 = y * 8;
+                                int yStride = y * stride;
+                                for(int x = 0; x < 8; x++)
+                                {
+                                    int c = b[y8+x];
+                                    if(c < -128)
+                                        c = 0;
+                                    else if(c > 127)
+                                        c = 255;
+                                    else
+                                        c += 128;
+
+                                    dst[yStride+x+offset] = (byte)c;
+                                }
+                            }
+                        } // for j
+                    } // for i
+
+                    mcu++;
+
+                    if(ri > 0 && mcu%ri == 0 && mcu < mxx*myy)
+                    {
+                        // A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input,
+                        // but this one assumes well-formed input, and hence the restart marker follows immediately.
+                        readFull(tmp, 0, 2);
+                        if(tmp[0] != 0xff || tmp[1] != expectedRST)
+                            throw new Exception("bad RST marker");
+
+                        expectedRST++;
+                        if(expectedRST == rst7Marker+1)
+                            expectedRST = rst0Marker;
+
+                        // Reset the Huffman decoder.
+                        bits = new bits_class();
+                        // Reset the DC components, as per section F.2.1.3.1.
+                        dc = new int[maxComponents];
+                        // Reset the progressive decoder state, as per section G.1.2.2.
+                        eobRun = 0;
+                    }
+                } // for mx
+            } // for my
+        }
+
+        // refine decodes a successive approximation refinement block, as specified in
+        // section G.1.2.
+        private void refine(Block b, huffman_class h, int zigStart, int zigEnd, int delta)
+        {
+            // Refining a DC component is trivial.
+            if(zigStart == 0)
+            {
+                if(zigEnd != 0)
+                    throw new Exception("Invalid state for zig DC component");
+
+                bool bit = decodeBit();
+                if(bit)
+                    b[0] |= delta;
+
+                return;
+            }
+
+            // Refining AC components is more complicated; see sections G.1.2.2 and G.1.2.3.
+            int zig = zigStart;
+            if(eobRun == 0)
+            {
+                for( ; zig <= zigEnd; zig++)
+                {
+                    bool done = false;
+                    int z = 0;
+                    var val = decodeHuffman(h);
+                    int val0 = val >> 4;
+                    int val1 = val & 0x0f;
+
+                    switch(val1)
+                    {
+                        case 0:
+                            if(val0 != 0x0f)
+                            {
+                                eobRun = (ushort)(1 << val0);
+                                if(val0 != 0)
+                                {
+                                    uint bits = decodeBits(val0);
+                                    eobRun |= (ushort)bits;
+                                }
+                            }
+                            done = true;
+                            break;
+                        case 1:
+                            z = delta;
+                            bool bit = decodeBit();
+                            if(!bit)
+                                z = -z;
+                            break;
+                        default:
+                            throw new Exception("unexpected Huffman code");
+                    }
+
+                    zig = refineNonZeroes(b, zig, zigEnd, val0, delta);
+                    if(zig > zigEnd)
+                        throw new Exception("too many coefficients");
+
+                    if(z != 0)
+                        b[unzig[zig]] = z;
+                }
+            }
+
+            if(eobRun > 0)
+            {
+                eobRun--;
+                refineNonZeroes(b, zig, zigEnd, -1, delta);
+            }
+        }
+
+        // refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0,
+        // the first nz zero entries are skipped over.
+        public int refineNonZeroes(Block b, int zig, int zigEnd, int nz, int delta)
+        {
+            for( ; zig <= zigEnd; zig++)
+            {
+                int u = unzig[zig];
+                if(b[u] == 0)
+                {
+                    if(nz == 0)
+                        break;
+                    nz--;
+                    continue;
+                }
+
+                bool bit = decodeBit();
+                if(!bit)
+                    continue;
+
+                if(b[u] >= 0)
+                    b[u] += delta;
+                else
+                    b[u] -= delta;
+            }
+
+            return zig;
+        }
+
+        private void makeImg(int mxx, int myy)
+        {
+            if(nComp == 1)
+            {
+                var m = new img_gray(8*mxx, 8*myy);
+                img1 = m.subimage(0, 0, width, height);
+            }
+            else
+            {
+                var h0 = comp[0].h;
+                var v0 = comp[0].v;
+                var hRatio = h0 / comp[1].h;
+                var vRatio = v0 / comp[1].v;
+
+                var ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio444;
+                switch((hRatio<<4) | vRatio)
+                {
+                    case 0x11:
+                        ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio444;
+                        break;
+                    case 0x12:
+                        ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio440;
+                        break;
+                    case 0x21:
+                        ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio422;
+                        break;
+                    case 0x22:
+                        ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio420;
+                        break;
+                    case 0x41:
+                        ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio411;
+                        break;
+                    case 0x42:
+                        ratio = img_ycbcr.YCbCrSubsampleRatio.YCbCrSubsampleRatio410;
+                        break;
+                }
+
+                var m = new img_ycbcr(8*h0*mxx, 8*v0*myy, ratio);
+                img3 = m.subimage(0, 0, width, height);
+
+                /*if d.nComp == 4 {
+                    h3, v3 := d.comp[3].h, d.comp[3].v
+                    d.blackPix = make([]byte, 8*h3*mxx*8*v3*myy)
+                    d.blackStride = 8 * h3 * mxx
+                }*/
+            }
+        }
+    }
 }
diff --git a/src/ImageProcessorCore/Formats/Jpg/Encoder.cs b/src/ImageProcessorCore/Formats/Jpg/Encoder.cs
index d9f4b39a29..7bac861eb6 100644
--- a/src/ImageProcessorCore/Formats/Jpg/Encoder.cs
+++ b/src/ImageProcessorCore/Formats/Jpg/Encoder.cs
@@ -1,623 +1,622 @@
 namespace ImageProcessorCore.Formats.Jpg
 {
-	using System;
-	using System.IO;
-
-	internal partial class Encoder
-	{
-		private const int sof0Marker = 0xc0; // Start Of Frame (Baseline).
-		private const int sof1Marker = 0xc1; // Start Of Frame (Extended Sequential).
-		private const int sof2Marker = 0xc2; // Start Of Frame (Progressive).
-		private const int dhtMarker  = 0xc4; // Define Huffman Table.
-		private const int rst0Marker = 0xd0; // ReSTart (0).
-		private const int rst7Marker = 0xd7; // ReSTart (7).
-		private const int soiMarker  = 0xd8; // Start Of Image.
-		private const int eoiMarker  = 0xd9; // End Of Image.
-		private const int sosMarker  = 0xda; // Start Of Scan.
-		private const int dqtMarker  = 0xdb; // Define Quantization Table.
-		private const int driMarker  = 0xdd; // Define Restart Interval.
-		private const int comMarker  = 0xfe; // COMment.
-		// "APPlication specific" markers aren't part of the JPEG spec per se,
-		// but in practice, their use is described at
-		// http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
-		private const int app0Marker  = 0xe0;
-		private const int app14Marker = 0xee;
-		private const int app15Marker = 0xef;
-
-		// bitCount counts the number of bits needed to hold an integer.
-		private readonly byte[] bitCount = {
-			0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-			5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-			6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-			6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-			7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-			7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-			7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-			7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-			8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, };
-
-		// unzig maps from the zig-zag ordering to the natural ordering. For example,
-		// unzig[3] is the column and row of the fourth element in zig-zag order. The
-		// value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
-		private static readonly int[] unzig = new int[] {
-			0, 1, 8, 16, 9, 2, 3, 10,
-			17, 24, 32, 25, 18, 11, 4, 5,
-			12, 19, 26, 33, 40, 48, 41, 34,
-			27, 20, 13, 6, 7, 14, 21, 28,
-			35, 42, 49, 56, 57, 50, 43, 36,
-			29, 22, 15, 23, 30, 37, 44, 51,
-			58, 59, 52, 45, 38, 31, 39, 46,
-			53, 60, 61, 54, 47, 55, 62, 63,
-		};
-
-		private const int nQuantIndex = 2;
-		private const int nHuffIndex = 4;
-
-		private enum quantIndex
-		{
-			quantIndexLuminance = 0,
-			quantIndexChrominance = 1,
-		}
-
-		private enum huffIndex
-		{
-			huffIndexLuminanceDC = 0,
-			huffIndexLuminanceAC = 1,
-			huffIndexChrominanceDC = 2,
-			huffIndexChrominanceAC = 3,
-		}
-
-		// unscaledQuant are the unscaled quantization tables in zig-zag order. Each
-		// encoder copies and scales the tables according to its quality parameter.
-		// The values are derived from section K.1 after converting from natural to
-		// zig-zag order.
-		private byte[,] unscaledQuant = new byte[,] {
-			// Luminance.
-			{
-				16, 11, 12, 14, 12, 10, 16, 14,
-				13, 14, 18, 17, 16, 19, 24, 40,
-				26, 24, 22, 22, 24, 49, 35, 37,
-				29, 40, 58, 51, 61, 60, 57, 51,
-				56, 55, 64, 72, 92, 78, 64, 68,
-				87, 69, 55, 56, 80, 109, 81, 87,
-				95, 98, 103, 104, 103, 62, 77, 113,
-				121, 112, 100, 120, 92, 101, 103, 99,
-			},
-			// Chrominance.
-			{
-				17, 18, 18, 24, 21, 24, 47, 26,
-				26, 47, 99, 66, 56, 66, 99, 99,
-				99, 99, 99, 99, 99, 99, 99, 99,
-				99, 99, 99, 99, 99, 99, 99, 99,
-				99, 99, 99, 99, 99, 99, 99, 99,
-				99, 99, 99, 99, 99, 99, 99, 99,
-				99, 99, 99, 99, 99, 99, 99, 99,
-				99, 99, 99, 99, 99, 99, 99, 99,
-			},
-		};
-
-		private class huffmanSpec
-		{
-			public huffmanSpec(byte[] c, byte[] v) { count = c; values = v; }
-			public byte[] count;
-			public byte[] values;
-		};
-
-		// theHuffmanSpec is the Huffman encoding specifications.
-		// This encoder uses the same Huffman encoding for all images.
-		private huffmanSpec[] theHuffmanSpec = new huffmanSpec[] {
-			// Luminance DC.
-			new huffmanSpec(
-				new byte[] {0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0},
-				new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
-			),
-			new huffmanSpec(
-				new byte[] {0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125},
-				new byte[] {
-					0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-					0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-					0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-					0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-					0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-					0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-					0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-					0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-					0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-					0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-					0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-					0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-					0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-					0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-					0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-					0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-					0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-					0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-					0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-					0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-					0xf9, 0xfa,
-				}
-			),
-			new huffmanSpec(
-				new byte[] {0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
-				new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
-			),
-			// Chrominance AC.
-			new huffmanSpec(
-				new byte[] {0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119},
-				new byte[] {
-					0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-					0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-					0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-					0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-					0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-					0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-					0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-					0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-					0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-					0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-					0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-					0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-					0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-					0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-					0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-					0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-					0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-					0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-					0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-					0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-					0xf9, 0xfa,
-				}
-			),
-		};
-
-		// huffmanLUT is a compiled look-up table representation of a huffmanSpec.
-		// Each value maps to a uint32 of which the 8 most significant bits hold the
-		// codeword size in bits and the 24 least significant bits hold the codeword.
-		// The maximum codeword size is 16 bits.
-		private class huffmanLUT
-		{
-			public uint[] values;
-
-			public huffmanLUT(huffmanSpec s)
-			{
-				int maxValue = 0;
-
-				foreach(var v in s.values)
-				{
-					if(v > maxValue)
-						maxValue = v;
-				}
-
-				values = new uint[maxValue+1];
-
-				int code = 0;
-				int k = 0;
-
-				for(int i = 0; i < s.count.Length; i++)
-				{
-					int nBits = (i+1) << 24;
-					for(int j = 0; j < s.count[i]; j++)
-					{
-						values[s.values[k]] = (uint)(nBits | code);
-						code++;
-						k++;
-					}
-					code <<= 1;
-				}
-			}
-		}
-
-		// w is the writer to write to. err is the first error encountered during
-		// writing. All attempted writes after the first error become no-ops.
-		private Stream w;
-		// buf is a scratch buffer.
-		private byte[] buf = new byte[16];
-		// bits and nBits are accumulated bits to write to w.
-		private uint bits, nBits;
-		// quant is the scaled quantization tables, in zig-zag order.
-		private byte[][] quant = new byte[nQuantIndex][];//[Block.blockSize];
-		// theHuffmanLUT are compiled representations of theHuffmanSpec.
-		private huffmanLUT[] theHuffmanLUT = new huffmanLUT[4];
-
-		private void writeByte(byte b)
-		{
-			var data = new byte[1];
-			data[0] = b;
-			w.Write(data, 0, 1);
-		}
-
-		// emit emits the least significant nBits bits of bits to the bit-stream.
-		// The precondition is bits < 1<<nBits && nBits <= 16.
-		private void emit(uint bits, uint nBits)
-		{
-			nBits += this.nBits;
-			bits <<= (int)(32 - nBits);
-			bits |= this.bits;
-			while(nBits >= 8)
-			{
-				byte b = (byte)(bits >> 24);
-				writeByte(b);
-				if(b == 0xff)
-					writeByte(0x00);
-				bits <<= 8;
-				nBits -= 8;
-			}
-			this.bits = bits;
-			this.nBits = nBits;
-		}
-
-		// emitHuff emits the given value with the given Huffman encoder.
-		private void emitHuff(huffIndex h, int v)
-		{
-			uint x = theHuffmanLUT[(int)h].values[v];
-			emit(x&((1<<24)-1), x>>24);
-		}
-
-		// emitHuffRLE emits a run of runLength copies of value encoded with the given
-		// Huffman encoder.
-		private void emitHuffRLE(huffIndex h, int runLength, int v)
-		{
-			int a = v;
-			int b = v;
-			if(a < 0)
-			{
-				a = -v;
-				b = v-1;
-			}
-			uint nBits = 0;
-			if(a < 0x100)
-				nBits = bitCount[a];
-			else
-				nBits = 8 + (uint)bitCount[a>>8];
-
-			emitHuff(h, (int)((runLength<<4)|nBits));
-			if(nBits > 0)
-				emit((uint)b & (uint)((1 << ((int)nBits)) - 1), nBits);
-		}
-
-		// writeMarkerHeader writes the header for a marker with the given length.
-		private void writeMarkerHeader(byte marker, int markerlen)
-		{
-			buf[0] = 0xff;
-			buf[1] = marker;
-			buf[2] = (byte)(markerlen >> 8);
-			buf[3] = (byte)(markerlen & 0xff);
-			w.Write(buf, 0, 4);
-		}
-
-		// writeDQT writes the Define Quantization Table marker.
-		private void writeDQT()
-		{
-			int markerlen = 2 + nQuantIndex*(1+Block.blockSize);
-			writeMarkerHeader(dqtMarker, markerlen);
-			for(int i = 0; i < nQuantIndex; i++)
-			{ 
-				writeByte((byte)i);
-				w.Write(quant[i], 0, quant[i].Length);
-			}
-		}
-
-		// writeSOF0 writes the Start Of Frame (Baseline) marker.
-		private void writeSOF0(int wid, int hei, int nComponent)
-		{
-			byte[] chroma1 = new byte[] { 0x22, 0x11, 0x11 };
-			byte[] chroma2 = new byte[] { 0x00, 0x01, 0x01 };
-
-			int markerlen = 8 + 3*nComponent;
-			writeMarkerHeader(sof0Marker, markerlen);
-			buf[0] = 8; // 8-bit color.
-			buf[1] = (byte)(hei >> 8);
-			buf[2] = (byte)(hei & 0xff);
-			buf[3] = (byte)(wid >> 8);
-			buf[4] = (byte)(wid & 0xff);
-			buf[5] = (byte)(nComponent);
-			if(nComponent == 1)
-			{
-				buf[6] = 1;
-				// No subsampling for grayscale image.
-				buf[7] = 0x11;
-				buf[8] = 0x00;
-			}
-			else
-			{
-				for(int i = 0; i < nComponent; i++)
-				{
-					buf[3*i+6] = (byte)(i + 1);
-					// We use 4:2:0 chroma subsampling.
-					buf[3*i+7] = chroma1[i];
-					buf[3*i+8] = chroma2[i];
-				}
-			}
-			w.Write(buf, 0, 3*(nComponent-1)+9);
-		}
-
-		// writeDHT writes the Define Huffman Table marker.
-		private void writeDHT(int nComponent)
-		{
-			byte[] headers = new byte[] { 0x00, 0x10, 0x01, 0x11 };
-			int markerlen = 2;
-			huffmanSpec[] specs = theHuffmanSpec;
-
-			if(nComponent == 1)
-			{
-				// Drop the Chrominance tables.
-				specs = new huffmanSpec[] { theHuffmanSpec[0], theHuffmanSpec[1] };
-			}
-
-			foreach(var s in specs)
-				markerlen += 1 + 16 + s.values.Length;
-
-			writeMarkerHeader(dhtMarker, markerlen);
-			for(int i = 0; i < specs.Length; i++)
-			{
-				var s = specs[i];
-
-				writeByte(headers[i]);
-				w.Write(s.count, 0, s.count.Length);
-				w.Write(s.values, 0, s.values.Length);
-			}
-		}
-
-		// writeBlock writes a block of pixel data using the given quantization table,
-		// returning the post-quantized DC value of the DCT-transformed block. b is in
-		// natural (not zig-zag) order.
-		private int writeBlock(Block b, quantIndex q, int prevDC)
-		{
-			FDCT(b);
-
-			// Emit the DC delta.
-			int dc = div(b[0], 8*quant[(int)q][0]);
-			emitHuffRLE((huffIndex)(2*(int)q+0), 0, dc-prevDC);
-
-			// Emit the AC components.
-			var h = (huffIndex)(2*(int)q+1);
-			int runLength = 0;
-			
-			for(int zig = 1; zig < Block.blockSize; zig++)
-			{
-				int ac = div(b[unzig[zig]], 8*quant[(int)q][zig]);
-
-				if(ac == 0)
-				{
-					runLength++;
-				}
-				else
-				{
-					while(runLength > 15)
-					{
-						emitHuff(h, 0xf0);
-						runLength -= 16;
-					}
-
-					emitHuffRLE(h, runLength, ac);
-					runLength = 0;
-				}
-			}
-			if(runLength > 0)
-				emitHuff(h, 0x00);
-			return dc;
-		}
-
-		// toYCbCr converts the 8x8 region of m whose top-left corner is p to its
-		// YCbCr values.
-		private void toYCbCr(ImageBase m, int x, int y, Block yBlock, Block cbBlock, Block crBlock)
-		{
-			int xmax = m.Width - 1;
-			int ymax = m.Height - 1;
-			for(int j = 0; j < 8; j++)
-			{
-				for(int i = 0; i < 8; i++)
-				{
-					byte yy, cb, cr;
-
-					var c = m[Math.Min(x+i, xmax), Math.Min(y+j, ymax)];
-					Colors.RGBToYCbCr((byte)(c.R*255), (byte)(c.G*255), (byte)(c.B*255), out yy, out cb, out cr);
-					yBlock[8*j+i] = yy;
-					cbBlock[8*j+i] = cb;
-					crBlock[8*j+i] = cr;
-				}
-			}
-		}
-
-		// grayToY stores the 8x8 region of m whose top-left corner is p in yBlock.
-		/*func grayToY(m *image.Gray, p image.Point, yBlock *block) {
-			b := m.Bounds()
-			xmax := b.Max.X - 1
-			ymax := b.Max.Y - 1
-			pix := m.Pix
-			for j := 0; j < 8; j++ {
-				for i := 0; i < 8; i++ {
-					idx := m.PixOffset(min(p.X+i, xmax), min(p.Y+j, ymax))
-					yBlock[8*j+i] = int32(pix[idx])
-				}
-			}
-		}
-
-		// rgbaToYCbCr is a specialized version of toYCbCr for image.RGBA images.
-		func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block) {
-			b := m.Bounds()
-			xmax := b.Max.X - 1
-			ymax := b.Max.Y - 1
-			for j := 0; j < 8; j++ {
-				sj := p.Y + j
-				if sj > ymax {
-					sj = ymax
-				}
-				offset := (sj-b.Min.Y)*m.Stride - b.Min.X*4
-				for i := 0; i < 8; i++ {
-					sx := p.X + i
-					if sx > xmax {
-						sx = xmax
-					}
-					pix := m.Pix[offset+sx*4:]
-					yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2])
-					yBlock[8*j+i] = int32(yy)
-					cbBlock[8*j+i] = int32(cb)
-					crBlock[8*j+i] = int32(cr)
-				}
-			}
-		}*/
-
-		// scale scales the 16x16 region represented by the 4 src blocks to the 8x8
-		// dst block.
-		private void scale(Block dst, Block[] src)
-		{
-			for(int i = 0; i < 4; i++)
-			{
-				int dstOff = ((i&2)<<4) | ((i&1)<<2);
-				for(int y = 0; y < 4; y++)
-				{
-					for(int x = 0; x < 4; x++)
-					{
-						int j = 16*y + 2*x;
-						int sum = src[i][j] + src[i][j+1] + src[i][j+8] + src[i][j+9];
-						dst[8*y+x+dstOff] = (sum + 2) >> 2;
-					}
-				}
-			}
-		}
-
-		// sosHeaderY is the SOS marker "\xff\xda" followed by 8 bytes:
-		//	- the marker length "\x00\x08",
-		//	- the number of components "\x01",
-		//	- component 1 uses DC table 0 and AC table 0 "\x01\x00",
-		//	- the bytes "\x00\x3f\x00". Section B.2.3 of the spec says that for
-		//	  sequential DCTs, those bytes (8-bit Ss, 8-bit Se, 4-bit Ah, 4-bit Al)
-		//	  should be 0x00, 0x3f, 0x00<<4 | 0x00.
-		private readonly byte[] sosHeaderY = new byte[] {
-			0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00,
-		};
-
-		// sosHeaderYCbCr is the SOS marker "\xff\xda" followed by 12 bytes:
-		//	- the marker length "\x00\x0c",
-		//	- the number of components "\x03",
-		//	- component 1 uses DC table 0 and AC table 0 "\x01\x00",
-		//	- component 2 uses DC table 1 and AC table 1 "\x02\x11",
-		//	- component 3 uses DC table 1 and AC table 1 "\x03\x11",
-		//	- the bytes "\x00\x3f\x00". Section B.2.3 of the spec says that for
-		//	  sequential DCTs, those bytes (8-bit Ss, 8-bit Se, 4-bit Ah, 4-bit Al)
-		//	  should be 0x00, 0x3f, 0x00<<4 | 0x00.
-		private readonly byte[] sosHeaderYCbCr = new byte[] {
-			0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02,
-			0x11, 0x03, 0x11, 0x00, 0x3f, 0x00,
-		};
-
-		// writeSOS writes the StartOfScan marker.
-		private void writeSOS(ImageBase m)
-		{
-			w.Write(sosHeaderYCbCr, 0, sosHeaderYCbCr.Length);
-
-			Block b = new Block();
-			Block[] cb = new Block[4];
-			Block[] cr = new Block[4];
-			int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
-
-			for(int i = 0; i < 4; i++) cb[i] = new Block();
-			for(int i = 0; i < 4; i++) cr[i] = new Block();
-
-			for(int y = 0; y < m.Height; y += 16)
-			{
-				for(int x = 0; x < m.Width; x += 16)
-				{
-					for(int i = 0; i < 4; i++)
-					{
-						int xOff = (i & 1) * 8;
-						int yOff = (i & 2) * 4;
-
-						toYCbCr(m, x+xOff, y+yOff, b, cb[i], cr[i]);
-						prevDCY = writeBlock(b, 0, prevDCY);
-					}
-					scale(b, cb);
-					prevDCCb = writeBlock(b, (quantIndex)1, prevDCCb);
-					scale(b, cr);
-					prevDCCr = writeBlock(b, (quantIndex)1, prevDCCr);
-				}
-			}
-
-			// Pad the last byte with 1's.
-			emit(0x7f, 7);
-		}
-
-		// Encode writes the Image m to w in JPEG 4:2:0 baseline format with the given
-		// options. Default parameters are used if a nil *Options is passed.
-		public void Encode(Stream w, ImageBase m, int quality)
-		{
-			this.w = w;
-
-			for(int i = 0; i < theHuffmanSpec.Length; i++)
-				theHuffmanLUT[i] = new huffmanLUT(theHuffmanSpec[i]);
-
-			for(int i = 0; i < nQuantIndex; i++)
-				quant[i] = new byte[Block.blockSize];
-
-			if(m.Width >= (1<<16) || m.Height >= (1<<16))
-				throw new Exception("jpeg: image is too large to encode");
-
-			if(quality < 1) quality = 1;
-			if(quality > 100) quality = 100;
-
-			// Convert from a quality rating to a scaling factor.
-			int scale;
-			if(quality < 50)
-				scale = 5000 / quality;
-			else
-				scale = 200 - quality*2;
-
-			// Initialize the quantization tables.
-	
-			for(int i = 0; i < nQuantIndex; i++)
-			{
-				for(int j = 0; j < Block.blockSize; j++)
-				{
-					int x = unscaledQuant[i,j];
-					x = (x*scale + 50) / 100;
-					if(x < 1) x = 1;
-					if(x > 255) x = 255;
-					quant[i][j] = (byte)x;
-				}
-			}
-
-			// Compute number of components based on input image type.
-			int nComponent = 3;
-
-			// Write the Start Of Image marker.
-			buf[0] = 0xff;
-			buf[1] = 0xd8;
-			w.Write(buf, 0, 2);
-
-			// Write the quantization tables.
-			writeDQT();
-
-			// Write the image dimensions.
-			writeSOF0(m.Width, m.Height, nComponent);
-
-			// Write the Huffman tables.
-			writeDHT(nComponent);
-
-			// Write the image data.
-			writeSOS(m);
-
-			// Write the End Of Image marker.
-			buf[0] = 0xff;
-			buf[1] = 0xd9;
-			w.Write(buf, 0, 2);
-			w.Flush();
-		}
-
-		// div returns a/b rounded to the nearest integer, instead of rounded to zero.
-		private static int div(int a, int b)
-		{
-			if(a >= 0)
-				return (a + (b >> 1)) / b;
-			else
-				return -((-a + (b >> 1)) / b);
-		}
-	}
+    using System;
+    using System.IO;
+
+    internal partial class Encoder
+    {
+        private const int sof0Marker = 0xc0; // Start Of Frame (Baseline).
+        private const int sof1Marker = 0xc1; // Start Of Frame (Extended Sequential).
+        private const int sof2Marker = 0xc2; // Start Of Frame (Progressive).
+        private const int dhtMarker  = 0xc4; // Define Huffman Table.
+        private const int rst0Marker = 0xd0; // ReSTart (0).
+        private const int rst7Marker = 0xd7; // ReSTart (7).
+        private const int soiMarker  = 0xd8; // Start Of Image.
+        private const int eoiMarker  = 0xd9; // End Of Image.
+        private const int sosMarker  = 0xda; // Start Of Scan.
+        private const int dqtMarker  = 0xdb; // Define Quantization Table.
+        private const int driMarker  = 0xdd; // Define Restart Interval.
+        private const int comMarker  = 0xfe; // COMment.
+        // "APPlication specific" markers aren't part of the JPEG spec per se,
+        // but in practice, their use is described at
+        // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
+        private const int app0Marker  = 0xe0;
+        private const int app14Marker = 0xee;
+        private const int app15Marker = 0xef;
+
+        // bitCount counts the number of bits needed to hold an integer.
+        private readonly byte[] bitCount = {
+            0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+            5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, };
+
+        // unzig maps from the zig-zag ordering to the natural ordering. For example,
+        // unzig[3] is the column and row of the fourth element in zig-zag order. The
+        // value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
+        private static readonly int[] unzig = new int[] {
+            0, 1, 8, 16, 9, 2, 3, 10,
+            17, 24, 32, 25, 18, 11, 4, 5,
+            12, 19, 26, 33, 40, 48, 41, 34,
+            27, 20, 13, 6, 7, 14, 21, 28,
+            35, 42, 49, 56, 57, 50, 43, 36,
+            29, 22, 15, 23, 30, 37, 44, 51,
+            58, 59, 52, 45, 38, 31, 39, 46,
+            53, 60, 61, 54, 47, 55, 62, 63,
+        };
+
+        private const int nQuantIndex = 2;
+        private const int nHuffIndex = 4;
+
+        private enum quantIndex
+        {
+            quantIndexLuminance = 0,
+            quantIndexChrominance = 1,
+        }
+
+        private enum huffIndex
+        {
+            huffIndexLuminanceDC = 0,
+            huffIndexLuminanceAC = 1,
+            huffIndexChrominanceDC = 2,
+            huffIndexChrominanceAC = 3,
+        }
+
+        // unscaledQuant are the unscaled quantization tables in zig-zag order. Each
+        // encoder copies and scales the tables according to its quality parameter.
+        // The values are derived from section K.1 after converting from natural to
+        // zig-zag order.
+        private byte[,] unscaledQuant = new byte[,] {
+            // Luminance.
+            {
+                16, 11, 12, 14, 12, 10, 16, 14,
+                13, 14, 18, 17, 16, 19, 24, 40,
+                26, 24, 22, 22, 24, 49, 35, 37,
+                29, 40, 58, 51, 61, 60, 57, 51,
+                56, 55, 64, 72, 92, 78, 64, 68,
+                87, 69, 55, 56, 80, 109, 81, 87,
+                95, 98, 103, 104, 103, 62, 77, 113,
+                121, 112, 100, 120, 92, 101, 103, 99,
+            },
+            // Chrominance.
+            {
+                17, 18, 18, 24, 21, 24, 47, 26,
+                26, 47, 99, 66, 56, 66, 99, 99,
+                99, 99, 99, 99, 99, 99, 99, 99,
+                99, 99, 99, 99, 99, 99, 99, 99,
+                99, 99, 99, 99, 99, 99, 99, 99,
+                99, 99, 99, 99, 99, 99, 99, 99,
+                99, 99, 99, 99, 99, 99, 99, 99,
+                99, 99, 99, 99, 99, 99, 99, 99,
+            },
+        };
+
+        private class huffmanSpec
+        {
+            public huffmanSpec(byte[] c, byte[] v) { count = c; values = v; }
+            public byte[] count;
+            public byte[] values;
+        };
+
+        // theHuffmanSpec is the Huffman encoding specifications.
+        // This encoder uses the same Huffman encoding for all images.
+        private huffmanSpec[] theHuffmanSpec = new huffmanSpec[] {
+            // Luminance DC.
+            new huffmanSpec(
+                new byte[] {0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0},
+                new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+            ),
+            new huffmanSpec(
+                new byte[] {0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125},
+                new byte[] {
+                    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+                    0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+                    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+                    0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+                    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+                    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+                    0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+                    0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+                    0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+                    0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+                    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+                    0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+                    0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+                    0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+                    0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+                    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+                    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+                    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+                    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+                    0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+                    0xf9, 0xfa,
+                }
+            ),
+            new huffmanSpec(
+                new byte[] {0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
+                new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+            ),
+            // Chrominance AC.
+            new huffmanSpec(
+                new byte[] {0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119},
+                new byte[] {
+                    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+                    0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+                    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+                    0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+                    0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+                    0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+                    0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+                    0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+                    0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+                    0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+                    0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+                    0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+                    0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+                    0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+                    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+                    0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+                    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+                    0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+                    0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+                    0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+                    0xf9, 0xfa,
+                }
+            ),
+        };
+
+        // huffmanLUT is a compiled look-up table representation of a huffmanSpec.
+        // Each value maps to a uint32 of which the 8 most significant bits hold the
+        // codeword size in bits and the 24 least significant bits hold the codeword.
+        // The maximum codeword size is 16 bits.
+        private class huffmanLUT
+        {
+            public uint[] values;
+
+            public huffmanLUT(huffmanSpec s)
+            {
+                int maxValue = 0;
+
+                foreach(var v in s.values)
+                {
+                    if(v > maxValue)
+                        maxValue = v;
+                }
+
+                values = new uint[maxValue+1];
+
+                int code = 0;
+                int k = 0;
+
+                for(int i = 0; i < s.count.Length; i++)
+                {
+                    int nBits = (i+1) << 24;
+                    for(int j = 0; j < s.count[i]; j++)
+                    {
+                        values[s.values[k]] = (uint)(nBits | code);
+                        code++;
+                        k++;
+                    }
+                    code <<= 1;
+                }
+            }
+        }
+
+        // w is the writer to write to. err is the first error encountered during
+        // writing. All attempted writes after the first error become no-ops.
+        private Stream w;
+        // buf is a scratch buffer.
+        private byte[] buf = new byte[16];
+        // bits and nBits are accumulated bits to write to w.
+        private uint bits, nBits;
+        // quant is the scaled quantization tables, in zig-zag order.
+        private byte[][] quant = new byte[nQuantIndex][];//[Block.blockSize];
+        // theHuffmanLUT are compiled representations of theHuffmanSpec.
+        private huffmanLUT[] theHuffmanLUT = new huffmanLUT[4];
+
+        private void writeByte(byte b)
+        {
+            var data = new byte[1];
+            data[0] = b;
+            w.Write(data, 0, 1);
+        }
+
+        // emit emits the least significant nBits bits of bits to the bit-stream.
+        // The precondition is bits < 1<<nBits && nBits <= 16.
+        private void emit(uint bits, uint nBits)
+        {
+            nBits += this.nBits;
+            bits <<= (int)(32 - nBits);
+            bits |= this.bits;
+            while(nBits >= 8)
+            {
+                byte b = (byte)(bits >> 24);
+                writeByte(b);
+                if(b == 0xff)
+                    writeByte(0x00);
+                bits <<= 8;
+                nBits -= 8;
+            }
+            this.bits = bits;
+            this.nBits = nBits;
+        }
+
+        // emitHuff emits the given value with the given Huffman encoder.
+        private void emitHuff(huffIndex h, int v)
+        {
+            uint x = theHuffmanLUT[(int)h].values[v];
+            emit(x&((1<<24)-1), x>>24);
+        }
+
+        // emitHuffRLE emits a run of runLength copies of value encoded with the given
+        // Huffman encoder.
+        private void emitHuffRLE(huffIndex h, int runLength, int v)
+        {
+            int a = v;
+            int b = v;
+            if(a < 0)
+            {
+                a = -v;
+                b = v-1;
+            }
+            uint nBits = 0;
+            if(a < 0x100)
+                nBits = bitCount[a];
+            else
+                nBits = 8 + (uint)bitCount[a>>8];
+
+            emitHuff(h, (int)((runLength<<4)|nBits));
+            if(nBits > 0)
+                emit((uint)b & (uint)((1 << ((int)nBits)) - 1), nBits);
+        }
+
+        // writeMarkerHeader writes the header for a marker with the given length.
+        private void writeMarkerHeader(byte marker, int markerlen)
+        {
+            buf[0] = 0xff;
+            buf[1] = marker;
+            buf[2] = (byte)(markerlen >> 8);
+            buf[3] = (byte)(markerlen & 0xff);
+            w.Write(buf, 0, 4);
+        }
+
+        // writeDQT writes the Define Quantization Table marker.
+        private void writeDQT()
+        {
+            int markerlen = 2 + nQuantIndex*(1+Block.blockSize);
+            writeMarkerHeader(dqtMarker, markerlen);
+            for(int i = 0; i < nQuantIndex; i++)
+            { 
+                writeByte((byte)i);
+                w.Write(quant[i], 0, quant[i].Length);
+            }
+        }
+
+        // writeSOF0 writes the Start Of Frame (Baseline) marker.
+        private void writeSOF0(int wid, int hei, int nComponent)
+        {
+            byte[] chroma1 = new byte[] { 0x22, 0x11, 0x11 };
+            byte[] chroma2 = new byte[] { 0x00, 0x01, 0x01 };
+
+            int markerlen = 8 + 3*nComponent;
+            writeMarkerHeader(sof0Marker, markerlen);
+            buf[0] = 8; // 8-bit color.
+            buf[1] = (byte)(hei >> 8);
+            buf[2] = (byte)(hei & 0xff);
+            buf[3] = (byte)(wid >> 8);
+            buf[4] = (byte)(wid & 0xff);
+            buf[5] = (byte)(nComponent);
+            if(nComponent == 1)
+            {
+                buf[6] = 1;
+                // No subsampling for grayscale image.
+                buf[7] = 0x11;
+                buf[8] = 0x00;
+            }
+            else
+            {
+                for(int i = 0; i < nComponent; i++)
+                {
+                    buf[3*i+6] = (byte)(i + 1);
+                    // We use 4:2:0 chroma subsampling.
+                    buf[3*i+7] = chroma1[i];
+                    buf[3*i+8] = chroma2[i];
+                }
+            }
+            w.Write(buf, 0, 3*(nComponent-1)+9);
+        }
+
+        // writeDHT writes the Define Huffman Table marker.
+        private void writeDHT(int nComponent)
+        {
+            byte[] headers = new byte[] { 0x00, 0x10, 0x01, 0x11 };
+            int markerlen = 2;
+            huffmanSpec[] specs = theHuffmanSpec;
+
+            if(nComponent == 1)
+            {
+                // Drop the Chrominance tables.
+                specs = new huffmanSpec[] { theHuffmanSpec[0], theHuffmanSpec[1] };
+            }
+
+            foreach(var s in specs)
+                markerlen += 1 + 16 + s.values.Length;
+
+            writeMarkerHeader(dhtMarker, markerlen);
+            for(int i = 0; i < specs.Length; i++)
+            {
+                var s = specs[i];
+
+                writeByte(headers[i]);
+                w.Write(s.count, 0, s.count.Length);
+                w.Write(s.values, 0, s.values.Length);
+            }
+        }
+
+        // writeBlock writes a block of pixel data using the given quantization table,
+        // returning the post-quantized DC value of the DCT-transformed block. b is in
+        // natural (not zig-zag) order.
+        private int writeBlock(Block b, quantIndex q, int prevDC)
+        {
+            FDCT(b);
+
+            // Emit the DC delta.
+            int dc = div(b[0], 8*quant[(int)q][0]);
+            emitHuffRLE((huffIndex)(2*(int)q+0), 0, dc-prevDC);
+
+            // Emit the AC components.
+            var h = (huffIndex)(2*(int)q+1);
+            int runLength = 0;
+            
+            for(int zig = 1; zig < Block.blockSize; zig++)
+            {
+                int ac = div(b[unzig[zig]], 8*quant[(int)q][zig]);
+
+                if(ac == 0)
+                {
+                    runLength++;
+                }
+                else
+                {
+                    while(runLength > 15)
+                    {
+                        emitHuff(h, 0xf0);
+                        runLength -= 16;
+                    }
+
+                    emitHuffRLE(h, runLength, ac);
+                    runLength = 0;
+                }
+            }
+            if(runLength > 0)
+                emitHuff(h, 0x00);
+            return dc;
+        }
+
+        // toYCbCr converts the 8x8 region of m whose top-left corner is p to its
+        // YCbCr values.
+        private void toYCbCr(ImageBase m, int x, int y, Block yBlock, Block cbBlock, Block crBlock)
+        {
+            int xmax = m.Width - 1;
+            int ymax = m.Height - 1;
+            for(int j = 0; j < 8; j++)
+            {
+                for(int i = 0; i < 8; i++)
+                {
+                    byte yy, cb, cr;
+
+                    var c = m[Math.Min(x+i, xmax), Math.Min(y+j, ymax)];
+                    Colors.RGBToYCbCr((byte)(c.R*255), (byte)(c.G*255), (byte)(c.B*255), out yy, out cb, out cr);
+                    yBlock[8*j+i] = yy;
+                    cbBlock[8*j+i] = cb;
+                    crBlock[8*j+i] = cr;
+                }
+            }
+        }
+
+        // grayToY stores the 8x8 region of m whose top-left corner is p in yBlock.
+        /*func grayToY(m *image.Gray, p image.Point, yBlock *block) {
+            b := m.Bounds()
+            xmax := b.Max.X - 1
+            ymax := b.Max.Y - 1
+            pix := m.Pix
+            for j := 0; j < 8; j++ {
+                for i := 0; i < 8; i++ {
+                    idx := m.PixOffset(min(p.X+i, xmax), min(p.Y+j, ymax))
+                    yBlock[8*j+i] = int32(pix[idx])
+                }
+            }
+        }
+
+        // rgbaToYCbCr is a specialized version of toYCbCr for image.RGBA images.
+        func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block) {
+            b := m.Bounds()
+            xmax := b.Max.X - 1
+            ymax := b.Max.Y - 1
+            for j := 0; j < 8; j++ {
+                sj := p.Y + j
+                if sj > ymax {
+                    sj = ymax
+                }
+                offset := (sj-b.Min.Y)*m.Stride - b.Min.X*4
+                for i := 0; i < 8; i++ {
+                    sx := p.X + i
+                    if sx > xmax {
+                        sx = xmax
+                    }
+                    pix := m.Pix[offset+sx*4:]
+                    yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2])
+                    yBlock[8*j+i] = int32(yy)
+                    cbBlock[8*j+i] = int32(cb)
+                    crBlock[8*j+i] = int32(cr)
+                }
+            }
+        }*/
+
+        // scale scales the 16x16 region represented by the 4 src blocks to the 8x8
+        // dst block.
+        private void scale(Block dst, Block[] src)
+        {
+            for(int i = 0; i < 4; i++)
+            {
+                int dstOff = ((i&2)<<4) | ((i&1)<<2);
+                for(int y = 0; y < 4; y++)
+                {
+                    for(int x = 0; x < 4; x++)
+                    {
+                        int j = 16*y + 2*x;
+                        int sum = src[i][j] + src[i][j+1] + src[i][j+8] + src[i][j+9];
+                        dst[8*y+x+dstOff] = (sum + 2) >> 2;
+                    }
+                }
+            }
+        }
+
+        // sosHeaderY is the SOS marker "\xff\xda" followed by 8 bytes:
+        //  - the marker length "\x00\x08",
+        //  - the number of components "\x01",
+        //  - component 1 uses DC table 0 and AC table 0 "\x01\x00",
+        //  - the bytes "\x00\x3f\x00". Section B.2.3 of the spec says that for
+        //    sequential DCTs, those bytes (8-bit Ss, 8-bit Se, 4-bit Ah, 4-bit Al)
+        //    should be 0x00, 0x3f, 0x00<<4 | 0x00.
+        private readonly byte[] sosHeaderY = new byte[] {
+            0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00,
+        };
+
+        // sosHeaderYCbCr is the SOS marker "\xff\xda" followed by 12 bytes:
+        //  - the marker length "\x00\x0c",
+        //  - the number of components "\x03",
+        //  - component 1 uses DC table 0 and AC table 0 "\x01\x00",
+        //  - component 2 uses DC table 1 and AC table 1 "\x02\x11",
+        //  - component 3 uses DC table 1 and AC table 1 "\x03\x11",
+        //  - the bytes "\x00\x3f\x00". Section B.2.3 of the spec says that for
+        //    sequential DCTs, those bytes (8-bit Ss, 8-bit Se, 4-bit Ah, 4-bit Al)
+        //    should be 0x00, 0x3f, 0x00<<4 | 0x00.
+        private readonly byte[] sosHeaderYCbCr = new byte[] {
+            0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02,
+            0x11, 0x03, 0x11, 0x00, 0x3f, 0x00,
+        };
+
+        // writeSOS writes the StartOfScan marker.
+        private void writeSOS(ImageBase m)
+        {
+            w.Write(sosHeaderYCbCr, 0, sosHeaderYCbCr.Length);
+
+            Block b = new Block();
+            Block[] cb = new Block[4];
+            Block[] cr = new Block[4];
+            int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
+
+            for(int i = 0; i < 4; i++) cb[i] = new Block();
+            for(int i = 0; i < 4; i++) cr[i] = new Block();
+
+            for(int y = 0; y < m.Height; y += 16)
+            {
+                for(int x = 0; x < m.Width; x += 16)
+                {
+                    for(int i = 0; i < 4; i++)
+                    {
+                        int xOff = (i & 1) * 8;
+                        int yOff = (i & 2) * 4;
+
+                        toYCbCr(m, x+xOff, y+yOff, b, cb[i], cr[i]);
+                        prevDCY = writeBlock(b, 0, prevDCY);
+                    }
+                    scale(b, cb);
+                    prevDCCb = writeBlock(b, (quantIndex)1, prevDCCb);
+                    scale(b, cr);
+                    prevDCCr = writeBlock(b, (quantIndex)1, prevDCCr);
+                }
+            }
+
+            // Pad the last byte with 1's.
+            emit(0x7f, 7);
+        }
+
+        // Encode writes the Image m to w in JPEG 4:2:0 baseline format with the given
+        // options. Default parameters are used if a nil *Options is passed.
+        public void Encode(Stream w, ImageBase m, int quality)
+        {
+            this.w = w;
+
+            for(int i = 0; i < theHuffmanSpec.Length; i++)
+                theHuffmanLUT[i] = new huffmanLUT(theHuffmanSpec[i]);
+
+            for(int i = 0; i < nQuantIndex; i++)
+                quant[i] = new byte[Block.blockSize];
+
+            if(m.Width >= (1<<16) || m.Height >= (1<<16))
+                throw new Exception("jpeg: image is too large to encode");
+
+            if(quality < 1) quality = 1;
+            if(quality > 100) quality = 100;
+
+            // Convert from a quality rating to a scaling factor.
+            int scale;
+            if(quality < 50)
+                scale = 5000 / quality;
+            else
+                scale = 200 - quality*2;
+
+            // Initialize the quantization tables.
+            for(int i = 0; i < nQuantIndex; i++)
+            {
+                for(int j = 0; j < Block.blockSize; j++)
+                {
+                    int x = unscaledQuant[i,j];
+                    x = (x*scale + 50) / 100;
+                    if(x < 1) x = 1;
+                    if(x > 255) x = 255;
+                    quant[i][j] = (byte)x;
+                }
+            }
+
+            // Compute number of components based on input image type.
+            int nComponent = 3;
+
+            // Write the Start Of Image marker.
+            buf[0] = 0xff;
+            buf[1] = 0xd8;
+            w.Write(buf, 0, 2);
+
+            // Write the quantization tables.
+            writeDQT();
+
+            // Write the image dimensions.
+            writeSOF0(m.Width, m.Height, nComponent);
+
+            // Write the Huffman tables.
+            writeDHT(nComponent);
+
+            // Write the image data.
+            writeSOS(m);
+
+            // Write the End Of Image marker.
+            buf[0] = 0xff;
+            buf[1] = 0xd9;
+            w.Write(buf, 0, 2);
+            w.Flush();
+        }
+
+        // div returns a/b rounded to the nearest integer, instead of rounded to zero.
+        private static int div(int a, int b)
+        {
+            if(a >= 0)
+                return (a + (b >> 1)) / b;
+            else
+                return -((-a + (b >> 1)) / b);
+        }
+    }
 }
diff --git a/src/ImageProcessorCore/Formats/Jpg/FDCT.cs b/src/ImageProcessorCore/Formats/Jpg/FDCT.cs
index fa46c68c77..c706e6f67d 100644
--- a/src/ImageProcessorCore/Formats/Jpg/FDCT.cs
+++ b/src/ImageProcessorCore/Formats/Jpg/FDCT.cs
@@ -3,135 +3,135 @@ namespace ImageProcessorCore.Formats.Jpg
     using System;
     using System.IO;
 
-	internal partial class Encoder
-	{
-		// Trigonometric constants in 13-bit fixed point format.
-		private const int fix_0_298631336 = 2446;
-		private const int fix_0_390180644 = 3196;
-		private const int fix_0_541196100 = 4433;
-		private const int fix_0_765366865 = 6270;
-		private const int fix_0_899976223 = 7373;
-		private const int fix_1_175875602 = 9633;
-		private const int fix_1_501321110 = 12299;
-		private const int fix_1_847759065 = 15137;
-		private const int fix_1_961570560 = 16069;
-		private const int fix_2_053119869 = 16819;
-		private const int fix_2_562915447 = 20995;
-		private const int fix_3_072711026 = 25172;
-		private const int constBits = 13;
-		private const int pass1Bits = 2;
-		private const int centerJSample = 128;
+    internal partial class Encoder
+    {
+        // Trigonometric constants in 13-bit fixed point format.
+        private const int fix_0_298631336 = 2446;
+        private const int fix_0_390180644 = 3196;
+        private const int fix_0_541196100 = 4433;
+        private const int fix_0_765366865 = 6270;
+        private const int fix_0_899976223 = 7373;
+        private const int fix_1_175875602 = 9633;
+        private const int fix_1_501321110 = 12299;
+        private const int fix_1_847759065 = 15137;
+        private const int fix_1_961570560 = 16069;
+        private const int fix_2_053119869 = 16819;
+        private const int fix_2_562915447 = 20995;
+        private const int fix_3_072711026 = 25172;
+        private const int constBits = 13;
+        private const int pass1Bits = 2;
+        private const int centerJSample = 128;
 
-		// fdct performs a forward DCT on an 8x8 block of coefficients, including a
-		// level shift.
-		private static void FDCT(Block b)
-		{
-			// Pass 1: process rows.
-			for(int y = 0; y < 8; y++)
-			{
-				int x0 = b[y*8+0];
-				int x1 = b[y*8+1];
-				int x2 = b[y*8+2];
-				int x3 = b[y*8+3];
-				int x4 = b[y*8+4];
-				int x5 = b[y*8+5];
-				int x6 = b[y*8+6];
-				int x7 = b[y*8+7];
+        // fdct performs a forward DCT on an 8x8 block of coefficients, including a
+        // level shift.
+        private static void FDCT(Block b)
+        {
+            // Pass 1: process rows.
+            for(int y = 0; y < 8; y++)
+            {
+                int x0 = b[y*8+0];
+                int x1 = b[y*8+1];
+                int x2 = b[y*8+2];
+                int x3 = b[y*8+3];
+                int x4 = b[y*8+4];
+                int x5 = b[y*8+5];
+                int x6 = b[y*8+6];
+                int x7 = b[y*8+7];
 
-				int tmp0 = x0 + x7;
-				int tmp1 = x1 + x6;
-				int tmp2 = x2 + x5;
-				int tmp3 = x3 + x4;
+                int tmp0 = x0 + x7;
+                int tmp1 = x1 + x6;
+                int tmp2 = x2 + x5;
+                int tmp3 = x3 + x4;
 
-				int tmp10 = tmp0 + tmp3;
-				int tmp12 = tmp0 - tmp3;
-				int tmp11 = tmp1 + tmp2;
-				int tmp13 = tmp1 - tmp2;
+                int tmp10 = tmp0 + tmp3;
+                int tmp12 = tmp0 - tmp3;
+                int tmp11 = tmp1 + tmp2;
+                int tmp13 = tmp1 - tmp2;
 
-				tmp0 = x0 - x7;
-				tmp1 = x1 - x6;
-				tmp2 = x2 - x5;
-				tmp3 = x3 - x4;
+                tmp0 = x0 - x7;
+                tmp1 = x1 - x6;
+                tmp2 = x2 - x5;
+                tmp3 = x3 - x4;
 
-				b[y*8+0] = (tmp10 + tmp11 - 8*centerJSample) << pass1Bits;
-				b[y*8+4] = (tmp10 - tmp11) << pass1Bits;
-				int z1 = (tmp12 + tmp13) * fix_0_541196100;
-				z1 += 1 << (constBits - pass1Bits - 1);
-				b[y*8+2] = (z1 + tmp12*fix_0_765366865) >> (constBits - pass1Bits);
-				b[y*8+6] = (z1 - tmp13*fix_1_847759065) >> (constBits - pass1Bits);
+                b[y*8+0] = (tmp10 + tmp11 - 8*centerJSample) << pass1Bits;
+                b[y*8+4] = (tmp10 - tmp11) << pass1Bits;
+                int z1 = (tmp12 + tmp13) * fix_0_541196100;
+                z1 += 1 << (constBits - pass1Bits - 1);
+                b[y*8+2] = (z1 + tmp12*fix_0_765366865) >> (constBits - pass1Bits);
+                b[y*8+6] = (z1 - tmp13*fix_1_847759065) >> (constBits - pass1Bits);
 
-				tmp10 = tmp0 + tmp3;
-				tmp11 = tmp1 + tmp2;
-				tmp12 = tmp0 + tmp2;
-				tmp13 = tmp1 + tmp3;
-				z1 = (tmp12 + tmp13) * fix_1_175875602;
-				z1 += 1 << (constBits - pass1Bits - 1);
-				tmp0 = tmp0 * fix_1_501321110;
-				tmp1 = tmp1 * fix_3_072711026;
-				tmp2 = tmp2 * fix_2_053119869;
-				tmp3 = tmp3 * fix_0_298631336;
-				tmp10 = tmp10 * -fix_0_899976223;
-				tmp11 = tmp11 * -fix_2_562915447;
-				tmp12 = tmp12 * -fix_0_390180644;
-				tmp13 = tmp13 * -fix_1_961570560;
+                tmp10 = tmp0 + tmp3;
+                tmp11 = tmp1 + tmp2;
+                tmp12 = tmp0 + tmp2;
+                tmp13 = tmp1 + tmp3;
+                z1 = (tmp12 + tmp13) * fix_1_175875602;
+                z1 += 1 << (constBits - pass1Bits - 1);
+                tmp0 = tmp0 * fix_1_501321110;
+                tmp1 = tmp1 * fix_3_072711026;
+                tmp2 = tmp2 * fix_2_053119869;
+                tmp3 = tmp3 * fix_0_298631336;
+                tmp10 = tmp10 * -fix_0_899976223;
+                tmp11 = tmp11 * -fix_2_562915447;
+                tmp12 = tmp12 * -fix_0_390180644;
+                tmp13 = tmp13 * -fix_1_961570560;
 
-				tmp12 += z1;
-				tmp13 += z1;
-				b[y*8+1] = (tmp0 + tmp10 + tmp12) >> (constBits - pass1Bits);
-				b[y*8+3] = (tmp1 + tmp11 + tmp13) >> (constBits - pass1Bits);
-				b[y*8+5] = (tmp2 + tmp11 + tmp12) >> (constBits - pass1Bits);
-				b[y*8+7] = (tmp3 + tmp10 + tmp13) >> (constBits - pass1Bits);
-			}
+                tmp12 += z1;
+                tmp13 += z1;
+                b[y*8+1] = (tmp0 + tmp10 + tmp12) >> (constBits - pass1Bits);
+                b[y*8+3] = (tmp1 + tmp11 + tmp13) >> (constBits - pass1Bits);
+                b[y*8+5] = (tmp2 + tmp11 + tmp12) >> (constBits - pass1Bits);
+                b[y*8+7] = (tmp3 + tmp10 + tmp13) >> (constBits - pass1Bits);
+            }
 
-			// Pass 2: process columns.
-			// We remove pass1Bits scaling, but leave results scaled up by an overall factor of 8.
-			for(int x = 0; x < 8; x++)
-			{
-				int tmp0 = b[0*8+x] + b[7*8+x];
-				int tmp1 = b[1*8+x] + b[6*8+x];
-				int tmp2 = b[2*8+x] + b[5*8+x];
-				int tmp3 = b[3*8+x] + b[4*8+x];
+            // Pass 2: process columns.
+            // We remove pass1Bits scaling, but leave results scaled up by an overall factor of 8.
+            for(int x = 0; x < 8; x++)
+            {
+                int tmp0 = b[0*8+x] + b[7*8+x];
+                int tmp1 = b[1*8+x] + b[6*8+x];
+                int tmp2 = b[2*8+x] + b[5*8+x];
+                int tmp3 = b[3*8+x] + b[4*8+x];
 
-				int tmp10 = tmp0 + tmp3 + (1<<(pass1Bits-1));
-				int tmp12 = tmp0 - tmp3;
-				int tmp11 = tmp1 + tmp2;
-				int tmp13 = tmp1 - tmp2;
+                int tmp10 = tmp0 + tmp3 + (1<<(pass1Bits-1));
+                int tmp12 = tmp0 - tmp3;
+                int tmp11 = tmp1 + tmp2;
+                int tmp13 = tmp1 - tmp2;
 
-				tmp0 = b[0*8+x] - b[7*8+x];
-				tmp1 = b[1*8+x] - b[6*8+x];
-				tmp2 = b[2*8+x] - b[5*8+x];
-				tmp3 = b[3*8+x] - b[4*8+x];
+                tmp0 = b[0*8+x] - b[7*8+x];
+                tmp1 = b[1*8+x] - b[6*8+x];
+                tmp2 = b[2*8+x] - b[5*8+x];
+                tmp3 = b[3*8+x] - b[4*8+x];
 
-				b[0*8+x] = (tmp10 + tmp11) >> pass1Bits;
-				b[4*8+x] = (tmp10 - tmp11) >> pass1Bits;
+                b[0*8+x] = (tmp10 + tmp11) >> pass1Bits;
+                b[4*8+x] = (tmp10 - tmp11) >> pass1Bits;
 
-				int z1 = (tmp12 + tmp13) * fix_0_541196100;
-				z1 += 1 << (constBits + pass1Bits - 1);
-				b[2*8+x] = (z1 + tmp12*fix_0_765366865) >> (constBits + pass1Bits);
-				b[6*8+x] = (z1 - tmp13*fix_1_847759065) >> (constBits + pass1Bits);
+                int z1 = (tmp12 + tmp13) * fix_0_541196100;
+                z1 += 1 << (constBits + pass1Bits - 1);
+                b[2*8+x] = (z1 + tmp12*fix_0_765366865) >> (constBits + pass1Bits);
+                b[6*8+x] = (z1 - tmp13*fix_1_847759065) >> (constBits + pass1Bits);
 
-				tmp10 = tmp0 + tmp3;
-				tmp11 = tmp1 + tmp2;
-				tmp12 = tmp0 + tmp2;
-				tmp13 = tmp1 + tmp3;
-				z1 = (tmp12 + tmp13) * fix_1_175875602;
-				z1 += 1 << (constBits + pass1Bits - 1);
-				tmp0 = tmp0 * fix_1_501321110;
-				tmp1 = tmp1 * fix_3_072711026;
-				tmp2 = tmp2 * fix_2_053119869;
-				tmp3 = tmp3 * fix_0_298631336;
-				tmp10 = tmp10 * -fix_0_899976223;
-				tmp11 = tmp11 * -fix_2_562915447;
-				tmp12 = tmp12 * -fix_0_390180644;
-				tmp13 = tmp13 * -fix_1_961570560;
+                tmp10 = tmp0 + tmp3;
+                tmp11 = tmp1 + tmp2;
+                tmp12 = tmp0 + tmp2;
+                tmp13 = tmp1 + tmp3;
+                z1 = (tmp12 + tmp13) * fix_1_175875602;
+                z1 += 1 << (constBits + pass1Bits - 1);
+                tmp0 = tmp0 * fix_1_501321110;
+                tmp1 = tmp1 * fix_3_072711026;
+                tmp2 = tmp2 * fix_2_053119869;
+                tmp3 = tmp3 * fix_0_298631336;
+                tmp10 = tmp10 * -fix_0_899976223;
+                tmp11 = tmp11 * -fix_2_562915447;
+                tmp12 = tmp12 * -fix_0_390180644;
+                tmp13 = tmp13 * -fix_1_961570560;
 
-				tmp12 += z1;
-				tmp13 += z1;
-				b[1*8+x] = (tmp0 + tmp10 + tmp12) >> (constBits + pass1Bits);
-				b[3*8+x] = (tmp1 + tmp11 + tmp13) >> (constBits + pass1Bits);
-				b[5*8+x] = (tmp2 + tmp11 + tmp12) >> (constBits + pass1Bits);
-				b[7*8+x] = (tmp3 + tmp10 + tmp13) >> (constBits + pass1Bits);
-			}
-		}
-	}
+                tmp12 += z1;
+                tmp13 += z1;
+                b[1*8+x] = (tmp0 + tmp10 + tmp12) >> (constBits + pass1Bits);
+                b[3*8+x] = (tmp1 + tmp11 + tmp13) >> (constBits + pass1Bits);
+                b[5*8+x] = (tmp2 + tmp11 + tmp12) >> (constBits + pass1Bits);
+                b[7*8+x] = (tmp3 + tmp10 + tmp13) >> (constBits + pass1Bits);
+            }
+        }
+    }
 }
diff --git a/src/ImageProcessorCore/Formats/Jpg/IDCT.cs b/src/ImageProcessorCore/Formats/Jpg/IDCT.cs
index 596beddf72..c29e7d0694 100644
--- a/src/ImageProcessorCore/Formats/Jpg/IDCT.cs
+++ b/src/ImageProcessorCore/Formats/Jpg/IDCT.cs
@@ -3,159 +3,159 @@ namespace ImageProcessorCore.Formats.Jpg
     using System;
     using System.IO;
 
-	internal partial class Decoder
-	{
-		private const int w1 = 2841; // 2048*sqrt(2)*cos(1*pi/16)
-		private const int w2 = 2676; // 2048*sqrt(2)*cos(2*pi/16)
-		private const int w3 = 2408; // 2048*sqrt(2)*cos(3*pi/16)
-		private const int w5 = 1609; // 2048*sqrt(2)*cos(5*pi/16)
-		private const int w6 = 1108; // 2048*sqrt(2)*cos(6*pi/16)
-		private const int w7 = 565;  // 2048*sqrt(2)*cos(7*pi/16)
-
-		private const int w1pw7 = w1 + w7;
-		private const int w1mw7 = w1 - w7;
-		private const int w2pw6 = w2 + w6;
-		private const int w2mw6 = w2 - w6;
-		private const int w3pw5 = w3 + w5;
-		private const int w3mw5 = w3 - w5;
-
-		private const int r2 = 181; // 256/sqrt(2)
-
-		// idct performs a 2-D Inverse Discrete Cosine Transformation.
-		//
-		// The input coefficients should already have been multiplied by the
-		// appropriate quantization table. We use fixed-point computation, with the
-		// number of bits for the fractional component varying over the intermediate
-		// stages.
-		//
-		// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the
-		// discrete W transform and for the discrete Fourier transform", IEEE Trans. on
-		// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
-		private static void IDCT(Block src)
-		{
-			// Horizontal 1-D IDCT.
-			for(int y = 0; y < 8; y++)
-			{
-				int y8 = y * 8;
-
-				// If all the AC components are zero, then the IDCT is trivial.
-				if(src[y8+1] == 0 && src[y8+2] == 0 && src[y8+3] == 0 &&
-					src[y8+4] == 0 && src[y8+5] == 0 && src[y8+6] == 0 && src[y8+7] == 0)
-				{
-					int dc = src[y8+0] << 3;
-					src[y8+0] = dc;
-					src[y8+1] = dc;
-					src[y8+2] = dc;
-					src[y8+3] = dc;
-					src[y8+4] = dc;
-					src[y8+5] = dc;
-					src[y8+6] = dc;
-					src[y8+7] = dc;
-					continue;
-				}
-
-				// Prescale.
-				int x0 = (src[y8+0] << 11) + 128;
-				int x1 = src[y8+4] << 11;
-				int x2 = src[y8+6];
-				int x3 = src[y8+2];
-				int x4 = src[y8+1];
-				int x5 = src[y8+7];
-				int x6 = src[y8+5];
-				int x7 = src[y8+3];
-
-				// Stage 1.
-				int x8 = w7 * (x4 + x5);
-				x4 = x8 + w1mw7*x4;
-				x5 = x8 - w1pw7*x5;
-				x8 = w3 * (x6 + x7);
-				x6 = x8 - w3mw5*x6;
-				x7 = x8 - w3pw5*x7;
-
-				// Stage 2.
-				x8 = x0 + x1;
-				x0 -= x1;
-				x1 = w6 * (x3 + x2);
-				x2 = x1 - w2pw6*x2;
-				x3 = x1 + w2mw6*x3;
-				x1 = x4 + x6;
-				x4 -= x6;
-				x6 = x5 + x7;
-				x5 -= x7;
-
-				// Stage 3.
-				x7 = x8 + x3;
-				x8 -= x3;
-				x3 = x0 + x2;
-				x0 -= x2;
-				x2 = (r2*(x4+x5) + 128) >> 8;
-				x4 = (r2*(x4-x5) + 128) >> 8;
-
-				// Stage 4.
-				src[y8+0] = (x7 + x1) >> 8;
-				src[y8+1] = (x3 + x2) >> 8;
-				src[y8+2] = (x0 + x4) >> 8;
-				src[y8+3] = (x8 + x6) >> 8;
-				src[y8+4] = (x8 - x6) >> 8;
-				src[y8+5] = (x0 - x4) >> 8;
-				src[y8+6] = (x3 - x2) >> 8;
-				src[y8+7] = (x7 - x1) >> 8;
-			}
-
-			// Vertical 1-D IDCT.
-			for(int x = 0; x < 8; x++)
-			{
-				// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
-				// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
-				// we do not bother to check for the all-zero case.
-
-				// Prescale.
-				int y0 = (src[8*0+x] << 8) + 8192;
-				int y1 = src[8*4+x] << 8;
-				int y2 = src[8*6+x];
-				int y3 = src[8*2+x];
-				int y4 = src[8*1+x];
-				int y5 = src[8*7+x];
-				int y6 = src[8*5+x];
-				int y7 = src[8*3+x];
-
-				// Stage 1.
-				int y8 = w7*(y4+y5) + 4;
-				y4 = (y8 + w1mw7*y4) >> 3;
-				y5 = (y8 - w1pw7*y5) >> 3;
-				y8 = w3*(y6+y7) + 4;
-				y6 = (y8 - w3mw5*y6) >> 3;
-				y7 = (y8 - w3pw5*y7) >> 3;
-
-				// Stage 2.
-				y8 = y0 + y1;
-				y0 -= y1;
-				y1 = w6*(y3+y2) + 4;
-				y2 = (y1 - w2pw6*y2) >> 3;
-				y3 = (y1 + w2mw6*y3) >> 3;
-				y1 = y4 + y6;
-				y4 -= y6;
-				y6 = y5 + y7;
-				y5 -= y7;
-
-				// Stage 3.
-				y7 = y8 + y3;
-				y8 -= y3;
-				y3 = y0 + y2;
-				y0 -= y2;
-				y2 = (r2*(y4+y5) + 128) >> 8;
-				y4 = (r2*(y4-y5) + 128) >> 8;
-
-				// Stage 4.
-				src[8*0+x] = (y7 + y1) >> 14;
-				src[8*1+x] = (y3 + y2) >> 14;
-				src[8*2+x] = (y0 + y4) >> 14;
-				src[8*3+x] = (y8 + y6) >> 14;
-				src[8*4+x] = (y8 - y6) >> 14;
-				src[8*5+x] = (y0 - y4) >> 14;
-				src[8*6+x] = (y3 - y2) >> 14;
-				src[8*7+x] = (y7 - y1) >> 14;
-			}
-		}
-	}
+    internal partial class Decoder
+    {
+        private const int w1 = 2841; // 2048*sqrt(2)*cos(1*pi/16)
+        private const int w2 = 2676; // 2048*sqrt(2)*cos(2*pi/16)
+        private const int w3 = 2408; // 2048*sqrt(2)*cos(3*pi/16)
+        private const int w5 = 1609; // 2048*sqrt(2)*cos(5*pi/16)
+        private const int w6 = 1108; // 2048*sqrt(2)*cos(6*pi/16)
+        private const int w7 = 565;  // 2048*sqrt(2)*cos(7*pi/16)
+
+        private const int w1pw7 = w1 + w7;
+        private const int w1mw7 = w1 - w7;
+        private const int w2pw6 = w2 + w6;
+        private const int w2mw6 = w2 - w6;
+        private const int w3pw5 = w3 + w5;
+        private const int w3mw5 = w3 - w5;
+
+        private const int r2 = 181; // 256/sqrt(2)
+
+        // idct performs a 2-D Inverse Discrete Cosine Transformation.
+        //
+        // The input coefficients should already have been multiplied by the
+        // appropriate quantization table. We use fixed-point computation, with the
+        // number of bits for the fractional component varying over the intermediate
+        // stages.
+        //
+        // For more on the actual algorithm, see Z. Wang, "Fast algorithms for the
+        // discrete W transform and for the discrete Fourier transform", IEEE Trans. on
+        // ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
+        private static void IDCT(Block src)
+        {
+            // Horizontal 1-D IDCT.
+            for(int y = 0; y < 8; y++)
+            {
+                int y8 = y * 8;
+
+                // If all the AC components are zero, then the IDCT is trivial.
+                if(src[y8+1] == 0 && src[y8+2] == 0 && src[y8+3] == 0 &&
+                    src[y8+4] == 0 && src[y8+5] == 0 && src[y8+6] == 0 && src[y8+7] == 0)
+                {
+                    int dc = src[y8+0] << 3;
+                    src[y8+0] = dc;
+                    src[y8+1] = dc;
+                    src[y8+2] = dc;
+                    src[y8+3] = dc;
+                    src[y8+4] = dc;
+                    src[y8+5] = dc;
+                    src[y8+6] = dc;
+                    src[y8+7] = dc;
+                    continue;
+                }
+
+                // Prescale.
+                int x0 = (src[y8+0] << 11) + 128;
+                int x1 = src[y8+4] << 11;
+                int x2 = src[y8+6];
+                int x3 = src[y8+2];
+                int x4 = src[y8+1];
+                int x5 = src[y8+7];
+                int x6 = src[y8+5];
+                int x7 = src[y8+3];
+
+                // Stage 1.
+                int x8 = w7 * (x4 + x5);
+                x4 = x8 + w1mw7*x4;
+                x5 = x8 - w1pw7*x5;
+                x8 = w3 * (x6 + x7);
+                x6 = x8 - w3mw5*x6;
+                x7 = x8 - w3pw5*x7;
+
+                // Stage 2.
+                x8 = x0 + x1;
+                x0 -= x1;
+                x1 = w6 * (x3 + x2);
+                x2 = x1 - w2pw6*x2;
+                x3 = x1 + w2mw6*x3;
+                x1 = x4 + x6;
+                x4 -= x6;
+                x6 = x5 + x7;
+                x5 -= x7;
+
+                // Stage 3.
+                x7 = x8 + x3;
+                x8 -= x3;
+                x3 = x0 + x2;
+                x0 -= x2;
+                x2 = (r2*(x4+x5) + 128) >> 8;
+                x4 = (r2*(x4-x5) + 128) >> 8;
+
+                // Stage 4.
+                src[y8+0] = (x7 + x1) >> 8;
+                src[y8+1] = (x3 + x2) >> 8;
+                src[y8+2] = (x0 + x4) >> 8;
+                src[y8+3] = (x8 + x6) >> 8;
+                src[y8+4] = (x8 - x6) >> 8;
+                src[y8+5] = (x0 - x4) >> 8;
+                src[y8+6] = (x3 - x2) >> 8;
+                src[y8+7] = (x7 - x1) >> 8;
+            }
+
+            // Vertical 1-D IDCT.
+            for(int x = 0; x < 8; x++)
+            {
+                // Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
+                // However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
+                // we do not bother to check for the all-zero case.
+
+                // Prescale.
+                int y0 = (src[8*0+x] << 8) + 8192;
+                int y1 = src[8*4+x] << 8;
+                int y2 = src[8*6+x];
+                int y3 = src[8*2+x];
+                int y4 = src[8*1+x];
+                int y5 = src[8*7+x];
+                int y6 = src[8*5+x];
+                int y7 = src[8*3+x];
+
+                // Stage 1.
+                int y8 = w7*(y4+y5) + 4;
+                y4 = (y8 + w1mw7*y4) >> 3;
+                y5 = (y8 - w1pw7*y5) >> 3;
+                y8 = w3*(y6+y7) + 4;
+                y6 = (y8 - w3mw5*y6) >> 3;
+                y7 = (y8 - w3pw5*y7) >> 3;
+
+                // Stage 2.
+                y8 = y0 + y1;
+                y0 -= y1;
+                y1 = w6*(y3+y2) + 4;
+                y2 = (y1 - w2pw6*y2) >> 3;
+                y3 = (y1 + w2mw6*y3) >> 3;
+                y1 = y4 + y6;
+                y4 -= y6;
+                y6 = y5 + y7;
+                y5 -= y7;
+
+                // Stage 3.
+                y7 = y8 + y3;
+                y8 -= y3;
+                y3 = y0 + y2;
+                y0 -= y2;
+                y2 = (r2*(y4+y5) + 128) >> 8;
+                y4 = (r2*(y4-y5) + 128) >> 8;
+
+                // Stage 4.
+                src[8*0+x] = (y7 + y1) >> 14;
+                src[8*1+x] = (y3 + y2) >> 14;
+                src[8*2+x] = (y0 + y4) >> 14;
+                src[8*3+x] = (y8 + y6) >> 14;
+                src[8*4+x] = (y8 - y6) >> 14;
+                src[8*5+x] = (y0 - y4) >> 14;
+                src[8*6+x] = (y3 - y2) >> 14;
+                src[8*7+x] = (y7 - y1) >> 14;
+            }
+        }
+    }
 }
diff --git a/src/ImageProcessorCore/Formats/Jpg/JpegDecoder.cs b/src/ImageProcessorCore/Formats/Jpg/JpegDecoder.cs
index 9ca19c475f..8fa13d2e36 100644
--- a/src/ImageProcessorCore/Formats/Jpg/JpegDecoder.cs
+++ b/src/ImageProcessorCore/Formats/Jpg/JpegDecoder.cs
@@ -8,7 +8,7 @@ namespace ImageProcessorCore.Formats
     using System;
     using System.IO;
     using System.Threading.Tasks;
-	using ImageProcessorCore.Formats.Jpg;
+    using ImageProcessorCore.Formats.Jpg;
 
     /// <summary>
     /// Image decoder for generating an image out of a jpg stream.
@@ -95,22 +95,22 @@ namespace ImageProcessorCore.Formats
             Guard.NotNull(image, "image");
             Guard.NotNull(stream, "stream");
 
-			Decoder decoder = new Decoder();
-			decoder.decode(stream, false);
+            Decoder decoder = new Decoder();
+            decoder.decode(stream, false);
 
             int pixelWidth = decoder.width;
             int pixelHeight = decoder.height;
 
             float[] pixels = new float[pixelWidth * pixelHeight * 4];
 
-			if(decoder.nComp == 1)
-			{
+            if(decoder.nComp == 1)
+            {
                 Parallel.For(
                     0,
                     pixelHeight,
                     y =>
                     {
-						var yoff = decoder.img1.get_row_offset(y);
+                        var yoff = decoder.img1.get_row_offset(y);
                         for (int x = 0; x < pixelWidth; x++)
                         {
                             int offset = ((y * pixelWidth) + x) * 4;
@@ -121,15 +121,15 @@ namespace ImageProcessorCore.Formats
                             pixels[offset + 3] = 1;
                         }
                     });
-			}
-			else if(decoder.nComp == 3)
+            }
+            else if(decoder.nComp == 3)
             {
                 Parallel.For(
                     0,
                     pixelHeight,
                     y =>
                         {
-							var yoff = decoder.imgrgb.get_row_offset(y);
+                            var yoff = decoder.imgrgb.get_row_offset(y);
                             for (int x = 0; x < pixelWidth; x++)
                             {
                                 int offset = ((y * pixelWidth) + x) * 4;