View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package ca.uhn.hl7v2.hoh.util.repackage;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.math.BigInteger;
22  import java.nio.charset.Charset;
23  import java.nio.charset.UnsupportedCharsetException;
24  
25  /**
26   * Provides Base64 encoding and decoding as defined by <a
27   * href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
28   * 
29   * <p>
30   * This class implements section <cite>6.8. Base64
31   * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet
32   * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by
33   * Freed and Borenstein.
34   * </p>
35   * <p>
36   * The class can be parameterized in the following manner with various
37   * constructors:
38   * <ul>
39   * <li>URL-safe mode: Default off.</li>
40   * <li>Line length: Default 76. Line length that aren't multiples of 4 will
41   * still essentially end up being multiples of 4 in the encoded data.
42   * <li>Line separator: Default is CRLF ("\r\n")</li>
43   * </ul>
44   * </p>
45   * <p>
46   * Since this class operates directly on byte streams, and not character
47   * streams, it is hard-coded to only encode/decode character encodings which are
48   * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8,
49   * etc).
50   * </p>
51   * <p>
52   * This class is thread-safe.
53   * </p>
54   * 
55   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
56   * @author Note that this class has been repackaged from Apache Commons-Codec
57   *         and is distributed under the terms of the Apache Software License,
58   *         version 2.0
59   */
60  public class Base64 {
61  
62  	public static void main(String[] args) {
63  		
64  		System.out.println("basic " + encodeBase64String("cgta:d@3r$@TTg2446yhhh2h4".getBytes()));
65  		
66  	}
67  	
68  	/**
69  	 * BASE32 characters are 6 bits in length. They are formed by taking a block
70  	 * of 3 octets to form a 24-bit string, which is converted into 4 BASE64
71  	 * characters.
72  	 */
73  	private static final int BITS_PER_ENCODED_BYTE = 6;
74  	private static final int BYTES_PER_UNENCODED_BLOCK = 3;
75  	private static final int BYTES_PER_ENCODED_BLOCK = 4;
76  
77  	/**
78  	 * Chunk separator per RFC 2045 section 2.1.
79  	 * 
80  	 * <p>
81  	 * N.B. The next major release may break compatibility and make this field
82  	 * private.
83  	 * </p>
84  	 * 
85  	 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
86  	 *      2.1</a>
87  	 */
88  	static final byte[] CHUNK_SEPARATOR = { '\r', '\n' };
89  
90  	/**
91  	 * This array is a lookup table that translates 6-bit positive integer index
92  	 * values into their "Base64 Alphabet" equivalents as specified in Table 1
93  	 * of RFC 2045.
94  	 * 
95  	 * Thanks to "commons" project in ws.apache.org for this code.
96  	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
97  	 */
98  	private static final byte[] STANDARD_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
99  			's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
100 
101 	/**
102 	 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
103 	 * changed to - and _ to make the encoded Base64 results more URL-SAFE. This
104 	 * table is only used when the Base64's mode is set to URL-SAFE.
105 	 */
106 	private static final byte[] URL_SAFE_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
107 			's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' };
108 
109 	/**
110 	 * This array is a lookup table that translates Unicode characters drawn
111 	 * from the "Base64 Alphabet" (as specified in Table 1 of RFC 2045) into
112 	 * their 6-bit positive integer equivalents. Characters that are not in the
113 	 * Base64 alphabet but fall within the bounds of the array are translated to
114 	 * -1.
115 	 * 
116 	 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This
117 	 * means decoder seamlessly handles both URL_SAFE and STANDARD base64. (The
118 	 * encoder, on the other hand, needs to know ahead of time what to emit).
119 	 * 
120 	 * Thanks to "commons" project in ws.apache.org for this code.
121 	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
122 	 */
123 	private static final byte[] DECODE_TABLE = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
124 			61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 };
125 
126 	/**
127 	 * Base64 uses 6-bit fields.
128 	 */
129 	/** Mask used to extract 6 bits, used when encoding */
130 	private static final int MASK_6BITS = 0x3f;
131 
132 	// The static final fields above are used for the original static byte[]
133 	// methods on Base64.
134 	// The private member fields below are used with the new streaming approach,
135 	// which requires
136 	// some state be preserved between calls of encode() and decode().
137 
138 	/**
139 	 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE
140 	 * above remains static because it is able to decode both STANDARD and
141 	 * URL_SAFE streams, but the encodeTable must be a member variable so we can
142 	 * switch between the two modes.
143 	 */
144 	private byte[] encodeTable;
145 
146 	// Only one decode table currently; keep for consistency with Base32 code
147 	private final byte[] decodeTable = DECODE_TABLE;
148 
149 	/**
150 	 * Line separator for encoding. Not used when decoding. Only used if
151 	 * lineLength > 0.
152 	 */
153 	private byte[] lineSeparator;
154 
155 	/**
156 	 * Convenience variable to help us determine when our buffer is going to run
157 	 * out of room and needs resizing.
158 	 * <code>decodeSize = 3 + lineSeparator.length;</code>
159 	 */
160 	private int decodeSize;
161 
162 	/**
163 	 * Convenience variable to help us determine when our buffer is going to run
164 	 * out of room and needs resizing.
165 	 * <code>encodeSize = 4 + lineSeparator.length;</code>
166 	 */
167 	private int encodeSize;
168 
169 	/**
170 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
171 	 * URL-unsafe mode.
172 	 * <p>
173 	 * When encoding the line length is 0 (no chunking), and the encoding table
174 	 * is STANDARD_ENCODE_TABLE.
175 	 * </p>
176 	 * 
177 	 * <p>
178 	 * When decoding all variants are supported.
179 	 * </p>
180 	 */
181 	public Base64() {
182 		this(0);
183 	}
184 
185 	/**
186 	 * Creates a Base64 codec used for decoding (all modes) and encoding in the
187 	 * given URL-safe mode.
188 	 * <p>
189 	 * When encoding the line length is 76, the line separator is CRLF, and the
190 	 * encoding table is STANDARD_ENCODE_TABLE.
191 	 * </p>
192 	 * 
193 	 * <p>
194 	 * When decoding all variants are supported.
195 	 * </p>
196 	 * 
197 	 * @param urlSafe
198 	 *            if {@code true}, URL-safe encoding is used. In most cases this
199 	 *            should be set to {@code false}.
200 	 * @since 1.4
201 	 */
202 	public Base64(boolean urlSafe) {
203 		this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
204 	}
205 
206 	/**
207 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
208 	 * URL-unsafe mode.
209 	 * <p>
210 	 * When encoding the line length is given in the constructor, the line
211 	 * separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
212 	 * </p>
213 	 * <p>
214 	 * Line lengths that aren't multiples of 4 will still essentially end up
215 	 * being multiples of 4 in the encoded data.
216 	 * </p>
217 	 * <p>
218 	 * When decoding all variants are supported.
219 	 * </p>
220 	 * 
221 	 * @param lineLength
222 	 *            Each line of encoded data will be at most of the given length
223 	 *            (rounded down to nearest multiple of 4). If lineLength <= 0,
224 	 *            then the output will not be divided into lines (chunks).
225 	 *            Ignored when decoding.
226 	 * @since 1.4
227 	 */
228 	public Base64(int lineLength) {
229 		this(lineLength, CHUNK_SEPARATOR);
230 	}
231 
232 	/**
233 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
234 	 * URL-unsafe mode.
235 	 * <p>
236 	 * When encoding the line length and line separator are given in the
237 	 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
238 	 * </p>
239 	 * <p>
240 	 * Line lengths that aren't multiples of 4 will still essentially end up
241 	 * being multiples of 4 in the encoded data.
242 	 * </p>
243 	 * <p>
244 	 * When decoding all variants are supported.
245 	 * </p>
246 	 * 
247 	 * @param lineLength
248 	 *            Each line of encoded data will be at most of the given length
249 	 *            (rounded down to nearest multiple of 4). If lineLength <= 0,
250 	 *            then the output will not be divided into lines (chunks).
251 	 *            Ignored when decoding.
252 	 * @param lineSeparator
253 	 *            Each line of encoded data will end with this sequence of
254 	 *            bytes.
255 	 * @throws IllegalArgumentException
256 	 *             Thrown when the provided lineSeparator included some base64
257 	 *             characters.
258 	 * @since 1.4
259 	 */
260 	public Base64(int lineLength, byte[] lineSeparator) {
261 		this(lineLength, lineSeparator, false);
262 	}
263 
264 	/**
265 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
266 	 * URL-unsafe mode.
267 	 * <p>
268 	 * When encoding the line length and line separator are given in the
269 	 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
270 	 * </p>
271 	 * <p>
272 	 * Line lengths that aren't multiples of 4 will still essentially end up
273 	 * being multiples of 4 in the encoded data.
274 	 * </p>
275 	 * <p>
276 	 * When decoding all variants are supported.
277 	 * </p>
278 	 * 
279 	 * @param lineLength
280 	 *            Each line of encoded data will be at most of the given length
281 	 *            (rounded down to nearest multiple of 4). If lineLength <= 0,
282 	 *            then the output will not be divided into lines (chunks).
283 	 *            Ignored when decoding.
284 	 * @param lineSeparator
285 	 *            Each line of encoded data will end with this sequence of
286 	 *            bytes.
287 	 * @param urlSafe
288 	 *            Instead of emitting '+' and '/' we emit '-' and '_'
289 	 *            respectively. urlSafe is only applied to encode operations.
290 	 *            Decoding seamlessly handles both modes.
291 	 * @throws IllegalArgumentException
292 	 *             The provided lineSeparator included some base64 characters.
293 	 *             That's not going to work!
294 	 * @since 1.4
295 	 */
296 	public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) {
297 		this(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, lineSeparator == null ? 0 : lineSeparator.length);
298 		// TODO could be simplified if there is no requirement to reject invalid
299 		// line sep when length <=0
300 		// @see test case Base64Test.testConstructors()
301 		if (lineSeparator != null) {
302 			if (containsAlphabetOrPad(lineSeparator)) {
303 				String sep = StringUtils.newStringUtf8(lineSeparator);
304 				throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
305 			}
306 			if (lineLength > 0) { // null line-sep forces no chunking rather
307 									// than throwing IAE
308 				this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
309 				this.lineSeparator = new byte[lineSeparator.length];
310 				System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
311 			} else {
312 				this.encodeSize = BYTES_PER_ENCODED_BLOCK;
313 				this.lineSeparator = null;
314 			}
315 		} else {
316 			this.encodeSize = BYTES_PER_ENCODED_BLOCK;
317 			this.lineSeparator = null;
318 		}
319 		this.decodeSize = this.encodeSize - 1;
320 		this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
321 	}
322 
323 	/**
324 	 * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
325 	 * 
326 	 * @return true if we're in URL-SAFE mode, false otherwise.
327 	 * @since 1.4
328 	 */
329 	public boolean isUrlSafe() {
330 		return this.encodeTable == URL_SAFE_ENCODE_TABLE;
331 	}
332 
333 	/**
334 	 * <p>
335 	 * Encodes all of the provided data, starting at inPos, for inAvail bytes.
336 	 * Must be called at least twice: once with the data to encode, and once
337 	 * with inAvail set to "-1" to alert encoder that EOF has been reached, so
338 	 * flush last remaining bytes (if not multiple of 3).
339 	 * </p>
340 	 * <p>
341 	 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
342 	 * and general approach.
343 	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
344 	 * </p>
345 	 * 
346 	 * @param in
347 	 *            byte[] array of binary data to base64 encode.
348 	 * @param inPos
349 	 *            Position to start reading data from.
350 	 * @param inAvail
351 	 *            Amount of bytes available from input for encoding.
352 	 * @param context
353 	 *            the context to be used
354 	 */
355 	void encode(byte[] in, int inPos, int inAvail, Context context) {
356 		if (context.eof) {
357 			return;
358 		}
359 		// inAvail < 0 is how we're informed of EOF in the underlying data we're
360 		// encoding.
361 		if (inAvail < 0) {
362 			context.eof = true;
363 			if (0 == context.modulus && lineLength == 0) {
364 				return; // no leftovers to process and not using chunking
365 			}
366 			ensureBufferSize(encodeSize, context);
367 			int savedPos = context.pos;
368 			switch (context.modulus) { // 0-2
369 			case 1: // 8 bits = 6 + 2
370 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; // top
371 																										// 6
372 																										// bits
373 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; // remaining
374 																										// 2
375 				// URL-SAFE skips the padding to further reduce size.
376 				if (encodeTable == STANDARD_ENCODE_TABLE) {
377 					context.buffer[context.pos++] = PAD;
378 					context.buffer[context.pos++] = PAD;
379 				}
380 				break;
381 
382 			case 2: // 16 bits = 6 + 6 + 4
383 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
384 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
385 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
386 				// URL-SAFE skips the padding to further reduce size.
387 				if (encodeTable == STANDARD_ENCODE_TABLE) {
388 					context.buffer[context.pos++] = PAD;
389 				}
390 				break;
391 			}
392 			context.currentLinePos += context.pos - savedPos; // keep track of
393 																// current line
394 																// position
395 			// if currentPos == 0 we are at the start of a line, so don't add
396 			// CRLF
397 			if (lineLength > 0 && context.currentLinePos > 0) {
398 				System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
399 				context.pos += lineSeparator.length;
400 			}
401 		} else {
402 			for (int i = 0; i < inAvail; i++) {
403 				ensureBufferSize(encodeSize, context);
404 				context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
405 				int b = in[inPos++];
406 				if (b < 0) {
407 					b += 256;
408 				}
409 				context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
410 				if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to
411 											// extract
412 					context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
413 					context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
414 					context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
415 					context.buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
416 					context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
417 					if (lineLength > 0 && lineLength <= context.currentLinePos) {
418 						System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
419 						context.pos += lineSeparator.length;
420 						context.currentLinePos = 0;
421 					}
422 				}
423 			}
424 		}
425 	}
426 
427 	/**
428 	 * <p>
429 	 * Decodes all of the provided data, starting at inPos, for inAvail bytes.
430 	 * Should be called at least twice: once with the data to decode, and once
431 	 * with inAvail set to "-1" to alert decoder that EOF has been reached. The
432 	 * "-1" call is not necessary when decoding, but it doesn't hurt, either.
433 	 * </p>
434 	 * <p>
435 	 * Ignores all non-base64 characters. This is how chunked (e.g. 76
436 	 * character) data is handled, since CR and LF are silently ignored, but has
437 	 * implications for other bytes, too. This method subscribes to the
438 	 * garbage-in, garbage-out philosophy: it will not check the provided data
439 	 * for validity.
440 	 * </p>
441 	 * <p>
442 	 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
443 	 * and general approach.
444 	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
445 	 * </p>
446 	 * 
447 	 * @param in
448 	 *            byte[] array of ascii data to base64 decode.
449 	 * @param inPos
450 	 *            Position to start reading data from.
451 	 * @param inAvail
452 	 *            Amount of bytes available from input for encoding.
453 	 * @param context
454 	 *            the context to be used
455 	 */
456 	void decode(byte[] in, int inPos, int inAvail, Context context) {
457 		if (context.eof) {
458 			return;
459 		}
460 		if (inAvail < 0) {
461 			context.eof = true;
462 		}
463 		for (int i = 0; i < inAvail; i++) {
464 			ensureBufferSize(decodeSize, context);
465 			byte b = in[inPos++];
466 			if (b == PAD) {
467 				// We're done.
468 				context.eof = true;
469 				break;
470 			} else {
471 				if (b >= 0 && b < DECODE_TABLE.length) {
472 					int result = DECODE_TABLE[b];
473 					if (result >= 0) {
474 						context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
475 						context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
476 						if (context.modulus == 0) {
477 							context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
478 							context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
479 							context.buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
480 						}
481 					}
482 				}
483 			}
484 		}
485 
486 		// Two forms of EOF as far as base64 decoder is concerned: actual
487 		// EOF (-1) and first time '=' character is encountered in stream.
488 		// This approach makes the '=' padding characters completely optional.
489 		if (context.eof && context.modulus != 0) {
490 			ensureBufferSize(decodeSize, context);
491 
492 			// We have some spare bits remaining
493 			// Output all whole multiples of 8 bits and ignore the rest
494 			switch (context.modulus) {
495 			// case 1: // 6 bits - ignore entirely
496 			// break;
497 			case 2: // 12 bits = 8 + 4
498 				context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the
499 																	// extra 4
500 																	// bits
501 				context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
502 				break;
503 			case 3: // 18 bits = 8 + 8 + 2
504 				context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
505 				context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
506 				context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
507 				break;
508 			}
509 		}
510 	}
511 
512 	/**
513 	 * Tests a given byte array to see if it contains only valid characters
514 	 * within the Base64 alphabet. Currently the method treats whitespace as
515 	 * valid.
516 	 * 
517 	 * @param arrayOctet
518 	 *            byte array to test
519 	 * @return {@code true} if all bytes are valid characters in the Base64
520 	 *         alphabet or if the byte array is empty; {@code false}, otherwise
521 	 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
522 	 */
523 	@Deprecated
524 	public static boolean isArrayByteBase64(byte[] arrayOctet) {
525 		return isBase64(arrayOctet);
526 	}
527 
528 	/**
529 	 * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
530 	 * 
531 	 * @param octet
532 	 *            The value to test
533 	 * @return {@code true} if the value is defined in the the base 64 alphabet,
534 	 *         {@code false} otherwise.
535 	 * @since 1.4
536 	 */
537 	public static boolean isBase64(byte octet) {
538 		return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
539 	}
540 
541 	/**
542 	 * Tests a given String to see if it contains only valid characters within
543 	 * the Base64 alphabet. Currently the method treats whitespace as valid.
544 	 * 
545 	 * @param base64
546 	 *            String to test
547 	 * @return {@code true} if all characters in the String are valid characters
548 	 *         in the Base64 alphabet or if the String is empty; {@code false},
549 	 *         otherwise
550 	 * @since 1.5
551 	 */
552 	public static boolean isBase64(String base64) {
553 		return isBase64(StringUtils.getBytesUtf8(base64));
554 	}
555 
556 	/**
557 	 * Tests a given byte array to see if it contains only valid characters
558 	 * within the Base64 alphabet. Currently the method treats whitespace as
559 	 * valid.
560 	 * 
561 	 * @param arrayOctet
562 	 *            byte array to test
563 	 * @return {@code true} if all bytes are valid characters in the Base64
564 	 *         alphabet or if the byte array is empty; {@code false}, otherwise
565 	 * @since 1.5
566 	 */
567 	public static boolean isBase64(byte[] arrayOctet) {
568 		for (int i = 0; i < arrayOctet.length; i++) {
569 			if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
570 				return false;
571 			}
572 		}
573 		return true;
574 	}
575 
576 	/**
577 	 * Encodes binary data using the base64 algorithm but does not chunk the
578 	 * output.
579 	 * 
580 	 * @param binaryData
581 	 *            binary data to encode
582 	 * @return byte[] containing Base64 characters in their UTF-8
583 	 *         representation.
584 	 */
585 	public static byte[] encodeBase64(byte[] binaryData) {
586 		return encodeBase64(binaryData, false);
587 	}
588 
589 	/**
590 	 * Encodes binary data using the base64 algorithm but does not chunk the
591 	 * output.
592 	 * 
593 	 * NOTE: We changed the behaviour of this method from multi-line chunking
594 	 * (commons-codec-1.4) to single-line non-chunking (commons-codec-1.5).
595 	 * 
596 	 * @param binaryData
597 	 *            binary data to encode
598 	 * @return String containing Base64 characters.
599 	 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
600 	 */
601 	public static String encodeBase64String(byte[] binaryData) {
602 		return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
603 	}
604 
605 	/**
606 	 * Encodes binary data using a URL-safe variation of the base64 algorithm
607 	 * but does not chunk the output. The url-safe variation emits - and _
608 	 * instead of + and / characters.
609 	 * 
610 	 * @param binaryData
611 	 *            binary data to encode
612 	 * @return byte[] containing Base64 characters in their UTF-8
613 	 *         representation.
614 	 * @since 1.4
615 	 */
616 	public static byte[] encodeBase64URLSafe(byte[] binaryData) {
617 		return encodeBase64(binaryData, false, true);
618 	}
619 
620 	/**
621 	 * Encodes binary data using a URL-safe variation of the base64 algorithm
622 	 * but does not chunk the output. The url-safe variation emits - and _
623 	 * instead of + and / characters.
624 	 * 
625 	 * @param binaryData
626 	 *            binary data to encode
627 	 * @return String containing Base64 characters
628 	 * @since 1.4
629 	 */
630 	public static String encodeBase64URLSafeString(byte[] binaryData) {
631 		return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
632 	}
633 
634 	/**
635 	 * Encodes binary data using the base64 algorithm and chunks the encoded
636 	 * output into 76 character blocks
637 	 * 
638 	 * @param binaryData
639 	 *            binary data to encode
640 	 * @return Base64 characters chunked in 76 character blocks
641 	 */
642 	public static byte[] encodeBase64Chunked(byte[] binaryData) {
643 		return encodeBase64(binaryData, true);
644 	}
645 
646 	/**
647 	 * Encodes binary data using the base64 algorithm, optionally chunking the
648 	 * output into 76 character blocks.
649 	 * 
650 	 * @param binaryData
651 	 *            Array containing binary data to encode.
652 	 * @param isChunked
653 	 *            if {@code true} this encoder will chunk the base64 output into
654 	 *            76 character blocks
655 	 * @return Base64-encoded data.
656 	 * @throws IllegalArgumentException
657 	 *             Thrown when the input array needs an output array bigger than
658 	 *             {@link Integer#MAX_VALUE}
659 	 */
660 	public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
661 		return encodeBase64(binaryData, isChunked, false);
662 	}
663 
664 	/**
665 	 * Encodes binary data using the base64 algorithm, optionally chunking the
666 	 * output into 76 character blocks.
667 	 * 
668 	 * @param binaryData
669 	 *            Array containing binary data to encode.
670 	 * @param isChunked
671 	 *            if {@code true} this encoder will chunk the base64 output into
672 	 *            76 character blocks
673 	 * @param urlSafe
674 	 *            if {@code true} this encoder will emit - and _ instead of the
675 	 *            usual + and / characters.
676 	 * @return Base64-encoded data.
677 	 * @throws IllegalArgumentException
678 	 *             Thrown when the input array needs an output array bigger than
679 	 *             {@link Integer#MAX_VALUE}
680 	 * @since 1.4
681 	 */
682 	public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) {
683 		return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
684 	}
685 
686 	/**
687 	 * Encodes binary data using the base64 algorithm, optionally chunking the
688 	 * output into 76 character blocks.
689 	 * 
690 	 * @param binaryData
691 	 *            Array containing binary data to encode.
692 	 * @param isChunked
693 	 *            if {@code true} this encoder will chunk the base64 output into
694 	 *            76 character blocks
695 	 * @param urlSafe
696 	 *            if {@code true} this encoder will emit - and _ instead of the
697 	 *            usual + and / characters.
698 	 * @param maxResultSize
699 	 *            The maximum result size to accept.
700 	 * @return Base64-encoded data.
701 	 * @throws IllegalArgumentException
702 	 *             Thrown when the input array needs an output array bigger than
703 	 *             maxResultSize
704 	 * @since 1.4
705 	 */
706 	public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) {
707 		if (binaryData == null || binaryData.length == 0) {
708 			return binaryData;
709 		}
710 
711 		// Create this so can use the super-class method
712 		// Also ensures that the same roundings are performed by the ctor and
713 		// the code
714 		Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
715 		long len = b64.getEncodedLength(binaryData);
716 		if (len > maxResultSize) {
717 			throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize);
718 		}
719 
720 		return b64.encode(binaryData);
721 	}
722 
723 	/**
724 	 * Decodes a Base64 String into octets
725 	 * 
726 	 * @param base64String
727 	 *            String containing Base64 data
728 	 * @return Array containing decoded data.
729 	 * @since 1.4
730 	 */
731 	public static byte[] decodeBase64(String base64String) {
732 		return new Base64().decode(base64String);
733 	}
734 
735 	/**
736 	 * Decodes Base64 data into octets
737 	 * 
738 	 * @param base64Data
739 	 *            Byte array containing Base64 data
740 	 * @return Array containing decoded data.
741 	 */
742 	public static byte[] decodeBase64(byte[] base64Data) {
743 		return new Base64().decode(base64Data);
744 	}
745 
746 	// Implementation of the Encoder Interface
747 
748 	// Implementation of integer encoding used for crypto
749 	/**
750 	 * Decodes a byte64-encoded integer according to crypto standards such as
751 	 * W3C's XML-Signature
752 	 * 
753 	 * @param pArray
754 	 *            a byte array containing base64 character data
755 	 * @return A BigInteger
756 	 * @since 1.4
757 	 */
758 	public static BigInteger decodeInteger(byte[] pArray) {
759 		return new BigInteger(1, decodeBase64(pArray));
760 	}
761 
762 	/**
763 	 * Encodes to a byte64-encoded integer according to crypto standards such as
764 	 * W3C's XML-Signature
765 	 * 
766 	 * @param bigInt
767 	 *            a BigInteger
768 	 * @return A byte array containing base64 character data
769 	 * @throws NullPointerException
770 	 *             if null is passed in
771 	 * @since 1.4
772 	 */
773 	public static byte[] encodeInteger(BigInteger bigInt) {
774 		if (bigInt == null) {
775 			throw new NullPointerException("encodeInteger called with null parameter");
776 		}
777 		return encodeBase64(toIntegerBytes(bigInt), false);
778 	}
779 
780 	/**
781 	 * Returns a byte-array representation of a <code>BigInteger</code> without
782 	 * sign bit.
783 	 * 
784 	 * @param bigInt
785 	 *            <code>BigInteger</code> to be converted
786 	 * @return a byte array representation of the BigInteger parameter
787 	 */
788 	static byte[] toIntegerBytes(BigInteger bigInt) {
789 		int bitlen = bigInt.bitLength();
790 		// round bitlen
791 		bitlen = ((bitlen + 7) >> 3) << 3;
792 		byte[] bigBytes = bigInt.toByteArray();
793 
794 		if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
795 			return bigBytes;
796 		}
797 		// set up params for copying everything but sign bit
798 		int startSrc = 0;
799 		int len = bigBytes.length;
800 
801 		// if bigInt is exactly byte-aligned, just skip signbit in copy
802 		if ((bigInt.bitLength() % 8) == 0) {
803 			startSrc = 1;
804 			len--;
805 		}
806 		int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
807 		byte[] resizedBytes = new byte[bitlen / 8];
808 		System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
809 		return resizedBytes;
810 	}
811 
812 	/**
813 	 * Returns whether or not the <code>octet</code> is in the Base32 alphabet.
814 	 * 
815 	 * @param octet
816 	 *            The value to test
817 	 * @return {@code true} if the value is defined in the the Base32 alphabet
818 	 *         {@code false} otherwise.
819 	 */
820 	protected boolean isInAlphabet(byte octet) {
821 		return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
822 	}
823 
824 	/**
825 	 * Holds thread context so classes can be thread-safe.
826 	 * 
827 	 * This class is not itself thread-safe; each thread must allocate its own
828 	 * copy.
829 	 * 
830 	 * @since 1.7
831 	 */
832 	static class Context {
833 
834 		/**
835 		 * Place holder for the bytes we're dealing with for our based logic.
836 		 * Bitwise operations store and extract the encoding or decoding from
837 		 * this variable.
838 		 */
839 		int ibitWorkArea;
840 
841 		/**
842 		 * Place holder for the bytes we're dealing with for our based logic.
843 		 * Bitwise operations store and extract the encoding or decoding from
844 		 * this variable.
845 		 */
846 		long lbitWorkArea;
847 
848 		/**
849 		 * Buffer for streaming.
850 		 */
851 		byte[] buffer;
852 
853 		/**
854 		 * Position where next character should be written in the buffer.
855 		 */
856 		int pos;
857 
858 		/**
859 		 * Position where next character should be read from the buffer.
860 		 */
861 		int readPos;
862 
863 		/**
864 		 * Boolean flag to indicate the EOF has been reached. Once EOF has been
865 		 * reached, this object becomes useless, and must be thrown away.
866 		 */
867 		boolean eof;
868 
869 		/**
870 		 * Variable tracks how many characters have been written to the current
871 		 * line. Only used when encoding. We use it to make sure each encoded
872 		 * line never goes beyond lineLength (if lineLength > 0).
873 		 */
874 		int currentLinePos;
875 
876 		/**
877 		 * Writes to the buffer only occur after every 3/5 reads when encoding,
878 		 * and every 4/8 reads when decoding. This variable helps track that.
879 		 */
880 		int modulus;
881 
882 		Context() {
883 		}
884 	}
885 
886 	/**
887 	 * EOF
888 	 * 
889 	 * @since 1.7
890 	 */
891 	static final int EOF = -1;
892 
893 	/**
894 	 * MIME chunk size per RFC 2045 section 6.8.
895 	 * 
896 	 * <p>
897 	 * The {@value} character limit does not count the trailing CRLF, but counts
898 	 * all other characters, including any equal signs.
899 	 * </p>
900 	 * 
901 	 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
902 	 *      6.8</a>
903 	 */
904 	public static final int MIME_CHUNK_SIZE = 76;
905 
906 	/**
907 	 * PEM chunk size per RFC 1421 section 4.3.2.4.
908 	 * 
909 	 * <p>
910 	 * The {@value} character limit does not count the trailing CRLF, but counts
911 	 * all other characters, including any equal signs.
912 	 * </p>
913 	 * 
914 	 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section
915 	 *      4.3.2.4</a>
916 	 */
917 	public static final int PEM_CHUNK_SIZE = 64;
918 
919 	private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
920 
921 	/**
922 	 * Defines the default buffer size - currently {@value} - must be large
923 	 * enough for at least one encoded block+separator
924 	 */
925 	private static final int DEFAULT_BUFFER_SIZE = 8192;
926 
927 	/** Mask used to extract 8 bits, used in decoding bytes */
928 	protected static final int MASK_8BITS = 0xff;
929 
930 	/**
931 	 * Byte used to pad output.
932 	 */
933 	protected static final byte PAD_DEFAULT = '='; // Allow static access to
934 													// default
935 
936 	protected final byte PAD = PAD_DEFAULT; // instance variable just in case it
937 											// needs to vary later
938 
939 	/**
940 	 * Number of bytes in each full block of unencoded data, e.g. 4 for Base64
941 	 * and 5 for Base32
942 	 */
943 	private final int unencodedBlockSize;
944 
945 	/**
946 	 * Number of bytes in each full block of encoded data, e.g. 3 for Base64 and
947 	 * 8 for Base32
948 	 */
949 	private final int encodedBlockSize;
950 
951 	/**
952 	 * Chunksize for encoding. Not used when decoding. A value of zero or less
953 	 * implies no chunking of the encoded data. Rounded down to nearest multiple
954 	 * of encodedBlockSize.
955 	 */
956 	protected int lineLength;
957 
958 	/**
959 	 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
960 	 */
961 	private int chunkSeparatorLength;
962 
963 	/**
964 	 * Note <code>lineLength</code> is rounded down to the nearest multiple of
965 	 * {@link #encodedBlockSize} If <code>chunkSeparatorLength</code> is zero,
966 	 * then chunking is disabled.
967 	 * 
968 	 * @param unencodedBlockSize
969 	 *            the size of an unencoded block (e.g. Base64 = 3)
970 	 * @param encodedBlockSize
971 	 *            the size of an encoded block (e.g. Base64 = 4)
972 	 * @param lineLength
973 	 *            if &gt; 0, use chunking with a length <code>lineLength</code>
974 	 * @param chunkSeparatorLength
975 	 *            the chunk separator length, if relevant
976 	 */
977 	protected Base64(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) {
978 		this.unencodedBlockSize = unencodedBlockSize;
979 		this.encodedBlockSize = encodedBlockSize;
980 		this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
981 		this.chunkSeparatorLength = chunkSeparatorLength;
982 	}
983 
984 	/**
985 	 * Returns true if this object has buffered data for reading.
986 	 * 
987 	 * @param context
988 	 *            the context to be used
989 	 * @return true if there is data still available for reading.
990 	 */
991 	boolean hasData(Context context) { // package protected for access from I/O
992 										// streams
993 		return context.buffer != null;
994 	}
995 
996 	/**
997 	 * Returns the amount of buffered data available for reading.
998 	 * 
999 	 * @param context
1000 	 *            the context to be used
1001 	 * @return The amount of buffered data available for reading.
1002 	 */
1003 	int available(Context context) { // package protected for access from I/O
1004 										// streams
1005 		return context.buffer != null ? context.pos - context.readPos : 0;
1006 	}
1007 
1008 	/**
1009 	 * Get the default buffer size. Can be overridden.
1010 	 * 
1011 	 * @return {@link #DEFAULT_BUFFER_SIZE}
1012 	 */
1013 	protected int getDefaultBufferSize() {
1014 		return DEFAULT_BUFFER_SIZE;
1015 	}
1016 
1017 	/**
1018 	 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
1019 	 * 
1020 	 * @param context
1021 	 *            the context to be used
1022 	 */
1023 	private void resizeBuffer(Context context) {
1024 		if (context.buffer == null) {
1025 			context.buffer = new byte[getDefaultBufferSize()];
1026 			context.pos = 0;
1027 			context.readPos = 0;
1028 		} else {
1029 			byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
1030 			System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
1031 			context.buffer = b;
1032 		}
1033 	}
1034 
1035 	/**
1036 	 * Ensure that the buffer has room for <code>size</code> bytes
1037 	 * 
1038 	 * @param size
1039 	 *            minimum spare space required
1040 	 * @param context
1041 	 *            the context to be used
1042 	 */
1043 	protected void ensureBufferSize(int size, Context context) {
1044 		if ((context.buffer == null) || (context.buffer.length < context.pos + size)) {
1045 			resizeBuffer(context);
1046 		}
1047 	}
1048 
1049 	/**
1050 	 * Extracts buffered data into the provided byte[] array, starting at
1051 	 * position bPos, up to a maximum of bAvail bytes. Returns how many bytes
1052 	 * were actually extracted.
1053 	 * 
1054 	 * @param b
1055 	 *            byte[] array to extract the buffered data into.
1056 	 * @param bPos
1057 	 *            position in byte[] array to start extraction at.
1058 	 * @param bAvail
1059 	 *            amount of bytes we're allowed to extract. We may extract fewer
1060 	 *            (if fewer are available).
1061 	 * @param context
1062 	 *            the context to be used
1063 	 * @return The number of bytes successfully extracted into the provided
1064 	 *         byte[] array.
1065 	 */
1066 	int readResults(byte[] b, int bPos, int bAvail, Context context) { // package
1067 																		// protected
1068 																		// for
1069 																		// access
1070 																		// from
1071 																		// I/O
1072 																		// streams
1073 		if (context.buffer != null) {
1074 			int len = Math.min(available(context), bAvail);
1075 			System.arraycopy(context.buffer, context.readPos, b, bPos, len);
1076 			context.readPos += len;
1077 			if (context.readPos >= context.pos) {
1078 				context.buffer = null; // so hasData() will return false, and
1079 										// this method can return -1
1080 			}
1081 			return len;
1082 		}
1083 		return context.eof ? EOF : 0;
1084 	}
1085 
1086 	/**
1087 	 * Checks if a byte value is whitespace or not. Whitespace is taken to mean:
1088 	 * space, tab, CR, LF
1089 	 * 
1090 	 * @param byteToCheck
1091 	 *            the byte to check
1092 	 * @return true if byte is whitespace, false otherwise
1093 	 */
1094 	protected static boolean isWhiteSpace(byte byteToCheck) {
1095 		switch (byteToCheck) {
1096 		case ' ':
1097 		case '\n':
1098 		case '\r':
1099 		case '\t':
1100 			return true;
1101 		default:
1102 			return false;
1103 		}
1104 	}
1105 
1106 	/**
1107 	 * Encodes an Object using the Base-N algorithm. This method is provided in
1108 	 * order to satisfy the requirements of the Encoder interface, and will
1109 	 * throw an EncoderException if the supplied object is not of type byte[].
1110 	 * 
1111 	 * @param obj
1112 	 *            Object to encode
1113 	 * @return An object (of type byte[]) containing the Base-N encoded data
1114 	 *         which corresponds to the byte[] supplied.
1115 	 * @throws EncoderException
1116 	 *             if the parameter supplied is not of type byte[]
1117 	 */
1118 	public Object encode(Object obj) throws Exception {
1119 		if (!(obj instanceof byte[])) {
1120 			throw new Exception("Parameter supplied to Base-N encode is not a byte[]");
1121 		}
1122 		return encode((byte[]) obj);
1123 	}
1124 
1125 	/**
1126 	 * Encodes a byte[] containing binary data, into a String containing
1127 	 * characters in the Base-N alphabet. Uses UTF8 encoding.
1128 	 * 
1129 	 * @param pArray
1130 	 *            a byte array containing binary data
1131 	 * @return A String containing only Base-N character data
1132 	 */
1133 	public String encodeToString(byte[] pArray) {
1134 		return StringUtils.newStringUtf8(encode(pArray));
1135 	}
1136 
1137 	/**
1138 	 * Encodes a byte[] containing binary data, into a String containing
1139 	 * characters in the appropriate alphabet. Uses UTF8 encoding.
1140 	 * 
1141 	 * @param pArray
1142 	 *            a byte array containing binary data
1143 	 * @return String containing only character data in the appropriate
1144 	 *         alphabet.
1145 	 */
1146 	public String encodeAsString(byte[] pArray) {
1147 		return StringUtils.newStringUtf8(encode(pArray));
1148 	}
1149 
1150 	/**
1151 	 * Decodes an Object using the Base-N algorithm. This method is provided in
1152 	 * order to satisfy the requirements of the Decoder interface, and will
1153 	 * throw a DecoderException if the supplied object is not of type byte[] or
1154 	 * String.
1155 	 * 
1156 	 * @param obj
1157 	 *            Object to decode
1158 	 * @return An object (of type byte[]) containing the binary data which
1159 	 *         corresponds to the byte[] or String supplied.
1160 	 * @throws DecoderException
1161 	 *             if the parameter supplied is not of type byte[]
1162 	 */
1163 	public Object decode(Object obj) throws Exception {
1164 		if (obj instanceof byte[]) {
1165 			return decode((byte[]) obj);
1166 		} else if (obj instanceof String) {
1167 			return decode((String) obj);
1168 		} else {
1169 			throw new Exception("Parameter supplied to Base-N decode is not a byte[] or a String");
1170 		}
1171 	}
1172 
1173 	/**
1174 	 * Decodes a String containing characters in the Base-N alphabet.
1175 	 * 
1176 	 * @param pArray
1177 	 *            A String containing Base-N character data
1178 	 * @return a byte array containing binary data
1179 	 */
1180 	public byte[] decode(String pArray) {
1181 		return decode(StringUtils.getBytesUtf8(pArray));
1182 	}
1183 
1184 	/**
1185 	 * Decodes a byte[] containing characters in the Base-N alphabet.
1186 	 * 
1187 	 * @param pArray
1188 	 *            A byte array containing Base-N character data
1189 	 * @return a byte array containing binary data
1190 	 */
1191 	public byte[] decode(byte[] pArray) {
1192 		Context context = new Context();
1193 		if (pArray == null || pArray.length == 0) {
1194 			return pArray;
1195 		}
1196 		decode(pArray, 0, pArray.length, context);
1197 		decode(pArray, 0, EOF, context); // Notify decoder of EOF.
1198 		byte[] result = new byte[context.pos];
1199 		readResults(result, 0, result.length, context);
1200 		return result;
1201 	}
1202 
1203 	/**
1204 	 * Encodes a byte[] containing binary data, into a byte[] containing
1205 	 * characters in the alphabet.
1206 	 * 
1207 	 * @param pArray
1208 	 *            a byte array containing binary data
1209 	 * @return A byte array containing only the basen alphabetic character data
1210 	 */
1211 	public byte[] encode(byte[] pArray) {
1212 		Context context = new Context();
1213 		if (pArray == null || pArray.length == 0) {
1214 			return pArray;
1215 		}
1216 		encode(pArray, 0, pArray.length, context);
1217 		encode(pArray, 0, EOF, context); // Notify encoder of EOF.
1218 		byte[] buf = new byte[context.pos - context.readPos];
1219 		readResults(buf, 0, buf.length, context);
1220 		return buf;
1221 	}
1222 
1223 	/**
1224 	 * Tests a given byte array to see if it contains only valid characters
1225 	 * within the alphabet. The method optionally treats whitespace and pad as
1226 	 * valid.
1227 	 * 
1228 	 * @param arrayOctet
1229 	 *            byte array to test
1230 	 * @param allowWSPad
1231 	 *            if {@code true}, then whitespace and PAD are also allowed
1232 	 * 
1233 	 * @return {@code true} if all bytes are valid characters in the alphabet or
1234 	 *         if the byte array is empty; {@code false}, otherwise
1235 	 */
1236 	public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
1237 		for (int i = 0; i < arrayOctet.length; i++) {
1238 			if (!isInAlphabet(arrayOctet[i]) && (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
1239 				return false;
1240 			}
1241 		}
1242 		return true;
1243 	}
1244 
1245 	/**
1246 	 * Tests a given String to see if it contains only valid characters within
1247 	 * the alphabet. The method treats whitespace and PAD as valid.
1248 	 * 
1249 	 * @param basen
1250 	 *            String to test
1251 	 * @return {@code true} if all characters in the String are valid characters
1252 	 *         in the alphabet or if the String is empty; {@code false},
1253 	 *         otherwise
1254 	 * @see #isInAlphabet(byte[], boolean)
1255 	 */
1256 	public boolean isInAlphabet(String basen) {
1257 		return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
1258 	}
1259 
1260 	/**
1261 	 * Tests a given byte array to see if it contains any characters within the
1262 	 * alphabet or PAD.
1263 	 * 
1264 	 * Intended for use in checking line-ending arrays
1265 	 * 
1266 	 * @param arrayOctet
1267 	 *            byte array to test
1268 	 * @return {@code true} if any byte is a valid character in the alphabet or
1269 	 *         PAD; {@code false} otherwise
1270 	 */
1271 	protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
1272 		if (arrayOctet == null) {
1273 			return false;
1274 		}
1275 		for (byte element : arrayOctet) {
1276 			if (PAD == element || isInAlphabet(element)) {
1277 				return true;
1278 			}
1279 		}
1280 		return false;
1281 	}
1282 
1283 	/**
1284 	 * Calculates the amount of space needed to encode the supplied array.
1285 	 * 
1286 	 * @param pArray
1287 	 *            byte[] array which will later be encoded
1288 	 * 
1289 	 * @return amount of space needed to encoded the supplied array. Returns a
1290 	 *         long since a max-len array will require > Integer.MAX_VALUE
1291 	 */
1292 	public long getEncodedLength(byte[] pArray) {
1293 		// Calculate non-chunked size - rounded up to allow for padding
1294 		// cast to long is needed to avoid possibility of overflow
1295 		long len = ((pArray.length + unencodedBlockSize - 1) / unencodedBlockSize) * (long) encodedBlockSize;
1296 		if (lineLength > 0) { // We're using chunking
1297 			// Round up to nearest multiple
1298 			len += ((len + lineLength - 1) / lineLength) * chunkSeparatorLength;
1299 		}
1300 		return len;
1301 	}
1302 
1303 	/**
1304 	 * Character encoding names required of every implementation of the Java
1305 	 * platform.
1306 	 * 
1307 	 * From the Java documentation <a href=
1308 	 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1309 	 * >Standard charsets</a>:
1310 	 * <p>
1311 	 * <cite>Every implementation of the Java platform is required to support
1312 	 * the following character encodings. Consult the release documentation for
1313 	 * your implementation to see if any other encodings are supported. Consult
1314 	 * the release documentation for your implementation to see if any other
1315 	 * encodings are supported. </cite>
1316 	 * </p>
1317 	 * 
1318 	 * <ul>
1319 	 * <li><code>US-ASCII</code><br/>
1320 	 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1321 	 * Unicode character set.</li>
1322 	 * <li><code>ISO-8859-1</code><br/>
1323 	 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1324 	 * <li><code>UTF-8</code><br/>
1325 	 * Eight-bit Unicode Transformation Format.</li>
1326 	 * <li><code>UTF-16BE</code><br/>
1327 	 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1328 	 * <li><code>UTF-16LE</code><br/>
1329 	 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1330 	 * <li><code>UTF-16</code><br/>
1331 	 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1332 	 * mandatory initial byte-order mark (either order accepted on input,
1333 	 * big-endian used on output.)</li>
1334 	 * </ul>
1335 	 * 
1336 	 * This perhaps would best belong in the [lang] project. Even if a similar
1337 	 * interface is defined in [lang], it is not foreseen that [codec] would be
1338 	 * made to depend on [lang].
1339 	 * 
1340 	 * <p>
1341 	 * This class is immutable and thread-safe.
1342 	 * </p>
1343 	 * 
1344 	 * @see <a
1345 	 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1346 	 *      charsets</a>
1347 	 * @since 1.4
1348 	 * @version $Id$
1349 	 */
1350 	public class CharEncoding {
1351 		/**
1352 		 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1353 		 * <p>
1354 		 * Every implementation of the Java platform is required to support this
1355 		 * character encoding.
1356 		 * </p>
1357 		 * 
1358 		 * @see <a
1359 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1360 		 *      charsets</a>
1361 		 */
1362 		public static final String ISO_8859_1 = "ISO-8859-1";
1363 
1364 		/**
1365 		 * <p>
1366 		 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1367 		 * Latin block of the Unicode character set.
1368 		 * </p>
1369 		 * <p>
1370 		 * Every implementation of the Java platform is required to support this
1371 		 * character encoding.
1372 		 * </p>
1373 		 * 
1374 		 * @see <a
1375 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1376 		 *      charsets</a>
1377 		 */
1378 		public static final String US_ASCII = "US-ASCII";
1379 
1380 		/**
1381 		 * <p>
1382 		 * Sixteen-bit Unicode Transformation Format, The byte order specified
1383 		 * by a mandatory initial byte-order mark (either order accepted on
1384 		 * input, big-endian used on output)
1385 		 * </p>
1386 		 * <p>
1387 		 * Every implementation of the Java platform is required to support this
1388 		 * character encoding.
1389 		 * </p>
1390 		 * 
1391 		 * @see <a
1392 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1393 		 *      charsets</a>
1394 		 */
1395 		public static final String UTF_16 = "UTF-16";
1396 
1397 		/**
1398 		 * <p>
1399 		 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1400 		 * </p>
1401 		 * <p>
1402 		 * Every implementation of the Java platform is required to support this
1403 		 * character encoding.
1404 		 * </p>
1405 		 * 
1406 		 * @see <a
1407 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1408 		 *      charsets</a>
1409 		 */
1410 		public static final String UTF_16BE = "UTF-16BE";
1411 
1412 		/**
1413 		 * <p>
1414 		 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1415 		 * </p>
1416 		 * <p>
1417 		 * Every implementation of the Java platform is required to support this
1418 		 * character encoding.
1419 		 * </p>
1420 		 * 
1421 		 * @see <a
1422 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1423 		 *      charsets</a>
1424 		 */
1425 		public static final String UTF_16LE = "UTF-16LE";
1426 
1427 		/**
1428 		 * <p>
1429 		 * Eight-bit Unicode Transformation Format.
1430 		 * </p>
1431 		 * <p>
1432 		 * Every implementation of the Java platform is required to support this
1433 		 * character encoding.
1434 		 * </p>
1435 		 * 
1436 		 * @see <a
1437 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1438 		 *      charsets</a>
1439 		 */
1440 		public static final String UTF_8 = "UTF-8";
1441 	}
1442 
1443 	/**
1444 	 * Charsets required of every implementation of the Java platform.
1445 	 * 
1446 	 * From the Java documentation <a href=
1447 	 * "http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1448 	 * >Standard charsets</a>:
1449 	 * <p>
1450 	 * <cite>Every implementation of the Java platform is required to support
1451 	 * the following character encodings. Consult the release documentation for
1452 	 * your implementation to see if any other encodings are supported. Consult
1453 	 * the release documentation for your implementation to see if any other
1454 	 * encodings are supported. </cite>
1455 	 * </p>
1456 	 * 
1457 	 * <ul>
1458 	 * <li><code>US-ASCII</code><br/>
1459 	 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1460 	 * Unicode character set.</li>
1461 	 * <li><code>ISO-8859-1</code><br/>
1462 	 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1463 	 * <li><code>UTF-8</code><br/>
1464 	 * Eight-bit Unicode Transformation Format.</li>
1465 	 * <li><code>UTF-16BE</code><br/>
1466 	 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1467 	 * <li><code>UTF-16LE</code><br/>
1468 	 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1469 	 * <li><code>UTF-16</code><br/>
1470 	 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1471 	 * mandatory initial byte-order mark (either order accepted on input,
1472 	 * big-endian used on output.)</li>
1473 	 * </ul>
1474 	 * 
1475 	 * This perhaps would best belong in the Commons Lang project. Even if a
1476 	 * similar class is defined in Commons Lang, it is not foreseen that Commons
1477 	 * Codec would be made to depend on Commons Lang.
1478 	 * 
1479 	 * <p>
1480 	 * This class is immutable and thread-safe.
1481 	 * </p>
1482 	 * 
1483 	 * @see <a
1484 	 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1485 	 *      charsets</a>
1486 	 * @since 1.7
1487 	 * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $
1488 	 */
1489 	public static class Charsets {
1490 
1491 		//
1492 		// This class should only contain Charset instances for required
1493 		// encodings. This guarantees that it will load correctly and
1494 		// without delay on all Java platforms.
1495 		//
1496 
1497 		/**
1498 		 * Returns the given Charset or the default Charset if the given Charset
1499 		 * is null.
1500 		 * 
1501 		 * @param charset
1502 		 *            A charset or null.
1503 		 * @return the given Charset or the default Charset if the given Charset
1504 		 *         is null
1505 		 */
1506 		public static Charset toCharset(Charset charset) {
1507 			return charset == null ? Charset.defaultCharset() : charset;
1508 		}
1509 
1510 		/**
1511 		 * Returns a Charset for the named charset. If the name is null, return
1512 		 * the default Charset.
1513 		 * 
1514 		 * @param charset
1515 		 *            The name of the requested charset, may be null.
1516 		 * @return a Charset for the named charset
1517 		 * @throws UnsupportedCharsetException
1518 		 *             If the named charset is unavailable
1519 		 */
1520 		public static Charset toCharset(String charset) {
1521 			return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
1522 		}
1523 
1524 		/**
1525 		 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1526 		 * <p>
1527 		 * Every implementation of the Java platform is required to support this
1528 		 * character encoding.
1529 		 * </p>
1530 		 * 
1531 		 * @see <a
1532 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1533 		 *      charsets</a>
1534 		 */
1535 		public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1);
1536 
1537 		/**
1538 		 * <p>
1539 		 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1540 		 * Latin block of the Unicode character set.
1541 		 * </p>
1542 		 * <p>
1543 		 * Every implementation of the Java platform is required to support this
1544 		 * character encoding.
1545 		 * </p>
1546 		 * 
1547 		 * @see <a
1548 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1549 		 *      charsets</a>
1550 		 */
1551 		public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII);
1552 
1553 		/**
1554 		 * <p>
1555 		 * Sixteen-bit Unicode Transformation Format, The byte order specified
1556 		 * by a mandatory initial byte-order mark (either order accepted on
1557 		 * input, big-endian used on output)
1558 		 * </p>
1559 		 * <p>
1560 		 * Every implementation of the Java platform is required to support this
1561 		 * character encoding.
1562 		 * </p>
1563 		 * 
1564 		 * @see <a
1565 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1566 		 *      charsets</a>
1567 		 */
1568 		public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16);
1569 
1570 		/**
1571 		 * <p>
1572 		 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1573 		 * </p>
1574 		 * <p>
1575 		 * Every implementation of the Java platform is required to support this
1576 		 * character encoding.
1577 		 * </p>
1578 		 * 
1579 		 * @see <a
1580 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1581 		 *      charsets</a>
1582 		 */
1583 		public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE);
1584 
1585 		/**
1586 		 * <p>
1587 		 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1588 		 * </p>
1589 		 * <p>
1590 		 * Every implementation of the Java platform is required to support this
1591 		 * character encoding.
1592 		 * </p>
1593 		 * 
1594 		 * @see <a
1595 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1596 		 *      charsets</a>
1597 		 */
1598 		public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE);
1599 
1600 		/**
1601 		 * <p>
1602 		 * Eight-bit Unicode Transformation Format.
1603 		 * </p>
1604 		 * <p>
1605 		 * Every implementation of the Java platform is required to support this
1606 		 * character encoding.
1607 		 * </p>
1608 		 * 
1609 		 * @see <a
1610 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1611 		 *      charsets</a>
1612 		 */
1613 		public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8);
1614 	}
1615 
1616 	/**
1617 	 * Converts String to and from bytes using the encodings required by the
1618 	 * Java specification. These encodings are specified in <a href=
1619 	 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1620 	 * >Standard charsets</a>
1621 	 * 
1622 	 * <p>
1623 	 * This class is immutable and thread-safe.
1624 	 * </p>
1625 	 * 
1626 	 * @see CharEncoding
1627 	 * @see <a
1628 	 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1629 	 *      charsets</a>
1630 	 * @version $Id$
1631 	 * @since 1.4
1632 	 */
1633 	public static class StringUtils {
1634 
1635 		/**
1636 		 * Calls {@link String#getBytes(Charset)}
1637 		 * 
1638 		 * @param string
1639 		 *            The string to encode (if null, return null).
1640 		 * @param charset
1641 		 *            The {@link Charset} to encode the {@code String}
1642 		 * @return the encoded bytes
1643 		 */
1644 		private static byte[] getBytes(String string, Charset charset) {
1645 			if (string == null) {
1646 				return null;
1647 			}
1648 			return string.getBytes(charset);
1649 		}
1650 
1651 		/**
1652 		 * Encodes the given string into a sequence of bytes using the
1653 		 * ISO-8859-1 charset, storing the result into a new byte array.
1654 		 * 
1655 		 * @param string
1656 		 *            the String to encode, may be {@code null}
1657 		 * @return encoded bytes, or {@code null} if the input string was
1658 		 *         {@code null}
1659 		 * @throws NullPointerException
1660 		 *             Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1661 		 *             which should never happen since it is required by the
1662 		 *             Java platform specification.
1663 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1664 		 *        UnsupportedEncodingException
1665 		 * @see <a
1666 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1667 		 *      charsets</a>
1668 		 * @see #getBytesUnchecked(String, String)
1669 		 */
1670 		public static byte[] getBytesIso8859_1(String string) {
1671 			return getBytes(string, Charsets.ISO_8859_1);
1672 		}
1673 
1674 		/**
1675 		 * Encodes the given string into a sequence of bytes using the named
1676 		 * charset, storing the result into a new byte array.
1677 		 * <p>
1678 		 * This method catches {@link UnsupportedEncodingException} and rethrows
1679 		 * it as {@link IllegalStateException}, which should never happen for a
1680 		 * required charset name. Use this method when the encoding is required
1681 		 * to be in the JRE.
1682 		 * </p>
1683 		 * 
1684 		 * @param string
1685 		 *            the String to encode, may be {@code null}
1686 		 * @param charsetName
1687 		 *            The name of a required {@link java.nio.charset.Charset}
1688 		 * @return encoded bytes, or {@code null} if the input string was
1689 		 *         {@code null}
1690 		 * @throws IllegalStateException
1691 		 *             Thrown when a {@link UnsupportedEncodingException} is
1692 		 *             caught, which should never happen for a required charset
1693 		 *             name.
1694 		 * @see CharEncoding
1695 		 * @see String#getBytes(String)
1696 		 */
1697 		public static byte[] getBytesUnchecked(String string, String charsetName) {
1698 			if (string == null) {
1699 				return null;
1700 			}
1701 			try {
1702 				return string.getBytes(charsetName);
1703 			} catch (UnsupportedEncodingException e) {
1704 				throw StringUtils.newIllegalStateException(charsetName, e);
1705 			}
1706 		}
1707 
1708 		/**
1709 		 * Encodes the given string into a sequence of bytes using the US-ASCII
1710 		 * charset, storing the result into a new byte array.
1711 		 * 
1712 		 * @param string
1713 		 *            the String to encode, may be {@code null}
1714 		 * @return encoded bytes, or {@code null} if the input string was
1715 		 *         {@code null}
1716 		 * @throws NullPointerException
1717 		 *             Thrown if {@link Charsets#US_ASCII} is not initialized,
1718 		 *             which should never happen since it is required by the
1719 		 *             Java platform specification.
1720 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1721 		 *        UnsupportedEncodingException
1722 		 * @see <a
1723 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1724 		 *      charsets</a>
1725 		 * @see #getBytesUnchecked(String, String)
1726 		 */
1727 		public static byte[] getBytesUsAscii(String string) {
1728 			return getBytes(string, Charsets.US_ASCII);
1729 		}
1730 
1731 		/**
1732 		 * Encodes the given string into a sequence of bytes using the UTF-16
1733 		 * charset, storing the result into a new byte array.
1734 		 * 
1735 		 * @param string
1736 		 *            the String to encode, may be {@code null}
1737 		 * @return encoded bytes, or {@code null} if the input string was
1738 		 *         {@code null}
1739 		 * @throws NullPointerException
1740 		 *             Thrown if {@link Charsets#UTF_16} is not initialized,
1741 		 *             which should never happen since it is required by the
1742 		 *             Java platform specification.
1743 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1744 		 *        UnsupportedEncodingException
1745 		 * @see <a
1746 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1747 		 *      charsets</a>
1748 		 * @see #getBytesUnchecked(String, String)
1749 		 */
1750 		public static byte[] getBytesUtf16(String string) {
1751 			return getBytes(string, Charsets.UTF_16);
1752 		}
1753 
1754 		/**
1755 		 * Encodes the given string into a sequence of bytes using the UTF-16BE
1756 		 * charset, storing the result into a new byte array.
1757 		 * 
1758 		 * @param string
1759 		 *            the String to encode, may be {@code null}
1760 		 * @return encoded bytes, or {@code null} if the input string was
1761 		 *         {@code null}
1762 		 * @throws NullPointerException
1763 		 *             Thrown if {@link Charsets#UTF_16BE} is not initialized,
1764 		 *             which should never happen since it is required by the
1765 		 *             Java platform specification.
1766 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1767 		 *        UnsupportedEncodingException
1768 		 * @see <a
1769 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1770 		 *      charsets</a>
1771 		 * @see #getBytesUnchecked(String, String)
1772 		 */
1773 		public static byte[] getBytesUtf16Be(String string) {
1774 			return getBytes(string, Charsets.UTF_16BE);
1775 		}
1776 
1777 		/**
1778 		 * Encodes the given string into a sequence of bytes using the UTF-16LE
1779 		 * charset, storing the result into a new byte array.
1780 		 * 
1781 		 * @param string
1782 		 *            the String to encode, may be {@code null}
1783 		 * @return encoded bytes, or {@code null} if the input string was
1784 		 *         {@code null}
1785 		 * @throws NullPointerException
1786 		 *             Thrown if {@link Charsets#UTF_16LE} is not initialized,
1787 		 *             which should never happen since it is required by the
1788 		 *             Java platform specification.
1789 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1790 		 *        UnsupportedEncodingException
1791 		 * @see <a
1792 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1793 		 *      charsets</a>
1794 		 * @see #getBytesUnchecked(String, String)
1795 		 */
1796 		public static byte[] getBytesUtf16Le(String string) {
1797 			return getBytes(string, Charsets.UTF_16LE);
1798 		}
1799 
1800 		/**
1801 		 * Encodes the given string into a sequence of bytes using the UTF-8
1802 		 * charset, storing the result into a new byte array.
1803 		 * 
1804 		 * @param string
1805 		 *            the String to encode, may be {@code null}
1806 		 * @return encoded bytes, or {@code null} if the input string was
1807 		 *         {@code null}
1808 		 * @throws NullPointerException
1809 		 *             Thrown if {@link Charsets#UTF_8} is not initialized,
1810 		 *             which should never happen since it is required by the
1811 		 *             Java platform specification.
1812 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1813 		 *        UnsupportedEncodingException
1814 		 * @see <a
1815 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1816 		 *      charsets</a>
1817 		 * @see #getBytesUnchecked(String, String)
1818 		 */
1819 		public static byte[] getBytesUtf8(String string) {
1820 			return getBytes(string, Charsets.UTF_8);
1821 		}
1822 
1823 		private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
1824 			return new IllegalStateException(charsetName + ": " + e);
1825 		}
1826 
1827 		/**
1828 		 * Constructs a new <code>String</code> by decoding the specified array
1829 		 * of bytes using the given charset.
1830 		 * 
1831 		 * @param bytes
1832 		 *            The bytes to be decoded into characters
1833 		 * @param charset
1834 		 *            The {@link Charset} to encode the {@code String}
1835 		 * @return A new <code>String</code> decoded from the specified array of
1836 		 *         bytes using the given charset, or {@code null} if the input
1837 		 *         byte array was {@code null}.
1838 		 * @throws NullPointerException
1839 		 *             Thrown if {@link Charsets#UTF_8} is not initialized,
1840 		 *             which should never happen since it is required by the
1841 		 *             Java platform specification.
1842 		 */
1843 		private static String newString(byte[] bytes, Charset charset) {
1844 			return bytes == null ? null : new String(bytes, charset);
1845 		}
1846 
1847 		/**
1848 		 * Constructs a new <code>String</code> by decoding the specified array
1849 		 * of bytes using the given charset.
1850 		 * <p>
1851 		 * This method catches {@link UnsupportedEncodingException} and
1852 		 * re-throws it as {@link IllegalStateException}, which should never
1853 		 * happen for a required charset name. Use this method when the encoding
1854 		 * is required to be in the JRE.
1855 		 * </p>
1856 		 * 
1857 		 * @param bytes
1858 		 *            The bytes to be decoded into characters, may be
1859 		 *            {@code null}
1860 		 * @param charsetName
1861 		 *            The name of a required {@link java.nio.charset.Charset}
1862 		 * @return A new <code>String</code> decoded from the specified array of
1863 		 *         bytes using the given charset, or {@code null} if the input
1864 		 *         byte array was {@code null}.
1865 		 * @throws IllegalStateException
1866 		 *             Thrown when a {@link UnsupportedEncodingException} is
1867 		 *             caught, which should never happen for a required charset
1868 		 *             name.
1869 		 * @see CharEncoding
1870 		 * @see String#String(byte[], String)
1871 		 */
1872 		public static String newString(byte[] bytes, String charsetName) {
1873 			if (bytes == null) {
1874 				return null;
1875 			}
1876 			try {
1877 				return new String(bytes, charsetName);
1878 			} catch (UnsupportedEncodingException e) {
1879 				throw StringUtils.newIllegalStateException(charsetName, e);
1880 			}
1881 		}
1882 
1883 		/**
1884 		 * Constructs a new <code>String</code> by decoding the specified array
1885 		 * of bytes using the ISO-8859-1 charset.
1886 		 * 
1887 		 * @param bytes
1888 		 *            The bytes to be decoded into characters, may be
1889 		 *            {@code null}
1890 		 * @return A new <code>String</code> decoded from the specified array of
1891 		 *         bytes using the ISO-8859-1 charset, or {@code null} if the
1892 		 *         input byte array was {@code null}.
1893 		 * @throws NullPointerException
1894 		 *             Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1895 		 *             which should never happen since it is required by the
1896 		 *             Java platform specification.
1897 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1898 		 *        UnsupportedEncodingException
1899 		 */
1900 		public static String newStringIso8859_1(byte[] bytes) {
1901 			return new String(bytes, Charsets.ISO_8859_1);
1902 		}
1903 
1904 		/**
1905 		 * Constructs a new <code>String</code> by decoding the specified array
1906 		 * of bytes using the US-ASCII charset.
1907 		 * 
1908 		 * @param bytes
1909 		 *            The bytes to be decoded into characters
1910 		 * @return A new <code>String</code> decoded from the specified array of
1911 		 *         bytes using the US-ASCII charset, or {@code null} if the
1912 		 *         input byte array was {@code null}.
1913 		 * @throws NullPointerException
1914 		 *             Thrown if {@link Charsets#US_ASCII} is not initialized,
1915 		 *             which should never happen since it is required by the
1916 		 *             Java platform specification.
1917 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1918 		 *        UnsupportedEncodingException
1919 		 */
1920 		public static String newStringUsAscii(byte[] bytes) {
1921 			return new String(bytes, Charsets.US_ASCII);
1922 		}
1923 
1924 		/**
1925 		 * Constructs a new <code>String</code> by decoding the specified array
1926 		 * of bytes using the UTF-16 charset.
1927 		 * 
1928 		 * @param bytes
1929 		 *            The bytes to be decoded into characters
1930 		 * @return A new <code>String</code> decoded from the specified array of
1931 		 *         bytes using the UTF-16 charset or {@code null} if the input
1932 		 *         byte array was {@code null}.
1933 		 * @throws NullPointerException
1934 		 *             Thrown if {@link Charsets#UTF_16} is not initialized,
1935 		 *             which should never happen since it is required by the
1936 		 *             Java platform specification.
1937 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1938 		 *        UnsupportedEncodingException
1939 		 */
1940 		public static String newStringUtf16(byte[] bytes) {
1941 			return new String(bytes, Charsets.UTF_16);
1942 		}
1943 
1944 		/**
1945 		 * Constructs a new <code>String</code> by decoding the specified array
1946 		 * of bytes using the UTF-16BE charset.
1947 		 * 
1948 		 * @param bytes
1949 		 *            The bytes to be decoded into characters
1950 		 * @return A new <code>String</code> decoded from the specified array of
1951 		 *         bytes using the UTF-16BE charset, or {@code null} if the
1952 		 *         input byte array was {@code null}.
1953 		 * @throws NullPointerException
1954 		 *             Thrown if {@link Charsets#UTF_16BE} is not initialized,
1955 		 *             which should never happen since it is required by the
1956 		 *             Java platform specification.
1957 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1958 		 *        UnsupportedEncodingException
1959 		 */
1960 		public static String newStringUtf16Be(byte[] bytes) {
1961 			return new String(bytes, Charsets.UTF_16BE);
1962 		}
1963 
1964 		/**
1965 		 * Constructs a new <code>String</code> by decoding the specified array
1966 		 * of bytes using the UTF-16LE charset.
1967 		 * 
1968 		 * @param bytes
1969 		 *            The bytes to be decoded into characters
1970 		 * @return A new <code>String</code> decoded from the specified array of
1971 		 *         bytes using the UTF-16LE charset, or {@code null} if the
1972 		 *         input byte array was {@code null}.
1973 		 * @throws NullPointerException
1974 		 *             Thrown if {@link Charsets#UTF_16LE} is not initialized,
1975 		 *             which should never happen since it is required by the
1976 		 *             Java platform specification.
1977 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1978 		 *        UnsupportedEncodingException
1979 		 */
1980 		public static String newStringUtf16Le(byte[] bytes) {
1981 			return new String(bytes, Charsets.UTF_16LE);
1982 		}
1983 
1984 		/**
1985 		 * Constructs a new <code>String</code> by decoding the specified array
1986 		 * of bytes using the UTF-8 charset.
1987 		 * 
1988 		 * @param bytes
1989 		 *            The bytes to be decoded into characters
1990 		 * @return A new <code>String</code> decoded from the specified array of
1991 		 *         bytes using the UTF-8 charset, or {@code null} if the input
1992 		 *         byte array was {@code null}.
1993 		 * @throws NullPointerException
1994 		 *             Thrown if {@link Charsets#UTF_8} is not initialized,
1995 		 *             which should never happen since it is required by the
1996 		 *             Java platform specification.
1997 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1998 		 *        UnsupportedEncodingException
1999 		 */
2000 		public static String newStringUtf8(byte[] bytes) {
2001 			return newString(bytes, Charsets.UTF_8);
2002 		}
2003 
2004 	}
2005 
2006 }