1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package ca.uhn.hl7v2.hoh.util.repackage;
19
20 import java.io.UnsupportedEncodingException;
21 import java.math.BigInteger;
22 import java.nio.charset.Charset;
23 import java.nio.charset.UnsupportedCharsetException;
24
25 /**
26 * Provides Base64 encoding and decoding as defined by <a
27 * href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
28 *
29 * <p>
30 * This class implements section <cite>6.8. Base64
31 * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet
32 * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by
33 * Freed and Borenstein.
34 * </p>
35 * <p>
36 * The class can be parameterized in the following manner with various
37 * constructors:
38 * <ul>
39 * <li>URL-safe mode: Default off.</li>
40 * <li>Line length: Default 76. Line length that aren't multiples of 4 will
41 * still essentially end up being multiples of 4 in the encoded data.
42 * <li>Line separator: Default is CRLF ("\r\n")</li>
43 * </ul>
44 * </p>
45 * <p>
46 * Since this class operates directly on byte streams, and not character
47 * streams, it is hard-coded to only encode/decode character encodings which are
48 * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8,
49 * etc).
50 * </p>
51 * <p>
52 * This class is thread-safe.
53 * </p>
54 *
55 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
56 * @author Note that this class has been repackaged from Apache Commons-Codec
57 * and is distributed under the terms of the Apache Software License,
58 * version 2.0
59 */
60 public class Base64 {
61
62 public static void main(String[] args) {
63
64 System.out.println("basic " + encodeBase64String("cgta:d@3r$@TTg2446yhhh2h4".getBytes()));
65
66 }
67
68 /**
69 * BASE32 characters are 6 bits in length. They are formed by taking a block
70 * of 3 octets to form a 24-bit string, which is converted into 4 BASE64
71 * characters.
72 */
73 private static final int BITS_PER_ENCODED_BYTE = 6;
74 private static final int BYTES_PER_UNENCODED_BLOCK = 3;
75 private static final int BYTES_PER_ENCODED_BLOCK = 4;
76
77 /**
78 * Chunk separator per RFC 2045 section 2.1.
79 *
80 * <p>
81 * N.B. The next major release may break compatibility and make this field
82 * private.
83 * </p>
84 *
85 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
86 * 2.1</a>
87 */
88 static final byte[] CHUNK_SEPARATOR = { '\r', '\n' };
89
90 /**
91 * This array is a lookup table that translates 6-bit positive integer index
92 * values into their "Base64 Alphabet" equivalents as specified in Table 1
93 * of RFC 2045.
94 *
95 * Thanks to "commons" project in ws.apache.org for this code.
96 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
97 */
98 private static final byte[] STANDARD_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
99 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
100
101 /**
102 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
103 * changed to - and _ to make the encoded Base64 results more URL-SAFE. This
104 * table is only used when the Base64's mode is set to URL-SAFE.
105 */
106 private static final byte[] URL_SAFE_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
107 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' };
108
109 /**
110 * This array is a lookup table that translates Unicode characters drawn
111 * from the "Base64 Alphabet" (as specified in Table 1 of RFC 2045) into
112 * their 6-bit positive integer equivalents. Characters that are not in the
113 * Base64 alphabet but fall within the bounds of the array are translated to
114 * -1.
115 *
116 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This
117 * means decoder seamlessly handles both URL_SAFE and STANDARD base64. (The
118 * encoder, on the other hand, needs to know ahead of time what to emit).
119 *
120 * Thanks to "commons" project in ws.apache.org for this code.
121 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
122 */
123 private static final byte[] DECODE_TABLE = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
124 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 };
125
126 /**
127 * Base64 uses 6-bit fields.
128 */
129 /** Mask used to extract 6 bits, used when encoding */
130 private static final int MASK_6BITS = 0x3f;
131
132 // The static final fields above are used for the original static byte[]
133 // methods on Base64.
134 // The private member fields below are used with the new streaming approach,
135 // which requires
136 // some state be preserved between calls of encode() and decode().
137
138 /**
139 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE
140 * above remains static because it is able to decode both STANDARD and
141 * URL_SAFE streams, but the encodeTable must be a member variable so we can
142 * switch between the two modes.
143 */
144 private byte[] encodeTable;
145
146 // Only one decode table currently; keep for consistency with Base32 code
147 private final byte[] decodeTable = DECODE_TABLE;
148
149 /**
150 * Line separator for encoding. Not used when decoding. Only used if
151 * lineLength > 0.
152 */
153 private byte[] lineSeparator;
154
155 /**
156 * Convenience variable to help us determine when our buffer is going to run
157 * out of room and needs resizing.
158 * <code>decodeSize = 3 + lineSeparator.length;</code>
159 */
160 private int decodeSize;
161
162 /**
163 * Convenience variable to help us determine when our buffer is going to run
164 * out of room and needs resizing.
165 * <code>encodeSize = 4 + lineSeparator.length;</code>
166 */
167 private int encodeSize;
168
169 /**
170 * Creates a Base64 codec used for decoding (all modes) and encoding in
171 * URL-unsafe mode.
172 * <p>
173 * When encoding the line length is 0 (no chunking), and the encoding table
174 * is STANDARD_ENCODE_TABLE.
175 * </p>
176 *
177 * <p>
178 * When decoding all variants are supported.
179 * </p>
180 */
181 public Base64() {
182 this(0);
183 }
184
185 /**
186 * Creates a Base64 codec used for decoding (all modes) and encoding in the
187 * given URL-safe mode.
188 * <p>
189 * When encoding the line length is 76, the line separator is CRLF, and the
190 * encoding table is STANDARD_ENCODE_TABLE.
191 * </p>
192 *
193 * <p>
194 * When decoding all variants are supported.
195 * </p>
196 *
197 * @param urlSafe
198 * if {@code true}, URL-safe encoding is used. In most cases this
199 * should be set to {@code false}.
200 * @since 1.4
201 */
202 public Base64(boolean urlSafe) {
203 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
204 }
205
206 /**
207 * Creates a Base64 codec used for decoding (all modes) and encoding in
208 * URL-unsafe mode.
209 * <p>
210 * When encoding the line length is given in the constructor, the line
211 * separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
212 * </p>
213 * <p>
214 * Line lengths that aren't multiples of 4 will still essentially end up
215 * being multiples of 4 in the encoded data.
216 * </p>
217 * <p>
218 * When decoding all variants are supported.
219 * </p>
220 *
221 * @param lineLength
222 * Each line of encoded data will be at most of the given length
223 * (rounded down to nearest multiple of 4). If lineLength <= 0,
224 * then the output will not be divided into lines (chunks).
225 * Ignored when decoding.
226 * @since 1.4
227 */
228 public Base64(int lineLength) {
229 this(lineLength, CHUNK_SEPARATOR);
230 }
231
232 /**
233 * Creates a Base64 codec used for decoding (all modes) and encoding in
234 * URL-unsafe mode.
235 * <p>
236 * When encoding the line length and line separator are given in the
237 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
238 * </p>
239 * <p>
240 * Line lengths that aren't multiples of 4 will still essentially end up
241 * being multiples of 4 in the encoded data.
242 * </p>
243 * <p>
244 * When decoding all variants are supported.
245 * </p>
246 *
247 * @param lineLength
248 * Each line of encoded data will be at most of the given length
249 * (rounded down to nearest multiple of 4). If lineLength <= 0,
250 * then the output will not be divided into lines (chunks).
251 * Ignored when decoding.
252 * @param lineSeparator
253 * Each line of encoded data will end with this sequence of
254 * bytes.
255 * @throws IllegalArgumentException
256 * Thrown when the provided lineSeparator included some base64
257 * characters.
258 * @since 1.4
259 */
260 public Base64(int lineLength, byte[] lineSeparator) {
261 this(lineLength, lineSeparator, false);
262 }
263
264 /**
265 * Creates a Base64 codec used for decoding (all modes) and encoding in
266 * URL-unsafe mode.
267 * <p>
268 * When encoding the line length and line separator are given in the
269 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
270 * </p>
271 * <p>
272 * Line lengths that aren't multiples of 4 will still essentially end up
273 * being multiples of 4 in the encoded data.
274 * </p>
275 * <p>
276 * When decoding all variants are supported.
277 * </p>
278 *
279 * @param lineLength
280 * Each line of encoded data will be at most of the given length
281 * (rounded down to nearest multiple of 4). If lineLength <= 0,
282 * then the output will not be divided into lines (chunks).
283 * Ignored when decoding.
284 * @param lineSeparator
285 * Each line of encoded data will end with this sequence of
286 * bytes.
287 * @param urlSafe
288 * Instead of emitting '+' and '/' we emit '-' and '_'
289 * respectively. urlSafe is only applied to encode operations.
290 * Decoding seamlessly handles both modes.
291 * @throws IllegalArgumentException
292 * The provided lineSeparator included some base64 characters.
293 * That's not going to work!
294 * @since 1.4
295 */
296 public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) {
297 this(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, lineSeparator == null ? 0 : lineSeparator.length);
298 // TODO could be simplified if there is no requirement to reject invalid
299 // line sep when length <=0
300 // @see test case Base64Test.testConstructors()
301 if (lineSeparator != null) {
302 if (containsAlphabetOrPad(lineSeparator)) {
303 String sep = StringUtils.newStringUtf8(lineSeparator);
304 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
305 }
306 if (lineLength > 0) { // null line-sep forces no chunking rather
307 // than throwing IAE
308 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
309 this.lineSeparator = new byte[lineSeparator.length];
310 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
311 } else {
312 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
313 this.lineSeparator = null;
314 }
315 } else {
316 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
317 this.lineSeparator = null;
318 }
319 this.decodeSize = this.encodeSize - 1;
320 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
321 }
322
323 /**
324 * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
325 *
326 * @return true if we're in URL-SAFE mode, false otherwise.
327 * @since 1.4
328 */
329 public boolean isUrlSafe() {
330 return this.encodeTable == URL_SAFE_ENCODE_TABLE;
331 }
332
333 /**
334 * <p>
335 * Encodes all of the provided data, starting at inPos, for inAvail bytes.
336 * Must be called at least twice: once with the data to encode, and once
337 * with inAvail set to "-1" to alert encoder that EOF has been reached, so
338 * flush last remaining bytes (if not multiple of 3).
339 * </p>
340 * <p>
341 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
342 * and general approach.
343 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
344 * </p>
345 *
346 * @param in
347 * byte[] array of binary data to base64 encode.
348 * @param inPos
349 * Position to start reading data from.
350 * @param inAvail
351 * Amount of bytes available from input for encoding.
352 * @param context
353 * the context to be used
354 */
355 void encode(byte[] in, int inPos, int inAvail, Context context) {
356 if (context.eof) {
357 return;
358 }
359 // inAvail < 0 is how we're informed of EOF in the underlying data we're
360 // encoding.
361 if (inAvail < 0) {
362 context.eof = true;
363 if (0 == context.modulus && lineLength == 0) {
364 return; // no leftovers to process and not using chunking
365 }
366 ensureBufferSize(encodeSize, context);
367 int savedPos = context.pos;
368 switch (context.modulus) { // 0-2
369 case 1: // 8 bits = 6 + 2
370 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; // top
371 // 6
372 // bits
373 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; // remaining
374 // 2
375 // URL-SAFE skips the padding to further reduce size.
376 if (encodeTable == STANDARD_ENCODE_TABLE) {
377 context.buffer[context.pos++] = PAD;
378 context.buffer[context.pos++] = PAD;
379 }
380 break;
381
382 case 2: // 16 bits = 6 + 6 + 4
383 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
384 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
385 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
386 // URL-SAFE skips the padding to further reduce size.
387 if (encodeTable == STANDARD_ENCODE_TABLE) {
388 context.buffer[context.pos++] = PAD;
389 }
390 break;
391 }
392 context.currentLinePos += context.pos - savedPos; // keep track of
393 // current line
394 // position
395 // if currentPos == 0 we are at the start of a line, so don't add
396 // CRLF
397 if (lineLength > 0 && context.currentLinePos > 0) {
398 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
399 context.pos += lineSeparator.length;
400 }
401 } else {
402 for (int i = 0; i < inAvail; i++) {
403 ensureBufferSize(encodeSize, context);
404 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
405 int b = in[inPos++];
406 if (b < 0) {
407 b += 256;
408 }
409 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
410 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to
411 // extract
412 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
413 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
414 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
415 context.buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
416 context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
417 if (lineLength > 0 && lineLength <= context.currentLinePos) {
418 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
419 context.pos += lineSeparator.length;
420 context.currentLinePos = 0;
421 }
422 }
423 }
424 }
425 }
426
427 /**
428 * <p>
429 * Decodes all of the provided data, starting at inPos, for inAvail bytes.
430 * Should be called at least twice: once with the data to decode, and once
431 * with inAvail set to "-1" to alert decoder that EOF has been reached. The
432 * "-1" call is not necessary when decoding, but it doesn't hurt, either.
433 * </p>
434 * <p>
435 * Ignores all non-base64 characters. This is how chunked (e.g. 76
436 * character) data is handled, since CR and LF are silently ignored, but has
437 * implications for other bytes, too. This method subscribes to the
438 * garbage-in, garbage-out philosophy: it will not check the provided data
439 * for validity.
440 * </p>
441 * <p>
442 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
443 * and general approach.
444 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
445 * </p>
446 *
447 * @param in
448 * byte[] array of ascii data to base64 decode.
449 * @param inPos
450 * Position to start reading data from.
451 * @param inAvail
452 * Amount of bytes available from input for encoding.
453 * @param context
454 * the context to be used
455 */
456 void decode(byte[] in, int inPos, int inAvail, Context context) {
457 if (context.eof) {
458 return;
459 }
460 if (inAvail < 0) {
461 context.eof = true;
462 }
463 for (int i = 0; i < inAvail; i++) {
464 ensureBufferSize(decodeSize, context);
465 byte b = in[inPos++];
466 if (b == PAD) {
467 // We're done.
468 context.eof = true;
469 break;
470 } else {
471 if (b >= 0 && b < DECODE_TABLE.length) {
472 int result = DECODE_TABLE[b];
473 if (result >= 0) {
474 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
475 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
476 if (context.modulus == 0) {
477 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
478 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
479 context.buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
480 }
481 }
482 }
483 }
484 }
485
486 // Two forms of EOF as far as base64 decoder is concerned: actual
487 // EOF (-1) and first time '=' character is encountered in stream.
488 // This approach makes the '=' padding characters completely optional.
489 if (context.eof && context.modulus != 0) {
490 ensureBufferSize(decodeSize, context);
491
492 // We have some spare bits remaining
493 // Output all whole multiples of 8 bits and ignore the rest
494 switch (context.modulus) {
495 // case 1: // 6 bits - ignore entirely
496 // break;
497 case 2: // 12 bits = 8 + 4
498 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the
499 // extra 4
500 // bits
501 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
502 break;
503 case 3: // 18 bits = 8 + 8 + 2
504 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
505 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
506 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
507 break;
508 }
509 }
510 }
511
512 /**
513 * Tests a given byte array to see if it contains only valid characters
514 * within the Base64 alphabet. Currently the method treats whitespace as
515 * valid.
516 *
517 * @param arrayOctet
518 * byte array to test
519 * @return {@code true} if all bytes are valid characters in the Base64
520 * alphabet or if the byte array is empty; {@code false}, otherwise
521 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
522 */
523 @Deprecated
524 public static boolean isArrayByteBase64(byte[] arrayOctet) {
525 return isBase64(arrayOctet);
526 }
527
528 /**
529 * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
530 *
531 * @param octet
532 * The value to test
533 * @return {@code true} if the value is defined in the the base 64 alphabet,
534 * {@code false} otherwise.
535 * @since 1.4
536 */
537 public static boolean isBase64(byte octet) {
538 return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
539 }
540
541 /**
542 * Tests a given String to see if it contains only valid characters within
543 * the Base64 alphabet. Currently the method treats whitespace as valid.
544 *
545 * @param base64
546 * String to test
547 * @return {@code true} if all characters in the String are valid characters
548 * in the Base64 alphabet or if the String is empty; {@code false},
549 * otherwise
550 * @since 1.5
551 */
552 public static boolean isBase64(String base64) {
553 return isBase64(StringUtils.getBytesUtf8(base64));
554 }
555
556 /**
557 * Tests a given byte array to see if it contains only valid characters
558 * within the Base64 alphabet. Currently the method treats whitespace as
559 * valid.
560 *
561 * @param arrayOctet
562 * byte array to test
563 * @return {@code true} if all bytes are valid characters in the Base64
564 * alphabet or if the byte array is empty; {@code false}, otherwise
565 * @since 1.5
566 */
567 public static boolean isBase64(byte[] arrayOctet) {
568 for (int i = 0; i < arrayOctet.length; i++) {
569 if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
570 return false;
571 }
572 }
573 return true;
574 }
575
576 /**
577 * Encodes binary data using the base64 algorithm but does not chunk the
578 * output.
579 *
580 * @param binaryData
581 * binary data to encode
582 * @return byte[] containing Base64 characters in their UTF-8
583 * representation.
584 */
585 public static byte[] encodeBase64(byte[] binaryData) {
586 return encodeBase64(binaryData, false);
587 }
588
589 /**
590 * Encodes binary data using the base64 algorithm but does not chunk the
591 * output.
592 *
593 * NOTE: We changed the behaviour of this method from multi-line chunking
594 * (commons-codec-1.4) to single-line non-chunking (commons-codec-1.5).
595 *
596 * @param binaryData
597 * binary data to encode
598 * @return String containing Base64 characters.
599 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
600 */
601 public static String encodeBase64String(byte[] binaryData) {
602 return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
603 }
604
605 /**
606 * Encodes binary data using a URL-safe variation of the base64 algorithm
607 * but does not chunk the output. The url-safe variation emits - and _
608 * instead of + and / characters.
609 *
610 * @param binaryData
611 * binary data to encode
612 * @return byte[] containing Base64 characters in their UTF-8
613 * representation.
614 * @since 1.4
615 */
616 public static byte[] encodeBase64URLSafe(byte[] binaryData) {
617 return encodeBase64(binaryData, false, true);
618 }
619
620 /**
621 * Encodes binary data using a URL-safe variation of the base64 algorithm
622 * but does not chunk the output. The url-safe variation emits - and _
623 * instead of + and / characters.
624 *
625 * @param binaryData
626 * binary data to encode
627 * @return String containing Base64 characters
628 * @since 1.4
629 */
630 public static String encodeBase64URLSafeString(byte[] binaryData) {
631 return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
632 }
633
634 /**
635 * Encodes binary data using the base64 algorithm and chunks the encoded
636 * output into 76 character blocks
637 *
638 * @param binaryData
639 * binary data to encode
640 * @return Base64 characters chunked in 76 character blocks
641 */
642 public static byte[] encodeBase64Chunked(byte[] binaryData) {
643 return encodeBase64(binaryData, true);
644 }
645
646 /**
647 * Encodes binary data using the base64 algorithm, optionally chunking the
648 * output into 76 character blocks.
649 *
650 * @param binaryData
651 * Array containing binary data to encode.
652 * @param isChunked
653 * if {@code true} this encoder will chunk the base64 output into
654 * 76 character blocks
655 * @return Base64-encoded data.
656 * @throws IllegalArgumentException
657 * Thrown when the input array needs an output array bigger than
658 * {@link Integer#MAX_VALUE}
659 */
660 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
661 return encodeBase64(binaryData, isChunked, false);
662 }
663
664 /**
665 * Encodes binary data using the base64 algorithm, optionally chunking the
666 * output into 76 character blocks.
667 *
668 * @param binaryData
669 * Array containing binary data to encode.
670 * @param isChunked
671 * if {@code true} this encoder will chunk the base64 output into
672 * 76 character blocks
673 * @param urlSafe
674 * if {@code true} this encoder will emit - and _ instead of the
675 * usual + and / characters.
676 * @return Base64-encoded data.
677 * @throws IllegalArgumentException
678 * Thrown when the input array needs an output array bigger than
679 * {@link Integer#MAX_VALUE}
680 * @since 1.4
681 */
682 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) {
683 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
684 }
685
686 /**
687 * Encodes binary data using the base64 algorithm, optionally chunking the
688 * output into 76 character blocks.
689 *
690 * @param binaryData
691 * Array containing binary data to encode.
692 * @param isChunked
693 * if {@code true} this encoder will chunk the base64 output into
694 * 76 character blocks
695 * @param urlSafe
696 * if {@code true} this encoder will emit - and _ instead of the
697 * usual + and / characters.
698 * @param maxResultSize
699 * The maximum result size to accept.
700 * @return Base64-encoded data.
701 * @throws IllegalArgumentException
702 * Thrown when the input array needs an output array bigger than
703 * maxResultSize
704 * @since 1.4
705 */
706 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) {
707 if (binaryData == null || binaryData.length == 0) {
708 return binaryData;
709 }
710
711 // Create this so can use the super-class method
712 // Also ensures that the same roundings are performed by the ctor and
713 // the code
714 Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
715 long len = b64.getEncodedLength(binaryData);
716 if (len > maxResultSize) {
717 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize);
718 }
719
720 return b64.encode(binaryData);
721 }
722
723 /**
724 * Decodes a Base64 String into octets
725 *
726 * @param base64String
727 * String containing Base64 data
728 * @return Array containing decoded data.
729 * @since 1.4
730 */
731 public static byte[] decodeBase64(String base64String) {
732 return new Base64().decode(base64String);
733 }
734
735 /**
736 * Decodes Base64 data into octets
737 *
738 * @param base64Data
739 * Byte array containing Base64 data
740 * @return Array containing decoded data.
741 */
742 public static byte[] decodeBase64(byte[] base64Data) {
743 return new Base64().decode(base64Data);
744 }
745
746 // Implementation of the Encoder Interface
747
748 // Implementation of integer encoding used for crypto
749 /**
750 * Decodes a byte64-encoded integer according to crypto standards such as
751 * W3C's XML-Signature
752 *
753 * @param pArray
754 * a byte array containing base64 character data
755 * @return A BigInteger
756 * @since 1.4
757 */
758 public static BigInteger decodeInteger(byte[] pArray) {
759 return new BigInteger(1, decodeBase64(pArray));
760 }
761
762 /**
763 * Encodes to a byte64-encoded integer according to crypto standards such as
764 * W3C's XML-Signature
765 *
766 * @param bigInt
767 * a BigInteger
768 * @return A byte array containing base64 character data
769 * @throws NullPointerException
770 * if null is passed in
771 * @since 1.4
772 */
773 public static byte[] encodeInteger(BigInteger bigInt) {
774 if (bigInt == null) {
775 throw new NullPointerException("encodeInteger called with null parameter");
776 }
777 return encodeBase64(toIntegerBytes(bigInt), false);
778 }
779
780 /**
781 * Returns a byte-array representation of a <code>BigInteger</code> without
782 * sign bit.
783 *
784 * @param bigInt
785 * <code>BigInteger</code> to be converted
786 * @return a byte array representation of the BigInteger parameter
787 */
788 static byte[] toIntegerBytes(BigInteger bigInt) {
789 int bitlen = bigInt.bitLength();
790 // round bitlen
791 bitlen = ((bitlen + 7) >> 3) << 3;
792 byte[] bigBytes = bigInt.toByteArray();
793
794 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
795 return bigBytes;
796 }
797 // set up params for copying everything but sign bit
798 int startSrc = 0;
799 int len = bigBytes.length;
800
801 // if bigInt is exactly byte-aligned, just skip signbit in copy
802 if ((bigInt.bitLength() % 8) == 0) {
803 startSrc = 1;
804 len--;
805 }
806 int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
807 byte[] resizedBytes = new byte[bitlen / 8];
808 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
809 return resizedBytes;
810 }
811
812 /**
813 * Returns whether or not the <code>octet</code> is in the Base32 alphabet.
814 *
815 * @param octet
816 * The value to test
817 * @return {@code true} if the value is defined in the the Base32 alphabet
818 * {@code false} otherwise.
819 */
820 protected boolean isInAlphabet(byte octet) {
821 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
822 }
823
824 /**
825 * Holds thread context so classes can be thread-safe.
826 *
827 * This class is not itself thread-safe; each thread must allocate its own
828 * copy.
829 *
830 * @since 1.7
831 */
832 static class Context {
833
834 /**
835 * Place holder for the bytes we're dealing with for our based logic.
836 * Bitwise operations store and extract the encoding or decoding from
837 * this variable.
838 */
839 int ibitWorkArea;
840
841 /**
842 * Place holder for the bytes we're dealing with for our based logic.
843 * Bitwise operations store and extract the encoding or decoding from
844 * this variable.
845 */
846 long lbitWorkArea;
847
848 /**
849 * Buffer for streaming.
850 */
851 byte[] buffer;
852
853 /**
854 * Position where next character should be written in the buffer.
855 */
856 int pos;
857
858 /**
859 * Position where next character should be read from the buffer.
860 */
861 int readPos;
862
863 /**
864 * Boolean flag to indicate the EOF has been reached. Once EOF has been
865 * reached, this object becomes useless, and must be thrown away.
866 */
867 boolean eof;
868
869 /**
870 * Variable tracks how many characters have been written to the current
871 * line. Only used when encoding. We use it to make sure each encoded
872 * line never goes beyond lineLength (if lineLength > 0).
873 */
874 int currentLinePos;
875
876 /**
877 * Writes to the buffer only occur after every 3/5 reads when encoding,
878 * and every 4/8 reads when decoding. This variable helps track that.
879 */
880 int modulus;
881
882 Context() {
883 }
884 }
885
886 /**
887 * EOF
888 *
889 * @since 1.7
890 */
891 static final int EOF = -1;
892
893 /**
894 * MIME chunk size per RFC 2045 section 6.8.
895 *
896 * <p>
897 * The {@value} character limit does not count the trailing CRLF, but counts
898 * all other characters, including any equal signs.
899 * </p>
900 *
901 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
902 * 6.8</a>
903 */
904 public static final int MIME_CHUNK_SIZE = 76;
905
906 /**
907 * PEM chunk size per RFC 1421 section 4.3.2.4.
908 *
909 * <p>
910 * The {@value} character limit does not count the trailing CRLF, but counts
911 * all other characters, including any equal signs.
912 * </p>
913 *
914 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section
915 * 4.3.2.4</a>
916 */
917 public static final int PEM_CHUNK_SIZE = 64;
918
919 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
920
921 /**
922 * Defines the default buffer size - currently {@value} - must be large
923 * enough for at least one encoded block+separator
924 */
925 private static final int DEFAULT_BUFFER_SIZE = 8192;
926
927 /** Mask used to extract 8 bits, used in decoding bytes */
928 protected static final int MASK_8BITS = 0xff;
929
930 /**
931 * Byte used to pad output.
932 */
933 protected static final byte PAD_DEFAULT = '='; // Allow static access to
934 // default
935
936 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it
937 // needs to vary later
938
939 /**
940 * Number of bytes in each full block of unencoded data, e.g. 4 for Base64
941 * and 5 for Base32
942 */
943 private final int unencodedBlockSize;
944
945 /**
946 * Number of bytes in each full block of encoded data, e.g. 3 for Base64 and
947 * 8 for Base32
948 */
949 private final int encodedBlockSize;
950
951 /**
952 * Chunksize for encoding. Not used when decoding. A value of zero or less
953 * implies no chunking of the encoded data. Rounded down to nearest multiple
954 * of encodedBlockSize.
955 */
956 protected int lineLength;
957
958 /**
959 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
960 */
961 private int chunkSeparatorLength;
962
963 /**
964 * Note <code>lineLength</code> is rounded down to the nearest multiple of
965 * {@link #encodedBlockSize} If <code>chunkSeparatorLength</code> is zero,
966 * then chunking is disabled.
967 *
968 * @param unencodedBlockSize
969 * the size of an unencoded block (e.g. Base64 = 3)
970 * @param encodedBlockSize
971 * the size of an encoded block (e.g. Base64 = 4)
972 * @param lineLength
973 * if > 0, use chunking with a length <code>lineLength</code>
974 * @param chunkSeparatorLength
975 * the chunk separator length, if relevant
976 */
977 protected Base64(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) {
978 this.unencodedBlockSize = unencodedBlockSize;
979 this.encodedBlockSize = encodedBlockSize;
980 this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
981 this.chunkSeparatorLength = chunkSeparatorLength;
982 }
983
984 /**
985 * Returns true if this object has buffered data for reading.
986 *
987 * @param context
988 * the context to be used
989 * @return true if there is data still available for reading.
990 */
991 boolean hasData(Context context) { // package protected for access from I/O
992 // streams
993 return context.buffer != null;
994 }
995
996 /**
997 * Returns the amount of buffered data available for reading.
998 *
999 * @param context
1000 * the context to be used
1001 * @return The amount of buffered data available for reading.
1002 */
1003 int available(Context context) { // package protected for access from I/O
1004 // streams
1005 return context.buffer != null ? context.pos - context.readPos : 0;
1006 }
1007
1008 /**
1009 * Get the default buffer size. Can be overridden.
1010 *
1011 * @return {@link #DEFAULT_BUFFER_SIZE}
1012 */
1013 protected int getDefaultBufferSize() {
1014 return DEFAULT_BUFFER_SIZE;
1015 }
1016
1017 /**
1018 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
1019 *
1020 * @param context
1021 * the context to be used
1022 */
1023 private void resizeBuffer(Context context) {
1024 if (context.buffer == null) {
1025 context.buffer = new byte[getDefaultBufferSize()];
1026 context.pos = 0;
1027 context.readPos = 0;
1028 } else {
1029 byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
1030 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
1031 context.buffer = b;
1032 }
1033 }
1034
1035 /**
1036 * Ensure that the buffer has room for <code>size</code> bytes
1037 *
1038 * @param size
1039 * minimum spare space required
1040 * @param context
1041 * the context to be used
1042 */
1043 protected void ensureBufferSize(int size, Context context) {
1044 if ((context.buffer == null) || (context.buffer.length < context.pos + size)) {
1045 resizeBuffer(context);
1046 }
1047 }
1048
1049 /**
1050 * Extracts buffered data into the provided byte[] array, starting at
1051 * position bPos, up to a maximum of bAvail bytes. Returns how many bytes
1052 * were actually extracted.
1053 *
1054 * @param b
1055 * byte[] array to extract the buffered data into.
1056 * @param bPos
1057 * position in byte[] array to start extraction at.
1058 * @param bAvail
1059 * amount of bytes we're allowed to extract. We may extract fewer
1060 * (if fewer are available).
1061 * @param context
1062 * the context to be used
1063 * @return The number of bytes successfully extracted into the provided
1064 * byte[] array.
1065 */
1066 int readResults(byte[] b, int bPos, int bAvail, Context context) { // package
1067 // protected
1068 // for
1069 // access
1070 // from
1071 // I/O
1072 // streams
1073 if (context.buffer != null) {
1074 int len = Math.min(available(context), bAvail);
1075 System.arraycopy(context.buffer, context.readPos, b, bPos, len);
1076 context.readPos += len;
1077 if (context.readPos >= context.pos) {
1078 context.buffer = null; // so hasData() will return false, and
1079 // this method can return -1
1080 }
1081 return len;
1082 }
1083 return context.eof ? EOF : 0;
1084 }
1085
1086 /**
1087 * Checks if a byte value is whitespace or not. Whitespace is taken to mean:
1088 * space, tab, CR, LF
1089 *
1090 * @param byteToCheck
1091 * the byte to check
1092 * @return true if byte is whitespace, false otherwise
1093 */
1094 protected static boolean isWhiteSpace(byte byteToCheck) {
1095 switch (byteToCheck) {
1096 case ' ':
1097 case '\n':
1098 case '\r':
1099 case '\t':
1100 return true;
1101 default:
1102 return false;
1103 }
1104 }
1105
1106 /**
1107 * Encodes an Object using the Base-N algorithm. This method is provided in
1108 * order to satisfy the requirements of the Encoder interface, and will
1109 * throw an EncoderException if the supplied object is not of type byte[].
1110 *
1111 * @param obj
1112 * Object to encode
1113 * @return An object (of type byte[]) containing the Base-N encoded data
1114 * which corresponds to the byte[] supplied.
1115 * @throws EncoderException
1116 * if the parameter supplied is not of type byte[]
1117 */
1118 public Object encode(Object obj) throws Exception {
1119 if (!(obj instanceof byte[])) {
1120 throw new Exception("Parameter supplied to Base-N encode is not a byte[]");
1121 }
1122 return encode((byte[]) obj);
1123 }
1124
1125 /**
1126 * Encodes a byte[] containing binary data, into a String containing
1127 * characters in the Base-N alphabet. Uses UTF8 encoding.
1128 *
1129 * @param pArray
1130 * a byte array containing binary data
1131 * @return A String containing only Base-N character data
1132 */
1133 public String encodeToString(byte[] pArray) {
1134 return StringUtils.newStringUtf8(encode(pArray));
1135 }
1136
1137 /**
1138 * Encodes a byte[] containing binary data, into a String containing
1139 * characters in the appropriate alphabet. Uses UTF8 encoding.
1140 *
1141 * @param pArray
1142 * a byte array containing binary data
1143 * @return String containing only character data in the appropriate
1144 * alphabet.
1145 */
1146 public String encodeAsString(byte[] pArray) {
1147 return StringUtils.newStringUtf8(encode(pArray));
1148 }
1149
1150 /**
1151 * Decodes an Object using the Base-N algorithm. This method is provided in
1152 * order to satisfy the requirements of the Decoder interface, and will
1153 * throw a DecoderException if the supplied object is not of type byte[] or
1154 * String.
1155 *
1156 * @param obj
1157 * Object to decode
1158 * @return An object (of type byte[]) containing the binary data which
1159 * corresponds to the byte[] or String supplied.
1160 * @throws DecoderException
1161 * if the parameter supplied is not of type byte[]
1162 */
1163 public Object decode(Object obj) throws Exception {
1164 if (obj instanceof byte[]) {
1165 return decode((byte[]) obj);
1166 } else if (obj instanceof String) {
1167 return decode((String) obj);
1168 } else {
1169 throw new Exception("Parameter supplied to Base-N decode is not a byte[] or a String");
1170 }
1171 }
1172
1173 /**
1174 * Decodes a String containing characters in the Base-N alphabet.
1175 *
1176 * @param pArray
1177 * A String containing Base-N character data
1178 * @return a byte array containing binary data
1179 */
1180 public byte[] decode(String pArray) {
1181 return decode(StringUtils.getBytesUtf8(pArray));
1182 }
1183
1184 /**
1185 * Decodes a byte[] containing characters in the Base-N alphabet.
1186 *
1187 * @param pArray
1188 * A byte array containing Base-N character data
1189 * @return a byte array containing binary data
1190 */
1191 public byte[] decode(byte[] pArray) {
1192 Context context = new Context();
1193 if (pArray == null || pArray.length == 0) {
1194 return pArray;
1195 }
1196 decode(pArray, 0, pArray.length, context);
1197 decode(pArray, 0, EOF, context); // Notify decoder of EOF.
1198 byte[] result = new byte[context.pos];
1199 readResults(result, 0, result.length, context);
1200 return result;
1201 }
1202
1203 /**
1204 * Encodes a byte[] containing binary data, into a byte[] containing
1205 * characters in the alphabet.
1206 *
1207 * @param pArray
1208 * a byte array containing binary data
1209 * @return A byte array containing only the basen alphabetic character data
1210 */
1211 public byte[] encode(byte[] pArray) {
1212 Context context = new Context();
1213 if (pArray == null || pArray.length == 0) {
1214 return pArray;
1215 }
1216 encode(pArray, 0, pArray.length, context);
1217 encode(pArray, 0, EOF, context); // Notify encoder of EOF.
1218 byte[] buf = new byte[context.pos - context.readPos];
1219 readResults(buf, 0, buf.length, context);
1220 return buf;
1221 }
1222
1223 /**
1224 * Tests a given byte array to see if it contains only valid characters
1225 * within the alphabet. The method optionally treats whitespace and pad as
1226 * valid.
1227 *
1228 * @param arrayOctet
1229 * byte array to test
1230 * @param allowWSPad
1231 * if {@code true}, then whitespace and PAD are also allowed
1232 *
1233 * @return {@code true} if all bytes are valid characters in the alphabet or
1234 * if the byte array is empty; {@code false}, otherwise
1235 */
1236 public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
1237 for (int i = 0; i < arrayOctet.length; i++) {
1238 if (!isInAlphabet(arrayOctet[i]) && (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
1239 return false;
1240 }
1241 }
1242 return true;
1243 }
1244
1245 /**
1246 * Tests a given String to see if it contains only valid characters within
1247 * the alphabet. The method treats whitespace and PAD as valid.
1248 *
1249 * @param basen
1250 * String to test
1251 * @return {@code true} if all characters in the String are valid characters
1252 * in the alphabet or if the String is empty; {@code false},
1253 * otherwise
1254 * @see #isInAlphabet(byte[], boolean)
1255 */
1256 public boolean isInAlphabet(String basen) {
1257 return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
1258 }
1259
1260 /**
1261 * Tests a given byte array to see if it contains any characters within the
1262 * alphabet or PAD.
1263 *
1264 * Intended for use in checking line-ending arrays
1265 *
1266 * @param arrayOctet
1267 * byte array to test
1268 * @return {@code true} if any byte is a valid character in the alphabet or
1269 * PAD; {@code false} otherwise
1270 */
1271 protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
1272 if (arrayOctet == null) {
1273 return false;
1274 }
1275 for (byte element : arrayOctet) {
1276 if (PAD == element || isInAlphabet(element)) {
1277 return true;
1278 }
1279 }
1280 return false;
1281 }
1282
1283 /**
1284 * Calculates the amount of space needed to encode the supplied array.
1285 *
1286 * @param pArray
1287 * byte[] array which will later be encoded
1288 *
1289 * @return amount of space needed to encoded the supplied array. Returns a
1290 * long since a max-len array will require > Integer.MAX_VALUE
1291 */
1292 public long getEncodedLength(byte[] pArray) {
1293 // Calculate non-chunked size - rounded up to allow for padding
1294 // cast to long is needed to avoid possibility of overflow
1295 long len = ((pArray.length + unencodedBlockSize - 1) / unencodedBlockSize) * (long) encodedBlockSize;
1296 if (lineLength > 0) { // We're using chunking
1297 // Round up to nearest multiple
1298 len += ((len + lineLength - 1) / lineLength) * chunkSeparatorLength;
1299 }
1300 return len;
1301 }
1302
1303 /**
1304 * Character encoding names required of every implementation of the Java
1305 * platform.
1306 *
1307 * From the Java documentation <a href=
1308 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1309 * >Standard charsets</a>:
1310 * <p>
1311 * <cite>Every implementation of the Java platform is required to support
1312 * the following character encodings. Consult the release documentation for
1313 * your implementation to see if any other encodings are supported. Consult
1314 * the release documentation for your implementation to see if any other
1315 * encodings are supported. </cite>
1316 * </p>
1317 *
1318 * <ul>
1319 * <li><code>US-ASCII</code><br/>
1320 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1321 * Unicode character set.</li>
1322 * <li><code>ISO-8859-1</code><br/>
1323 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1324 * <li><code>UTF-8</code><br/>
1325 * Eight-bit Unicode Transformation Format.</li>
1326 * <li><code>UTF-16BE</code><br/>
1327 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1328 * <li><code>UTF-16LE</code><br/>
1329 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1330 * <li><code>UTF-16</code><br/>
1331 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1332 * mandatory initial byte-order mark (either order accepted on input,
1333 * big-endian used on output.)</li>
1334 * </ul>
1335 *
1336 * This perhaps would best belong in the [lang] project. Even if a similar
1337 * interface is defined in [lang], it is not foreseen that [codec] would be
1338 * made to depend on [lang].
1339 *
1340 * <p>
1341 * This class is immutable and thread-safe.
1342 * </p>
1343 *
1344 * @see <a
1345 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1346 * charsets</a>
1347 * @since 1.4
1348 * @version $Id$
1349 */
1350 public class CharEncoding {
1351 /**
1352 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1353 * <p>
1354 * Every implementation of the Java platform is required to support this
1355 * character encoding.
1356 * </p>
1357 *
1358 * @see <a
1359 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1360 * charsets</a>
1361 */
1362 public static final String ISO_8859_1 = "ISO-8859-1";
1363
1364 /**
1365 * <p>
1366 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1367 * Latin block of the Unicode character set.
1368 * </p>
1369 * <p>
1370 * Every implementation of the Java platform is required to support this
1371 * character encoding.
1372 * </p>
1373 *
1374 * @see <a
1375 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1376 * charsets</a>
1377 */
1378 public static final String US_ASCII = "US-ASCII";
1379
1380 /**
1381 * <p>
1382 * Sixteen-bit Unicode Transformation Format, The byte order specified
1383 * by a mandatory initial byte-order mark (either order accepted on
1384 * input, big-endian used on output)
1385 * </p>
1386 * <p>
1387 * Every implementation of the Java platform is required to support this
1388 * character encoding.
1389 * </p>
1390 *
1391 * @see <a
1392 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1393 * charsets</a>
1394 */
1395 public static final String UTF_16 = "UTF-16";
1396
1397 /**
1398 * <p>
1399 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1400 * </p>
1401 * <p>
1402 * Every implementation of the Java platform is required to support this
1403 * character encoding.
1404 * </p>
1405 *
1406 * @see <a
1407 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1408 * charsets</a>
1409 */
1410 public static final String UTF_16BE = "UTF-16BE";
1411
1412 /**
1413 * <p>
1414 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1415 * </p>
1416 * <p>
1417 * Every implementation of the Java platform is required to support this
1418 * character encoding.
1419 * </p>
1420 *
1421 * @see <a
1422 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1423 * charsets</a>
1424 */
1425 public static final String UTF_16LE = "UTF-16LE";
1426
1427 /**
1428 * <p>
1429 * Eight-bit Unicode Transformation Format.
1430 * </p>
1431 * <p>
1432 * Every implementation of the Java platform is required to support this
1433 * character encoding.
1434 * </p>
1435 *
1436 * @see <a
1437 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1438 * charsets</a>
1439 */
1440 public static final String UTF_8 = "UTF-8";
1441 }
1442
1443 /**
1444 * Charsets required of every implementation of the Java platform.
1445 *
1446 * From the Java documentation <a href=
1447 * "http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1448 * >Standard charsets</a>:
1449 * <p>
1450 * <cite>Every implementation of the Java platform is required to support
1451 * the following character encodings. Consult the release documentation for
1452 * your implementation to see if any other encodings are supported. Consult
1453 * the release documentation for your implementation to see if any other
1454 * encodings are supported. </cite>
1455 * </p>
1456 *
1457 * <ul>
1458 * <li><code>US-ASCII</code><br/>
1459 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1460 * Unicode character set.</li>
1461 * <li><code>ISO-8859-1</code><br/>
1462 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1463 * <li><code>UTF-8</code><br/>
1464 * Eight-bit Unicode Transformation Format.</li>
1465 * <li><code>UTF-16BE</code><br/>
1466 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1467 * <li><code>UTF-16LE</code><br/>
1468 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1469 * <li><code>UTF-16</code><br/>
1470 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1471 * mandatory initial byte-order mark (either order accepted on input,
1472 * big-endian used on output.)</li>
1473 * </ul>
1474 *
1475 * This perhaps would best belong in the Commons Lang project. Even if a
1476 * similar class is defined in Commons Lang, it is not foreseen that Commons
1477 * Codec would be made to depend on Commons Lang.
1478 *
1479 * <p>
1480 * This class is immutable and thread-safe.
1481 * </p>
1482 *
1483 * @see <a
1484 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1485 * charsets</a>
1486 * @since 1.7
1487 * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $
1488 */
1489 public static class Charsets {
1490
1491 //
1492 // This class should only contain Charset instances for required
1493 // encodings. This guarantees that it will load correctly and
1494 // without delay on all Java platforms.
1495 //
1496
1497 /**
1498 * Returns the given Charset or the default Charset if the given Charset
1499 * is null.
1500 *
1501 * @param charset
1502 * A charset or null.
1503 * @return the given Charset or the default Charset if the given Charset
1504 * is null
1505 */
1506 public static Charset toCharset(Charset charset) {
1507 return charset == null ? Charset.defaultCharset() : charset;
1508 }
1509
1510 /**
1511 * Returns a Charset for the named charset. If the name is null, return
1512 * the default Charset.
1513 *
1514 * @param charset
1515 * The name of the requested charset, may be null.
1516 * @return a Charset for the named charset
1517 * @throws UnsupportedCharsetException
1518 * If the named charset is unavailable
1519 */
1520 public static Charset toCharset(String charset) {
1521 return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
1522 }
1523
1524 /**
1525 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1526 * <p>
1527 * Every implementation of the Java platform is required to support this
1528 * character encoding.
1529 * </p>
1530 *
1531 * @see <a
1532 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1533 * charsets</a>
1534 */
1535 public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1);
1536
1537 /**
1538 * <p>
1539 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1540 * Latin block of the Unicode character set.
1541 * </p>
1542 * <p>
1543 * Every implementation of the Java platform is required to support this
1544 * character encoding.
1545 * </p>
1546 *
1547 * @see <a
1548 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1549 * charsets</a>
1550 */
1551 public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII);
1552
1553 /**
1554 * <p>
1555 * Sixteen-bit Unicode Transformation Format, The byte order specified
1556 * by a mandatory initial byte-order mark (either order accepted on
1557 * input, big-endian used on output)
1558 * </p>
1559 * <p>
1560 * Every implementation of the Java platform is required to support this
1561 * character encoding.
1562 * </p>
1563 *
1564 * @see <a
1565 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1566 * charsets</a>
1567 */
1568 public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16);
1569
1570 /**
1571 * <p>
1572 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1573 * </p>
1574 * <p>
1575 * Every implementation of the Java platform is required to support this
1576 * character encoding.
1577 * </p>
1578 *
1579 * @see <a
1580 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1581 * charsets</a>
1582 */
1583 public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE);
1584
1585 /**
1586 * <p>
1587 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1588 * </p>
1589 * <p>
1590 * Every implementation of the Java platform is required to support this
1591 * character encoding.
1592 * </p>
1593 *
1594 * @see <a
1595 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1596 * charsets</a>
1597 */
1598 public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE);
1599
1600 /**
1601 * <p>
1602 * Eight-bit Unicode Transformation Format.
1603 * </p>
1604 * <p>
1605 * Every implementation of the Java platform is required to support this
1606 * character encoding.
1607 * </p>
1608 *
1609 * @see <a
1610 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1611 * charsets</a>
1612 */
1613 public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8);
1614 }
1615
1616 /**
1617 * Converts String to and from bytes using the encodings required by the
1618 * Java specification. These encodings are specified in <a href=
1619 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1620 * >Standard charsets</a>
1621 *
1622 * <p>
1623 * This class is immutable and thread-safe.
1624 * </p>
1625 *
1626 * @see CharEncoding
1627 * @see <a
1628 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1629 * charsets</a>
1630 * @version $Id$
1631 * @since 1.4
1632 */
1633 public static class StringUtils {
1634
1635 /**
1636 * Calls {@link String#getBytes(Charset)}
1637 *
1638 * @param string
1639 * The string to encode (if null, return null).
1640 * @param charset
1641 * The {@link Charset} to encode the {@code String}
1642 * @return the encoded bytes
1643 */
1644 private static byte[] getBytes(String string, Charset charset) {
1645 if (string == null) {
1646 return null;
1647 }
1648 return string.getBytes(charset);
1649 }
1650
1651 /**
1652 * Encodes the given string into a sequence of bytes using the
1653 * ISO-8859-1 charset, storing the result into a new byte array.
1654 *
1655 * @param string
1656 * the String to encode, may be {@code null}
1657 * @return encoded bytes, or {@code null} if the input string was
1658 * {@code null}
1659 * @throws NullPointerException
1660 * Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1661 * which should never happen since it is required by the
1662 * Java platform specification.
1663 * @since As of 1.7, throws {@link NullPointerException} instead of
1664 * UnsupportedEncodingException
1665 * @see <a
1666 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1667 * charsets</a>
1668 * @see #getBytesUnchecked(String, String)
1669 */
1670 public static byte[] getBytesIso8859_1(String string) {
1671 return getBytes(string, Charsets.ISO_8859_1);
1672 }
1673
1674 /**
1675 * Encodes the given string into a sequence of bytes using the named
1676 * charset, storing the result into a new byte array.
1677 * <p>
1678 * This method catches {@link UnsupportedEncodingException} and rethrows
1679 * it as {@link IllegalStateException}, which should never happen for a
1680 * required charset name. Use this method when the encoding is required
1681 * to be in the JRE.
1682 * </p>
1683 *
1684 * @param string
1685 * the String to encode, may be {@code null}
1686 * @param charsetName
1687 * The name of a required {@link java.nio.charset.Charset}
1688 * @return encoded bytes, or {@code null} if the input string was
1689 * {@code null}
1690 * @throws IllegalStateException
1691 * Thrown when a {@link UnsupportedEncodingException} is
1692 * caught, which should never happen for a required charset
1693 * name.
1694 * @see CharEncoding
1695 * @see String#getBytes(String)
1696 */
1697 public static byte[] getBytesUnchecked(String string, String charsetName) {
1698 if (string == null) {
1699 return null;
1700 }
1701 try {
1702 return string.getBytes(charsetName);
1703 } catch (UnsupportedEncodingException e) {
1704 throw StringUtils.newIllegalStateException(charsetName, e);
1705 }
1706 }
1707
1708 /**
1709 * Encodes the given string into a sequence of bytes using the US-ASCII
1710 * charset, storing the result into a new byte array.
1711 *
1712 * @param string
1713 * the String to encode, may be {@code null}
1714 * @return encoded bytes, or {@code null} if the input string was
1715 * {@code null}
1716 * @throws NullPointerException
1717 * Thrown if {@link Charsets#US_ASCII} is not initialized,
1718 * which should never happen since it is required by the
1719 * Java platform specification.
1720 * @since As of 1.7, throws {@link NullPointerException} instead of
1721 * UnsupportedEncodingException
1722 * @see <a
1723 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1724 * charsets</a>
1725 * @see #getBytesUnchecked(String, String)
1726 */
1727 public static byte[] getBytesUsAscii(String string) {
1728 return getBytes(string, Charsets.US_ASCII);
1729 }
1730
1731 /**
1732 * Encodes the given string into a sequence of bytes using the UTF-16
1733 * charset, storing the result into a new byte array.
1734 *
1735 * @param string
1736 * the String to encode, may be {@code null}
1737 * @return encoded bytes, or {@code null} if the input string was
1738 * {@code null}
1739 * @throws NullPointerException
1740 * Thrown if {@link Charsets#UTF_16} is not initialized,
1741 * which should never happen since it is required by the
1742 * Java platform specification.
1743 * @since As of 1.7, throws {@link NullPointerException} instead of
1744 * UnsupportedEncodingException
1745 * @see <a
1746 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1747 * charsets</a>
1748 * @see #getBytesUnchecked(String, String)
1749 */
1750 public static byte[] getBytesUtf16(String string) {
1751 return getBytes(string, Charsets.UTF_16);
1752 }
1753
1754 /**
1755 * Encodes the given string into a sequence of bytes using the UTF-16BE
1756 * charset, storing the result into a new byte array.
1757 *
1758 * @param string
1759 * the String to encode, may be {@code null}
1760 * @return encoded bytes, or {@code null} if the input string was
1761 * {@code null}
1762 * @throws NullPointerException
1763 * Thrown if {@link Charsets#UTF_16BE} is not initialized,
1764 * which should never happen since it is required by the
1765 * Java platform specification.
1766 * @since As of 1.7, throws {@link NullPointerException} instead of
1767 * UnsupportedEncodingException
1768 * @see <a
1769 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1770 * charsets</a>
1771 * @see #getBytesUnchecked(String, String)
1772 */
1773 public static byte[] getBytesUtf16Be(String string) {
1774 return getBytes(string, Charsets.UTF_16BE);
1775 }
1776
1777 /**
1778 * Encodes the given string into a sequence of bytes using the UTF-16LE
1779 * charset, storing the result into a new byte array.
1780 *
1781 * @param string
1782 * the String to encode, may be {@code null}
1783 * @return encoded bytes, or {@code null} if the input string was
1784 * {@code null}
1785 * @throws NullPointerException
1786 * Thrown if {@link Charsets#UTF_16LE} is not initialized,
1787 * which should never happen since it is required by the
1788 * Java platform specification.
1789 * @since As of 1.7, throws {@link NullPointerException} instead of
1790 * UnsupportedEncodingException
1791 * @see <a
1792 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1793 * charsets</a>
1794 * @see #getBytesUnchecked(String, String)
1795 */
1796 public static byte[] getBytesUtf16Le(String string) {
1797 return getBytes(string, Charsets.UTF_16LE);
1798 }
1799
1800 /**
1801 * Encodes the given string into a sequence of bytes using the UTF-8
1802 * charset, storing the result into a new byte array.
1803 *
1804 * @param string
1805 * the String to encode, may be {@code null}
1806 * @return encoded bytes, or {@code null} if the input string was
1807 * {@code null}
1808 * @throws NullPointerException
1809 * Thrown if {@link Charsets#UTF_8} is not initialized,
1810 * which should never happen since it is required by the
1811 * Java platform specification.
1812 * @since As of 1.7, throws {@link NullPointerException} instead of
1813 * UnsupportedEncodingException
1814 * @see <a
1815 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1816 * charsets</a>
1817 * @see #getBytesUnchecked(String, String)
1818 */
1819 public static byte[] getBytesUtf8(String string) {
1820 return getBytes(string, Charsets.UTF_8);
1821 }
1822
1823 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
1824 return new IllegalStateException(charsetName + ": " + e);
1825 }
1826
1827 /**
1828 * Constructs a new <code>String</code> by decoding the specified array
1829 * of bytes using the given charset.
1830 *
1831 * @param bytes
1832 * The bytes to be decoded into characters
1833 * @param charset
1834 * The {@link Charset} to encode the {@code String}
1835 * @return A new <code>String</code> decoded from the specified array of
1836 * bytes using the given charset, or {@code null} if the input
1837 * byte array was {@code null}.
1838 * @throws NullPointerException
1839 * Thrown if {@link Charsets#UTF_8} is not initialized,
1840 * which should never happen since it is required by the
1841 * Java platform specification.
1842 */
1843 private static String newString(byte[] bytes, Charset charset) {
1844 return bytes == null ? null : new String(bytes, charset);
1845 }
1846
1847 /**
1848 * Constructs a new <code>String</code> by decoding the specified array
1849 * of bytes using the given charset.
1850 * <p>
1851 * This method catches {@link UnsupportedEncodingException} and
1852 * re-throws it as {@link IllegalStateException}, which should never
1853 * happen for a required charset name. Use this method when the encoding
1854 * is required to be in the JRE.
1855 * </p>
1856 *
1857 * @param bytes
1858 * The bytes to be decoded into characters, may be
1859 * {@code null}
1860 * @param charsetName
1861 * The name of a required {@link java.nio.charset.Charset}
1862 * @return A new <code>String</code> decoded from the specified array of
1863 * bytes using the given charset, or {@code null} if the input
1864 * byte array was {@code null}.
1865 * @throws IllegalStateException
1866 * Thrown when a {@link UnsupportedEncodingException} is
1867 * caught, which should never happen for a required charset
1868 * name.
1869 * @see CharEncoding
1870 * @see String#String(byte[], String)
1871 */
1872 public static String newString(byte[] bytes, String charsetName) {
1873 if (bytes == null) {
1874 return null;
1875 }
1876 try {
1877 return new String(bytes, charsetName);
1878 } catch (UnsupportedEncodingException e) {
1879 throw StringUtils.newIllegalStateException(charsetName, e);
1880 }
1881 }
1882
1883 /**
1884 * Constructs a new <code>String</code> by decoding the specified array
1885 * of bytes using the ISO-8859-1 charset.
1886 *
1887 * @param bytes
1888 * The bytes to be decoded into characters, may be
1889 * {@code null}
1890 * @return A new <code>String</code> decoded from the specified array of
1891 * bytes using the ISO-8859-1 charset, or {@code null} if the
1892 * input byte array was {@code null}.
1893 * @throws NullPointerException
1894 * Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1895 * which should never happen since it is required by the
1896 * Java platform specification.
1897 * @since As of 1.7, throws {@link NullPointerException} instead of
1898 * UnsupportedEncodingException
1899 */
1900 public static String newStringIso8859_1(byte[] bytes) {
1901 return new String(bytes, Charsets.ISO_8859_1);
1902 }
1903
1904 /**
1905 * Constructs a new <code>String</code> by decoding the specified array
1906 * of bytes using the US-ASCII charset.
1907 *
1908 * @param bytes
1909 * The bytes to be decoded into characters
1910 * @return A new <code>String</code> decoded from the specified array of
1911 * bytes using the US-ASCII charset, or {@code null} if the
1912 * input byte array was {@code null}.
1913 * @throws NullPointerException
1914 * Thrown if {@link Charsets#US_ASCII} is not initialized,
1915 * which should never happen since it is required by the
1916 * Java platform specification.
1917 * @since As of 1.7, throws {@link NullPointerException} instead of
1918 * UnsupportedEncodingException
1919 */
1920 public static String newStringUsAscii(byte[] bytes) {
1921 return new String(bytes, Charsets.US_ASCII);
1922 }
1923
1924 /**
1925 * Constructs a new <code>String</code> by decoding the specified array
1926 * of bytes using the UTF-16 charset.
1927 *
1928 * @param bytes
1929 * The bytes to be decoded into characters
1930 * @return A new <code>String</code> decoded from the specified array of
1931 * bytes using the UTF-16 charset or {@code null} if the input
1932 * byte array was {@code null}.
1933 * @throws NullPointerException
1934 * Thrown if {@link Charsets#UTF_16} is not initialized,
1935 * which should never happen since it is required by the
1936 * Java platform specification.
1937 * @since As of 1.7, throws {@link NullPointerException} instead of
1938 * UnsupportedEncodingException
1939 */
1940 public static String newStringUtf16(byte[] bytes) {
1941 return new String(bytes, Charsets.UTF_16);
1942 }
1943
1944 /**
1945 * Constructs a new <code>String</code> by decoding the specified array
1946 * of bytes using the UTF-16BE charset.
1947 *
1948 * @param bytes
1949 * The bytes to be decoded into characters
1950 * @return A new <code>String</code> decoded from the specified array of
1951 * bytes using the UTF-16BE charset, or {@code null} if the
1952 * input byte array was {@code null}.
1953 * @throws NullPointerException
1954 * Thrown if {@link Charsets#UTF_16BE} is not initialized,
1955 * which should never happen since it is required by the
1956 * Java platform specification.
1957 * @since As of 1.7, throws {@link NullPointerException} instead of
1958 * UnsupportedEncodingException
1959 */
1960 public static String newStringUtf16Be(byte[] bytes) {
1961 return new String(bytes, Charsets.UTF_16BE);
1962 }
1963
1964 /**
1965 * Constructs a new <code>String</code> by decoding the specified array
1966 * of bytes using the UTF-16LE charset.
1967 *
1968 * @param bytes
1969 * The bytes to be decoded into characters
1970 * @return A new <code>String</code> decoded from the specified array of
1971 * bytes using the UTF-16LE charset, or {@code null} if the
1972 * input byte array was {@code null}.
1973 * @throws NullPointerException
1974 * Thrown if {@link Charsets#UTF_16LE} is not initialized,
1975 * which should never happen since it is required by the
1976 * Java platform specification.
1977 * @since As of 1.7, throws {@link NullPointerException} instead of
1978 * UnsupportedEncodingException
1979 */
1980 public static String newStringUtf16Le(byte[] bytes) {
1981 return new String(bytes, Charsets.UTF_16LE);
1982 }
1983
1984 /**
1985 * Constructs a new <code>String</code> by decoding the specified array
1986 * of bytes using the UTF-8 charset.
1987 *
1988 * @param bytes
1989 * The bytes to be decoded into characters
1990 * @return A new <code>String</code> decoded from the specified array of
1991 * bytes using the UTF-8 charset, or {@code null} if the input
1992 * byte array was {@code null}.
1993 * @throws NullPointerException
1994 * Thrown if {@link Charsets#UTF_8} is not initialized,
1995 * which should never happen since it is required by the
1996 * Java platform specification.
1997 * @since As of 1.7, throws {@link NullPointerException} instead of
1998 * UnsupportedEncodingException
1999 */
2000 public static String newStringUtf8(byte[] bytes) {
2001 return newString(bytes, Charsets.UTF_8);
2002 }
2003
2004 }
2005
2006 }