1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package ca.uhn.hl7v2.hoh.util.repackage;
19
20 import java.io.UnsupportedEncodingException;
21 import java.math.BigInteger;
22 import java.nio.charset.Charset;
23 import java.nio.charset.StandardCharsets;
24 import java.nio.charset.UnsupportedCharsetException;
25
26 /**
27 * Provides Base64 encoding and decoding as defined by <a
28 * href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
29 *
30 * <p>
31 * This class implements section <cite>6.8. Base64
32 * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet
33 * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by
34 * Freed and Borenstein.
35 * </p>
36 * <p>
37 * The class can be parameterized in the following manner with various
38 * constructors:
39 * <ul>
40 * <li>URL-safe mode: Default off.</li>
41 * <li>Line length: Default 76. Line length that aren't multiples of 4 will
42 * still essentially end up being multiples of 4 in the encoded data.
43 * <li>Line separator: Default is CRLF ("\r\n")</li>
44 * </ul>
45 * </p>
46 * <p>
47 * Since this class operates directly on byte streams, and not character
48 * streams, it is hard-coded to only encode/decode character encodings which are
49 * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8,
50 * etc).
51 * </p>
52 * <p>
53 * This class is thread-safe.
54 * </p>
55 *
56 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
57 * @author Note that this class has been repackaged from Apache Commons-Codec
58 * and is distributed under the terms of the Apache Software License,
59 * version 2.0
60 */
61 public class Base64 {
62
63 public static void main(String[] args) {
64
65 System.out.println("basic " + encodeBase64String("cgta:d@3r$@TTg2446yhhh2h4".getBytes()));
66
67 }
68
69 /**
70 * BASE32 characters are 6 bits in length. They are formed by taking a block
71 * of 3 octets to form a 24-bit string, which is converted into 4 BASE64
72 * characters.
73 */
74 private static final int BITS_PER_ENCODED_BYTE = 6;
75 private static final int BYTES_PER_UNENCODED_BLOCK = 3;
76 private static final int BYTES_PER_ENCODED_BLOCK = 4;
77
78 /**
79 * Chunk separator per RFC 2045 section 2.1.
80 *
81 * <p>
82 * N.B. The next major release may break compatibility and make this field
83 * private.
84 * </p>
85 *
86 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
87 * 2.1</a>
88 */
89 static final byte[] CHUNK_SEPARATOR = { '\r', '\n' };
90
91 /**
92 * This array is a lookup table that translates 6-bit positive integer index
93 * values into their "Base64 Alphabet" equivalents as specified in Table 1
94 * of RFC 2045.
95 *
96 * Thanks to "commons" project in ws.apache.org for this code.
97 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
98 */
99 private static final byte[] STANDARD_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
100 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
101
102 /**
103 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
104 * changed to - and _ to make the encoded Base64 results more URL-SAFE. This
105 * table is only used when the Base64's mode is set to URL-SAFE.
106 */
107 private static final byte[] URL_SAFE_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
108 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' };
109
110 /**
111 * This array is a lookup table that translates Unicode characters drawn
112 * from the "Base64 Alphabet" (as specified in Table 1 of RFC 2045) into
113 * their 6-bit positive integer equivalents. Characters that are not in the
114 * Base64 alphabet but fall within the bounds of the array are translated to
115 * -1.
116 *
117 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This
118 * means decoder seamlessly handles both URL_SAFE and STANDARD base64. (The
119 * encoder, on the other hand, needs to know ahead of time what to emit).
120 *
121 * Thanks to "commons" project in ws.apache.org for this code.
122 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
123 */
124 private static final byte[] DECODE_TABLE = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
125 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 };
126
127 /**
128 * Base64 uses 6-bit fields.
129 */
130 /** Mask used to extract 6 bits, used when encoding */
131 private static final int MASK_6BITS = 0x3f;
132
133 // The static final fields above are used for the original static byte[]
134 // methods on Base64.
135 // The private member fields below are used with the new streaming approach,
136 // which requires
137 // some state be preserved between calls of encode() and decode().
138
139 /**
140 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE
141 * above remains static because it is able to decode both STANDARD and
142 * URL_SAFE streams, but the encodeTable must be a member variable so we can
143 * switch between the two modes.
144 */
145 private byte[] encodeTable;
146
147 // Only one decode table currently; keep for consistency with Base32 code
148 private final byte[] decodeTable = DECODE_TABLE;
149
150 /**
151 * Line separator for encoding. Not used when decoding. Only used if
152 * lineLength > 0.
153 */
154 private byte[] lineSeparator;
155
156 /**
157 * Convenience variable to help us determine when our buffer is going to run
158 * out of room and needs resizing.
159 * <code>decodeSize = 3 + lineSeparator.length;</code>
160 */
161 private int decodeSize;
162
163 /**
164 * Convenience variable to help us determine when our buffer is going to run
165 * out of room and needs resizing.
166 * <code>encodeSize = 4 + lineSeparator.length;</code>
167 */
168 private int encodeSize;
169
170 /**
171 * Creates a Base64 codec used for decoding (all modes) and encoding in
172 * URL-unsafe mode.
173 * <p>
174 * When encoding the line length is 0 (no chunking), and the encoding table
175 * is STANDARD_ENCODE_TABLE.
176 * </p>
177 *
178 * <p>
179 * When decoding all variants are supported.
180 * </p>
181 */
182 public Base64() {
183 this(0);
184 }
185
186 /**
187 * Creates a Base64 codec used for decoding (all modes) and encoding in the
188 * given URL-safe mode.
189 * <p>
190 * When encoding the line length is 76, the line separator is CRLF, and the
191 * encoding table is STANDARD_ENCODE_TABLE.
192 * </p>
193 *
194 * <p>
195 * When decoding all variants are supported.
196 * </p>
197 *
198 * @param urlSafe
199 * if {@code true}, URL-safe encoding is used. In most cases this
200 * should be set to {@code false}.
201 * @since 1.4
202 */
203 public Base64(boolean urlSafe) {
204 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
205 }
206
207 /**
208 * Creates a Base64 codec used for decoding (all modes) and encoding in
209 * URL-unsafe mode.
210 * <p>
211 * When encoding the line length is given in the constructor, the line
212 * separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
213 * </p>
214 * <p>
215 * Line lengths that aren't multiples of 4 will still essentially end up
216 * being multiples of 4 in the encoded data.
217 * </p>
218 * <p>
219 * When decoding all variants are supported.
220 * </p>
221 *
222 * @param lineLength
223 * Each line of encoded data will be at most of the given length
224 * (rounded down to nearest multiple of 4). If lineLength <= 0,
225 * then the output will not be divided into lines (chunks).
226 * Ignored when decoding.
227 * @since 1.4
228 */
229 public Base64(int lineLength) {
230 this(lineLength, CHUNK_SEPARATOR);
231 }
232
233 /**
234 * Creates a Base64 codec used for decoding (all modes) and encoding in
235 * URL-unsafe mode.
236 * <p>
237 * When encoding the line length and line separator are given in the
238 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
239 * </p>
240 * <p>
241 * Line lengths that aren't multiples of 4 will still essentially end up
242 * being multiples of 4 in the encoded data.
243 * </p>
244 * <p>
245 * When decoding all variants are supported.
246 * </p>
247 *
248 * @param lineLength
249 * Each line of encoded data will be at most of the given length
250 * (rounded down to nearest multiple of 4). If lineLength <= 0,
251 * then the output will not be divided into lines (chunks).
252 * Ignored when decoding.
253 * @param lineSeparator
254 * Each line of encoded data will end with this sequence of
255 * bytes.
256 * @throws IllegalArgumentException
257 * Thrown when the provided lineSeparator included some base64
258 * characters.
259 * @since 1.4
260 */
261 public Base64(int lineLength, byte[] lineSeparator) {
262 this(lineLength, lineSeparator, false);
263 }
264
265 /**
266 * Creates a Base64 codec used for decoding (all modes) and encoding in
267 * URL-unsafe mode.
268 * <p>
269 * When encoding the line length and line separator are given in the
270 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
271 * </p>
272 * <p>
273 * Line lengths that aren't multiples of 4 will still essentially end up
274 * being multiples of 4 in the encoded data.
275 * </p>
276 * <p>
277 * When decoding all variants are supported.
278 * </p>
279 *
280 * @param lineLength
281 * Each line of encoded data will be at most of the given length
282 * (rounded down to nearest multiple of 4). If lineLength <= 0,
283 * then the output will not be divided into lines (chunks).
284 * Ignored when decoding.
285 * @param lineSeparator
286 * Each line of encoded data will end with this sequence of
287 * bytes.
288 * @param urlSafe
289 * Instead of emitting '+' and '/' we emit '-' and '_'
290 * respectively. urlSafe is only applied to encode operations.
291 * Decoding seamlessly handles both modes.
292 * @throws IllegalArgumentException
293 * The provided lineSeparator included some base64 characters.
294 * That's not going to work!
295 * @since 1.4
296 */
297 public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) {
298 this(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, lineSeparator == null ? 0 : lineSeparator.length);
299 // TODO could be simplified if there is no requirement to reject invalid
300 // line sep when length <=0
301 // @see test case Base64Test.testConstructors()
302 if (lineSeparator != null) {
303 if (containsAlphabetOrPad(lineSeparator)) {
304 String sep = StringUtils.newStringUtf8(lineSeparator);
305 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
306 }
307 if (lineLength > 0) { // null line-sep forces no chunking rather
308 // than throwing IAE
309 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
310 this.lineSeparator = new byte[lineSeparator.length];
311 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
312 } else {
313 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
314 this.lineSeparator = null;
315 }
316 } else {
317 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
318 this.lineSeparator = null;
319 }
320 this.decodeSize = this.encodeSize - 1;
321 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
322 }
323
324 /**
325 * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
326 *
327 * @return true if we're in URL-SAFE mode, false otherwise.
328 * @since 1.4
329 */
330 public boolean isUrlSafe() {
331 return this.encodeTable == URL_SAFE_ENCODE_TABLE;
332 }
333
334 /**
335 * <p>
336 * Encodes all of the provided data, starting at inPos, for inAvail bytes.
337 * Must be called at least twice: once with the data to encode, and once
338 * with inAvail set to "-1" to alert encoder that EOF has been reached, so
339 * flush last remaining bytes (if not multiple of 3).
340 * </p>
341 * <p>
342 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
343 * and general approach.
344 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
345 * </p>
346 *
347 * @param in
348 * byte[] array of binary data to base64 encode.
349 * @param inPos
350 * Position to start reading data from.
351 * @param inAvail
352 * Amount of bytes available from input for encoding.
353 * @param context
354 * the context to be used
355 */
356 void encode(byte[] in, int inPos, int inAvail, Context context) {
357 if (context.eof) {
358 return;
359 }
360 // inAvail < 0 is how we're informed of EOF in the underlying data we're
361 // encoding.
362 if (inAvail < 0) {
363 context.eof = true;
364 if (0 == context.modulus && lineLength == 0) {
365 return; // no leftovers to process and not using chunking
366 }
367 ensureBufferSize(encodeSize, context);
368 int savedPos = context.pos;
369 switch (context.modulus) { // 0-2
370 case 1: // 8 bits = 6 + 2
371 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; // top
372 // 6
373 // bits
374 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; // remaining
375 // 2
376 // URL-SAFE skips the padding to further reduce size.
377 if (encodeTable == STANDARD_ENCODE_TABLE) {
378 context.buffer[context.pos++] = PAD;
379 context.buffer[context.pos++] = PAD;
380 }
381 break;
382
383 case 2: // 16 bits = 6 + 6 + 4
384 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
385 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
386 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
387 // URL-SAFE skips the padding to further reduce size.
388 if (encodeTable == STANDARD_ENCODE_TABLE) {
389 context.buffer[context.pos++] = PAD;
390 }
391 break;
392 }
393 context.currentLinePos += context.pos - savedPos; // keep track of
394 // current line
395 // position
396 // if currentPos == 0 we are at the start of a line, so don't add
397 // CRLF
398 if (lineLength > 0 && context.currentLinePos > 0) {
399 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
400 context.pos += lineSeparator.length;
401 }
402 } else {
403 for (int i = 0; i < inAvail; i++) {
404 ensureBufferSize(encodeSize, context);
405 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
406 int b = in[inPos++];
407 if (b < 0) {
408 b += 256;
409 }
410 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
411 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to
412 // extract
413 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
414 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
415 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
416 context.buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
417 context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
418 if (lineLength > 0 && lineLength <= context.currentLinePos) {
419 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
420 context.pos += lineSeparator.length;
421 context.currentLinePos = 0;
422 }
423 }
424 }
425 }
426 }
427
428 /**
429 * <p>
430 * Decodes all of the provided data, starting at inPos, for inAvail bytes.
431 * Should be called at least twice: once with the data to decode, and once
432 * with inAvail set to "-1" to alert decoder that EOF has been reached. The
433 * "-1" call is not necessary when decoding, but it doesn't hurt, either.
434 * </p>
435 * <p>
436 * Ignores all non-base64 characters. This is how chunked (e.g. 76
437 * character) data is handled, since CR and LF are silently ignored, but has
438 * implications for other bytes, too. This method subscribes to the
439 * garbage-in, garbage-out philosophy: it will not check the provided data
440 * for validity.
441 * </p>
442 * <p>
443 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
444 * and general approach.
445 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
446 * </p>
447 *
448 * @param in
449 * byte[] array of ascii data to base64 decode.
450 * @param inPos
451 * Position to start reading data from.
452 * @param inAvail
453 * Amount of bytes available from input for encoding.
454 * @param context
455 * the context to be used
456 */
457 void decode(byte[] in, int inPos, int inAvail, Context context) {
458 if (context.eof) {
459 return;
460 }
461 if (inAvail < 0) {
462 context.eof = true;
463 }
464 for (int i = 0; i < inAvail; i++) {
465 ensureBufferSize(decodeSize, context);
466 byte b = in[inPos++];
467 if (b == PAD) {
468 // We're done.
469 context.eof = true;
470 break;
471 } else {
472 if (b >= 0 && b < DECODE_TABLE.length) {
473 int result = DECODE_TABLE[b];
474 if (result >= 0) {
475 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
476 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
477 if (context.modulus == 0) {
478 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
479 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
480 context.buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
481 }
482 }
483 }
484 }
485 }
486
487 // Two forms of EOF as far as base64 decoder is concerned: actual
488 // EOF (-1) and first time '=' character is encountered in stream.
489 // This approach makes the '=' padding characters completely optional.
490 if (context.eof && context.modulus != 0) {
491 ensureBufferSize(decodeSize, context);
492
493 // We have some spare bits remaining
494 // Output all whole multiples of 8 bits and ignore the rest
495 switch (context.modulus) {
496 // case 1: // 6 bits - ignore entirely
497 // break;
498 case 2: // 12 bits = 8 + 4
499 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the
500 // extra 4
501 // bits
502 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
503 break;
504 case 3: // 18 bits = 8 + 8 + 2
505 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
506 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
507 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
508 break;
509 }
510 }
511 }
512
513 /**
514 * Tests a given byte array to see if it contains only valid characters
515 * within the Base64 alphabet. Currently the method treats whitespace as
516 * valid.
517 *
518 * @param arrayOctet
519 * byte array to test
520 * @return {@code true} if all bytes are valid characters in the Base64
521 * alphabet or if the byte array is empty; {@code false}, otherwise
522 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
523 */
524 @Deprecated
525 public static boolean isArrayByteBase64(byte[] arrayOctet) {
526 return isBase64(arrayOctet);
527 }
528
529 /**
530 * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
531 *
532 * @param octet
533 * The value to test
534 * @return {@code true} if the value is defined in the the base 64 alphabet,
535 * {@code false} otherwise.
536 * @since 1.4
537 */
538 public static boolean isBase64(byte octet) {
539 return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
540 }
541
542 /**
543 * Tests a given String to see if it contains only valid characters within
544 * the Base64 alphabet. Currently the method treats whitespace as valid.
545 *
546 * @param base64
547 * String to test
548 * @return {@code true} if all characters in the String are valid characters
549 * in the Base64 alphabet or if the String is empty; {@code false},
550 * otherwise
551 * @since 1.5
552 */
553 public static boolean isBase64(String base64) {
554 return isBase64(StringUtils.getBytesUtf8(base64));
555 }
556
557 /**
558 * Tests a given byte array to see if it contains only valid characters
559 * within the Base64 alphabet. Currently the method treats whitespace as
560 * valid.
561 *
562 * @param arrayOctet
563 * byte array to test
564 * @return {@code true} if all bytes are valid characters in the Base64
565 * alphabet or if the byte array is empty; {@code false}, otherwise
566 * @since 1.5
567 */
568 public static boolean isBase64(byte[] arrayOctet) {
569 for (byte b : arrayOctet) {
570 if (!isBase64(b) && !isWhiteSpace(b)) {
571 return false;
572 }
573 }
574 return true;
575 }
576
577 /**
578 * Encodes binary data using the base64 algorithm but does not chunk the
579 * output.
580 *
581 * @param binaryData
582 * binary data to encode
583 * @return byte[] containing Base64 characters in their UTF-8
584 * representation.
585 */
586 public static byte[] encodeBase64(byte[] binaryData) {
587 return encodeBase64(binaryData, false);
588 }
589
590 /**
591 * Encodes binary data using the base64 algorithm but does not chunk the
592 * output.
593 *
594 * NOTE: We changed the behaviour of this method from multi-line chunking
595 * (commons-codec-1.4) to single-line non-chunking (commons-codec-1.5).
596 *
597 * @param binaryData
598 * binary data to encode
599 * @return String containing Base64 characters.
600 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
601 */
602 public static String encodeBase64String(byte[] binaryData) {
603 return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
604 }
605
606 /**
607 * Encodes binary data using a URL-safe variation of the base64 algorithm
608 * but does not chunk the output. The url-safe variation emits - and _
609 * instead of + and / characters.
610 *
611 * @param binaryData
612 * binary data to encode
613 * @return byte[] containing Base64 characters in their UTF-8
614 * representation.
615 * @since 1.4
616 */
617 public static byte[] encodeBase64URLSafe(byte[] binaryData) {
618 return encodeBase64(binaryData, false, true);
619 }
620
621 /**
622 * Encodes binary data using a URL-safe variation of the base64 algorithm
623 * but does not chunk the output. The url-safe variation emits - and _
624 * instead of + and / characters.
625 *
626 * @param binaryData
627 * binary data to encode
628 * @return String containing Base64 characters
629 * @since 1.4
630 */
631 public static String encodeBase64URLSafeString(byte[] binaryData) {
632 return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
633 }
634
635 /**
636 * Encodes binary data using the base64 algorithm and chunks the encoded
637 * output into 76 character blocks
638 *
639 * @param binaryData
640 * binary data to encode
641 * @return Base64 characters chunked in 76 character blocks
642 */
643 public static byte[] encodeBase64Chunked(byte[] binaryData) {
644 return encodeBase64(binaryData, true);
645 }
646
647 /**
648 * Encodes binary data using the base64 algorithm, optionally chunking the
649 * output into 76 character blocks.
650 *
651 * @param binaryData
652 * Array containing binary data to encode.
653 * @param isChunked
654 * if {@code true} this encoder will chunk the base64 output into
655 * 76 character blocks
656 * @return Base64-encoded data.
657 * @throws IllegalArgumentException
658 * Thrown when the input array needs an output array bigger than
659 * {@link Integer#MAX_VALUE}
660 */
661 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
662 return encodeBase64(binaryData, isChunked, false);
663 }
664
665 /**
666 * Encodes binary data using the base64 algorithm, optionally chunking the
667 * output into 76 character blocks.
668 *
669 * @param binaryData
670 * Array containing binary data to encode.
671 * @param isChunked
672 * if {@code true} this encoder will chunk the base64 output into
673 * 76 character blocks
674 * @param urlSafe
675 * if {@code true} this encoder will emit - and _ instead of the
676 * usual + and / characters.
677 * @return Base64-encoded data.
678 * @throws IllegalArgumentException
679 * Thrown when the input array needs an output array bigger than
680 * {@link Integer#MAX_VALUE}
681 * @since 1.4
682 */
683 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) {
684 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
685 }
686
687 /**
688 * Encodes binary data using the base64 algorithm, optionally chunking the
689 * output into 76 character blocks.
690 *
691 * @param binaryData
692 * Array containing binary data to encode.
693 * @param isChunked
694 * if {@code true} this encoder will chunk the base64 output into
695 * 76 character blocks
696 * @param urlSafe
697 * if {@code true} this encoder will emit - and _ instead of the
698 * usual + and / characters.
699 * @param maxResultSize
700 * The maximum result size to accept.
701 * @return Base64-encoded data.
702 * @throws IllegalArgumentException
703 * Thrown when the input array needs an output array bigger than
704 * maxResultSize
705 * @since 1.4
706 */
707 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) {
708 if (binaryData == null || binaryData.length == 0) {
709 return binaryData;
710 }
711
712 // Create this so can use the super-class method
713 // Also ensures that the same roundings are performed by the ctor and
714 // the code
715 Base64 b64 = isChunked ? new Base64package/Base64.html#Base64">Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
716 long len = b64.getEncodedLength(binaryData);
717 if (len > maxResultSize) {
718 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize);
719 }
720
721 return b64.encode(binaryData);
722 }
723
724 /**
725 * Decodes a Base64 String into octets
726 *
727 * @param base64String
728 * String containing Base64 data
729 * @return Array containing decoded data.
730 * @since 1.4
731 */
732 public static byte[] decodeBase64(String base64String) {
733 return new Base64().decode(base64String);
734 }
735
736 /**
737 * Decodes Base64 data into octets
738 *
739 * @param base64Data
740 * Byte array containing Base64 data
741 * @return Array containing decoded data.
742 */
743 public static byte[] decodeBase64(byte[] base64Data) {
744 return new Base64().decode(base64Data);
745 }
746
747 // Implementation of the Encoder Interface
748
749 // Implementation of integer encoding used for crypto
750 /**
751 * Decodes a byte64-encoded integer according to crypto standards such as
752 * W3C's XML-Signature
753 *
754 * @param pArray
755 * a byte array containing base64 character data
756 * @return A BigInteger
757 * @since 1.4
758 */
759 public static BigInteger decodeInteger(byte[] pArray) {
760 return new BigInteger(1, decodeBase64(pArray));
761 }
762
763 /**
764 * Encodes to a byte64-encoded integer according to crypto standards such as
765 * W3C's XML-Signature
766 *
767 * @param bigInt
768 * a BigInteger
769 * @return A byte array containing base64 character data
770 * @throws NullPointerException
771 * if null is passed in
772 * @since 1.4
773 */
774 public static byte[] encodeInteger(BigInteger bigInt) {
775 if (bigInt == null) {
776 throw new NullPointerException("encodeInteger called with null parameter");
777 }
778 return encodeBase64(toIntegerBytes(bigInt), false);
779 }
780
781 /**
782 * Returns a byte-array representation of a <code>BigInteger</code> without
783 * sign bit.
784 *
785 * @param bigInt
786 * <code>BigInteger</code> to be converted
787 * @return a byte array representation of the BigInteger parameter
788 */
789 static byte[] toIntegerBytes(BigInteger bigInt) {
790 int bitlen = bigInt.bitLength();
791 // round bitlen
792 bitlen = ((bitlen + 7) >> 3) << 3;
793 byte[] bigBytes = bigInt.toByteArray();
794
795 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
796 return bigBytes;
797 }
798 // set up params for copying everything but sign bit
799 int startSrc = 0;
800 int len = bigBytes.length;
801
802 // if bigInt is exactly byte-aligned, just skip signbit in copy
803 if ((bigInt.bitLength() % 8) == 0) {
804 startSrc = 1;
805 len--;
806 }
807 int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
808 byte[] resizedBytes = new byte[bitlen / 8];
809 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
810 return resizedBytes;
811 }
812
813 /**
814 * Returns whether or not the <code>octet</code> is in the Base32 alphabet.
815 *
816 * @param octet
817 * The value to test
818 * @return {@code true} if the value is defined in the the Base32 alphabet
819 * {@code false} otherwise.
820 */
821 protected boolean isInAlphabet(byte octet) {
822 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
823 }
824
825 /**
826 * Holds thread context so classes can be thread-safe.
827 *
828 * This class is not itself thread-safe; each thread must allocate its own
829 * copy.
830 *
831 * @since 1.7
832 */
833 static class Context {
834
835 /**
836 * Place holder for the bytes we're dealing with for our based logic.
837 * Bitwise operations store and extract the encoding or decoding from
838 * this variable.
839 */
840 int ibitWorkArea;
841
842 /**
843 * Place holder for the bytes we're dealing with for our based logic.
844 * Bitwise operations store and extract the encoding or decoding from
845 * this variable.
846 */
847 long lbitWorkArea;
848
849 /**
850 * Buffer for streaming.
851 */
852 byte[] buffer;
853
854 /**
855 * Position where next character should be written in the buffer.
856 */
857 int pos;
858
859 /**
860 * Position where next character should be read from the buffer.
861 */
862 int readPos;
863
864 /**
865 * Boolean flag to indicate the EOF has been reached. Once EOF has been
866 * reached, this object becomes useless, and must be thrown away.
867 */
868 boolean eof;
869
870 /**
871 * Variable tracks how many characters have been written to the current
872 * line. Only used when encoding. We use it to make sure each encoded
873 * line never goes beyond lineLength (if lineLength > 0).
874 */
875 int currentLinePos;
876
877 /**
878 * Writes to the buffer only occur after every 3/5 reads when encoding,
879 * and every 4/8 reads when decoding. This variable helps track that.
880 */
881 int modulus;
882
883 Context() {
884 }
885 }
886
887 /**
888 * EOF
889 *
890 * @since 1.7
891 */
892 static final int EOF = -1;
893
894 /**
895 * MIME chunk size per RFC 2045 section 6.8.
896 *
897 * <p>
898 * The {@value} character limit does not count the trailing CRLF, but counts
899 * all other characters, including any equal signs.
900 * </p>
901 *
902 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
903 * 6.8</a>
904 */
905 public static final int MIME_CHUNK_SIZE = 76;
906
907 /**
908 * PEM chunk size per RFC 1421 section 4.3.2.4.
909 *
910 * <p>
911 * The {@value} character limit does not count the trailing CRLF, but counts
912 * all other characters, including any equal signs.
913 * </p>
914 *
915 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section
916 * 4.3.2.4</a>
917 */
918 public static final int PEM_CHUNK_SIZE = 64;
919
920 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
921
922 /**
923 * Defines the default buffer size - currently {@value} - must be large
924 * enough for at least one encoded block+separator
925 */
926 private static final int DEFAULT_BUFFER_SIZE = 8192;
927
928 /** Mask used to extract 8 bits, used in decoding bytes */
929 protected static final int MASK_8BITS = 0xff;
930
931 /**
932 * Byte used to pad output.
933 */
934 protected static final byte PAD_DEFAULT = '='; // Allow static access to
935 // default
936
937 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it
938 // needs to vary later
939
940 /**
941 * Number of bytes in each full block of unencoded data, e.g. 4 for Base64
942 * and 5 for Base32
943 */
944 private final int unencodedBlockSize;
945
946 /**
947 * Number of bytes in each full block of encoded data, e.g. 3 for Base64 and
948 * 8 for Base32
949 */
950 private final int encodedBlockSize;
951
952 /**
953 * Chunksize for encoding. Not used when decoding. A value of zero or less
954 * implies no chunking of the encoded data. Rounded down to nearest multiple
955 * of encodedBlockSize.
956 */
957 protected final int lineLength;
958
959 /**
960 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
961 */
962 private final int chunkSeparatorLength;
963
964 /**
965 * Note <code>lineLength</code> is rounded down to the nearest multiple of
966 * {@link #encodedBlockSize} If <code>chunkSeparatorLength</code> is zero,
967 * then chunking is disabled.
968 *
969 * @param unencodedBlockSize
970 * the size of an unencoded block (e.g. Base64 = 3)
971 * @param encodedBlockSize
972 * the size of an encoded block (e.g. Base64 = 4)
973 * @param lineLength
974 * if > 0, use chunking with a length <code>lineLength</code>
975 * @param chunkSeparatorLength
976 * the chunk separator length, if relevant
977 */
978 protected Base64(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) {
979 this.unencodedBlockSize = unencodedBlockSize;
980 this.encodedBlockSize = encodedBlockSize;
981 this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
982 this.chunkSeparatorLength = chunkSeparatorLength;
983 }
984
985 /**
986 * Returns true if this object has buffered data for reading.
987 *
988 * @param context
989 * the context to be used
990 * @return true if there is data still available for reading.
991 */
992 boolean hasData(Context context) { // package protected for access from I/O
993 // streams
994 return context.buffer != null;
995 }
996
997 /**
998 * Returns the amount of buffered data available for reading.
999 *
1000 * @param context
1001 * the context to be used
1002 * @return The amount of buffered data available for reading.
1003 */
1004 int available(Context context) { // package protected for access from I/O
1005 // streams
1006 return context.buffer != null ? context.pos - context.readPos : 0;
1007 }
1008
1009 /**
1010 * Get the default buffer size. Can be overridden.
1011 *
1012 * @return {@link #DEFAULT_BUFFER_SIZE}
1013 */
1014 protected int getDefaultBufferSize() {
1015 return DEFAULT_BUFFER_SIZE;
1016 }
1017
1018 /**
1019 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
1020 *
1021 * @param context
1022 * the context to be used
1023 */
1024 private void resizeBuffer(Context context) {
1025 if (context.buffer == null) {
1026 context.buffer = new byte[getDefaultBufferSize()];
1027 context.pos = 0;
1028 context.readPos = 0;
1029 } else {
1030 byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
1031 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
1032 context.buffer = b;
1033 }
1034 }
1035
1036 /**
1037 * Ensure that the buffer has room for <code>size</code> bytes
1038 *
1039 * @param size
1040 * minimum spare space required
1041 * @param context
1042 * the context to be used
1043 */
1044 protected void ensureBufferSize(int size, Context context) {
1045 if ((context.buffer == null) || (context.buffer.length < context.pos + size)) {
1046 resizeBuffer(context);
1047 }
1048 }
1049
1050 /**
1051 * Extracts buffered data into the provided byte[] array, starting at
1052 * position bPos, up to a maximum of bAvail bytes. Returns how many bytes
1053 * were actually extracted.
1054 *
1055 * @param b
1056 * byte[] array to extract the buffered data into.
1057 * @param bPos
1058 * position in byte[] array to start extraction at.
1059 * @param bAvail
1060 * amount of bytes we're allowed to extract. We may extract fewer
1061 * (if fewer are available).
1062 * @param context
1063 * the context to be used
1064 * @return The number of bytes successfully extracted into the provided
1065 * byte[] array.
1066 */
1067 int readResults(byte[] b, int bPos, int bAvail, Context context) { // package
1068 // protected
1069 // for
1070 // access
1071 // from
1072 // I/O
1073 // streams
1074 if (context.buffer != null) {
1075 int len = Math.min(available(context), bAvail);
1076 System.arraycopy(context.buffer, context.readPos, b, bPos, len);
1077 context.readPos += len;
1078 if (context.readPos >= context.pos) {
1079 context.buffer = null; // so hasData() will return false, and
1080 // this method can return -1
1081 }
1082 return len;
1083 }
1084 return context.eof ? EOF : 0;
1085 }
1086
1087 /**
1088 * Checks if a byte value is whitespace or not. Whitespace is taken to mean:
1089 * space, tab, CR, LF
1090 *
1091 * @param byteToCheck
1092 * the byte to check
1093 * @return true if byte is whitespace, false otherwise
1094 */
1095 protected static boolean isWhiteSpace(byte byteToCheck) {
1096 switch (byteToCheck) {
1097 case ' ':
1098 case '\n':
1099 case '\r':
1100 case '\t':
1101 return true;
1102 default:
1103 return false;
1104 }
1105 }
1106
1107 /**
1108 * Encodes an Object using the Base-N algorithm. This method is provided in
1109 * order to satisfy the requirements of the Encoder interface, and will
1110 * throw an EncoderException if the supplied object is not of type byte[].
1111 *
1112 * @param obj
1113 * Object to encode
1114 * @return An object (of type byte[]) containing the Base-N encoded data
1115 * which corresponds to the byte[] supplied.
1116 * @throws Exception
1117 * if the parameter supplied is not of type byte[]
1118 */
1119 public Object encode(Object obj) throws Exception {
1120 if (!(obj instanceof byte[])) {
1121 throw new Exception("Parameter supplied to Base-N encode is not a byte[]");
1122 }
1123 return encode((byte[]) obj);
1124 }
1125
1126 /**
1127 * Encodes a byte[] containing binary data, into a String containing
1128 * characters in the Base-N alphabet. Uses UTF8 encoding.
1129 *
1130 * @param pArray
1131 * a byte array containing binary data
1132 * @return A String containing only Base-N character data
1133 */
1134 public String encodeToString(byte[] pArray) {
1135 return StringUtils.newStringUtf8(encode(pArray));
1136 }
1137
1138 /**
1139 * Encodes a byte[] containing binary data, into a String containing
1140 * characters in the appropriate alphabet. Uses UTF8 encoding.
1141 *
1142 * @param pArray
1143 * a byte array containing binary data
1144 * @return String containing only character data in the appropriate
1145 * alphabet.
1146 */
1147 public String encodeAsString(byte[] pArray) {
1148 return StringUtils.newStringUtf8(encode(pArray));
1149 }
1150
1151 /**
1152 * Decodes an Object using the Base-N algorithm. This method is provided in
1153 * order to satisfy the requirements of the Decoder interface, and will
1154 * throw a DecoderException if the supplied object is not of type byte[] or
1155 * String.
1156 *
1157 * @param obj
1158 * Object to decode
1159 * @return An object (of type byte[]) containing the binary data which
1160 * corresponds to the byte[] or String supplied.
1161 * @throws Exception
1162 * if the parameter supplied is not of type byte[]
1163 */
1164 public Object decode(Object obj) throws Exception {
1165 if (obj instanceof byte[]) {
1166 return decode((byte[]) obj);
1167 } else if (obj instanceof String) {
1168 return decode((String) obj);
1169 } else {
1170 throw new Exception("Parameter supplied to Base-N decode is not a byte[] or a String");
1171 }
1172 }
1173
1174 /**
1175 * Decodes a String containing characters in the Base-N alphabet.
1176 *
1177 * @param pArray
1178 * A String containing Base-N character data
1179 * @return a byte array containing binary data
1180 */
1181 public byte[] decode(String pArray) {
1182 return decode(StringUtils.getBytesUtf8(pArray));
1183 }
1184
1185 /**
1186 * Decodes a byte[] containing characters in the Base-N alphabet.
1187 *
1188 * @param pArray
1189 * A byte array containing Base-N character data
1190 * @return a byte array containing binary data
1191 */
1192 public byte[] decode(byte[] pArray) {
1193 Context context = new Context();
1194 if (pArray == null || pArray.length == 0) {
1195 return pArray;
1196 }
1197 decode(pArray, 0, pArray.length, context);
1198 decode(pArray, 0, EOF, context); // Notify decoder of EOF.
1199 byte[] result = new byte[context.pos];
1200 readResults(result, 0, result.length, context);
1201 return result;
1202 }
1203
1204 /**
1205 * Encodes a byte[] containing binary data, into a byte[] containing
1206 * characters in the alphabet.
1207 *
1208 * @param pArray
1209 * a byte array containing binary data
1210 * @return A byte array containing only the basen alphabetic character data
1211 */
1212 public byte[] encode(byte[] pArray) {
1213 Context context = new Context();
1214 if (pArray == null || pArray.length == 0) {
1215 return pArray;
1216 }
1217 encode(pArray, 0, pArray.length, context);
1218 encode(pArray, 0, EOF, context); // Notify encoder of EOF.
1219 byte[] buf = new byte[context.pos - context.readPos];
1220 readResults(buf, 0, buf.length, context);
1221 return buf;
1222 }
1223
1224 /**
1225 * Tests a given byte array to see if it contains only valid characters
1226 * within the alphabet. The method optionally treats whitespace and pad as
1227 * valid.
1228 *
1229 * @param arrayOctet
1230 * byte array to test
1231 * @param allowWSPad
1232 * if {@code true}, then whitespace and PAD are also allowed
1233 *
1234 * @return {@code true} if all bytes are valid characters in the alphabet or
1235 * if the byte array is empty; {@code false}, otherwise
1236 */
1237 public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
1238 for (byte b : arrayOctet) {
1239 if (!isInAlphabet(b) && (!allowWSPad || (b != PAD) && !isWhiteSpace(b))) {
1240 return false;
1241 }
1242 }
1243 return true;
1244 }
1245
1246 /**
1247 * Tests a given String to see if it contains only valid characters within
1248 * the alphabet. The method treats whitespace and PAD as valid.
1249 *
1250 * @param basen
1251 * String to test
1252 * @return {@code true} if all characters in the String are valid characters
1253 * in the alphabet or if the String is empty; {@code false},
1254 * otherwise
1255 * @see #isInAlphabet(byte[], boolean)
1256 */
1257 public boolean isInAlphabet(String basen) {
1258 return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
1259 }
1260
1261 /**
1262 * Tests a given byte array to see if it contains any characters within the
1263 * alphabet or PAD.
1264 *
1265 * Intended for use in checking line-ending arrays
1266 *
1267 * @param arrayOctet
1268 * byte array to test
1269 * @return {@code true} if any byte is a valid character in the alphabet or
1270 * PAD; {@code false} otherwise
1271 */
1272 protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
1273 if (arrayOctet == null) {
1274 return false;
1275 }
1276 for (byte element : arrayOctet) {
1277 if (PAD == element || isInAlphabet(element)) {
1278 return true;
1279 }
1280 }
1281 return false;
1282 }
1283
1284 /**
1285 * Calculates the amount of space needed to encode the supplied array.
1286 *
1287 * @param pArray
1288 * byte[] array which will later be encoded
1289 *
1290 * @return amount of space needed to encoded the supplied array. Returns a
1291 * long since a max-len array will require > Integer.MAX_VALUE
1292 */
1293 public long getEncodedLength(byte[] pArray) {
1294 // Calculate non-chunked size - rounded up to allow for padding
1295 // cast to long is needed to avoid possibility of overflow
1296 long len = ((pArray.length + unencodedBlockSize - 1) / unencodedBlockSize) * (long) encodedBlockSize;
1297 if (lineLength > 0) { // We're using chunking
1298 // Round up to nearest multiple
1299 len += ((len + lineLength - 1) / lineLength) * chunkSeparatorLength;
1300 }
1301 return len;
1302 }
1303
1304 /**
1305 * Character encoding names required of every implementation of the Java
1306 * platform.
1307 *
1308 * From the Java documentation <a href=
1309 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1310 * >Standard charsets</a>:
1311 * <p>
1312 * <cite>Every implementation of the Java platform is required to support
1313 * the following character encodings. Consult the release documentation for
1314 * your implementation to see if any other encodings are supported. Consult
1315 * the release documentation for your implementation to see if any other
1316 * encodings are supported. </cite>
1317 * </p>
1318 *
1319 * <ul>
1320 * <li><code>US-ASCII</code><br/>
1321 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1322 * Unicode character set.</li>
1323 * <li><code>ISO-8859-1</code><br/>
1324 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1325 * <li><code>UTF-8</code><br/>
1326 * Eight-bit Unicode Transformation Format.</li>
1327 * <li><code>UTF-16BE</code><br/>
1328 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1329 * <li><code>UTF-16LE</code><br/>
1330 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1331 * <li><code>UTF-16</code><br/>
1332 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1333 * mandatory initial byte-order mark (either order accepted on input,
1334 * big-endian used on output.)</li>
1335 * </ul>
1336 *
1337 * This perhaps would best belong in the [lang] project. Even if a similar
1338 * interface is defined in [lang], it is not foreseen that [codec] would be
1339 * made to depend on [lang].
1340 *
1341 * <p>
1342 * This class is immutable and thread-safe.
1343 * </p>
1344 *
1345 * @see <a
1346 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1347 * charsets</a>
1348 * @since 1.4
1349 * @version $Id$
1350 */
1351 public static class CharEncoding {
1352 /**
1353 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1354 * <p>
1355 * Every implementation of the Java platform is required to support this
1356 * character encoding.
1357 * </p>
1358 *
1359 * @see <a
1360 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1361 * charsets</a>
1362 */
1363 public static final String ISO_8859_1 = "ISO-8859-1";
1364
1365 /**
1366 * <p>
1367 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1368 * Latin block of the Unicode character set.
1369 * </p>
1370 * <p>
1371 * Every implementation of the Java platform is required to support this
1372 * character encoding.
1373 * </p>
1374 *
1375 * @see <a
1376 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1377 * charsets</a>
1378 */
1379 public static final String US_ASCII = "US-ASCII";
1380
1381 /**
1382 * <p>
1383 * Sixteen-bit Unicode Transformation Format, The byte order specified
1384 * by a mandatory initial byte-order mark (either order accepted on
1385 * input, big-endian used on output)
1386 * </p>
1387 * <p>
1388 * Every implementation of the Java platform is required to support this
1389 * character encoding.
1390 * </p>
1391 *
1392 * @see <a
1393 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1394 * charsets</a>
1395 */
1396 public static final String UTF_16 = "UTF-16";
1397
1398 /**
1399 * <p>
1400 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1401 * </p>
1402 * <p>
1403 * Every implementation of the Java platform is required to support this
1404 * character encoding.
1405 * </p>
1406 *
1407 * @see <a
1408 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1409 * charsets</a>
1410 */
1411 public static final String UTF_16BE = "UTF-16BE";
1412
1413 /**
1414 * <p>
1415 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1416 * </p>
1417 * <p>
1418 * Every implementation of the Java platform is required to support this
1419 * character encoding.
1420 * </p>
1421 *
1422 * @see <a
1423 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1424 * charsets</a>
1425 */
1426 public static final String UTF_16LE = "UTF-16LE";
1427
1428 /**
1429 * <p>
1430 * Eight-bit Unicode Transformation Format.
1431 * </p>
1432 * <p>
1433 * Every implementation of the Java platform is required to support this
1434 * character encoding.
1435 * </p>
1436 *
1437 * @see <a
1438 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1439 * charsets</a>
1440 */
1441 public static final String UTF_8 = "UTF-8";
1442 }
1443
1444 /**
1445 * Charsets required of every implementation of the Java platform.
1446 *
1447 * From the Java documentation <a href=
1448 * "http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1449 * >Standard charsets</a>:
1450 * <p>
1451 * <cite>Every implementation of the Java platform is required to support
1452 * the following character encodings. Consult the release documentation for
1453 * your implementation to see if any other encodings are supported. Consult
1454 * the release documentation for your implementation to see if any other
1455 * encodings are supported. </cite>
1456 * </p>
1457 *
1458 * <ul>
1459 * <li><code>US-ASCII</code><br/>
1460 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1461 * Unicode character set.</li>
1462 * <li><code>ISO-8859-1</code><br/>
1463 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1464 * <li><code>UTF-8</code><br/>
1465 * Eight-bit Unicode Transformation Format.</li>
1466 * <li><code>UTF-16BE</code><br/>
1467 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1468 * <li><code>UTF-16LE</code><br/>
1469 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1470 * <li><code>UTF-16</code><br/>
1471 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1472 * mandatory initial byte-order mark (either order accepted on input,
1473 * big-endian used on output.)</li>
1474 * </ul>
1475 *
1476 * This perhaps would best belong in the Commons Lang project. Even if a
1477 * similar class is defined in Commons Lang, it is not foreseen that Commons
1478 * Codec would be made to depend on Commons Lang.
1479 *
1480 * <p>
1481 * This class is immutable and thread-safe.
1482 * </p>
1483 *
1484 * @see <a
1485 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1486 * charsets</a>
1487 * @since 1.7
1488 * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $
1489 */
1490 public static class Charsets {
1491
1492 //
1493 // This class should only contain Charset instances for required
1494 // encodings. This guarantees that it will load correctly and
1495 // without delay on all Java platforms.
1496 //
1497
1498 /**
1499 * Returns the given Charset or the default Charset if the given Charset
1500 * is null.
1501 *
1502 * @param charset
1503 * A charset or null.
1504 * @return the given Charset or the default Charset if the given Charset
1505 * is null
1506 */
1507 public static Charset toCharset(Charset charset) {
1508 return charset == null ? Charset.defaultCharset() : charset;
1509 }
1510
1511 /**
1512 * Returns a Charset for the named charset. If the name is null, return
1513 * the default Charset.
1514 *
1515 * @param charset
1516 * The name of the requested charset, may be null.
1517 * @return a Charset for the named charset
1518 * @throws UnsupportedCharsetException
1519 * If the named charset is unavailable
1520 */
1521 public static Charset toCharset(String charset) {
1522 return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
1523 }
1524
1525 /**
1526 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1527 * <p>
1528 * Every implementation of the Java platform is required to support this
1529 * character encoding.
1530 * </p>
1531 *
1532 * @see <a
1533 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1534 * charsets</a>
1535 */
1536 public static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1;
1537
1538 /**
1539 * <p>
1540 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1541 * Latin block of the Unicode character set.
1542 * </p>
1543 * <p>
1544 * Every implementation of the Java platform is required to support this
1545 * character encoding.
1546 * </p>
1547 *
1548 * @see <a
1549 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1550 * charsets</a>
1551 */
1552 public static final Charset US_ASCII = StandardCharsets.US_ASCII;
1553
1554 /**
1555 * <p>
1556 * Sixteen-bit Unicode Transformation Format, The byte order specified
1557 * by a mandatory initial byte-order mark (either order accepted on
1558 * input, big-endian used on output)
1559 * </p>
1560 * <p>
1561 * Every implementation of the Java platform is required to support this
1562 * character encoding.
1563 * </p>
1564 *
1565 * @see <a
1566 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1567 * charsets</a>
1568 */
1569 public static final Charset UTF_16 = StandardCharsets.UTF_16;
1570
1571 /**
1572 * <p>
1573 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1574 * </p>
1575 * <p>
1576 * Every implementation of the Java platform is required to support this
1577 * character encoding.
1578 * </p>
1579 *
1580 * @see <a
1581 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1582 * charsets</a>
1583 */
1584 public static final Charset UTF_16BE = StandardCharsets.UTF_16BE;
1585
1586 /**
1587 * <p>
1588 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1589 * </p>
1590 * <p>
1591 * Every implementation of the Java platform is required to support this
1592 * character encoding.
1593 * </p>
1594 *
1595 * @see <a
1596 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1597 * charsets</a>
1598 */
1599 public static final Charset UTF_16LE = StandardCharsets.UTF_16LE;
1600
1601 /**
1602 * <p>
1603 * Eight-bit Unicode Transformation Format.
1604 * </p>
1605 * <p>
1606 * Every implementation of the Java platform is required to support this
1607 * character encoding.
1608 * </p>
1609 *
1610 * @see <a
1611 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1612 * charsets</a>
1613 */
1614 public static final Charset UTF_8 = StandardCharsets.UTF_8;
1615 }
1616
1617 /**
1618 * Converts String to and from bytes using the encodings required by the
1619 * Java specification. These encodings are specified in <a href=
1620 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1621 * >Standard charsets</a>
1622 *
1623 * <p>
1624 * This class is immutable and thread-safe.
1625 * </p>
1626 *
1627 * @see CharEncoding
1628 * @see <a
1629 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1630 * charsets</a>
1631 * @version $Id$
1632 * @since 1.4
1633 */
1634 public static class StringUtils {
1635
1636 /**
1637 * Calls {@link String#getBytes(Charset)}
1638 *
1639 * @param string
1640 * The string to encode (if null, return null).
1641 * @param charset
1642 * The {@link Charset} to encode the {@code String}
1643 * @return the encoded bytes
1644 */
1645 private static byte[] getBytes(String string, Charset charset) {
1646 if (string == null) {
1647 return null;
1648 }
1649 return string.getBytes(charset);
1650 }
1651
1652 /**
1653 * Encodes the given string into a sequence of bytes using the
1654 * ISO-8859-1 charset, storing the result into a new byte array.
1655 *
1656 * @param string
1657 * the String to encode, may be {@code null}
1658 * @return encoded bytes, or {@code null} if the input string was
1659 * {@code null}
1660 * @throws NullPointerException
1661 * Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1662 * which should never happen since it is required by the
1663 * Java platform specification.
1664 * @since As of 1.7, throws {@link NullPointerException} instead of
1665 * UnsupportedEncodingException
1666 * @see <a
1667 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1668 * charsets</a>
1669 * @see #getBytesUnchecked(String, String)
1670 */
1671 public static byte[] getBytesIso8859_1(String string) {
1672 return getBytes(string, Charsets.ISO_8859_1);
1673 }
1674
1675 /**
1676 * Encodes the given string into a sequence of bytes using the named
1677 * charset, storing the result into a new byte array.
1678 * <p>
1679 * This method catches {@link UnsupportedEncodingException} and rethrows
1680 * it as {@link IllegalStateException}, which should never happen for a
1681 * required charset name. Use this method when the encoding is required
1682 * to be in the JRE.
1683 * </p>
1684 *
1685 * @param string
1686 * the String to encode, may be {@code null}
1687 * @param charsetName
1688 * The name of a required {@link java.nio.charset.Charset}
1689 * @return encoded bytes, or {@code null} if the input string was
1690 * {@code null}
1691 * @throws IllegalStateException
1692 * Thrown when a {@link UnsupportedEncodingException} is
1693 * caught, which should never happen for a required charset
1694 * name.
1695 * @see CharEncoding
1696 * @see String#getBytes(String)
1697 */
1698 public static byte[] getBytesUnchecked(String string, String charsetName) {
1699 if (string == null) {
1700 return null;
1701 }
1702 try {
1703 return string.getBytes(charsetName);
1704 } catch (UnsupportedEncodingException e) {
1705 throw StringUtils.newIllegalStateException(charsetName, e);
1706 }
1707 }
1708
1709 /**
1710 * Encodes the given string into a sequence of bytes using the US-ASCII
1711 * charset, storing the result into a new byte array.
1712 *
1713 * @param string
1714 * the String to encode, may be {@code null}
1715 * @return encoded bytes, or {@code null} if the input string was
1716 * {@code null}
1717 * @throws NullPointerException
1718 * Thrown if {@link Charsets#US_ASCII} is not initialized,
1719 * which should never happen since it is required by the
1720 * Java platform specification.
1721 * @since As of 1.7, throws {@link NullPointerException} instead of
1722 * UnsupportedEncodingException
1723 * @see <a
1724 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1725 * charsets</a>
1726 * @see #getBytesUnchecked(String, String)
1727 */
1728 public static byte[] getBytesUsAscii(String string) {
1729 return getBytes(string, Charsets.US_ASCII);
1730 }
1731
1732 /**
1733 * Encodes the given string into a sequence of bytes using the UTF-16
1734 * charset, storing the result into a new byte array.
1735 *
1736 * @param string
1737 * the String to encode, may be {@code null}
1738 * @return encoded bytes, or {@code null} if the input string was
1739 * {@code null}
1740 * @throws NullPointerException
1741 * Thrown if {@link Charsets#UTF_16} is not initialized,
1742 * which should never happen since it is required by the
1743 * Java platform specification.
1744 * @since As of 1.7, throws {@link NullPointerException} instead of
1745 * UnsupportedEncodingException
1746 * @see <a
1747 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1748 * charsets</a>
1749 * @see #getBytesUnchecked(String, String)
1750 */
1751 public static byte[] getBytesUtf16(String string) {
1752 return getBytes(string, Charsets.UTF_16);
1753 }
1754
1755 /**
1756 * Encodes the given string into a sequence of bytes using the UTF-16BE
1757 * charset, storing the result into a new byte array.
1758 *
1759 * @param string
1760 * the String to encode, may be {@code null}
1761 * @return encoded bytes, or {@code null} if the input string was
1762 * {@code null}
1763 * @throws NullPointerException
1764 * Thrown if {@link Charsets#UTF_16BE} is not initialized,
1765 * which should never happen since it is required by the
1766 * Java platform specification.
1767 * @since As of 1.7, throws {@link NullPointerException} instead of
1768 * UnsupportedEncodingException
1769 * @see <a
1770 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1771 * charsets</a>
1772 * @see #getBytesUnchecked(String, String)
1773 */
1774 public static byte[] getBytesUtf16Be(String string) {
1775 return getBytes(string, Charsets.UTF_16BE);
1776 }
1777
1778 /**
1779 * Encodes the given string into a sequence of bytes using the UTF-16LE
1780 * charset, storing the result into a new byte array.
1781 *
1782 * @param string
1783 * the String to encode, may be {@code null}
1784 * @return encoded bytes, or {@code null} if the input string was
1785 * {@code null}
1786 * @throws NullPointerException
1787 * Thrown if {@link Charsets#UTF_16LE} is not initialized,
1788 * which should never happen since it is required by the
1789 * Java platform specification.
1790 * @since As of 1.7, throws {@link NullPointerException} instead of
1791 * UnsupportedEncodingException
1792 * @see <a
1793 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1794 * charsets</a>
1795 * @see #getBytesUnchecked(String, String)
1796 */
1797 public static byte[] getBytesUtf16Le(String string) {
1798 return getBytes(string, Charsets.UTF_16LE);
1799 }
1800
1801 /**
1802 * Encodes the given string into a sequence of bytes using the UTF-8
1803 * charset, storing the result into a new byte array.
1804 *
1805 * @param string
1806 * the String to encode, may be {@code null}
1807 * @return encoded bytes, or {@code null} if the input string was
1808 * {@code null}
1809 * @throws NullPointerException
1810 * Thrown if {@link Charsets#UTF_8} is not initialized,
1811 * which should never happen since it is required by the
1812 * Java platform specification.
1813 * @since As of 1.7, throws {@link NullPointerException} instead of
1814 * UnsupportedEncodingException
1815 * @see <a
1816 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1817 * charsets</a>
1818 * @see #getBytesUnchecked(String, String)
1819 */
1820 public static byte[] getBytesUtf8(String string) {
1821 return getBytes(string, Charsets.UTF_8);
1822 }
1823
1824 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
1825 return new IllegalStateException(charsetName + ": " + e);
1826 }
1827
1828 /**
1829 * Constructs a new <code>String</code> by decoding the specified array
1830 * of bytes using the given charset.
1831 *
1832 * @param bytes
1833 * The bytes to be decoded into characters
1834 * @param charset
1835 * The {@link Charset} to encode the {@code String}
1836 * @return A new <code>String</code> decoded from the specified array of
1837 * bytes using the given charset, or {@code null} if the input
1838 * byte array was {@code null}.
1839 * @throws NullPointerException
1840 * Thrown if {@link Charsets#UTF_8} is not initialized,
1841 * which should never happen since it is required by the
1842 * Java platform specification.
1843 */
1844 private static String newString(byte[] bytes, Charset charset) {
1845 return bytes == null ? null : new String(bytes, charset);
1846 }
1847
1848 /**
1849 * Constructs a new <code>String</code> by decoding the specified array
1850 * of bytes using the given charset.
1851 * <p>
1852 * This method catches {@link UnsupportedEncodingException} and
1853 * re-throws it as {@link IllegalStateException}, which should never
1854 * happen for a required charset name. Use this method when the encoding
1855 * is required to be in the JRE.
1856 * </p>
1857 *
1858 * @param bytes
1859 * The bytes to be decoded into characters, may be
1860 * {@code null}
1861 * @param charsetName
1862 * The name of a required {@link java.nio.charset.Charset}
1863 * @return A new <code>String</code> decoded from the specified array of
1864 * bytes using the given charset, or {@code null} if the input
1865 * byte array was {@code null}.
1866 * @throws IllegalStateException
1867 * Thrown when a {@link UnsupportedEncodingException} is
1868 * caught, which should never happen for a required charset
1869 * name.
1870 * @see CharEncoding
1871 * @see String#String(byte[], String)
1872 */
1873 public static String newString(byte[] bytes, String charsetName) {
1874 if (bytes == null) {
1875 return null;
1876 }
1877 try {
1878 return new String(bytes, charsetName);
1879 } catch (UnsupportedEncodingException e) {
1880 throw StringUtils.newIllegalStateException(charsetName, e);
1881 }
1882 }
1883
1884 /**
1885 * Constructs a new <code>String</code> by decoding the specified array
1886 * of bytes using the ISO-8859-1 charset.
1887 *
1888 * @param bytes
1889 * The bytes to be decoded into characters, may be
1890 * {@code null}
1891 * @return A new <code>String</code> decoded from the specified array of
1892 * bytes using the ISO-8859-1 charset, or {@code null} if the
1893 * input byte array was {@code null}.
1894 * @throws NullPointerException
1895 * Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1896 * which should never happen since it is required by the
1897 * Java platform specification.
1898 * @since As of 1.7, throws {@link NullPointerException} instead of
1899 * UnsupportedEncodingException
1900 */
1901 public static String newStringIso8859_1(byte[] bytes) {
1902 return new String(bytes, Charsets.ISO_8859_1);
1903 }
1904
1905 /**
1906 * Constructs a new <code>String</code> by decoding the specified array
1907 * of bytes using the US-ASCII charset.
1908 *
1909 * @param bytes
1910 * The bytes to be decoded into characters
1911 * @return A new <code>String</code> decoded from the specified array of
1912 * bytes using the US-ASCII charset, or {@code null} if the
1913 * input byte array was {@code null}.
1914 * @throws NullPointerException
1915 * Thrown if {@link Charsets#US_ASCII} is not initialized,
1916 * which should never happen since it is required by the
1917 * Java platform specification.
1918 * @since As of 1.7, throws {@link NullPointerException} instead of
1919 * UnsupportedEncodingException
1920 */
1921 public static String newStringUsAscii(byte[] bytes) {
1922 return new String(bytes, Charsets.US_ASCII);
1923 }
1924
1925 /**
1926 * Constructs a new <code>String</code> by decoding the specified array
1927 * of bytes using the UTF-16 charset.
1928 *
1929 * @param bytes
1930 * The bytes to be decoded into characters
1931 * @return A new <code>String</code> decoded from the specified array of
1932 * bytes using the UTF-16 charset or {@code null} if the input
1933 * byte array was {@code null}.
1934 * @throws NullPointerException
1935 * Thrown if {@link Charsets#UTF_16} is not initialized,
1936 * which should never happen since it is required by the
1937 * Java platform specification.
1938 * @since As of 1.7, throws {@link NullPointerException} instead of
1939 * UnsupportedEncodingException
1940 */
1941 public static String newStringUtf16(byte[] bytes) {
1942 return new String(bytes, Charsets.UTF_16);
1943 }
1944
1945 /**
1946 * Constructs a new <code>String</code> by decoding the specified array
1947 * of bytes using the UTF-16BE charset.
1948 *
1949 * @param bytes
1950 * The bytes to be decoded into characters
1951 * @return A new <code>String</code> decoded from the specified array of
1952 * bytes using the UTF-16BE charset, or {@code null} if the
1953 * input byte array was {@code null}.
1954 * @throws NullPointerException
1955 * Thrown if {@link Charsets#UTF_16BE} is not initialized,
1956 * which should never happen since it is required by the
1957 * Java platform specification.
1958 * @since As of 1.7, throws {@link NullPointerException} instead of
1959 * UnsupportedEncodingException
1960 */
1961 public static String newStringUtf16Be(byte[] bytes) {
1962 return new String(bytes, Charsets.UTF_16BE);
1963 }
1964
1965 /**
1966 * Constructs a new <code>String</code> by decoding the specified array
1967 * of bytes using the UTF-16LE charset.
1968 *
1969 * @param bytes
1970 * The bytes to be decoded into characters
1971 * @return A new <code>String</code> decoded from the specified array of
1972 * bytes using the UTF-16LE charset, or {@code null} if the
1973 * input byte array was {@code null}.
1974 * @throws NullPointerException
1975 * Thrown if {@link Charsets#UTF_16LE} is not initialized,
1976 * which should never happen since it is required by the
1977 * Java platform specification.
1978 * @since As of 1.7, throws {@link NullPointerException} instead of
1979 * UnsupportedEncodingException
1980 */
1981 public static String newStringUtf16Le(byte[] bytes) {
1982 return new String(bytes, Charsets.UTF_16LE);
1983 }
1984
1985 /**
1986 * Constructs a new <code>String</code> by decoding the specified array
1987 * of bytes using the UTF-8 charset.
1988 *
1989 * @param bytes
1990 * The bytes to be decoded into characters
1991 * @return A new <code>String</code> decoded from the specified array of
1992 * bytes using the UTF-8 charset, or {@code null} if the input
1993 * byte array was {@code null}.
1994 * @throws NullPointerException
1995 * Thrown if {@link Charsets#UTF_8} is not initialized,
1996 * which should never happen since it is required by the
1997 * Java platform specification.
1998 * @since As of 1.7, throws {@link NullPointerException} instead of
1999 * UnsupportedEncodingException
2000 */
2001 public static String newStringUtf8(byte[] bytes) {
2002 return newString(bytes, Charsets.UTF_8);
2003 }
2004
2005 }
2006
2007 }