1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package ca.uhn.hl7v2.hoh.util.repackage; 19 20 import java.io.UnsupportedEncodingException; 21 import java.math.BigInteger; 22 import java.nio.charset.Charset; 23 import java.nio.charset.UnsupportedCharsetException; 24 25 /** 26 * Provides Base64 encoding and decoding as defined by <a 27 * href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>. 28 * 29 * <p> 30 * This class implements section <cite>6.8. Base64 31 * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet 32 * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by 33 * Freed and Borenstein. 34 * </p> 35 * <p> 36 * The class can be parameterized in the following manner with various 37 * constructors: 38 * <ul> 39 * <li>URL-safe mode: Default off.</li> 40 * <li>Line length: Default 76. Line length that aren't multiples of 4 will 41 * still essentially end up being multiples of 4 in the encoded data. 42 * <li>Line separator: Default is CRLF ("\r\n")</li> 43 * </ul> 44 * </p> 45 * <p> 46 * Since this class operates directly on byte streams, and not character 47 * streams, it is hard-coded to only encode/decode character encodings which are 48 * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, 49 * etc). 50 * </p> 51 * <p> 52 * This class is thread-safe. 53 * </p> 54 * 55 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> 56 * @author Note that this class has been repackaged from Apache Commons-Codec 57 * and is distributed under the terms of the Apache Software License, 58 * version 2.0 59 */ 60 public class Base64 { 61 62 public static void main(String[] args) { 63 64 System.out.println("basic " + encodeBase64String("cgta:d@3r$@TTg2446yhhh2h4".getBytes())); 65 66 } 67 68 /** 69 * BASE32 characters are 6 bits in length. They are formed by taking a block 70 * of 3 octets to form a 24-bit string, which is converted into 4 BASE64 71 * characters. 72 */ 73 private static final int BITS_PER_ENCODED_BYTE = 6; 74 private static final int BYTES_PER_UNENCODED_BLOCK = 3; 75 private static final int BYTES_PER_ENCODED_BLOCK = 4; 76 77 /** 78 * Chunk separator per RFC 2045 section 2.1. 79 * 80 * <p> 81 * N.B. The next major release may break compatibility and make this field 82 * private. 83 * </p> 84 * 85 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 86 * 2.1</a> 87 */ 88 static final byte[] CHUNK_SEPARATOR = { '\r', '\n' }; 89 90 /** 91 * This array is a lookup table that translates 6-bit positive integer index 92 * values into their "Base64 Alphabet" equivalents as specified in Table 1 93 * of RFC 2045. 94 * 95 * Thanks to "commons" project in ws.apache.org for this code. 96 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 97 */ 98 private static final byte[] STANDARD_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 99 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; 100 101 /** 102 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / 103 * changed to - and _ to make the encoded Base64 results more URL-SAFE. This 104 * table is only used when the Base64's mode is set to URL-SAFE. 105 */ 106 private static final byte[] URL_SAFE_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 107 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' }; 108 109 /** 110 * This array is a lookup table that translates Unicode characters drawn 111 * from the "Base64 Alphabet" (as specified in Table 1 of RFC 2045) into 112 * their 6-bit positive integer equivalents. Characters that are not in the 113 * Base64 alphabet but fall within the bounds of the array are translated to 114 * -1. 115 * 116 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This 117 * means decoder seamlessly handles both URL_SAFE and STANDARD base64. (The 118 * encoder, on the other hand, needs to know ahead of time what to emit). 119 * 120 * Thanks to "commons" project in ws.apache.org for this code. 121 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 122 */ 123 private static final byte[] DECODE_TABLE = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 124 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 }; 125 126 /** 127 * Base64 uses 6-bit fields. 128 */ 129 /** Mask used to extract 6 bits, used when encoding */ 130 private static final int MASK_6BITS = 0x3f; 131 132 // The static final fields above are used for the original static byte[] 133 // methods on Base64. 134 // The private member fields below are used with the new streaming approach, 135 // which requires 136 // some state be preserved between calls of encode() and decode(). 137 138 /** 139 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE 140 * above remains static because it is able to decode both STANDARD and 141 * URL_SAFE streams, but the encodeTable must be a member variable so we can 142 * switch between the two modes. 143 */ 144 private byte[] encodeTable; 145 146 // Only one decode table currently; keep for consistency with Base32 code 147 private final byte[] decodeTable = DECODE_TABLE; 148 149 /** 150 * Line separator for encoding. Not used when decoding. Only used if 151 * lineLength > 0. 152 */ 153 private byte[] lineSeparator; 154 155 /** 156 * Convenience variable to help us determine when our buffer is going to run 157 * out of room and needs resizing. 158 * <code>decodeSize = 3 + lineSeparator.length;</code> 159 */ 160 private int decodeSize; 161 162 /** 163 * Convenience variable to help us determine when our buffer is going to run 164 * out of room and needs resizing. 165 * <code>encodeSize = 4 + lineSeparator.length;</code> 166 */ 167 private int encodeSize; 168 169 /** 170 * Creates a Base64 codec used for decoding (all modes) and encoding in 171 * URL-unsafe mode. 172 * <p> 173 * When encoding the line length is 0 (no chunking), and the encoding table 174 * is STANDARD_ENCODE_TABLE. 175 * </p> 176 * 177 * <p> 178 * When decoding all variants are supported. 179 * </p> 180 */ 181 public Base64() { 182 this(0); 183 } 184 185 /** 186 * Creates a Base64 codec used for decoding (all modes) and encoding in the 187 * given URL-safe mode. 188 * <p> 189 * When encoding the line length is 76, the line separator is CRLF, and the 190 * encoding table is STANDARD_ENCODE_TABLE. 191 * </p> 192 * 193 * <p> 194 * When decoding all variants are supported. 195 * </p> 196 * 197 * @param urlSafe 198 * if {@code true}, URL-safe encoding is used. In most cases this 199 * should be set to {@code false}. 200 * @since 1.4 201 */ 202 public Base64(boolean urlSafe) { 203 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); 204 } 205 206 /** 207 * Creates a Base64 codec used for decoding (all modes) and encoding in 208 * URL-unsafe mode. 209 * <p> 210 * When encoding the line length is given in the constructor, the line 211 * separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 212 * </p> 213 * <p> 214 * Line lengths that aren't multiples of 4 will still essentially end up 215 * being multiples of 4 in the encoded data. 216 * </p> 217 * <p> 218 * When decoding all variants are supported. 219 * </p> 220 * 221 * @param lineLength 222 * Each line of encoded data will be at most of the given length 223 * (rounded down to nearest multiple of 4). If lineLength <= 0, 224 * then the output will not be divided into lines (chunks). 225 * Ignored when decoding. 226 * @since 1.4 227 */ 228 public Base64(int lineLength) { 229 this(lineLength, CHUNK_SEPARATOR); 230 } 231 232 /** 233 * Creates a Base64 codec used for decoding (all modes) and encoding in 234 * URL-unsafe mode. 235 * <p> 236 * When encoding the line length and line separator are given in the 237 * constructor, and the encoding table is STANDARD_ENCODE_TABLE. 238 * </p> 239 * <p> 240 * Line lengths that aren't multiples of 4 will still essentially end up 241 * being multiples of 4 in the encoded data. 242 * </p> 243 * <p> 244 * When decoding all variants are supported. 245 * </p> 246 * 247 * @param lineLength 248 * Each line of encoded data will be at most of the given length 249 * (rounded down to nearest multiple of 4). If lineLength <= 0, 250 * then the output will not be divided into lines (chunks). 251 * Ignored when decoding. 252 * @param lineSeparator 253 * Each line of encoded data will end with this sequence of 254 * bytes. 255 * @throws IllegalArgumentException 256 * Thrown when the provided lineSeparator included some base64 257 * characters. 258 * @since 1.4 259 */ 260 public Base64(int lineLength, byte[] lineSeparator) { 261 this(lineLength, lineSeparator, false); 262 } 263 264 /** 265 * Creates a Base64 codec used for decoding (all modes) and encoding in 266 * URL-unsafe mode. 267 * <p> 268 * When encoding the line length and line separator are given in the 269 * constructor, and the encoding table is STANDARD_ENCODE_TABLE. 270 * </p> 271 * <p> 272 * Line lengths that aren't multiples of 4 will still essentially end up 273 * being multiples of 4 in the encoded data. 274 * </p> 275 * <p> 276 * When decoding all variants are supported. 277 * </p> 278 * 279 * @param lineLength 280 * Each line of encoded data will be at most of the given length 281 * (rounded down to nearest multiple of 4). If lineLength <= 0, 282 * then the output will not be divided into lines (chunks). 283 * Ignored when decoding. 284 * @param lineSeparator 285 * Each line of encoded data will end with this sequence of 286 * bytes. 287 * @param urlSafe 288 * Instead of emitting '+' and '/' we emit '-' and '_' 289 * respectively. urlSafe is only applied to encode operations. 290 * Decoding seamlessly handles both modes. 291 * @throws IllegalArgumentException 292 * The provided lineSeparator included some base64 characters. 293 * That's not going to work! 294 * @since 1.4 295 */ 296 public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) { 297 this(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, lineSeparator == null ? 0 : lineSeparator.length); 298 // TODO could be simplified if there is no requirement to reject invalid 299 // line sep when length <=0 300 // @see test case Base64Test.testConstructors() 301 if (lineSeparator != null) { 302 if (containsAlphabetOrPad(lineSeparator)) { 303 String sep = StringUtils.newStringUtf8(lineSeparator); 304 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]"); 305 } 306 if (lineLength > 0) { // null line-sep forces no chunking rather 307 // than throwing IAE 308 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 309 this.lineSeparator = new byte[lineSeparator.length]; 310 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 311 } else { 312 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 313 this.lineSeparator = null; 314 } 315 } else { 316 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 317 this.lineSeparator = null; 318 } 319 this.decodeSize = this.encodeSize - 1; 320 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; 321 } 322 323 /** 324 * Returns our current encode mode. True if we're URL-SAFE, false otherwise. 325 * 326 * @return true if we're in URL-SAFE mode, false otherwise. 327 * @since 1.4 328 */ 329 public boolean isUrlSafe() { 330 return this.encodeTable == URL_SAFE_ENCODE_TABLE; 331 } 332 333 /** 334 * <p> 335 * Encodes all of the provided data, starting at inPos, for inAvail bytes. 336 * Must be called at least twice: once with the data to encode, and once 337 * with inAvail set to "-1" to alert encoder that EOF has been reached, so 338 * flush last remaining bytes (if not multiple of 3). 339 * </p> 340 * <p> 341 * Thanks to "commons" project in ws.apache.org for the bitwise operations, 342 * and general approach. 343 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 344 * </p> 345 * 346 * @param in 347 * byte[] array of binary data to base64 encode. 348 * @param inPos 349 * Position to start reading data from. 350 * @param inAvail 351 * Amount of bytes available from input for encoding. 352 * @param context 353 * the context to be used 354 */ 355 void encode(byte[] in, int inPos, int inAvail, Context context) { 356 if (context.eof) { 357 return; 358 } 359 // inAvail < 0 is how we're informed of EOF in the underlying data we're 360 // encoding. 361 if (inAvail < 0) { 362 context.eof = true; 363 if (0 == context.modulus && lineLength == 0) { 364 return; // no leftovers to process and not using chunking 365 } 366 ensureBufferSize(encodeSize, context); 367 int savedPos = context.pos; 368 switch (context.modulus) { // 0-2 369 case 1: // 8 bits = 6 + 2 370 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; // top 371 // 6 372 // bits 373 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; // remaining 374 // 2 375 // URL-SAFE skips the padding to further reduce size. 376 if (encodeTable == STANDARD_ENCODE_TABLE) { 377 context.buffer[context.pos++] = PAD; 378 context.buffer[context.pos++] = PAD; 379 } 380 break; 381 382 case 2: // 16 bits = 6 + 6 + 4 383 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS]; 384 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS]; 385 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS]; 386 // URL-SAFE skips the padding to further reduce size. 387 if (encodeTable == STANDARD_ENCODE_TABLE) { 388 context.buffer[context.pos++] = PAD; 389 } 390 break; 391 } 392 context.currentLinePos += context.pos - savedPos; // keep track of 393 // current line 394 // position 395 // if currentPos == 0 we are at the start of a line, so don't add 396 // CRLF 397 if (lineLength > 0 && context.currentLinePos > 0) { 398 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length); 399 context.pos += lineSeparator.length; 400 } 401 } else { 402 for (int i = 0; i < inAvail; i++) { 403 ensureBufferSize(encodeSize, context); 404 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK; 405 int b = in[inPos++]; 406 if (b < 0) { 407 b += 256; 408 } 409 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE 410 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to 411 // extract 412 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS]; 413 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS]; 414 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS]; 415 context.buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS]; 416 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 417 if (lineLength > 0 && lineLength <= context.currentLinePos) { 418 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length); 419 context.pos += lineSeparator.length; 420 context.currentLinePos = 0; 421 } 422 } 423 } 424 } 425 } 426 427 /** 428 * <p> 429 * Decodes all of the provided data, starting at inPos, for inAvail bytes. 430 * Should be called at least twice: once with the data to decode, and once 431 * with inAvail set to "-1" to alert decoder that EOF has been reached. The 432 * "-1" call is not necessary when decoding, but it doesn't hurt, either. 433 * </p> 434 * <p> 435 * Ignores all non-base64 characters. This is how chunked (e.g. 76 436 * character) data is handled, since CR and LF are silently ignored, but has 437 * implications for other bytes, too. This method subscribes to the 438 * garbage-in, garbage-out philosophy: it will not check the provided data 439 * for validity. 440 * </p> 441 * <p> 442 * Thanks to "commons" project in ws.apache.org for the bitwise operations, 443 * and general approach. 444 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 445 * </p> 446 * 447 * @param in 448 * byte[] array of ascii data to base64 decode. 449 * @param inPos 450 * Position to start reading data from. 451 * @param inAvail 452 * Amount of bytes available from input for encoding. 453 * @param context 454 * the context to be used 455 */ 456 void decode(byte[] in, int inPos, int inAvail, Context context) { 457 if (context.eof) { 458 return; 459 } 460 if (inAvail < 0) { 461 context.eof = true; 462 } 463 for (int i = 0; i < inAvail; i++) { 464 ensureBufferSize(decodeSize, context); 465 byte b = in[inPos++]; 466 if (b == PAD) { 467 // We're done. 468 context.eof = true; 469 break; 470 } else { 471 if (b >= 0 && b < DECODE_TABLE.length) { 472 int result = DECODE_TABLE[b]; 473 if (result >= 0) { 474 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; 475 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; 476 if (context.modulus == 0) { 477 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS); 478 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); 479 context.buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 480 } 481 } 482 } 483 } 484 } 485 486 // Two forms of EOF as far as base64 decoder is concerned: actual 487 // EOF (-1) and first time '=' character is encountered in stream. 488 // This approach makes the '=' padding characters completely optional. 489 if (context.eof && context.modulus != 0) { 490 ensureBufferSize(decodeSize, context); 491 492 // We have some spare bits remaining 493 // Output all whole multiples of 8 bits and ignore the rest 494 switch (context.modulus) { 495 // case 1: // 6 bits - ignore entirely 496 // break; 497 case 2: // 12 bits = 8 + 4 498 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the 499 // extra 4 500 // bits 501 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); 502 break; 503 case 3: // 18 bits = 8 + 8 + 2 504 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits 505 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); 506 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); 507 break; 508 } 509 } 510 } 511 512 /** 513 * Tests a given byte array to see if it contains only valid characters 514 * within the Base64 alphabet. Currently the method treats whitespace as 515 * valid. 516 * 517 * @param arrayOctet 518 * byte array to test 519 * @return {@code true} if all bytes are valid characters in the Base64 520 * alphabet or if the byte array is empty; {@code false}, otherwise 521 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0. 522 */ 523 @Deprecated 524 public static boolean isArrayByteBase64(byte[] arrayOctet) { 525 return isBase64(arrayOctet); 526 } 527 528 /** 529 * Returns whether or not the <code>octet</code> is in the base 64 alphabet. 530 * 531 * @param octet 532 * The value to test 533 * @return {@code true} if the value is defined in the the base 64 alphabet, 534 * {@code false} otherwise. 535 * @since 1.4 536 */ 537 public static boolean isBase64(byte octet) { 538 return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1); 539 } 540 541 /** 542 * Tests a given String to see if it contains only valid characters within 543 * the Base64 alphabet. Currently the method treats whitespace as valid. 544 * 545 * @param base64 546 * String to test 547 * @return {@code true} if all characters in the String are valid characters 548 * in the Base64 alphabet or if the String is empty; {@code false}, 549 * otherwise 550 * @since 1.5 551 */ 552 public static boolean isBase64(String base64) { 553 return isBase64(StringUtils.getBytesUtf8(base64)); 554 } 555 556 /** 557 * Tests a given byte array to see if it contains only valid characters 558 * within the Base64 alphabet. Currently the method treats whitespace as 559 * valid. 560 * 561 * @param arrayOctet 562 * byte array to test 563 * @return {@code true} if all bytes are valid characters in the Base64 564 * alphabet or if the byte array is empty; {@code false}, otherwise 565 * @since 1.5 566 */ 567 public static boolean isBase64(byte[] arrayOctet) { 568 for (int i = 0; i < arrayOctet.length; i++) { 569 if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) { 570 return false; 571 } 572 } 573 return true; 574 } 575 576 /** 577 * Encodes binary data using the base64 algorithm but does not chunk the 578 * output. 579 * 580 * @param binaryData 581 * binary data to encode 582 * @return byte[] containing Base64 characters in their UTF-8 583 * representation. 584 */ 585 public static byte[] encodeBase64(byte[] binaryData) { 586 return encodeBase64(binaryData, false); 587 } 588 589 /** 590 * Encodes binary data using the base64 algorithm but does not chunk the 591 * output. 592 * 593 * NOTE: We changed the behaviour of this method from multi-line chunking 594 * (commons-codec-1.4) to single-line non-chunking (commons-codec-1.5). 595 * 596 * @param binaryData 597 * binary data to encode 598 * @return String containing Base64 characters. 599 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not). 600 */ 601 public static String encodeBase64String(byte[] binaryData) { 602 return StringUtils.newStringUtf8(encodeBase64(binaryData, false)); 603 } 604 605 /** 606 * Encodes binary data using a URL-safe variation of the base64 algorithm 607 * but does not chunk the output. The url-safe variation emits - and _ 608 * instead of + and / characters. 609 * 610 * @param binaryData 611 * binary data to encode 612 * @return byte[] containing Base64 characters in their UTF-8 613 * representation. 614 * @since 1.4 615 */ 616 public static byte[] encodeBase64URLSafe(byte[] binaryData) { 617 return encodeBase64(binaryData, false, true); 618 } 619 620 /** 621 * Encodes binary data using a URL-safe variation of the base64 algorithm 622 * but does not chunk the output. The url-safe variation emits - and _ 623 * instead of + and / characters. 624 * 625 * @param binaryData 626 * binary data to encode 627 * @return String containing Base64 characters 628 * @since 1.4 629 */ 630 public static String encodeBase64URLSafeString(byte[] binaryData) { 631 return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true)); 632 } 633 634 /** 635 * Encodes binary data using the base64 algorithm and chunks the encoded 636 * output into 76 character blocks 637 * 638 * @param binaryData 639 * binary data to encode 640 * @return Base64 characters chunked in 76 character blocks 641 */ 642 public static byte[] encodeBase64Chunked(byte[] binaryData) { 643 return encodeBase64(binaryData, true); 644 } 645 646 /** 647 * Encodes binary data using the base64 algorithm, optionally chunking the 648 * output into 76 character blocks. 649 * 650 * @param binaryData 651 * Array containing binary data to encode. 652 * @param isChunked 653 * if {@code true} this encoder will chunk the base64 output into 654 * 76 character blocks 655 * @return Base64-encoded data. 656 * @throws IllegalArgumentException 657 * Thrown when the input array needs an output array bigger than 658 * {@link Integer#MAX_VALUE} 659 */ 660 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) { 661 return encodeBase64(binaryData, isChunked, false); 662 } 663 664 /** 665 * Encodes binary data using the base64 algorithm, optionally chunking the 666 * output into 76 character blocks. 667 * 668 * @param binaryData 669 * Array containing binary data to encode. 670 * @param isChunked 671 * if {@code true} this encoder will chunk the base64 output into 672 * 76 character blocks 673 * @param urlSafe 674 * if {@code true} this encoder will emit - and _ instead of the 675 * usual + and / characters. 676 * @return Base64-encoded data. 677 * @throws IllegalArgumentException 678 * Thrown when the input array needs an output array bigger than 679 * {@link Integer#MAX_VALUE} 680 * @since 1.4 681 */ 682 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) { 683 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); 684 } 685 686 /** 687 * Encodes binary data using the base64 algorithm, optionally chunking the 688 * output into 76 character blocks. 689 * 690 * @param binaryData 691 * Array containing binary data to encode. 692 * @param isChunked 693 * if {@code true} this encoder will chunk the base64 output into 694 * 76 character blocks 695 * @param urlSafe 696 * if {@code true} this encoder will emit - and _ instead of the 697 * usual + and / characters. 698 * @param maxResultSize 699 * The maximum result size to accept. 700 * @return Base64-encoded data. 701 * @throws IllegalArgumentException 702 * Thrown when the input array needs an output array bigger than 703 * maxResultSize 704 * @since 1.4 705 */ 706 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) { 707 if (binaryData == null || binaryData.length == 0) { 708 return binaryData; 709 } 710 711 // Create this so can use the super-class method 712 // Also ensures that the same roundings are performed by the ctor and 713 // the code 714 Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); 715 long len = b64.getEncodedLength(binaryData); 716 if (len > maxResultSize) { 717 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize); 718 } 719 720 return b64.encode(binaryData); 721 } 722 723 /** 724 * Decodes a Base64 String into octets 725 * 726 * @param base64String 727 * String containing Base64 data 728 * @return Array containing decoded data. 729 * @since 1.4 730 */ 731 public static byte[] decodeBase64(String base64String) { 732 return new Base64().decode(base64String); 733 } 734 735 /** 736 * Decodes Base64 data into octets 737 * 738 * @param base64Data 739 * Byte array containing Base64 data 740 * @return Array containing decoded data. 741 */ 742 public static byte[] decodeBase64(byte[] base64Data) { 743 return new Base64().decode(base64Data); 744 } 745 746 // Implementation of the Encoder Interface 747 748 // Implementation of integer encoding used for crypto 749 /** 750 * Decodes a byte64-encoded integer according to crypto standards such as 751 * W3C's XML-Signature 752 * 753 * @param pArray 754 * a byte array containing base64 character data 755 * @return A BigInteger 756 * @since 1.4 757 */ 758 public static BigInteger decodeInteger(byte[] pArray) { 759 return new BigInteger(1, decodeBase64(pArray)); 760 } 761 762 /** 763 * Encodes to a byte64-encoded integer according to crypto standards such as 764 * W3C's XML-Signature 765 * 766 * @param bigInt 767 * a BigInteger 768 * @return A byte array containing base64 character data 769 * @throws NullPointerException 770 * if null is passed in 771 * @since 1.4 772 */ 773 public static byte[] encodeInteger(BigInteger bigInt) { 774 if (bigInt == null) { 775 throw new NullPointerException("encodeInteger called with null parameter"); 776 } 777 return encodeBase64(toIntegerBytes(bigInt), false); 778 } 779 780 /** 781 * Returns a byte-array representation of a <code>BigInteger</code> without 782 * sign bit. 783 * 784 * @param bigInt 785 * <code>BigInteger</code> to be converted 786 * @return a byte array representation of the BigInteger parameter 787 */ 788 static byte[] toIntegerBytes(BigInteger bigInt) { 789 int bitlen = bigInt.bitLength(); 790 // round bitlen 791 bitlen = ((bitlen + 7) >> 3) << 3; 792 byte[] bigBytes = bigInt.toByteArray(); 793 794 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) { 795 return bigBytes; 796 } 797 // set up params for copying everything but sign bit 798 int startSrc = 0; 799 int len = bigBytes.length; 800 801 // if bigInt is exactly byte-aligned, just skip signbit in copy 802 if ((bigInt.bitLength() % 8) == 0) { 803 startSrc = 1; 804 len--; 805 } 806 int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec 807 byte[] resizedBytes = new byte[bitlen / 8]; 808 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); 809 return resizedBytes; 810 } 811 812 /** 813 * Returns whether or not the <code>octet</code> is in the Base32 alphabet. 814 * 815 * @param octet 816 * The value to test 817 * @return {@code true} if the value is defined in the the Base32 alphabet 818 * {@code false} otherwise. 819 */ 820 protected boolean isInAlphabet(byte octet) { 821 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 822 } 823 824 /** 825 * Holds thread context so classes can be thread-safe. 826 * 827 * This class is not itself thread-safe; each thread must allocate its own 828 * copy. 829 * 830 * @since 1.7 831 */ 832 static class Context { 833 834 /** 835 * Place holder for the bytes we're dealing with for our based logic. 836 * Bitwise operations store and extract the encoding or decoding from 837 * this variable. 838 */ 839 int ibitWorkArea; 840 841 /** 842 * Place holder for the bytes we're dealing with for our based logic. 843 * Bitwise operations store and extract the encoding or decoding from 844 * this variable. 845 */ 846 long lbitWorkArea; 847 848 /** 849 * Buffer for streaming. 850 */ 851 byte[] buffer; 852 853 /** 854 * Position where next character should be written in the buffer. 855 */ 856 int pos; 857 858 /** 859 * Position where next character should be read from the buffer. 860 */ 861 int readPos; 862 863 /** 864 * Boolean flag to indicate the EOF has been reached. Once EOF has been 865 * reached, this object becomes useless, and must be thrown away. 866 */ 867 boolean eof; 868 869 /** 870 * Variable tracks how many characters have been written to the current 871 * line. Only used when encoding. We use it to make sure each encoded 872 * line never goes beyond lineLength (if lineLength > 0). 873 */ 874 int currentLinePos; 875 876 /** 877 * Writes to the buffer only occur after every 3/5 reads when encoding, 878 * and every 4/8 reads when decoding. This variable helps track that. 879 */ 880 int modulus; 881 882 Context() { 883 } 884 } 885 886 /** 887 * EOF 888 * 889 * @since 1.7 890 */ 891 static final int EOF = -1; 892 893 /** 894 * MIME chunk size per RFC 2045 section 6.8. 895 * 896 * <p> 897 * The {@value} character limit does not count the trailing CRLF, but counts 898 * all other characters, including any equal signs. 899 * </p> 900 * 901 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 902 * 6.8</a> 903 */ 904 public static final int MIME_CHUNK_SIZE = 76; 905 906 /** 907 * PEM chunk size per RFC 1421 section 4.3.2.4. 908 * 909 * <p> 910 * The {@value} character limit does not count the trailing CRLF, but counts 911 * all other characters, including any equal signs. 912 * </p> 913 * 914 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 915 * 4.3.2.4</a> 916 */ 917 public static final int PEM_CHUNK_SIZE = 64; 918 919 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 920 921 /** 922 * Defines the default buffer size - currently {@value} - must be large 923 * enough for at least one encoded block+separator 924 */ 925 private static final int DEFAULT_BUFFER_SIZE = 8192; 926 927 /** Mask used to extract 8 bits, used in decoding bytes */ 928 protected static final int MASK_8BITS = 0xff; 929 930 /** 931 * Byte used to pad output. 932 */ 933 protected static final byte PAD_DEFAULT = '='; // Allow static access to 934 // default 935 936 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it 937 // needs to vary later 938 939 /** 940 * Number of bytes in each full block of unencoded data, e.g. 4 for Base64 941 * and 5 for Base32 942 */ 943 private final int unencodedBlockSize; 944 945 /** 946 * Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 947 * 8 for Base32 948 */ 949 private final int encodedBlockSize; 950 951 /** 952 * Chunksize for encoding. Not used when decoding. A value of zero or less 953 * implies no chunking of the encoded data. Rounded down to nearest multiple 954 * of encodedBlockSize. 955 */ 956 protected int lineLength; 957 958 /** 959 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 960 */ 961 private int chunkSeparatorLength; 962 963 /** 964 * Note <code>lineLength</code> is rounded down to the nearest multiple of 965 * {@link #encodedBlockSize} If <code>chunkSeparatorLength</code> is zero, 966 * then chunking is disabled. 967 * 968 * @param unencodedBlockSize 969 * the size of an unencoded block (e.g. Base64 = 3) 970 * @param encodedBlockSize 971 * the size of an encoded block (e.g. Base64 = 4) 972 * @param lineLength 973 * if > 0, use chunking with a length <code>lineLength</code> 974 * @param chunkSeparatorLength 975 * the chunk separator length, if relevant 976 */ 977 protected Base64(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) { 978 this.unencodedBlockSize = unencodedBlockSize; 979 this.encodedBlockSize = encodedBlockSize; 980 this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; 981 this.chunkSeparatorLength = chunkSeparatorLength; 982 } 983 984 /** 985 * Returns true if this object has buffered data for reading. 986 * 987 * @param context 988 * the context to be used 989 * @return true if there is data still available for reading. 990 */ 991 boolean hasData(Context context) { // package protected for access from I/O 992 // streams 993 return context.buffer != null; 994 } 995 996 /** 997 * Returns the amount of buffered data available for reading. 998 * 999 * @param context 1000 * the context to be used 1001 * @return The amount of buffered data available for reading. 1002 */ 1003 int available(Context context) { // package protected for access from I/O 1004 // streams 1005 return context.buffer != null ? context.pos - context.readPos : 0; 1006 } 1007 1008 /** 1009 * Get the default buffer size. Can be overridden. 1010 * 1011 * @return {@link #DEFAULT_BUFFER_SIZE} 1012 */ 1013 protected int getDefaultBufferSize() { 1014 return DEFAULT_BUFFER_SIZE; 1015 } 1016 1017 /** 1018 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 1019 * 1020 * @param context 1021 * the context to be used 1022 */ 1023 private void resizeBuffer(Context context) { 1024 if (context.buffer == null) { 1025 context.buffer = new byte[getDefaultBufferSize()]; 1026 context.pos = 0; 1027 context.readPos = 0; 1028 } else { 1029 byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 1030 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); 1031 context.buffer = b; 1032 } 1033 } 1034 1035 /** 1036 * Ensure that the buffer has room for <code>size</code> bytes 1037 * 1038 * @param size 1039 * minimum spare space required 1040 * @param context 1041 * the context to be used 1042 */ 1043 protected void ensureBufferSize(int size, Context context) { 1044 if ((context.buffer == null) || (context.buffer.length < context.pos + size)) { 1045 resizeBuffer(context); 1046 } 1047 } 1048 1049 /** 1050 * Extracts buffered data into the provided byte[] array, starting at 1051 * position bPos, up to a maximum of bAvail bytes. Returns how many bytes 1052 * were actually extracted. 1053 * 1054 * @param b 1055 * byte[] array to extract the buffered data into. 1056 * @param bPos 1057 * position in byte[] array to start extraction at. 1058 * @param bAvail 1059 * amount of bytes we're allowed to extract. We may extract fewer 1060 * (if fewer are available). 1061 * @param context 1062 * the context to be used 1063 * @return The number of bytes successfully extracted into the provided 1064 * byte[] array. 1065 */ 1066 int readResults(byte[] b, int bPos, int bAvail, Context context) { // package 1067 // protected 1068 // for 1069 // access 1070 // from 1071 // I/O 1072 // streams 1073 if (context.buffer != null) { 1074 int len = Math.min(available(context), bAvail); 1075 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 1076 context.readPos += len; 1077 if (context.readPos >= context.pos) { 1078 context.buffer = null; // so hasData() will return false, and 1079 // this method can return -1 1080 } 1081 return len; 1082 } 1083 return context.eof ? EOF : 0; 1084 } 1085 1086 /** 1087 * Checks if a byte value is whitespace or not. Whitespace is taken to mean: 1088 * space, tab, CR, LF 1089 * 1090 * @param byteToCheck 1091 * the byte to check 1092 * @return true if byte is whitespace, false otherwise 1093 */ 1094 protected static boolean isWhiteSpace(byte byteToCheck) { 1095 switch (byteToCheck) { 1096 case ' ': 1097 case '\n': 1098 case '\r': 1099 case '\t': 1100 return true; 1101 default: 1102 return false; 1103 } 1104 } 1105 1106 /** 1107 * Encodes an Object using the Base-N algorithm. This method is provided in 1108 * order to satisfy the requirements of the Encoder interface, and will 1109 * throw an EncoderException if the supplied object is not of type byte[]. 1110 * 1111 * @param obj 1112 * Object to encode 1113 * @return An object (of type byte[]) containing the Base-N encoded data 1114 * which corresponds to the byte[] supplied. 1115 * @throws EncoderException 1116 * if the parameter supplied is not of type byte[] 1117 */ 1118 public Object encode(Object obj) throws Exception { 1119 if (!(obj instanceof byte[])) { 1120 throw new Exception("Parameter supplied to Base-N encode is not a byte[]"); 1121 } 1122 return encode((byte[]) obj); 1123 } 1124 1125 /** 1126 * Encodes a byte[] containing binary data, into a String containing 1127 * characters in the Base-N alphabet. Uses UTF8 encoding. 1128 * 1129 * @param pArray 1130 * a byte array containing binary data 1131 * @return A String containing only Base-N character data 1132 */ 1133 public String encodeToString(byte[] pArray) { 1134 return StringUtils.newStringUtf8(encode(pArray)); 1135 } 1136 1137 /** 1138 * Encodes a byte[] containing binary data, into a String containing 1139 * characters in the appropriate alphabet. Uses UTF8 encoding. 1140 * 1141 * @param pArray 1142 * a byte array containing binary data 1143 * @return String containing only character data in the appropriate 1144 * alphabet. 1145 */ 1146 public String encodeAsString(byte[] pArray) { 1147 return StringUtils.newStringUtf8(encode(pArray)); 1148 } 1149 1150 /** 1151 * Decodes an Object using the Base-N algorithm. This method is provided in 1152 * order to satisfy the requirements of the Decoder interface, and will 1153 * throw a DecoderException if the supplied object is not of type byte[] or 1154 * String. 1155 * 1156 * @param obj 1157 * Object to decode 1158 * @return An object (of type byte[]) containing the binary data which 1159 * corresponds to the byte[] or String supplied. 1160 * @throws DecoderException 1161 * if the parameter supplied is not of type byte[] 1162 */ 1163 public Object decode(Object obj) throws Exception { 1164 if (obj instanceof byte[]) { 1165 return decode((byte[]) obj); 1166 } else if (obj instanceof String) { 1167 return decode((String) obj); 1168 } else { 1169 throw new Exception("Parameter supplied to Base-N decode is not a byte[] or a String"); 1170 } 1171 } 1172 1173 /** 1174 * Decodes a String containing characters in the Base-N alphabet. 1175 * 1176 * @param pArray 1177 * A String containing Base-N character data 1178 * @return a byte array containing binary data 1179 */ 1180 public byte[] decode(String pArray) { 1181 return decode(StringUtils.getBytesUtf8(pArray)); 1182 } 1183 1184 /** 1185 * Decodes a byte[] containing characters in the Base-N alphabet. 1186 * 1187 * @param pArray 1188 * A byte array containing Base-N character data 1189 * @return a byte array containing binary data 1190 */ 1191 public byte[] decode(byte[] pArray) { 1192 Context context = new Context(); 1193 if (pArray == null || pArray.length == 0) { 1194 return pArray; 1195 } 1196 decode(pArray, 0, pArray.length, context); 1197 decode(pArray, 0, EOF, context); // Notify decoder of EOF. 1198 byte[] result = new byte[context.pos]; 1199 readResults(result, 0, result.length, context); 1200 return result; 1201 } 1202 1203 /** 1204 * Encodes a byte[] containing binary data, into a byte[] containing 1205 * characters in the alphabet. 1206 * 1207 * @param pArray 1208 * a byte array containing binary data 1209 * @return A byte array containing only the basen alphabetic character data 1210 */ 1211 public byte[] encode(byte[] pArray) { 1212 Context context = new Context(); 1213 if (pArray == null || pArray.length == 0) { 1214 return pArray; 1215 } 1216 encode(pArray, 0, pArray.length, context); 1217 encode(pArray, 0, EOF, context); // Notify encoder of EOF. 1218 byte[] buf = new byte[context.pos - context.readPos]; 1219 readResults(buf, 0, buf.length, context); 1220 return buf; 1221 } 1222 1223 /** 1224 * Tests a given byte array to see if it contains only valid characters 1225 * within the alphabet. The method optionally treats whitespace and pad as 1226 * valid. 1227 * 1228 * @param arrayOctet 1229 * byte array to test 1230 * @param allowWSPad 1231 * if {@code true}, then whitespace and PAD are also allowed 1232 * 1233 * @return {@code true} if all bytes are valid characters in the alphabet or 1234 * if the byte array is empty; {@code false}, otherwise 1235 */ 1236 public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) { 1237 for (int i = 0; i < arrayOctet.length; i++) { 1238 if (!isInAlphabet(arrayOctet[i]) && (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) { 1239 return false; 1240 } 1241 } 1242 return true; 1243 } 1244 1245 /** 1246 * Tests a given String to see if it contains only valid characters within 1247 * the alphabet. The method treats whitespace and PAD as valid. 1248 * 1249 * @param basen 1250 * String to test 1251 * @return {@code true} if all characters in the String are valid characters 1252 * in the alphabet or if the String is empty; {@code false}, 1253 * otherwise 1254 * @see #isInAlphabet(byte[], boolean) 1255 */ 1256 public boolean isInAlphabet(String basen) { 1257 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 1258 } 1259 1260 /** 1261 * Tests a given byte array to see if it contains any characters within the 1262 * alphabet or PAD. 1263 * 1264 * Intended for use in checking line-ending arrays 1265 * 1266 * @param arrayOctet 1267 * byte array to test 1268 * @return {@code true} if any byte is a valid character in the alphabet or 1269 * PAD; {@code false} otherwise 1270 */ 1271 protected boolean containsAlphabetOrPad(byte[] arrayOctet) { 1272 if (arrayOctet == null) { 1273 return false; 1274 } 1275 for (byte element : arrayOctet) { 1276 if (PAD == element || isInAlphabet(element)) { 1277 return true; 1278 } 1279 } 1280 return false; 1281 } 1282 1283 /** 1284 * Calculates the amount of space needed to encode the supplied array. 1285 * 1286 * @param pArray 1287 * byte[] array which will later be encoded 1288 * 1289 * @return amount of space needed to encoded the supplied array. Returns a 1290 * long since a max-len array will require > Integer.MAX_VALUE 1291 */ 1292 public long getEncodedLength(byte[] pArray) { 1293 // Calculate non-chunked size - rounded up to allow for padding 1294 // cast to long is needed to avoid possibility of overflow 1295 long len = ((pArray.length + unencodedBlockSize - 1) / unencodedBlockSize) * (long) encodedBlockSize; 1296 if (lineLength > 0) { // We're using chunking 1297 // Round up to nearest multiple 1298 len += ((len + lineLength - 1) / lineLength) * chunkSeparatorLength; 1299 } 1300 return len; 1301 } 1302 1303 /** 1304 * Character encoding names required of every implementation of the Java 1305 * platform. 1306 * 1307 * From the Java documentation <a href= 1308 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html" 1309 * >Standard charsets</a>: 1310 * <p> 1311 * <cite>Every implementation of the Java platform is required to support 1312 * the following character encodings. Consult the release documentation for 1313 * your implementation to see if any other encodings are supported. Consult 1314 * the release documentation for your implementation to see if any other 1315 * encodings are supported. </cite> 1316 * </p> 1317 * 1318 * <ul> 1319 * <li><code>US-ASCII</code><br/> 1320 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the 1321 * Unicode character set.</li> 1322 * <li><code>ISO-8859-1</code><br/> 1323 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> 1324 * <li><code>UTF-8</code><br/> 1325 * Eight-bit Unicode Transformation Format.</li> 1326 * <li><code>UTF-16BE</code><br/> 1327 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> 1328 * <li><code>UTF-16LE</code><br/> 1329 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> 1330 * <li><code>UTF-16</code><br/> 1331 * Sixteen-bit Unicode Transformation Format, byte order specified by a 1332 * mandatory initial byte-order mark (either order accepted on input, 1333 * big-endian used on output.)</li> 1334 * </ul> 1335 * 1336 * This perhaps would best belong in the [lang] project. Even if a similar 1337 * interface is defined in [lang], it is not foreseen that [codec] would be 1338 * made to depend on [lang]. 1339 * 1340 * <p> 1341 * This class is immutable and thread-safe. 1342 * </p> 1343 * 1344 * @see <a 1345 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1346 * charsets</a> 1347 * @since 1.4 1348 * @version $Id$ 1349 */ 1350 public class CharEncoding { 1351 /** 1352 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p> 1353 * <p> 1354 * Every implementation of the Java platform is required to support this 1355 * character encoding. 1356 * </p> 1357 * 1358 * @see <a 1359 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1360 * charsets</a> 1361 */ 1362 public static final String ISO_8859_1 = "ISO-8859-1"; 1363 1364 /** 1365 * <p> 1366 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic 1367 * Latin block of the Unicode character set. 1368 * </p> 1369 * <p> 1370 * Every implementation of the Java platform is required to support this 1371 * character encoding. 1372 * </p> 1373 * 1374 * @see <a 1375 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1376 * charsets</a> 1377 */ 1378 public static final String US_ASCII = "US-ASCII"; 1379 1380 /** 1381 * <p> 1382 * Sixteen-bit Unicode Transformation Format, The byte order specified 1383 * by a mandatory initial byte-order mark (either order accepted on 1384 * input, big-endian used on output) 1385 * </p> 1386 * <p> 1387 * Every implementation of the Java platform is required to support this 1388 * character encoding. 1389 * </p> 1390 * 1391 * @see <a 1392 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1393 * charsets</a> 1394 */ 1395 public static final String UTF_16 = "UTF-16"; 1396 1397 /** 1398 * <p> 1399 * Sixteen-bit Unicode Transformation Format, big-endian byte order. 1400 * </p> 1401 * <p> 1402 * Every implementation of the Java platform is required to support this 1403 * character encoding. 1404 * </p> 1405 * 1406 * @see <a 1407 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1408 * charsets</a> 1409 */ 1410 public static final String UTF_16BE = "UTF-16BE"; 1411 1412 /** 1413 * <p> 1414 * Sixteen-bit Unicode Transformation Format, little-endian byte order. 1415 * </p> 1416 * <p> 1417 * Every implementation of the Java platform is required to support this 1418 * character encoding. 1419 * </p> 1420 * 1421 * @see <a 1422 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1423 * charsets</a> 1424 */ 1425 public static final String UTF_16LE = "UTF-16LE"; 1426 1427 /** 1428 * <p> 1429 * Eight-bit Unicode Transformation Format. 1430 * </p> 1431 * <p> 1432 * Every implementation of the Java platform is required to support this 1433 * character encoding. 1434 * </p> 1435 * 1436 * @see <a 1437 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1438 * charsets</a> 1439 */ 1440 public static final String UTF_8 = "UTF-8"; 1441 } 1442 1443 /** 1444 * Charsets required of every implementation of the Java platform. 1445 * 1446 * From the Java documentation <a href= 1447 * "http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html" 1448 * >Standard charsets</a>: 1449 * <p> 1450 * <cite>Every implementation of the Java platform is required to support 1451 * the following character encodings. Consult the release documentation for 1452 * your implementation to see if any other encodings are supported. Consult 1453 * the release documentation for your implementation to see if any other 1454 * encodings are supported. </cite> 1455 * </p> 1456 * 1457 * <ul> 1458 * <li><code>US-ASCII</code><br/> 1459 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the 1460 * Unicode character set.</li> 1461 * <li><code>ISO-8859-1</code><br/> 1462 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> 1463 * <li><code>UTF-8</code><br/> 1464 * Eight-bit Unicode Transformation Format.</li> 1465 * <li><code>UTF-16BE</code><br/> 1466 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> 1467 * <li><code>UTF-16LE</code><br/> 1468 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> 1469 * <li><code>UTF-16</code><br/> 1470 * Sixteen-bit Unicode Transformation Format, byte order specified by a 1471 * mandatory initial byte-order mark (either order accepted on input, 1472 * big-endian used on output.)</li> 1473 * </ul> 1474 * 1475 * This perhaps would best belong in the Commons Lang project. Even if a 1476 * similar class is defined in Commons Lang, it is not foreseen that Commons 1477 * Codec would be made to depend on Commons Lang. 1478 * 1479 * <p> 1480 * This class is immutable and thread-safe. 1481 * </p> 1482 * 1483 * @see <a 1484 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1485 * charsets</a> 1486 * @since 1.7 1487 * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $ 1488 */ 1489 public static class Charsets { 1490 1491 // 1492 // This class should only contain Charset instances for required 1493 // encodings. This guarantees that it will load correctly and 1494 // without delay on all Java platforms. 1495 // 1496 1497 /** 1498 * Returns the given Charset or the default Charset if the given Charset 1499 * is null. 1500 * 1501 * @param charset 1502 * A charset or null. 1503 * @return the given Charset or the default Charset if the given Charset 1504 * is null 1505 */ 1506 public static Charset toCharset(Charset charset) { 1507 return charset == null ? Charset.defaultCharset() : charset; 1508 } 1509 1510 /** 1511 * Returns a Charset for the named charset. If the name is null, return 1512 * the default Charset. 1513 * 1514 * @param charset 1515 * The name of the requested charset, may be null. 1516 * @return a Charset for the named charset 1517 * @throws UnsupportedCharsetException 1518 * If the named charset is unavailable 1519 */ 1520 public static Charset toCharset(String charset) { 1521 return charset == null ? Charset.defaultCharset() : Charset.forName(charset); 1522 } 1523 1524 /** 1525 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p> 1526 * <p> 1527 * Every implementation of the Java platform is required to support this 1528 * character encoding. 1529 * </p> 1530 * 1531 * @see <a 1532 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1533 * charsets</a> 1534 */ 1535 public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1); 1536 1537 /** 1538 * <p> 1539 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic 1540 * Latin block of the Unicode character set. 1541 * </p> 1542 * <p> 1543 * Every implementation of the Java platform is required to support this 1544 * character encoding. 1545 * </p> 1546 * 1547 * @see <a 1548 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1549 * charsets</a> 1550 */ 1551 public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII); 1552 1553 /** 1554 * <p> 1555 * Sixteen-bit Unicode Transformation Format, The byte order specified 1556 * by a mandatory initial byte-order mark (either order accepted on 1557 * input, big-endian used on output) 1558 * </p> 1559 * <p> 1560 * Every implementation of the Java platform is required to support this 1561 * character encoding. 1562 * </p> 1563 * 1564 * @see <a 1565 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1566 * charsets</a> 1567 */ 1568 public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16); 1569 1570 /** 1571 * <p> 1572 * Sixteen-bit Unicode Transformation Format, big-endian byte order. 1573 * </p> 1574 * <p> 1575 * Every implementation of the Java platform is required to support this 1576 * character encoding. 1577 * </p> 1578 * 1579 * @see <a 1580 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1581 * charsets</a> 1582 */ 1583 public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE); 1584 1585 /** 1586 * <p> 1587 * Sixteen-bit Unicode Transformation Format, little-endian byte order. 1588 * </p> 1589 * <p> 1590 * Every implementation of the Java platform is required to support this 1591 * character encoding. 1592 * </p> 1593 * 1594 * @see <a 1595 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1596 * charsets</a> 1597 */ 1598 public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE); 1599 1600 /** 1601 * <p> 1602 * Eight-bit Unicode Transformation Format. 1603 * </p> 1604 * <p> 1605 * Every implementation of the Java platform is required to support this 1606 * character encoding. 1607 * </p> 1608 * 1609 * @see <a 1610 * href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1611 * charsets</a> 1612 */ 1613 public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8); 1614 } 1615 1616 /** 1617 * Converts String to and from bytes using the encodings required by the 1618 * Java specification. These encodings are specified in <a href= 1619 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html" 1620 * >Standard charsets</a> 1621 * 1622 * <p> 1623 * This class is immutable and thread-safe. 1624 * </p> 1625 * 1626 * @see CharEncoding 1627 * @see <a 1628 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1629 * charsets</a> 1630 * @version $Id$ 1631 * @since 1.4 1632 */ 1633 public static class StringUtils { 1634 1635 /** 1636 * Calls {@link String#getBytes(Charset)} 1637 * 1638 * @param string 1639 * The string to encode (if null, return null). 1640 * @param charset 1641 * The {@link Charset} to encode the {@code String} 1642 * @return the encoded bytes 1643 */ 1644 private static byte[] getBytes(String string, Charset charset) { 1645 if (string == null) { 1646 return null; 1647 } 1648 return string.getBytes(charset); 1649 } 1650 1651 /** 1652 * Encodes the given string into a sequence of bytes using the 1653 * ISO-8859-1 charset, storing the result into a new byte array. 1654 * 1655 * @param string 1656 * the String to encode, may be {@code null} 1657 * @return encoded bytes, or {@code null} if the input string was 1658 * {@code null} 1659 * @throws NullPointerException 1660 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, 1661 * which should never happen since it is required by the 1662 * Java platform specification. 1663 * @since As of 1.7, throws {@link NullPointerException} instead of 1664 * UnsupportedEncodingException 1665 * @see <a 1666 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1667 * charsets</a> 1668 * @see #getBytesUnchecked(String, String) 1669 */ 1670 public static byte[] getBytesIso8859_1(String string) { 1671 return getBytes(string, Charsets.ISO_8859_1); 1672 } 1673 1674 /** 1675 * Encodes the given string into a sequence of bytes using the named 1676 * charset, storing the result into a new byte array. 1677 * <p> 1678 * This method catches {@link UnsupportedEncodingException} and rethrows 1679 * it as {@link IllegalStateException}, which should never happen for a 1680 * required charset name. Use this method when the encoding is required 1681 * to be in the JRE. 1682 * </p> 1683 * 1684 * @param string 1685 * the String to encode, may be {@code null} 1686 * @param charsetName 1687 * The name of a required {@link java.nio.charset.Charset} 1688 * @return encoded bytes, or {@code null} if the input string was 1689 * {@code null} 1690 * @throws IllegalStateException 1691 * Thrown when a {@link UnsupportedEncodingException} is 1692 * caught, which should never happen for a required charset 1693 * name. 1694 * @see CharEncoding 1695 * @see String#getBytes(String) 1696 */ 1697 public static byte[] getBytesUnchecked(String string, String charsetName) { 1698 if (string == null) { 1699 return null; 1700 } 1701 try { 1702 return string.getBytes(charsetName); 1703 } catch (UnsupportedEncodingException e) { 1704 throw StringUtils.newIllegalStateException(charsetName, e); 1705 } 1706 } 1707 1708 /** 1709 * Encodes the given string into a sequence of bytes using the US-ASCII 1710 * charset, storing the result into a new byte array. 1711 * 1712 * @param string 1713 * the String to encode, may be {@code null} 1714 * @return encoded bytes, or {@code null} if the input string was 1715 * {@code null} 1716 * @throws NullPointerException 1717 * Thrown if {@link Charsets#US_ASCII} is not initialized, 1718 * which should never happen since it is required by the 1719 * Java platform specification. 1720 * @since As of 1.7, throws {@link NullPointerException} instead of 1721 * UnsupportedEncodingException 1722 * @see <a 1723 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1724 * charsets</a> 1725 * @see #getBytesUnchecked(String, String) 1726 */ 1727 public static byte[] getBytesUsAscii(String string) { 1728 return getBytes(string, Charsets.US_ASCII); 1729 } 1730 1731 /** 1732 * Encodes the given string into a sequence of bytes using the UTF-16 1733 * charset, storing the result into a new byte array. 1734 * 1735 * @param string 1736 * the String to encode, may be {@code null} 1737 * @return encoded bytes, or {@code null} if the input string was 1738 * {@code null} 1739 * @throws NullPointerException 1740 * Thrown if {@link Charsets#UTF_16} is not initialized, 1741 * which should never happen since it is required by the 1742 * Java platform specification. 1743 * @since As of 1.7, throws {@link NullPointerException} instead of 1744 * UnsupportedEncodingException 1745 * @see <a 1746 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1747 * charsets</a> 1748 * @see #getBytesUnchecked(String, String) 1749 */ 1750 public static byte[] getBytesUtf16(String string) { 1751 return getBytes(string, Charsets.UTF_16); 1752 } 1753 1754 /** 1755 * Encodes the given string into a sequence of bytes using the UTF-16BE 1756 * charset, storing the result into a new byte array. 1757 * 1758 * @param string 1759 * the String to encode, may be {@code null} 1760 * @return encoded bytes, or {@code null} if the input string was 1761 * {@code null} 1762 * @throws NullPointerException 1763 * Thrown if {@link Charsets#UTF_16BE} is not initialized, 1764 * which should never happen since it is required by the 1765 * Java platform specification. 1766 * @since As of 1.7, throws {@link NullPointerException} instead of 1767 * UnsupportedEncodingException 1768 * @see <a 1769 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1770 * charsets</a> 1771 * @see #getBytesUnchecked(String, String) 1772 */ 1773 public static byte[] getBytesUtf16Be(String string) { 1774 return getBytes(string, Charsets.UTF_16BE); 1775 } 1776 1777 /** 1778 * Encodes the given string into a sequence of bytes using the UTF-16LE 1779 * charset, storing the result into a new byte array. 1780 * 1781 * @param string 1782 * the String to encode, may be {@code null} 1783 * @return encoded bytes, or {@code null} if the input string was 1784 * {@code null} 1785 * @throws NullPointerException 1786 * Thrown if {@link Charsets#UTF_16LE} is not initialized, 1787 * which should never happen since it is required by the 1788 * Java platform specification. 1789 * @since As of 1.7, throws {@link NullPointerException} instead of 1790 * UnsupportedEncodingException 1791 * @see <a 1792 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1793 * charsets</a> 1794 * @see #getBytesUnchecked(String, String) 1795 */ 1796 public static byte[] getBytesUtf16Le(String string) { 1797 return getBytes(string, Charsets.UTF_16LE); 1798 } 1799 1800 /** 1801 * Encodes the given string into a sequence of bytes using the UTF-8 1802 * charset, storing the result into a new byte array. 1803 * 1804 * @param string 1805 * the String to encode, may be {@code null} 1806 * @return encoded bytes, or {@code null} if the input string was 1807 * {@code null} 1808 * @throws NullPointerException 1809 * Thrown if {@link Charsets#UTF_8} is not initialized, 1810 * which should never happen since it is required by the 1811 * Java platform specification. 1812 * @since As of 1.7, throws {@link NullPointerException} instead of 1813 * UnsupportedEncodingException 1814 * @see <a 1815 * href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 1816 * charsets</a> 1817 * @see #getBytesUnchecked(String, String) 1818 */ 1819 public static byte[] getBytesUtf8(String string) { 1820 return getBytes(string, Charsets.UTF_8); 1821 } 1822 1823 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) { 1824 return new IllegalStateException(charsetName + ": " + e); 1825 } 1826 1827 /** 1828 * Constructs a new <code>String</code> by decoding the specified array 1829 * of bytes using the given charset. 1830 * 1831 * @param bytes 1832 * The bytes to be decoded into characters 1833 * @param charset 1834 * The {@link Charset} to encode the {@code String} 1835 * @return A new <code>String</code> decoded from the specified array of 1836 * bytes using the given charset, or {@code null} if the input 1837 * byte array was {@code null}. 1838 * @throws NullPointerException 1839 * Thrown if {@link Charsets#UTF_8} is not initialized, 1840 * which should never happen since it is required by the 1841 * Java platform specification. 1842 */ 1843 private static String newString(byte[] bytes, Charset charset) { 1844 return bytes == null ? null : new String(bytes, charset); 1845 } 1846 1847 /** 1848 * Constructs a new <code>String</code> by decoding the specified array 1849 * of bytes using the given charset. 1850 * <p> 1851 * This method catches {@link UnsupportedEncodingException} and 1852 * re-throws it as {@link IllegalStateException}, which should never 1853 * happen for a required charset name. Use this method when the encoding 1854 * is required to be in the JRE. 1855 * </p> 1856 * 1857 * @param bytes 1858 * The bytes to be decoded into characters, may be 1859 * {@code null} 1860 * @param charsetName 1861 * The name of a required {@link java.nio.charset.Charset} 1862 * @return A new <code>String</code> decoded from the specified array of 1863 * bytes using the given charset, or {@code null} if the input 1864 * byte array was {@code null}. 1865 * @throws IllegalStateException 1866 * Thrown when a {@link UnsupportedEncodingException} is 1867 * caught, which should never happen for a required charset 1868 * name. 1869 * @see CharEncoding 1870 * @see String#String(byte[], String) 1871 */ 1872 public static String newString(byte[] bytes, String charsetName) { 1873 if (bytes == null) { 1874 return null; 1875 } 1876 try { 1877 return new String(bytes, charsetName); 1878 } catch (UnsupportedEncodingException e) { 1879 throw StringUtils.newIllegalStateException(charsetName, e); 1880 } 1881 } 1882 1883 /** 1884 * Constructs a new <code>String</code> by decoding the specified array 1885 * of bytes using the ISO-8859-1 charset. 1886 * 1887 * @param bytes 1888 * The bytes to be decoded into characters, may be 1889 * {@code null} 1890 * @return A new <code>String</code> decoded from the specified array of 1891 * bytes using the ISO-8859-1 charset, or {@code null} if the 1892 * input byte array was {@code null}. 1893 * @throws NullPointerException 1894 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, 1895 * which should never happen since it is required by the 1896 * Java platform specification. 1897 * @since As of 1.7, throws {@link NullPointerException} instead of 1898 * UnsupportedEncodingException 1899 */ 1900 public static String newStringIso8859_1(byte[] bytes) { 1901 return new String(bytes, Charsets.ISO_8859_1); 1902 } 1903 1904 /** 1905 * Constructs a new <code>String</code> by decoding the specified array 1906 * of bytes using the US-ASCII charset. 1907 * 1908 * @param bytes 1909 * The bytes to be decoded into characters 1910 * @return A new <code>String</code> decoded from the specified array of 1911 * bytes using the US-ASCII charset, or {@code null} if the 1912 * input byte array was {@code null}. 1913 * @throws NullPointerException 1914 * Thrown if {@link Charsets#US_ASCII} is not initialized, 1915 * which should never happen since it is required by the 1916 * Java platform specification. 1917 * @since As of 1.7, throws {@link NullPointerException} instead of 1918 * UnsupportedEncodingException 1919 */ 1920 public static String newStringUsAscii(byte[] bytes) { 1921 return new String(bytes, Charsets.US_ASCII); 1922 } 1923 1924 /** 1925 * Constructs a new <code>String</code> by decoding the specified array 1926 * of bytes using the UTF-16 charset. 1927 * 1928 * @param bytes 1929 * The bytes to be decoded into characters 1930 * @return A new <code>String</code> decoded from the specified array of 1931 * bytes using the UTF-16 charset or {@code null} if the input 1932 * byte array was {@code null}. 1933 * @throws NullPointerException 1934 * Thrown if {@link Charsets#UTF_16} is not initialized, 1935 * which should never happen since it is required by the 1936 * Java platform specification. 1937 * @since As of 1.7, throws {@link NullPointerException} instead of 1938 * UnsupportedEncodingException 1939 */ 1940 public static String newStringUtf16(byte[] bytes) { 1941 return new String(bytes, Charsets.UTF_16); 1942 } 1943 1944 /** 1945 * Constructs a new <code>String</code> by decoding the specified array 1946 * of bytes using the UTF-16BE charset. 1947 * 1948 * @param bytes 1949 * The bytes to be decoded into characters 1950 * @return A new <code>String</code> decoded from the specified array of 1951 * bytes using the UTF-16BE charset, or {@code null} if the 1952 * input byte array was {@code null}. 1953 * @throws NullPointerException 1954 * Thrown if {@link Charsets#UTF_16BE} is not initialized, 1955 * which should never happen since it is required by the 1956 * Java platform specification. 1957 * @since As of 1.7, throws {@link NullPointerException} instead of 1958 * UnsupportedEncodingException 1959 */ 1960 public static String newStringUtf16Be(byte[] bytes) { 1961 return new String(bytes, Charsets.UTF_16BE); 1962 } 1963 1964 /** 1965 * Constructs a new <code>String</code> by decoding the specified array 1966 * of bytes using the UTF-16LE charset. 1967 * 1968 * @param bytes 1969 * The bytes to be decoded into characters 1970 * @return A new <code>String</code> decoded from the specified array of 1971 * bytes using the UTF-16LE charset, or {@code null} if the 1972 * input byte array was {@code null}. 1973 * @throws NullPointerException 1974 * Thrown if {@link Charsets#UTF_16LE} is not initialized, 1975 * which should never happen since it is required by the 1976 * Java platform specification. 1977 * @since As of 1.7, throws {@link NullPointerException} instead of 1978 * UnsupportedEncodingException 1979 */ 1980 public static String newStringUtf16Le(byte[] bytes) { 1981 return new String(bytes, Charsets.UTF_16LE); 1982 } 1983 1984 /** 1985 * Constructs a new <code>String</code> by decoding the specified array 1986 * of bytes using the UTF-8 charset. 1987 * 1988 * @param bytes 1989 * The bytes to be decoded into characters 1990 * @return A new <code>String</code> decoded from the specified array of 1991 * bytes using the UTF-8 charset, or {@code null} if the input 1992 * byte array was {@code null}. 1993 * @throws NullPointerException 1994 * Thrown if {@link Charsets#UTF_8} is not initialized, 1995 * which should never happen since it is required by the 1996 * Java platform specification. 1997 * @since As of 1.7, throws {@link NullPointerException} instead of 1998 * UnsupportedEncodingException 1999 */ 2000 public static String newStringUtf8(byte[] bytes) { 2001 return newString(bytes, Charsets.UTF_8); 2002 } 2003 2004 } 2005 2006 }