| 1 | |
|
| 2 | |
|
| 3 | |
|
| 4 | |
|
| 5 | |
|
| 6 | |
|
| 7 | |
|
| 8 | |
|
| 9 | |
|
| 10 | |
|
| 11 | |
|
| 12 | |
|
| 13 | |
|
| 14 | |
|
| 15 | |
|
| 16 | |
|
| 17 | |
|
| 18 | |
|
| 19 | |
|
| 20 | |
|
| 21 | |
|
| 22 | |
|
| 23 | |
|
| 24 | |
|
| 25 | |
|
| 26 | |
package ca.uhn.hl7v2.llp; |
| 27 | |
|
| 28 | |
import java.io.UnsupportedEncodingException; |
| 29 | |
import java.nio.charset.Charset; |
| 30 | |
import java.util.Arrays; |
| 31 | |
|
| 32 | |
import ca.uhn.hl7v2.HL7Exception; |
| 33 | |
import ca.uhn.hl7v2.parser.EncodingNotSupportedException; |
| 34 | |
import ca.uhn.hl7v2.preparser.PreParser; |
| 35 | |
import org.slf4j.Logger; |
| 36 | |
import org.slf4j.LoggerFactory; |
| 37 | |
|
| 38 | |
|
| 39 | |
|
| 40 | |
|
| 41 | |
|
| 42 | |
|
| 43 | |
|
| 44 | 0 | public class CharSetUtil { |
| 45 | |
|
| 46 | 5 | private static final Logger LOG = LoggerFactory.getLogger(CharSetUtil.class); |
| 47 | |
|
| 48 | |
static Charset checkCharset(String message, Charset defaultCharset) { |
| 49 | 185 | Charset charset = defaultCharset; |
| 50 | |
try { |
| 51 | 185 | String[] fields = PreParser.getFields(message, "MSH-18(0)"); |
| 52 | 180 | String hl7CharsetName = stripNonLowAscii(fields[0]); |
| 53 | 180 | if (hl7CharsetName != null && hl7CharsetName.length() > 0) |
| 54 | 65 | charset = HL7Charsets.getCharsetForHL7Encoding(hl7CharsetName); |
| 55 | 175 | LOG.trace("Detected MSH-18 value \"{}\" so using charset {}", hl7CharsetName, charset.displayName()); |
| 56 | 5 | } catch (EncodingNotSupportedException e) { |
| 57 | 5 | LOG.warn("Invalid or unsupported charset in MSH-18. Defaulting to {}", charset.displayName()); |
| 58 | 5 | } catch (HL7Exception e) { |
| 59 | 5 | LOG.warn("Failed to parse MSH segment. Defaulting to {}", charset.displayName(), e); |
| 60 | 180 | } |
| 61 | 185 | return charset; |
| 62 | |
} |
| 63 | |
|
| 64 | |
static Charset checkCharset(byte[] message, Charset defaultCharset) { |
| 65 | 100 | String guessMessage = BOM.skipBOM(message); |
| 66 | 100 | return checkCharset(guessMessage, defaultCharset); |
| 67 | |
} |
| 68 | |
|
| 69 | |
private static String stripNonLowAscii(String theString) { |
| 70 | 180 | if (theString == null) return ""; |
| 71 | 65 | StringBuilder b = new StringBuilder(); |
| 72 | |
|
| 73 | 665 | for (int i = 0; i < theString.length(); i++) { |
| 74 | 600 | char next = theString.charAt(i); |
| 75 | 600 | if (next > 0 && next < 127) { |
| 76 | 600 | b.append(next); |
| 77 | |
} |
| 78 | |
} |
| 79 | |
|
| 80 | 65 | return b.toString(); |
| 81 | |
} |
| 82 | |
|
| 83 | |
public static byte[] withoutBOM(byte[] bytes) { |
| 84 | 5 | BOM bom = BOM.getBOM(bytes); |
| 85 | 5 | byte[] withoutBOM = new byte[bytes.length - bom.bytes.length]; |
| 86 | 5 | System.arraycopy(bytes, bom.bytes.length, withoutBOM, 0, bytes.length - bom.bytes.length); |
| 87 | 5 | return withoutBOM; |
| 88 | |
} |
| 89 | |
|
| 90 | 125 | private enum BOM { |
| 91 | |
|
| 92 | 5 | UTF_8(new byte[]{ |
| 93 | |
(byte) 0xEF, |
| 94 | |
(byte) 0xBB, |
| 95 | |
(byte) 0xBF}, "UTF-8"), |
| 96 | 5 | UTF_16_LE(new byte[]{ |
| 97 | |
(byte) 0xFF, |
| 98 | |
(byte) 0xFE}, "UTF-16LE"), |
| 99 | 5 | UTF_16_BE(new byte[]{ |
| 100 | |
(byte) 0xFE, |
| 101 | |
(byte) 0xFF}, "UTF-16BE"), |
| 102 | 5 | UTF_32_LE(new byte[]{ |
| 103 | |
(byte) 0xFF, |
| 104 | |
(byte) 0xFE, |
| 105 | |
(byte) 0x00, |
| 106 | |
(byte) 0x00}, "UTF-32LE"), |
| 107 | 5 | UTF_32_BE(new byte[]{ |
| 108 | |
(byte) 0x00, |
| 109 | |
(byte) 0x00, |
| 110 | |
(byte) 0xFE, |
| 111 | |
(byte) 0xFF}, "UTF-32BE"), |
| 112 | 5 | NONE(new byte[]{}, "US-ASCII"); |
| 113 | |
|
| 114 | |
private byte[] bytes; |
| 115 | |
private Charset charset; |
| 116 | |
|
| 117 | 30 | BOM(byte[] bytes, String charset) { |
| 118 | 30 | this.bytes = bytes; |
| 119 | 30 | this.charset = Charset.forName(charset); |
| 120 | 30 | } |
| 121 | |
|
| 122 | |
public static BOM getBOM(byte[] bytes) { |
| 123 | 585 | for (BOM bom : BOM.values()) { |
| 124 | 585 | byte[] bytesToCompare = new byte[bom.bytes.length]; |
| 125 | 585 | System.arraycopy(bytes, 0, bytesToCompare, 0, bom.bytes.length); |
| 126 | 585 | if (Arrays.equals(bom.bytes, bytesToCompare)) return bom; |
| 127 | |
} |
| 128 | 0 | return BOM.NONE; |
| 129 | |
} |
| 130 | |
|
| 131 | |
public static String skipBOM(byte[] bytes) { |
| 132 | |
try { |
| 133 | 100 | BOM bom = getBOM(bytes); |
| 134 | 100 | return new String(bytes, bom.bytes.length, bytes.length - bom.bytes.length, bom.charset.toString()); |
| 135 | 0 | } catch (UnsupportedEncodingException e) { |
| 136 | |
|
| 137 | 0 | return null; |
| 138 | |
} |
| 139 | |
} |
| 140 | |
|
| 141 | |
|
| 142 | |
|
| 143 | |
|
| 144 | |
} |
| 145 | |
} |