1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package ca.uhn.hl7v2.llp;
27
28 import java.io.UnsupportedEncodingException;
29 import java.nio.charset.Charset;
30 import java.util.Arrays;
31
32 import ca.uhn.hl7v2.HL7Exception;
33 import ca.uhn.hl7v2.parser.EncodingNotSupportedException;
34 import ca.uhn.hl7v2.preparser.PreParser;
35 import org.slf4j.Logger;
36 import org.slf4j.LoggerFactory;
37
38
39
40
41
42
43
44 public class CharSetUtil {
45
46 private static final Logger LOG = LoggerFactory.getLogger(CharSetUtil.class);
47
48 static Charset checkCharset(String message, Charset defaultCharset) {
49 Charset charset = defaultCharset;
50 try {
51 String[] fields = PreParser.getFields(message, "MSH-18(0)");
52 String hl7CharsetName = stripNonLowAscii(fields[0]);
53 if (hl7CharsetName.length() > 0)
54 charset = HL7Charsets.getCharsetForHL7Encoding(hl7CharsetName);
55 LOG.trace("Detected MSH-18 value \"{}\" so using charset {}", hl7CharsetName, charset.displayName());
56 } catch (EncodingNotSupportedException e) {
57 LOG.warn("Invalid or unsupported charset in MSH-18. Defaulting to {}", charset.displayName());
58 } catch (HL7Exception e) {
59 LOG.warn("Failed to parse MSH segment. Defaulting to {}", charset.displayName(), e);
60 }
61 return charset;
62 }
63
64 static Charset checkCharset(byte[] message, Charset defaultCharset) {
65 String guessMessage = BOM.skipBOM(message);
66 return checkCharset(guessMessage, defaultCharset);
67 }
68
69 private static String stripNonLowAscii(String theString) {
70 if (theString == null) return "";
71 StringBuilder b = new StringBuilder();
72
73 for (int i = 0; i < theString.length(); i++) {
74 char next = theString.charAt(i);
75 if (next > 0 && next < 127) {
76 b.append(next);
77 }
78 }
79
80 return b.toString();
81 }
82
83 public static byte[] withoutBOM(byte[] bytes) {
84 BOM bom = BOM.getBOM(bytes);
85 byte[] withoutBOM = new byte[bytes.length - bom.bytes.length];
86 System.arraycopy(bytes, bom.bytes.length, withoutBOM, 0, bytes.length - bom.bytes.length);
87 return withoutBOM;
88 }
89
90 private enum BOM {
91
92 UTF_8(new byte[]{
93 (byte) 0xEF,
94 (byte) 0xBB,
95 (byte) 0xBF}, "UTF-8"),
96 UTF_16_LE(new byte[]{
97 (byte) 0xFF,
98 (byte) 0xFE}, "UTF-16LE"),
99 UTF_16_BE(new byte[]{
100 (byte) 0xFE,
101 (byte) 0xFF}, "UTF-16BE"),
102 UTF_32_LE(new byte[]{
103 (byte) 0xFF,
104 (byte) 0xFE,
105 (byte) 0x00,
106 (byte) 0x00}, "UTF-32LE"),
107 UTF_32_BE(new byte[]{
108 (byte) 0x00,
109 (byte) 0x00,
110 (byte) 0xFE,
111 (byte) 0xFF}, "UTF-32BE"),
112 NONE(new byte[]{}, "US-ASCII");
113
114 private final byte[] bytes;
115 private final Charset charset;
116
117 BOM(byte[] bytes, String charset) {
118 this.bytes = bytes;
119 this.charset = Charset.forName(charset);
120 }
121
122 public static BOM getBOM(byte[] bytes) {
123 for (BOM bom : BOM.values()) {
124 byte[] bytesToCompare = new byte[bom.bytes.length];
125 System.arraycopy(bytes, 0, bytesToCompare, 0, bom.bytes.length);
126 if (Arrays.equals(bom.bytes, bytesToCompare)) return bom;
127 }
128 return BOM.NONE;
129 }
130
131 public static String skipBOM(byte[] bytes) {
132 try {
133 BOM bom = getBOM(bytes);
134 return new String(bytes, bom.bytes.length, bytes.length - bom.bytes.length, bom.charset.toString());
135 } catch (UnsupportedEncodingException e) {
136
137 return null;
138 }
139 }
140
141
142
143
144 }
145 }