1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
|
23 | |
|
24 | |
|
25 | |
|
26 | |
package ca.uhn.hl7v2.llp; |
27 | |
|
28 | |
import java.io.UnsupportedEncodingException; |
29 | |
import java.nio.charset.Charset; |
30 | |
import java.util.Arrays; |
31 | |
|
32 | |
import ca.uhn.hl7v2.HL7Exception; |
33 | |
import ca.uhn.hl7v2.parser.EncodingNotSupportedException; |
34 | |
import ca.uhn.hl7v2.preparser.PreParser; |
35 | |
import org.slf4j.Logger; |
36 | |
import org.slf4j.LoggerFactory; |
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | 0 | public class CharSetUtil { |
45 | |
|
46 | 5 | private static final Logger LOG = LoggerFactory.getLogger(CharSetUtil.class); |
47 | |
|
48 | |
static Charset checkCharset(String message, Charset defaultCharset) { |
49 | 185 | Charset charset = defaultCharset; |
50 | |
try { |
51 | 185 | String[] fields = PreParser.getFields(message, "MSH-18(0)"); |
52 | 180 | String hl7CharsetName = stripNonLowAscii(fields[0]); |
53 | 180 | if (hl7CharsetName != null && hl7CharsetName.length() > 0) |
54 | 65 | charset = HL7Charsets.getCharsetForHL7Encoding(hl7CharsetName); |
55 | 175 | LOG.trace("Detected MSH-18 value \"{}\" so using charset {}", hl7CharsetName, charset.displayName()); |
56 | 5 | } catch (EncodingNotSupportedException e) { |
57 | 5 | LOG.warn("Invalid or unsupported charset in MSH-18. Defaulting to {}", charset.displayName()); |
58 | 5 | } catch (HL7Exception e) { |
59 | 5 | LOG.warn("Failed to parse MSH segment. Defaulting to {}", charset.displayName(), e); |
60 | 180 | } |
61 | 185 | return charset; |
62 | |
} |
63 | |
|
64 | |
static Charset checkCharset(byte[] message, Charset defaultCharset) { |
65 | 100 | String guessMessage = BOM.skipBOM(message); |
66 | 100 | return checkCharset(guessMessage, defaultCharset); |
67 | |
} |
68 | |
|
69 | |
private static String stripNonLowAscii(String theString) { |
70 | 180 | if (theString == null) return ""; |
71 | 65 | StringBuilder b = new StringBuilder(); |
72 | |
|
73 | 665 | for (int i = 0; i < theString.length(); i++) { |
74 | 600 | char next = theString.charAt(i); |
75 | 600 | if (next > 0 && next < 127) { |
76 | 600 | b.append(next); |
77 | |
} |
78 | |
} |
79 | |
|
80 | 65 | return b.toString(); |
81 | |
} |
82 | |
|
83 | |
public static byte[] withoutBOM(byte[] bytes) { |
84 | 5 | BOM bom = BOM.getBOM(bytes); |
85 | 5 | byte[] withoutBOM = new byte[bytes.length - bom.bytes.length]; |
86 | 5 | System.arraycopy(bytes, bom.bytes.length, withoutBOM, 0, bytes.length - bom.bytes.length); |
87 | 5 | return withoutBOM; |
88 | |
} |
89 | |
|
90 | 125 | private enum BOM { |
91 | |
|
92 | 5 | UTF_8(new byte[]{ |
93 | |
(byte) 0xEF, |
94 | |
(byte) 0xBB, |
95 | |
(byte) 0xBF}, "UTF-8"), |
96 | 5 | UTF_16_LE(new byte[]{ |
97 | |
(byte) 0xFF, |
98 | |
(byte) 0xFE}, "UTF-16LE"), |
99 | 5 | UTF_16_BE(new byte[]{ |
100 | |
(byte) 0xFE, |
101 | |
(byte) 0xFF}, "UTF-16BE"), |
102 | 5 | UTF_32_LE(new byte[]{ |
103 | |
(byte) 0xFF, |
104 | |
(byte) 0xFE, |
105 | |
(byte) 0x00, |
106 | |
(byte) 0x00}, "UTF-32LE"), |
107 | 5 | UTF_32_BE(new byte[]{ |
108 | |
(byte) 0x00, |
109 | |
(byte) 0x00, |
110 | |
(byte) 0xFE, |
111 | |
(byte) 0xFF}, "UTF-32BE"), |
112 | 5 | NONE(new byte[]{}, "US-ASCII"); |
113 | |
|
114 | |
private byte[] bytes; |
115 | |
private Charset charset; |
116 | |
|
117 | 30 | BOM(byte[] bytes, String charset) { |
118 | 30 | this.bytes = bytes; |
119 | 30 | this.charset = Charset.forName(charset); |
120 | 30 | } |
121 | |
|
122 | |
public static BOM getBOM(byte[] bytes) { |
123 | 585 | for (BOM bom : BOM.values()) { |
124 | 585 | byte[] bytesToCompare = new byte[bom.bytes.length]; |
125 | 585 | System.arraycopy(bytes, 0, bytesToCompare, 0, bom.bytes.length); |
126 | 585 | if (Arrays.equals(bom.bytes, bytesToCompare)) return bom; |
127 | |
} |
128 | 0 | return BOM.NONE; |
129 | |
} |
130 | |
|
131 | |
public static String skipBOM(byte[] bytes) { |
132 | |
try { |
133 | 100 | BOM bom = getBOM(bytes); |
134 | 100 | return new String(bytes, bom.bytes.length, bytes.length - bom.bytes.length, bom.charset.toString()); |
135 | 0 | } catch (UnsupportedEncodingException e) { |
136 | |
|
137 | 0 | return null; |
138 | |
} |
139 | |
} |
140 | |
|
141 | |
|
142 | |
|
143 | |
|
144 | |
} |
145 | |
} |