1 /**
2 The contents of this file are subject to the Mozilla Public License Version 1.1
3 (the "License"); you may not use this file except in compliance with the License.
4 You may obtain a copy of the License at http://www.mozilla.org/MPL/
5 Software distributed under the License is distributed on an "AS IS" basis,
6 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7 specific language governing rights and limitations under the License.
8
9 The Original Code is "Escape.java". Description:
10 "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
11 defined in section 2.10 of the standard (version 2.4)"
12
13 The Initial Developer of the Original Code is University Health Network. Copyright (C)
14 2001. All Rights Reserved.
15
16 Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz
17
18 Alternatively, the contents of this file may be used under the terms of the
19 GNU General Public License (the "GPL"), in which case the provisions of the GPL are
20 applicable instead of those above. If you wish to allow use of your version of this
21 file only under the terms of the GPL and not to allow others to use your version
22 of this file under the MPL, indicate your decision by deleting the provisions above
23 and replace them with the notice and other provisions required by the GPL License.
24 If you do not delete the provisions above, a recipient may use your version of
25 this file under either the MPL or the GPL.
26 */
27 package ca.uhn.hl7v2.parser;
28
29 import java.util.Collections;
30 import java.util.LinkedHashMap;
31 import java.util.Map;
32
33 /**
34 * Handles "escaping" and "unescaping" of text according to the HL7 escape
35 * sequence rules defined in section 2.10 of the standard (version 2.4).
36 * Currently, escape sequences for multiple character sets are unsupported. The
37 * highlighting, hexademical, and locally defined escape sequences are also
38 * unsupported.
39 *
40 * @author Bryan Tripp
41 * @author Mark Lee (Skeva Technologies)
42 * @author Elmar Hinz
43 * @author Christian Ohr
44 */
45 public class DefaultEscaping implements Escaping {
46
47 /**
48 * limits the size of variousEncChars to 1000, can be overridden by system property.
49 */
50 private static final Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap
51 <EncodingCharacters, EncLookup>(6, 0.75f, true) {
52
53 private static final long serialVersionUID = 1L;
54 final int maxSize = Integer.parseInt(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
55
56 @Override
57 protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
58 return this.size() > maxSize;
59 }
60 });
61
62
63 /**
64 * @param text string to be escaped
65 * @param encChars encoding characters to be used
66 * @return the escaped string
67 */
68 public String escape(String text, EncodingCharacters encChars) {
69 EncLookup esc = getEscapeSequences(encChars);
70 int textLength = text.length();
71
72 StringBuilder result = new StringBuilder(textLength);
73 for (int i = 0; i < textLength; i++) {
74 boolean charReplaced = false;
75 char c = text.charAt(i);
76
77 FORENCCHARS:
78 for (int j = 0; j < esc.characters.length; j++) {
79 if (text.charAt(i) == esc.characters[j]) {
80
81 // Formatting escape sequences such as \.br\ should be left alone
82 if (j == 4) {
83
84 if (i+1 < textLength) {
85
86 // Check for \.br\
87 char nextChar = text.charAt(i + 1);
88 switch (nextChar) {
89 case '.':
90 case 'C':
91 case 'M':
92 case 'X':
93 case 'Z':
94 {
95 int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
96 if (nextEscapeIndex > 0) {
97 result.append(text, i, nextEscapeIndex + 1);
98 charReplaced = true;
99 i = nextEscapeIndex;
100 break FORENCCHARS;
101 }
102 break;
103 }
104 case 'H':
105 case 'N':
106 {
107 if (i+2 < textLength && text.charAt(i+2) == '\\') {
108 int nextEscapeIndex = i + 2;
109 if (nextEscapeIndex > 0) {
110 result.append(text, i, nextEscapeIndex + 1);
111 charReplaced = true;
112 i = nextEscapeIndex;
113 break FORENCCHARS;
114 }
115 }
116 break;
117 }
118 }
119
120 }
121
122 }
123
124 result.append(esc.encodings[j]);
125 charReplaced = true;
126 break;
127 }
128 }
129 if (!charReplaced) {
130 result.append(c);
131 }
132 }
133 return result.toString();
134 }
135
136 /**
137 * @param text string to be unescaped
138 * @param encChars encoding characters to be used
139 * @return the unescaped string
140 */
141 public String unescape(String text, EncodingCharacters encChars) {
142
143 // If the escape char isn't found, we don't need to look for escape sequences
144 char escapeChar = encChars.getEscapeCharacter();
145 boolean foundEscapeChar = false;
146 for (int i = 0; i < text.length(); i++) {
147 if (text.charAt(i) == escapeChar) {
148 foundEscapeChar = true;
149 break;
150 }
151 }
152 if (!foundEscapeChar) {
153 return text;
154 }
155
156 int textLength = text.length();
157 StringBuilder result = new StringBuilder(textLength + 20);
158 EncLookup esc = getEscapeSequences(encChars);
159 char escape = esc.characters[4];
160 int encodingsCount = esc.characters.length;
161 int i = 0;
162 while (i < textLength) {
163 char c = text.charAt(i);
164 if (c != escape) {
165 result.append(c);
166 i++;
167 } else {
168 boolean foundEncoding = false;
169
170 // Test against the standard encodings
171 for (int j = 0; j < encodingsCount; j++) {
172 String encoding = esc.encodings[j];
173 int encodingLength = encoding.length();
174 if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
175 .equals(encoding)) {
176 result.append(esc.characters[j]);
177 i += encodingLength;
178 foundEncoding = true;
179 break;
180 }
181 }
182
183 if (!foundEncoding) {
184
185 // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
186 // formatting codes. They should be left intact
187 if (i + 1 < textLength) {
188 char nextChar = text.charAt(i + 1);
189 switch (nextChar) {
190 case '.':
191 case 'C':
192 case 'M':
193 case 'X':
194 case 'Z':
195 {
196 int closingEscape = text.indexOf(escape, i + 1);
197 if (closingEscape > 0) {
198 String substring = text.substring(i, closingEscape + 1);
199 result.append(substring);
200 i += substring.length();
201 } else {
202 i++;
203 }
204 break;
205 }
206 case 'H':
207 case 'N':
208 {
209 int closingEscape = text.indexOf(escape, i + 1);
210 if (closingEscape == i + 2) {
211 String substring = text.substring(i, closingEscape + 1);
212 result.append(substring);
213 i += substring.length();
214 } else {
215 i++;
216 }
217 break;
218 }
219 default:
220 {
221 i++;
222 }
223 }
224
225 } else {
226 i++;
227 }
228 }
229
230
231 }
232 }
233 return result.toString();
234 }
235
236 /**
237 * Returns a HashTable with escape sequences as keys, and corresponding
238 * Strings as values.
239 */
240 private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
241 EncLookup escapeSequences = variousEncChars.get(encChars);
242 if (escapeSequences == null) {
243 // this means we haven't got the sequences for these encoding
244 // characters yet - let's make them
245 escapeSequences = new EncLookup(encChars);
246 variousEncChars.put(encChars, escapeSequences);
247 }
248 return escapeSequences;
249 }
250
251
252
253
254 /**
255 * A performance-optimized replacement for using when
256 * mapping from HL7 special characters to their respective
257 * encodings
258 *
259 * @author Christian Ohr
260 */
261 private static class EncLookup {
262
263 private static final char[] CODES = {'F', 'S', 'T', 'R', 'E', 'L'};
264 private final char[] characters = new char[7];
265 final String[] encodings = new String[7];
266
267 EncLookup(EncodingCharacters ec) {
268 characters[0] = ec.getFieldSeparator();
269 characters[1] = ec.getComponentSeparator();
270 characters[2] = ec.getSubcomponentSeparator();
271 characters[3] = ec.getRepetitionSeparator();
272 characters[4] = ec.getEscapeCharacter();
273 characters[5] = ec.getTruncationCharacter();
274 characters[6] = '\r';
275
276 for (int i = 0; i < CODES.length; i++) {
277 String seq = String.valueOf(ec.getEscapeCharacter()) +
278 CODES[i] +
279 ec.getEscapeCharacter();
280 encodings[i] = seq;
281 }
282 // Escaping of truncation # is not implemented yet. It may only be escaped if it is the first character that
283 // exceeds the conformance length of the component (ch 2.5.5.2). As of now, this information is not
284 // available at this place.
285 encodings[5] = "#";
286 encodings[6] = "\\X000d\\";
287 }
288 }
289 }
290