View Javadoc
1   /**
2    The contents of this file are subject to the Mozilla Public License Version 1.1
3    (the "License"); you may not use this file except in compliance with the License.
4    You may obtain a copy of the License at http://www.mozilla.org/MPL/
5    Software distributed under the License is distributed on an "AS IS" basis,
6    WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7    specific language governing rights and limitations under the License.
8   
9    The Original Code is "Escape.java".  Description:
10   "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
11   defined in section 2.10 of the standard (version 2.4)"
12  
13   The Initial Developer of the Original Code is University Health Network. Copyright (C)
14   2001.  All Rights Reserved.
15  
16   Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz
17  
18   Alternatively, the contents of this file may be used under the terms of the
19   GNU General Public License (the  "GPL"), in which case the provisions of the GPL are
20   applicable instead of those above.  If you wish to allow use of your version of this
21   file only under the terms of the GPL and not to allow others to use your version
22   of this file under the MPL, indicate your decision by deleting  the provisions above
23   and replace  them with the notice and other provisions required by the GPL License.
24   If you do not delete the provisions above, a recipient may use your version of
25   this file under either the MPL or the GPL.
26   */
27  package ca.uhn.hl7v2.parser;
28  
29  import java.util.Collections;
30  import java.util.LinkedHashMap;
31  import java.util.Map;
32  
33  /**
34   * Handles "escaping" and "unescaping" of text according to the HL7 escape
35   * sequence rules defined in section 2.10 of the standard (version 2.4).
36   * Currently, escape sequences for multiple character sets are unsupported. The
37   * highlighting, hexademical, and locally defined escape sequences are also
38   * unsupported.
39   *
40   * @author Bryan Tripp
41   * @author Mark Lee (Skeva Technologies)
42   * @author Elmar Hinz
43   * @author Christian Ohr
44   */
45  public class DefaultEscaping implements Escaping {
46  
47      /**
48       * limits the size of variousEncChars to 1000, can be overridden by system property.
49       */
50      private static final Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap
51          <EncodingCharacters, EncLookup>(6, 0.75f, true) {
52  
53          private static final long serialVersionUID = 1L;
54          final int maxSize = Integer.parseInt(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
55  
56          @Override
57          protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
58              return this.size() > maxSize;
59          }
60      });
61  
62  
63      /**
64       * @param text string to be escaped
65       * @param encChars encoding characters to be used
66       * @return the escaped string
67       */
68      public String escape(String text, EncodingCharacters encChars) {
69          EncLookup esc = getEscapeSequences(encChars);
70          int textLength = text.length();
71  
72          StringBuilder result = new StringBuilder(textLength);
73          for (int i = 0; i < textLength; i++) {
74              boolean charReplaced = false;
75              char c = text.charAt(i);
76  
77              FORENCCHARS:
78              for (int j = 0; j < esc.characters.length; j++) {
79                  if (text.charAt(i) == esc.characters[j]) {
80  
81                      // Formatting escape sequences such as \.br\ should be left alone
82                      if (j == 4) {
83  
84                          if (i+1 < textLength) {
85  
86                              // Check for \.br\
87                              char nextChar = text.charAt(i + 1);
88                              switch (nextChar) {
89                                  case '.':
90                                  case 'C':
91                                  case 'M':
92                                  case 'X':
93                                  case 'Z':
94                                  {
95                                      int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
96                                      if (nextEscapeIndex > 0) {
97                                          result.append(text, i, nextEscapeIndex + 1);
98                                          charReplaced = true;
99                                          i = nextEscapeIndex;
100                                         break FORENCCHARS;
101                                     }
102                                     break;
103                                 }
104                                 case 'H':
105                                 case 'N':
106                                 {
107                                     if (i+2 < textLength && text.charAt(i+2) == '\\') {
108                                         int nextEscapeIndex = i + 2;
109                                         if (nextEscapeIndex > 0) {
110                                             result.append(text, i, nextEscapeIndex + 1);
111                                             charReplaced = true;
112                                             i = nextEscapeIndex;
113                                             break FORENCCHARS;
114                                         }
115                                     }
116                                     break;
117                                 }
118                             }
119 
120                         }
121 
122                     }
123 
124                     result.append(esc.encodings[j]);
125                     charReplaced = true;
126                     break;
127                 }
128             }
129             if (!charReplaced) {
130                 result.append(c);
131             }
132         }
133         return result.toString();
134     }
135 
136     /**
137      * @param text string to be unescaped
138      * @param encChars encoding characters to be used
139      * @return the unescaped string
140      */
141     public String unescape(String text, EncodingCharacters encChars) {
142 
143         // If the escape char isn't found, we don't need to look for escape sequences
144         char escapeChar = encChars.getEscapeCharacter();
145         boolean foundEscapeChar = false;
146         for (int i = 0; i < text.length(); i++) {
147             if (text.charAt(i) == escapeChar) {
148                 foundEscapeChar = true;
149                 break;
150             }
151         }
152         if (!foundEscapeChar) {
153             return text;
154         }
155 
156         int textLength = text.length();
157         StringBuilder result = new StringBuilder(textLength + 20);
158         EncLookup esc = getEscapeSequences(encChars);
159         char escape = esc.characters[4];
160         int encodingsCount = esc.characters.length;
161         int i = 0;
162         while (i < textLength) {
163             char c = text.charAt(i);
164             if (c != escape) {
165                 result.append(c);
166                 i++;
167             } else {
168                 boolean foundEncoding = false;
169 
170                 // Test against the standard encodings
171                 for (int j = 0; j < encodingsCount; j++) {
172                     String encoding = esc.encodings[j];
173                     int encodingLength = encoding.length();
174                     if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
175                         .equals(encoding)) {
176                         result.append(esc.characters[j]);
177                         i += encodingLength;
178                         foundEncoding = true;
179                         break;
180                     }
181                 }
182 
183                 if (!foundEncoding) {
184 
185                     // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
186                     // formatting codes. They should be left intact
187                     if (i + 1 < textLength) {
188                         char nextChar = text.charAt(i + 1);
189                         switch (nextChar) {
190                             case '.':
191                             case 'C':
192                             case 'M':
193                             case 'X':
194                             case 'Z':
195                             {
196                                 int closingEscape = text.indexOf(escape, i + 1);
197                                 if (closingEscape > 0) {
198                                     String substring = text.substring(i, closingEscape + 1);
199                                     result.append(substring);
200                                     i += substring.length();
201                                 } else {
202                                     i++;
203                                 }
204                                 break;
205                             }
206                             case 'H':
207                             case 'N':
208                             {
209                                 int closingEscape = text.indexOf(escape, i + 1);
210                                 if (closingEscape == i + 2) {
211                                     String substring = text.substring(i, closingEscape + 1);
212                                     result.append(substring);
213                                     i += substring.length();
214                                 } else {
215                                     i++;
216                                 }
217                                 break;
218                             }
219                             default:
220                             {
221                                 i++;
222                             }
223                         }
224 
225                     } else {
226                         i++;
227                     }
228                 }
229 
230 
231             }
232         }
233         return result.toString();
234     }
235 
236     /**
237      * Returns a HashTable with escape sequences as keys, and corresponding
238      * Strings as values.
239      */
240     private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
241         EncLookup escapeSequences = variousEncChars.get(encChars);
242         if (escapeSequences == null) {
243             // this means we haven't got the sequences for these encoding
244             // characters yet - let's make them
245             escapeSequences = new EncLookup(encChars);
246             variousEncChars.put(encChars, escapeSequences);
247         }
248         return escapeSequences;
249     }
250 
251 
252 
253 
254     /**
255      * A performance-optimized replacement for using when
256      * mapping from HL7 special characters to their respective
257      * encodings
258      *
259      * @author Christian Ohr
260      */
261     private static class EncLookup {
262 
263         private static final char[] CODES = {'F', 'S', 'T', 'R', 'E', 'L'};
264         private final char[] characters = new char[7];
265         final String[] encodings = new String[7];
266 
267         EncLookup(EncodingCharacters ec) {
268             characters[0] = ec.getFieldSeparator();
269             characters[1] = ec.getComponentSeparator();
270             characters[2] = ec.getSubcomponentSeparator();
271             characters[3] = ec.getRepetitionSeparator();
272             characters[4] = ec.getEscapeCharacter();
273             characters[5] = ec.getTruncationCharacter();
274             characters[6] = '\r';
275 
276             for (int i = 0; i < CODES.length; i++) {
277                 String seq = String.valueOf(ec.getEscapeCharacter()) +
278                         CODES[i] +
279                         ec.getEscapeCharacter();
280                 encodings[i] = seq;
281             }
282             // Escaping of truncation # is not implemented yet. It may only be escaped if it is the first character that
283             // exceeds the conformance length of the component (ch 2.5.5.2). As of now, this information is not
284             // available at this place.
285             encodings[5] = "#";
286             encodings[6] = "\\X000d\\";
287         }
288     }
289 }
290