001/** 002 The contents of this file are subject to the Mozilla Public License Version 1.1 003 (the "License"); you may not use this file except in compliance with the License. 004 You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005 Software distributed under the License is distributed on an "AS IS" basis, 006 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007 specific language governing rights and limitations under the License. 008 009 The Original Code is "Escape.java". Description: 010 "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules 011 defined in section 2.10 of the standard (version 2.4)" 012 013 The Initial Developer of the Original Code is University Health Network. Copyright (C) 014 2001. All Rights Reserved. 015 016 Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 017 018 Alternatively, the contents of this file may be used under the terms of the 019 GNU General Public License (the "GPL"), in which case the provisions of the GPL are 020 applicable instead of those above. If you wish to allow use of your version of this 021 file only under the terms of the GPL and not to allow others to use your version 022 of this file under the MPL, indicate your decision by deleting the provisions above 023 and replace them with the notice and other provisions required by the GPL License. 024 If you do not delete the provisions above, a recipient may use your version of 025 this file under either the MPL or the GPL. 026 */ 027package ca.uhn.hl7v2.parser; 028 029import java.util.Collections; 030import java.util.LinkedHashMap; 031import java.util.Map; 032 033/** 034 * Handles "escaping" and "unescaping" of text according to the HL7 escape 035 * sequence rules defined in section 2.10 of the standard (version 2.4). 036 * Currently, escape sequences for multiple character sets are unsupported. The 037 * highlighting, hexademical, and locally defined escape sequences are also 038 * unsupported. 039 * 040 * @author Bryan Tripp 041 * @author Mark Lee (Skeva Technologies) 042 * @author Elmar Hinz 043 * @author Christian Ohr 044 */ 045public class DefaultEscaping implements Escaping { 046 047 /** 048 * limits the size of variousEncChars to 1000, can be overridden by system property. 049 */ 050 private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap 051 <EncodingCharacters, EncLookup>(5, 0.75f, true) { 052 053 private static final long serialVersionUID = 1L; 054 final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000")); 055 056 @Override 057 protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) { 058 return this.size() > maxSize; 059 } 060 }); 061 062 063 /** 064 * @param text string to be escaped 065 * @param encChars encoding characters to be used 066 * @return the escaped string 067 */ 068 public String escape(String text, EncodingCharacters encChars) { 069 EncLookup esc = getEscapeSequences(encChars); 070 int textLength = text.length(); 071 072 StringBuilder result = new StringBuilder(textLength); 073 for (int i = 0; i < textLength; i++) { 074 boolean charReplaced = false; 075 char c = text.charAt(i); 076 077 FORENCCHARS: 078 for (int j = 0; j < 6; j++) { 079 if (text.charAt(i) == esc.characters[j]) { 080 081 // Formatting escape sequences such as \.br\ should be left alone 082 if (j == 4) { 083 084 if (i+1 < textLength) { 085 086 // Check for \.br\ 087 char nextChar = text.charAt(i + 1); 088 switch (nextChar) { 089 case '.': 090 case 'C': 091 case 'M': 092 case 'X': 093 case 'Z': 094 { 095 int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1); 096 if (nextEscapeIndex > 0) { 097 result.append(text.substring(i, nextEscapeIndex + 1)); 098 charReplaced = true; 099 i = nextEscapeIndex; 100 break FORENCCHARS; 101 } 102 break; 103 } 104 case 'H': 105 case 'N': 106 { 107 if (i+2 < textLength && text.charAt(i+2) == '\\') { 108 int nextEscapeIndex = i + 2; 109 if (nextEscapeIndex > 0) { 110 result.append(text.substring(i, nextEscapeIndex + 1)); 111 charReplaced = true; 112 i = nextEscapeIndex; 113 break FORENCCHARS; 114 } 115 } 116 break; 117 } 118 } 119 120 } 121 122 } 123 124 result.append(esc.encodings[j]); 125 charReplaced = true; 126 break; 127 } 128 } 129 if (!charReplaced) { 130 result.append(c); 131 } 132 } 133 return result.toString(); 134 } 135 136 /** 137 * @param text string to be unescaped 138 * @param encChars encoding characters to be used 139 * @return the unescaped string 140 */ 141 public String unescape(String text, EncodingCharacters encChars) { 142 143 // If the escape char isn't found, we don't need to look for escape sequences 144 char escapeChar = encChars.getEscapeCharacter(); 145 boolean foundEscapeChar = false; 146 for (int i = 0; i < text.length(); i++) { 147 if (text.charAt(i) == escapeChar) { 148 foundEscapeChar = true; 149 break; 150 } 151 } 152 if (!foundEscapeChar) { 153 return text; 154 } 155 156 int textLength = text.length(); 157 StringBuilder result = new StringBuilder(textLength + 20); 158 EncLookup esc = getEscapeSequences(encChars); 159 char escape = esc.characters[4]; 160 int encodingsCount = esc.characters.length; 161 int i = 0; 162 while (i < textLength) { 163 char c = text.charAt(i); 164 if (c != escape) { 165 result.append(c); 166 i++; 167 } else { 168 boolean foundEncoding = false; 169 170 // Test against the standard encodings 171 for (int j = 0; j < encodingsCount; j++) { 172 String encoding = esc.encodings[j]; 173 int encodingLength = encoding.length(); 174 if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength) 175 .equals(encoding)) { 176 result.append(esc.characters[j]); 177 i += encodingLength; 178 foundEncoding = true; 179 break; 180 } 181 } 182 183 if (!foundEncoding) { 184 185 // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are 186 // formatting codes. They should be left intact 187 if (i + 1 < textLength) { 188 char nextChar = text.charAt(i + 1); 189 switch (nextChar) { 190 case '.': 191 case 'C': 192 case 'M': 193 case 'X': 194 case 'Z': 195 { 196 int closingEscape = text.indexOf(escape, i + 1); 197 if (closingEscape > 0) { 198 String substring = text.substring(i, closingEscape + 1); 199 result.append(substring); 200 i += substring.length(); 201 } else { 202 i++; 203 } 204 break; 205 } 206 case 'H': 207 case 'N': 208 { 209 int closingEscape = text.indexOf(escape, i + 1); 210 if (closingEscape == i + 2) { 211 String substring = text.substring(i, closingEscape + 1); 212 result.append(substring); 213 i += substring.length(); 214 } else { 215 i++; 216 } 217 break; 218 } 219 default: 220 { 221 i++; 222 } 223 } 224 225 } else { 226 i++; 227 } 228 } 229 230 231 } 232 } 233 return result.toString(); 234 } 235 236 /** 237 * Returns a HashTable with escape sequences as keys, and corresponding 238 * Strings as values. 239 */ 240 private static EncLookup getEscapeSequences(EncodingCharacters encChars) { 241 EncLookup escapeSequences = variousEncChars.get(encChars); 242 if (escapeSequences == null) { 243 // this means we haven't got the sequences for these encoding 244 // characters yet - let's make them 245 escapeSequences = new EncLookup(encChars); 246 variousEncChars.put(encChars, escapeSequences); 247 } 248 return escapeSequences; 249 } 250 251 252 253 254 /** 255 * A performance-optimized replacement for using when 256 * mapping from HL7 special characters to their respective 257 * encodings 258 * 259 * @author Christian Ohr 260 */ 261 private static class EncLookup { 262 263 char[] characters = new char[7]; 264 String[] encodings = new String[7]; 265 266 EncLookup(EncodingCharacters ec) { 267 characters[0] = ec.getFieldSeparator(); 268 characters[1] = ec.getComponentSeparator(); 269 characters[2] = ec.getSubcomponentSeparator(); 270 characters[3] = ec.getRepetitionSeparator(); 271 characters[4] = ec.getEscapeCharacter(); 272 273 characters[5] = ec.getTruncationCharacter(); 274 characters[6] = '\r'; 275 char[] codes = {'F', 'S', 'T', 'R', 'E', 'L'}; 276 for (int i = 0; i < codes.length; i++) { 277 StringBuilder seq = new StringBuilder(); 278 seq.append(ec.getEscapeCharacter()); 279 seq.append(codes[i]); 280 seq.append(ec.getEscapeCharacter()); 281 encodings[i] = seq.toString(); 282 } 283 // Escaping of truncation # is not implemented yet. It may only be escaped if it is the first character that 284 // exceeds the conformance length of the component (ch 2.5.5.2). As of now, this information is not 285 // available at this place. 286 encodings[5] = "#"; 287 encodings[6] = "\\X000d\\"; 288 } 289 } 290} 291