Coverage Report - ca.uhn.hl7v2.parser.DefaultEscaping
 
Classes in this File Line Coverage Branch Coverage Complexity
DefaultEscaping
94%
87/92
70%
44/62
8.8
DefaultEscaping$1
100%
3/3
50%
1/2
8.8
DefaultEscaping$EncLookup
100%
20/20
100%
2/2
8.8
 
 1  
 /**
 2  
  The contents of this file are subject to the Mozilla Public License Version 1.1
 3  
  (the "License"); you may not use this file except in compliance with the License.
 4  
  You may obtain a copy of the License at http://www.mozilla.org/MPL/
 5  
  Software distributed under the License is distributed on an "AS IS" basis,
 6  
  WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
 7  
  specific language governing rights and limitations under the License.
 8  
 
 9  
  The Original Code is "Escape.java".  Description:
 10  
  "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
 11  
  defined in section 2.10 of the standard (version 2.4)"
 12  
 
 13  
  The Initial Developer of the Original Code is University Health Network. Copyright (C)
 14  
  2001.  All Rights Reserved.
 15  
 
 16  
  Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz
 17  
 
 18  
  Alternatively, the contents of this file may be used under the terms of the
 19  
  GNU General Public License (the  "GPL"), in which case the provisions of the GPL are
 20  
  applicable instead of those above.  If you wish to allow use of your version of this
 21  
  file only under the terms of the GPL and not to allow others to use your version
 22  
  of this file under the MPL, indicate your decision by deleting  the provisions above
 23  
  and replace  them with the notice and other provisions required by the GPL License.
 24  
  If you do not delete the provisions above, a recipient may use your version of
 25  
  this file under either the MPL or the GPL.
 26  
  */
 27  
 package ca.uhn.hl7v2.parser;
 28  
 
 29  
 import java.util.Collections;
 30  
 import java.util.LinkedHashMap;
 31  
 import java.util.Map;
 32  
 
 33  
 /**
 34  
  * Handles "escaping" and "unescaping" of text according to the HL7 escape
 35  
  * sequence rules defined in section 2.10 of the standard (version 2.4).
 36  
  * Currently, escape sequences for multiple character sets are unsupported. The
 37  
  * highlighting, hexademical, and locally defined escape sequences are also
 38  
  * unsupported.
 39  
  *
 40  
  * @author Bryan Tripp
 41  
  * @author Mark Lee (Skeva Technologies)
 42  
  * @author Elmar Hinz
 43  
  * @author Christian Ohr
 44  
  */
 45  5555
 public class DefaultEscaping implements Escaping {
 46  
 
 47  
     /**
 48  
      * limits the size of variousEncChars to 1000, can be overridden by system property.
 49  
      */
 50  5
     private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap
 51  5
         <EncodingCharacters, EncLookup>(5, 0.75f, true) {
 52  
 
 53  
         private static final long serialVersionUID = 1L;
 54  5
         final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
 55  
 
 56  
         @Override
 57  
         protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
 58  15
             return this.size() > maxSize;
 59  
         }
 60  
     });
 61  
 
 62  
 
 63  
     /**
 64  
      * @param text string to be escaped
 65  
      * @param encChars encoding characters to be used
 66  
      * @return the escaped string
 67  
      */
 68  
     public String escape(String text, EncodingCharacters encChars) {
 69  55591
         EncLookup esc = getEscapeSequences(encChars);
 70  55591
         int textLength = text.length();
 71  
 
 72  55591
         StringBuilder result = new StringBuilder(textLength);
 73  452324
         for (int i = 0; i < textLength; i++) {
 74  396733
             boolean charReplaced = false;
 75  396733
             char c = text.charAt(i);
 76  
 
 77  
             FORENCCHARS:
 78  2733747
             for (int j = 0; j < 6; j++) {
 79  2349408
                 if (text.charAt(i) == esc.characters[j]) {
 80  
 
 81  
                     // Formatting escape sequences such as \.br\ should be left alone
 82  12394
                     if (j == 4) {
 83  
 
 84  3376
                         if (i+1 < textLength) {
 85  
 
 86  
                             // Check for \.br\
 87  3376
                             char nextChar = text.charAt(i + 1);
 88  3376
                             switch (nextChar) {
 89  
                                 case '.':
 90  
                                 case 'C':
 91  
                                 case 'M':
 92  
                                 case 'X':
 93  
                                 case 'Z':
 94  
                                 {
 95  315
                                     int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
 96  315
                                     if (nextEscapeIndex > 0) {
 97  315
                                         result.append(text.substring(i, nextEscapeIndex + 1));
 98  315
                                         charReplaced = true;
 99  315
                                         i = nextEscapeIndex;
 100  315
                                         break FORENCCHARS;
 101  
                                     }
 102  
                                     break;
 103  
                                 }
 104  
                                 case 'H':
 105  
                                 case 'N':
 106  
                                 {
 107  150
                                     if (i+2 < textLength && text.charAt(i+2) == '\\') {
 108  150
                                         int nextEscapeIndex = i + 2;
 109  150
                                         if (nextEscapeIndex > 0) {
 110  150
                                             result.append(text.substring(i, nextEscapeIndex + 1));
 111  150
                                             charReplaced = true;
 112  150
                                             i = nextEscapeIndex;
 113  150
                                             break FORENCCHARS;
 114  
                                         }
 115  
                                     }
 116  
                                     break;
 117  
                                 }
 118  
                             }
 119  
 
 120  
                         }
 121  
 
 122  
                     }
 123  
 
 124  11929
                     result.append(esc.encodings[j]);
 125  11929
                     charReplaced = true;
 126  11929
                     break;
 127  
                 }
 128  
             }
 129  396733
             if (!charReplaced) {
 130  384339
                 result.append(c);
 131  
             }
 132  
         }
 133  55591
         return result.toString();
 134  
     }
 135  
 
 136  
     /**
 137  
      * @param text string to be unescaped
 138  
      * @param encChars encoding characters to be used
 139  
      * @return the unescaped string
 140  
      */
 141  
     public String unescape(String text, EncodingCharacters encChars) {
 142  
 
 143  
         // If the escape char isn't found, we don't need to look for escape sequences
 144  101054
         char escapeChar = encChars.getEscapeCharacter();
 145  101054
         boolean foundEscapeChar = false;
 146  740917
         for (int i = 0; i < text.length(); i++) {
 147  642874
             if (text.charAt(i) == escapeChar) {
 148  3011
                 foundEscapeChar = true;
 149  3011
                 break;
 150  
             }
 151  
         }
 152  101054
         if (!foundEscapeChar) {
 153  98043
             return text;
 154  
         }
 155  
 
 156  3011
         int textLength = text.length();
 157  3011
         StringBuilder result = new StringBuilder(textLength + 20);
 158  3011
         EncLookup esc = getEscapeSequences(encChars);
 159  3011
         char escape = esc.characters[4];
 160  3011
         int encodingsCount = esc.characters.length;
 161  3011
         int i = 0;
 162  379095
         while (i < textLength) {
 163  376084
             char c = text.charAt(i);
 164  376084
             if (c != escape) {
 165  336990
                 result.append(c);
 166  336990
                 i++;
 167  
             } else {
 168  39094
                 boolean foundEncoding = false;
 169  
 
 170  
                 // Test against the standard encodings
 171  185444
                 for (int j = 0; j < encodingsCount; j++) {
 172  173799
                     String encoding = esc.encodings[j];
 173  173799
                     int encodingLength = encoding.length();
 174  173799
                     if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
 175  173669
                         .equals(encoding)) {
 176  27449
                         result.append(esc.characters[j]);
 177  27449
                         i += encodingLength;
 178  27449
                         foundEncoding = true;
 179  27449
                         break;
 180  
                     }
 181  
                 }
 182  
 
 183  39094
                 if (!foundEncoding) {
 184  
 
 185  
                     // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
 186  
                     // formatting codes. They should be left intact
 187  11645
                     if (i + 1 < textLength) {
 188  11645
                         char nextChar = text.charAt(i + 1);
 189  11645
                         switch (nextChar) {
 190  
                             case '.':
 191  
                             case 'C':
 192  
                             case 'M':
 193  
                             case 'X':
 194  
                             case 'Z':
 195  
                             {
 196  11425
                                 int closingEscape = text.indexOf(escape, i + 1);
 197  11425
                                 if (closingEscape > 0) {
 198  11425
                                     String substring = text.substring(i, closingEscape + 1);
 199  11425
                                     result.append(substring);
 200  11425
                                     i += substring.length();
 201  11425
                                 } else {
 202  0
                                     i++;
 203  
                                 }
 204  0
                                 break;
 205  
                             }
 206  
                             case 'H':
 207  
                             case 'N':
 208  
                             {
 209  180
                                 int closingEscape = text.indexOf(escape, i + 1);
 210  180
                                 if (closingEscape == i + 2) {
 211  180
                                     String substring = text.substring(i, closingEscape + 1);
 212  180
                                     result.append(substring);
 213  180
                                     i += substring.length();
 214  180
                                 } else {
 215  0
                                     i++;
 216  
                                 }
 217  0
                                 break;
 218  
                             }
 219  
                             default:
 220  
                             {
 221  40
                                 i++;
 222  
                             }
 223  
                         }
 224  
 
 225  11645
                     } else {
 226  0
                         i++;
 227  
                     }
 228  
                 }
 229  
 
 230  
 
 231  
             }
 232  376084
         }
 233  3011
         return result.toString();
 234  
     }
 235  
 
 236  
     /**
 237  
      * Returns a HashTable with escape sequences as keys, and corresponding
 238  
      * Strings as values.
 239  
      */
 240  
     private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
 241  58602
         EncLookup escapeSequences = variousEncChars.get(encChars);
 242  58602
         if (escapeSequences == null) {
 243  
             // this means we haven't got the sequences for these encoding
 244  
             // characters yet - let's make them
 245  15
             escapeSequences = new EncLookup(encChars);
 246  15
             variousEncChars.put(encChars, escapeSequences);
 247  
         }
 248  58602
         return escapeSequences;
 249  
     }
 250  
 
 251  
 
 252  
 
 253  
 
 254  
     /**
 255  
      * A performance-optimized replacement for using when
 256  
      * mapping from HL7 special characters to their respective
 257  
      * encodings
 258  
      *
 259  
      * @author Christian Ohr
 260  
      */
 261  
     private static class EncLookup {
 262  
 
 263  15
         char[] characters = new char[7];
 264  15
         String[] encodings = new String[7];
 265  
 
 266  15
         EncLookup(EncodingCharacters ec) {
 267  15
             characters[0] = ec.getFieldSeparator();
 268  15
             characters[1] = ec.getComponentSeparator();
 269  15
             characters[2] = ec.getSubcomponentSeparator();
 270  15
             characters[3] = ec.getRepetitionSeparator();
 271  15
             characters[4] = ec.getEscapeCharacter();
 272  
 
 273  15
             characters[5] = ec.getTruncationCharacter();
 274  15
             characters[6] = '\r';
 275  15
             char[] codes = {'F', 'S', 'T', 'R', 'E', 'L'};
 276  105
             for (int i = 0; i < codes.length; i++) {
 277  90
                 StringBuilder seq = new StringBuilder();
 278  90
                 seq.append(ec.getEscapeCharacter());
 279  90
                 seq.append(codes[i]);
 280  90
                 seq.append(ec.getEscapeCharacter());
 281  90
                 encodings[i] = seq.toString();
 282  
             }
 283  
             // Escaping of truncation # is not implemented yet. It may only be escaped if it is the first character that
 284  
             // exceeds the conformance length of the component (ch 2.5.5.2). As of now, this information is not
 285  
             // available at this place.
 286  15
             encodings[5] = "#";
 287  15
             encodings[6] = "\\X000d\\";
 288  15
         }
 289  
     }
 290  
 }
 291