001/**
002 The contents of this file are subject to the Mozilla Public License Version 1.1
003 (the "License"); you may not use this file except in compliance with the License.
004 You may obtain a copy of the License at http://www.mozilla.org/MPL/
005 Software distributed under the License is distributed on an "AS IS" basis,
006 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007 specific language governing rights and limitations under the License.
008
009 The Original Code is "Escape.java".  Description:
010 "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
011 defined in section 2.10 of the standard (version 2.4)"
012
013 The Initial Developer of the Original Code is University Health Network. Copyright (C)
014 2001.  All Rights Reserved.
015
016 Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz
017
018 Alternatively, the contents of this file may be used under the terms of the
019 GNU General Public License (the  "GPL"), in which case the provisions of the GPL are
020 applicable instead of those above.  If you wish to allow use of your version of this
021 file only under the terms of the GPL and not to allow others to use your version
022 of this file under the MPL, indicate your decision by deleting  the provisions above
023 and replace  them with the notice and other provisions required by the GPL License.
024 If you do not delete the provisions above, a recipient may use your version of
025 this file under either the MPL or the GPL.
026 */
027package ca.uhn.hl7v2.parser;
028
029import java.util.Collections;
030import java.util.LinkedHashMap;
031import java.util.Map;
032
033/**
034 * Handles "escaping" and "unescaping" of text according to the HL7 escape
035 * sequence rules defined in section 2.10 of the standard (version 2.4).
036 * Currently, escape sequences for multiple character sets are unsupported. The
037 * highlighting, hexademical, and locally defined escape sequences are also
038 * unsupported.
039 *
040 * @author Bryan Tripp
041 * @author Mark Lee (Skeva Technologies)
042 * @author Elmar Hinz
043 * @author Christian Ohr
044 */
045public class DefaultEscaping implements Escaping {
046
047    /**
048     * limits the size of variousEncChars to 1000, can be overridden by system property.
049     */
050    private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap
051        <EncodingCharacters, EncLookup>(5, 0.75f, true) {
052
053        private static final long serialVersionUID = 1L;
054        final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
055
056        @Override
057        protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
058            return this.size() > maxSize;
059        }
060    });
061
062
063    /**
064     * @param text string to be escaped
065     * @param encChars encoding characters to be used
066     * @return the escaped string
067     */
068    public String escape(String text, EncodingCharacters encChars) {
069        EncLookup esc = getEscapeSequences(encChars);
070        int textLength = text.length();
071
072        StringBuilder result = new StringBuilder(textLength);
073        for (int i = 0; i < textLength; i++) {
074            boolean charReplaced = false;
075            char c = text.charAt(i);
076
077            FORENCCHARS:
078            for (int j = 0; j < 6; j++) {
079                if (text.charAt(i) == esc.characters[j]) {
080
081                    // Formatting escape sequences such as \.br\ should be left alone
082                    if (j == 4) {
083
084                        if (i+1 < textLength) {
085
086                            // Check for \.br\
087                            char nextChar = text.charAt(i + 1);
088                            switch (nextChar) {
089                                case '.':
090                                case 'C':
091                                case 'M':
092                                case 'X':
093                                case 'Z':
094                                {
095                                    int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
096                                    if (nextEscapeIndex > 0) {
097                                        result.append(text.substring(i, nextEscapeIndex + 1));
098                                        charReplaced = true;
099                                        i = nextEscapeIndex;
100                                        break FORENCCHARS;
101                                    }
102                                    break;
103                                }
104                                case 'H':
105                                case 'N':
106                                {
107                                    if (i+2 < textLength && text.charAt(i+2) == '\\') {
108                                        int nextEscapeIndex = i + 2;
109                                        if (nextEscapeIndex > 0) {
110                                            result.append(text.substring(i, nextEscapeIndex + 1));
111                                            charReplaced = true;
112                                            i = nextEscapeIndex;
113                                            break FORENCCHARS;
114                                        }
115                                    }
116                                    break;
117                                }
118                            }
119
120                        }
121
122                    }
123
124                    result.append(esc.encodings[j]);
125                    charReplaced = true;
126                    break;
127                }
128            }
129            if (!charReplaced) {
130                result.append(c);
131            }
132        }
133        return result.toString();
134    }
135
136    /**
137     * @param text string to be unescaped
138     * @param encChars encoding characters to be used
139     * @return the unescaped string
140     */
141    public String unescape(String text, EncodingCharacters encChars) {
142
143        // If the escape char isn't found, we don't need to look for escape sequences
144        char escapeChar = encChars.getEscapeCharacter();
145        boolean foundEscapeChar = false;
146        for (int i = 0; i < text.length(); i++) {
147            if (text.charAt(i) == escapeChar) {
148                foundEscapeChar = true;
149                break;
150            }
151        }
152        if (!foundEscapeChar) {
153            return text;
154        }
155
156        int textLength = text.length();
157        StringBuilder result = new StringBuilder(textLength + 20);
158        EncLookup esc = getEscapeSequences(encChars);
159        char escape = esc.characters[4];
160        int encodingsCount = esc.characters.length;
161        int i = 0;
162        while (i < textLength) {
163            char c = text.charAt(i);
164            if (c != escape) {
165                result.append(c);
166                i++;
167            } else {
168                boolean foundEncoding = false;
169
170                // Test against the standard encodings
171                for (int j = 0; j < encodingsCount; j++) {
172                    String encoding = esc.encodings[j];
173                    int encodingLength = encoding.length();
174                    if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
175                        .equals(encoding)) {
176                        result.append(esc.characters[j]);
177                        i += encodingLength;
178                        foundEncoding = true;
179                        break;
180                    }
181                }
182
183                if (!foundEncoding) {
184
185                    // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
186                    // formatting codes. They should be left intact
187                    if (i + 1 < textLength) {
188                        char nextChar = text.charAt(i + 1);
189                        switch (nextChar) {
190                            case '.':
191                            case 'C':
192                            case 'M':
193                            case 'X':
194                            case 'Z':
195                            {
196                                int closingEscape = text.indexOf(escape, i + 1);
197                                if (closingEscape > 0) {
198                                    String substring = text.substring(i, closingEscape + 1);
199                                    result.append(substring);
200                                    i += substring.length();
201                                } else {
202                                    i++;
203                                }
204                                break;
205                            }
206                            case 'H':
207                            case 'N':
208                            {
209                                int closingEscape = text.indexOf(escape, i + 1);
210                                if (closingEscape == i + 2) {
211                                    String substring = text.substring(i, closingEscape + 1);
212                                    result.append(substring);
213                                    i += substring.length();
214                                } else {
215                                    i++;
216                                }
217                                break;
218                            }
219                            default:
220                            {
221                                i++;
222                            }
223                        }
224
225                    } else {
226                        i++;
227                    }
228                }
229
230
231            }
232        }
233        return result.toString();
234    }
235
236    /**
237     * Returns a HashTable with escape sequences as keys, and corresponding
238     * Strings as values.
239     */
240    private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
241        EncLookup escapeSequences = variousEncChars.get(encChars);
242        if (escapeSequences == null) {
243            // this means we haven't got the sequences for these encoding
244            // characters yet - let's make them
245            escapeSequences = new EncLookup(encChars);
246            variousEncChars.put(encChars, escapeSequences);
247        }
248        return escapeSequences;
249    }
250
251
252
253
254    /**
255     * A performance-optimized replacement for using when
256     * mapping from HL7 special characters to their respective
257     * encodings
258     *
259     * @author Christian Ohr
260     */
261    private static class EncLookup {
262
263        char[] characters = new char[7];
264        String[] encodings = new String[7];
265
266        EncLookup(EncodingCharacters ec) {
267            characters[0] = ec.getFieldSeparator();
268            characters[1] = ec.getComponentSeparator();
269            characters[2] = ec.getSubcomponentSeparator();
270            characters[3] = ec.getRepetitionSeparator();
271            characters[4] = ec.getEscapeCharacter();
272
273            characters[5] = ec.getTruncationCharacter();
274            characters[6] = '\r';
275            char[] codes = {'F', 'S', 'T', 'R', 'E', 'L'};
276            for (int i = 0; i < codes.length; i++) {
277                StringBuilder seq = new StringBuilder();
278                seq.append(ec.getEscapeCharacter());
279                seq.append(codes[i]);
280                seq.append(ec.getEscapeCharacter());
281                encodings[i] = seq.toString();
282            }
283            // Escaping of truncation # is not implemented yet. It may only be escaped if it is the first character that
284            // exceeds the conformance length of the component (ch 2.5.5.2). As of now, this information is not
285            // available at this place.
286            encodings[5] = "#";
287            encodings[6] = "\\X000d\\";
288        }
289    }
290}
291