001/**
002 * The contents of this file are subject to the Mozilla Public License Version 1.1
003 * (the "License"); you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005 * Software distributed under the License is distributed on an "AS IS" basis,
006 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007 * specific language governing rights and limitations under the License.
008 *
009 * The Original Code is "XMLParser.java".  Description:
010 * "Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
011 * specification."
012 *
013 * The Initial Developer of the Original Code is University Health Network. Copyright (C)
014 * 2002.  All Rights Reserved.
015 *
016 * Contributor(s): ______________________________________.
017 *
018 * Alternatively, the contents of this file may be used under the terms of the
019 * GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
020 * applicable instead of those above.  If you wish to allow use of your version of this
021 * file only under the terms of the GPL and not to allow others to use your version
022 * of this file under the MPL, indicate your decision by deleting  the provisions above
023 * and replace  them with the notice and other provisions required by the GPL License.
024 * If you do not delete the provisions above, a recipient may use your version of
025 * this file under either the MPL or the GPL.
026 */
027
028package ca.uhn.hl7v2.parser;
029
030import java.util.HashSet;
031import java.util.Set;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034
035import ca.uhn.hl7v2.Version;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038import org.w3c.dom.DOMException;
039import org.w3c.dom.Document;
040import org.w3c.dom.Element;
041import org.w3c.dom.Node;
042import org.w3c.dom.NodeList;
043
044import ca.uhn.hl7v2.ErrorCode;
045import ca.uhn.hl7v2.HL7Exception;
046import ca.uhn.hl7v2.HapiContext;
047import ca.uhn.hl7v2.model.Composite;
048import ca.uhn.hl7v2.model.DataTypeException;
049import ca.uhn.hl7v2.model.GenericComposite;
050import ca.uhn.hl7v2.model.GenericMessage;
051import ca.uhn.hl7v2.model.GenericPrimitive;
052import ca.uhn.hl7v2.model.Message;
053import ca.uhn.hl7v2.model.Primitive;
054import ca.uhn.hl7v2.model.Segment;
055import ca.uhn.hl7v2.model.Type;
056import ca.uhn.hl7v2.model.Varies;
057import ca.uhn.hl7v2.util.Terser;
058import ca.uhn.hl7v2.util.XMLUtils;
059
060/**
061 * Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
062 * specification. This is an abstract class that handles datatype and segment parsing/encoding, but
063 * not the parsing/encoding of entire messages. To use the XML parser, you should create a subclass
064 * for a certain message structure. This subclass must be able to identify the Segment objects that
065 * correspond to various Segment nodes in an XML document, and call the methods <code>
066 * parse(Segment segment, ElementNode segmentNode)</code> and
067 * <code>encode(Segment segment, ElementNode segmentNode)
068 * </code> as appropriate. XMLParser uses the Xerces parser, which must be installed in your
069 * classpath.
070 * 
071 * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour
072 * @author Bryan Tripp, Shawn Bellina
073 */
074public abstract class XMLParser extends Parser {
075
076        private static final String ESCAPE_ATTRNAME = "V";
077        private static final String ESCAPE_NODENAME = "escape";
078        private static final Logger log = LoggerFactory.getLogger(XMLParser.class);
079    protected static final String NS = "urn:hl7-org:v2xml";
080    private static final Pattern NS_PATTERN = Pattern.compile("xmlns(.*)=\"" + NS + "\"");
081
082        private String textEncoding;
083
084
085
086
087        /** Constructor */
088        public XMLParser() {
089                super();
090        }
091
092    /**
093     *
094     * @param context the HAPI context
095     */
096        public XMLParser(HapiContext context) {
097                super(context);
098        }
099
100        /**
101         * Constructor
102         * 
103         * @param theFactory custom factory to use for model class lookup
104         */
105        public XMLParser(ModelClassFactory theFactory) {
106                super(theFactory);
107
108        }
109
110        /**
111         * Returns a String representing the encoding of the given message, if the encoding is
112         * recognized. For example if the given message appears to be encoded using HL7 2.x XML rules
113         * then "XML" would be returned. If the encoding is not recognized then null is returned. That
114         * this method returns a specific encoding does not guarantee that the message is correctly
115         * encoded (e.g. well formed XML) - just that it is not encoded using any other encoding than
116         * the one returned. Returns null if the encoding is not recognized.
117         */
118        public String getEncoding(String message) {
119                return EncodingDetector.isXmlEncoded(message) ? getDefaultEncoding() : null;
120        }
121
122        /**
123         * @return the preferred encoding of this Parser
124         */
125        public String getDefaultEncoding() {
126                return "XML";
127        }
128
129        /**
130         * Sets the <i>keepAsOriginalNodes<i>
131         * 
132         * The nodes whose names match the <i>keepAsOriginalNodes<i> will be kept as original, meaning
133         * that no white space treaming will occur on them
134     *
135     * @param keepAsOriginalNodes of the nodes to be kept as original
136     * @deprecated Use {@link ParserConfiguration#setXmlDisableWhitespaceTrimmingOnNodeNames(Set)} instead. That method works exactly the same as this one but has been renamed for a more clear meaning. 
137         */
138        @Deprecated()
139        public void setKeepAsOriginalNodes(String[] keepAsOriginalNodes) {
140                getParserConfiguration().setXmlDisableWhitespaceTrimmingOnNodeNames(keepAsOriginalNodes);
141        }
142
143        /**
144         * Sets the <i>keepAsOriginalNodes<i>
145         * 
146         * @deprecated Use {@link ParserConfiguration#getXmlDisableWhitespaceTrimmingOnNodeNames()} instead
147         */
148        @Deprecated
149        public String[] getKeepAsOriginalNodes() {
150                return getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().toArray(new String[getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().size()]);
151        }
152
153        /**
154         * <p>
155         * Creates and populates a Message object from an XML Document that contains an XML-encoded HL7
156         * message.
157         * </p>
158         * <p>
159         * The easiest way to implement this method for a particular message structure is as follows:
160         * <ol>
161         * <li>Create an instance of the Message type you are going to handle with your subclass of
162         * XMLParser</li>
163         * <li>Go through the given Document and find the Elements that represent the top level of each
164         * message segment.</li>
165         * <li>For each of these segments, call
166         * <code>parse(Segment segmentObject, Element segmentElement)</code>, providing the appropriate
167         * Segment from your Message object, and the corresponding Element.</li>
168         * </ol>
169         * At the end of this process, your Message object should be populated with data from the XML
170         * Document.
171         * </p>
172         *
173     * @param xmlMessage DOM message object to be parsed
174     * @param version HL7 version
175         * @throws HL7Exception if the message is not correctly formatted.
176         * @throws EncodingNotSupportedException if the message encoded is not supported by this parser.
177         */
178        public abstract Message parseDocument(Document xmlMessage, String version) throws HL7Exception;
179
180        /**
181         * <p>
182         * Parses a message string and returns the corresponding Message object. This method checks that
183         * the given message string is XML encoded, creates an XML Document object (using Xerces) from
184         * the given String, and calls the abstract method <code>parse(Document XMLMessage)</code>
185         * </p>
186         */
187        protected Message doParse(String message, String version) throws HL7Exception {
188                Message m;
189
190                // parse message string into a DOM document
191                Document doc;
192                doc = parseStringIntoDocument(message);
193                m = parseDocument(doc, version);
194
195                return m;
196        }
197
198        /**
199         * Parses a string containing an XML document into a Document object.
200         * 
201         * Note that this method is synchronized currently, as the XML parser is not thread safe
202         * 
203         * @throws HL7Exception
204         */
205        protected synchronized Document parseStringIntoDocument(String message) throws HL7Exception {
206                try {
207                        return XMLUtils.parse(message);
208                } catch (Exception e) {
209                        throw new HL7Exception("Exception parsing XML", e);
210                }
211        }
212
213        /**
214         * Formats a Message object into an HL7 message string using the given encoding.
215         * 
216         * @throws HL7Exception if the data fields in the message do not permit encoding (e.g. required
217         *             fields are null)
218         * @throws EncodingNotSupportedException if the requested encoding is not supported by this
219         *             parser.
220         */
221        protected String doEncode(Message source, String encoding) throws HL7Exception {
222                if (!encoding.equals("XML"))
223                        throw new EncodingNotSupportedException("XMLParser supports only XML encoding");
224                return encode(source);
225        }
226
227        /**
228         * Formats a Message object into an HL7 message string using this parser's default encoding (XML
229         * encoding). This method calls the abstract method <code>encodeDocument(...)</code> in order to
230         * obtain XML Document object representation of the Message, then serializes it to a String.
231         * 
232         * @throws HL7Exception if the data fields in the message do not permit encoding (e.g. required
233         *             fields are null)
234         */
235        protected String doEncode(Message source) throws HL7Exception {
236                if (source instanceof GenericMessage) {
237                        throw new HL7Exception(
238                                        "Can't XML-encode a GenericMessage.  Message must have a recognized structure.");
239                }
240
241                Document doc = encodeDocument(source);
242                // Element documentElement = doc.getDocumentElement();
243                // if (!documentElement.hasAttribute("xmlns"))
244                // documentElement.setAttribute("xmlns", "urn:hl7-org:v2xml");
245                try {
246                        return XMLUtils.serialize(doc, getParserConfiguration().isPrettyPrintWhenEncodingXml());
247                } catch (Exception e) {
248                        throw new HL7Exception("Exception serializing XML document to string", e);
249                }
250        }
251
252        /**
253         * <p>
254         * Creates an XML Document that corresponds to the given Message object.
255         * </p>
256         * <p>
257         * If you are implementing this method, you should create an XML Document, and insert XML
258         * Elements into it that correspond to the groups and segments that belong to the message type
259         * that your subclass of XMLParser supports. Then, for each segment in the message, call the
260         * method <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element
261         * for that segment and the corresponding Segment object from the given Message.
262         * </p>
263     *
264     * @param source message
265     * @return the DOM document object of the encoded message
266         */
267        public abstract Document encodeDocument(Message source) throws HL7Exception;
268
269
270    protected void assertNamespaceURI(String ns) throws HL7Exception {
271        if (!NS.equals(ns)) {
272            throw new HL7Exception("Namespace URI must be " + NS);
273        }
274    }
275
276        /**
277         * Populates the given Segment object with data from the given XML Element.
278         *
279     * @param segmentObject the segment to parse into
280     * @param segmentElement the DOM element to be parsed
281         * @throws HL7Exception if the XML Element does not have the correct name and structure for the
282         *             given Segment, or if there is an error while setting individual field values.
283         */
284        public void parse(Segment segmentObject, Element segmentElement) throws HL7Exception {
285                Set<String> done = new HashSet<String>();
286
287                NodeList all = segmentElement.getChildNodes();
288                for (int i = 0; i < all.getLength(); i++) {
289                        String elementName = all.item(i).getNodeName();
290
291                        if (all.item(i).getNodeType() == Node.ELEMENT_NODE && !done.contains(elementName)) {
292                assertNamespaceURI(all.item(i).getNamespaceURI());
293                                done.add(elementName);
294
295                                int index = elementName.indexOf('.');
296                                if (index >= 0 && elementName.length() > index) { // properly formatted element
297                                        String fieldNumString = elementName.substring(index + 1);
298                                        int fieldNum = Integer.parseInt(fieldNumString);
299                                        parseReps(segmentObject, segmentElement, elementName, fieldNum);
300                                } else {
301                                        log.debug("Child of segment {} doesn't look like a field {}",
302                                                        segmentObject.getName(), elementName);
303                                }
304                        }
305                }
306
307                // set data type of OBX-5
308                if (segmentObject.getClass().getName().contains("OBX")) {
309                        FixFieldDataType.fixOBX5(segmentObject, getFactory(), getHapiContext().getParserConfiguration());
310                }
311        // set data type of MFE-4
312        if (segmentObject.getClass().getName().contains("MFE") &&
313                Version.versionOf(segmentObject.getMessage().getVersion()).isGreaterThan(Version.V23)) {
314            FixFieldDataType.fixMFE4(segmentObject, getFactory(), getHapiContext().getParserConfiguration());
315        }
316        }
317
318        private void parseReps(Segment segmentObject, Element segmentElement, String fieldName,
319                        int fieldNum) throws HL7Exception {
320
321                NodeList reps = segmentElement.getElementsByTagName(fieldName);
322                for (int i = 0; i < reps.getLength(); i++) {
323                        parse(segmentObject.getField(fieldNum, i), (Element) reps.item(i));
324                }
325        }
326
327        /**
328         * Populates the given Element with data from the given Segment, by inserting Elements
329         * corresponding to the Segment's fields, their components, etc. Returns true if there is at
330         * least one data value in the segment.
331     *
332     * @param segmentObject the segment to be encoded
333     * @param segmentElement the DOM element to encode into
334     * @return true if there is at least one data value in the segment
335     * @throws HL7Exception if an erro occurred while encoding
336         */
337        public boolean encode(Segment segmentObject, Element segmentElement) throws HL7Exception {
338                boolean hasValue = false;
339                int n = segmentObject.numFields();
340                for (int i = 1; i <= n; i++) {
341                        String name = makeElementName(segmentObject, i);
342                        Type[] reps = segmentObject.getField(i);
343                        for (Type rep : reps) {
344                                Element newNode = segmentElement.getOwnerDocument().createElement(name);
345                                boolean componentHasValue = encode(rep, newNode);
346                                if (componentHasValue) {
347                                        try {
348                                                segmentElement.appendChild(newNode);
349                                        } catch (DOMException e) {
350                                                throw new HL7Exception("DOMException encoding Segment: ", e);
351                                        }
352                                        hasValue = true;
353                                }
354                        }
355                }
356                return hasValue;
357        }
358
359        /**
360         * Populates the given Type object with data from the given XML Element.
361     *
362     * @param datatypeObject the type to parse into
363     * @param datatypeElement the DOM element to be parsed
364     * @throws DataTypeException if the data did not match the expected type rules
365         */
366        public void parse(Type datatypeObject, Element datatypeElement) throws HL7Exception {
367                if (datatypeObject instanceof Varies) {
368                        parseVaries((Varies) datatypeObject, datatypeElement);
369                } else if (datatypeObject instanceof Primitive) {
370                        parsePrimitive((Primitive) datatypeObject, datatypeElement);
371                } else if (datatypeObject instanceof Composite) {
372                        parseComposite((Composite) datatypeObject, datatypeElement);
373                }
374        }
375
376        /**
377         * Parses an XML element into a Varies by determining whether the element is primitive or
378         * composite, calling setData() on the Varies with a new generic primitive or composite as
379         * appropriate, and then calling parse again with the new Type object.
380         */
381        private void parseVaries(Varies datatypeObject, Element datatypeElement)
382                        throws HL7Exception {
383                // figure out what data type it holds
384                // short nodeType = datatypeElement.getFirstChild().getNodeType();
385                if (!hasChildElement(datatypeElement)) {
386                        // it's a primitive
387                        datatypeObject.setData(new GenericPrimitive(datatypeObject.getMessage()));
388                } else {
389                        // it's a composite ... almost know what type, except that we don't have the version
390                        // here
391                        datatypeObject.setData(new GenericComposite(datatypeObject.getMessage()));
392                }
393                parse(datatypeObject.getData(), datatypeElement);
394        }
395
396        /** Returns true if any of the given element's children are (non-escape) elements */
397        private boolean hasChildElement(Element e) {
398                NodeList children = e.getChildNodes();
399                boolean hasElement = false;
400                int c = 0;
401                while (c < children.getLength() && !hasElement) {
402                        if (children.item(c).getNodeType() == Node.ELEMENT_NODE
403                                        && !ESCAPE_NODENAME.equals(children.item(c).getNodeName())) {
404                                hasElement = true;
405                        }
406                        c++;
407                }
408                return hasElement;
409        }
410
411        /**
412         * Parses a primitive type by filling it with text child, if any. If the datatype element
413         * contains escape elements, resolve them properly.
414         */
415        private void parsePrimitive(Primitive datatypeObject, Element datatypeElement)
416                        throws HL7Exception {
417                NodeList children = datatypeElement.getChildNodes();
418                StringBuilder builder = new StringBuilder();
419                for (int c = 0; c < children.getLength(); c++) {
420                        Node child = children.item(c);
421                        try {
422                                if (child.getNodeType() == Node.TEXT_NODE) {
423                                        String value = child.getNodeValue();
424                                        if (value != null && value.length() > 0) {
425                                                if (keepAsOriginal(child.getParentNode())) {
426                                                        builder.append(value);
427                                                } else {
428                                                        builder.append(removeWhitespace(value));
429                                                }
430                                        }
431                                        // Check for formatting elements
432                                } else if (child.getNodeType() == Node.ELEMENT_NODE
433                                                && ESCAPE_NODENAME.equals(child.getLocalName())) {
434                    assertNamespaceURI(child.getNamespaceURI());
435                                        EncodingCharacters ec = EncodingCharacters.getInstance(datatypeObject
436                                                        .getMessage());
437                                        Element elem = (Element) child;
438                                        String attr = elem.getAttribute(ESCAPE_ATTRNAME).trim();
439                                        if (attr.length() > 0) {
440                                                builder.append(ec.getEscapeCharacter()).append(attr)
441                                                                .append(ec.getEscapeCharacter());
442                                        }
443                                }
444                        } catch (Exception e) {
445                                log.error("Error parsing primitive value from TEXT_NODE", e);
446                        }
447
448                }
449                datatypeObject.setValue(builder.toString());
450        }
451
452        /**
453         * Checks if <code>Node</code> content should be kept as original (ie.: whitespaces won't be
454         * removed)
455         * 
456         * @param node The target <code>Node</code>
457         * @return boolean <code>true</code> if whitespaces should not be removed from node content,
458         *         <code>false</code> otherwise
459         */
460        protected boolean keepAsOriginal(Node node) {
461                if (getParserConfiguration().isXmlDisableWhitespaceTrimmingOnAllNodes()) {
462                        return true;
463                }
464                return (node.getNodeName() != null) && getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().contains(node.getNodeName());
465        }
466
467        /**
468         * Removes all unnecessary whitespace from the given String (intended to be used with Primitive
469         * values). This includes leading and trailing whitespace, and repeated space characters.
470         * Carriage returns, line feeds, and tabs are replaced with spaces.
471         */
472        protected String removeWhitespace(String s) {
473                
474                s = s.replace('\r', ' ');
475                s = s.replace('\n', ' ');
476                s = s.replace('\t', ' ');
477
478                boolean repeatedSpacesExist = true;
479                while (repeatedSpacesExist) {
480                        int loc = s.indexOf("  ");
481                        if (loc < 0) {
482                                repeatedSpacesExist = false;
483                        } else {
484                                StringBuilder buf = new StringBuilder();
485                                buf.append(s.substring(0, loc));
486                                buf.append(" ");
487                                buf.append(s.substring(loc + 2));
488                                s = buf.toString();
489                        }
490                }
491                return s.trim();
492        }
493
494        /**
495         * Populates a Composite type by looping through it's children, finding corresponding Elements
496         * among the children of the given Element, and calling parse(Type, Element) for each.
497         */
498        private void parseComposite(Composite datatypeObject, Element datatypeElement)
499                        throws HL7Exception {
500                if (datatypeObject instanceof GenericComposite) { // elements won't be named
501                                                                                                                        // GenericComposite.x
502                        NodeList children = datatypeElement.getChildNodes();
503                        int compNum = 0;
504                        for (int i = 0; i < children.getLength(); i++) {
505                                if (children.item(i).getNodeType() == Node.ELEMENT_NODE) {
506                                        Element nextElement = (Element) children.item(i);
507                    assertNamespaceURI(nextElement.getNamespaceURI());
508                                        String localName = nextElement.getLocalName();
509                                        int dotIndex = localName.indexOf(".");
510                                        if (dotIndex > -1) {
511                                                compNum = Integer.parseInt(localName.substring(dotIndex + 1)) - 1;
512                                        } else {
513                                                log.debug(
514                                                                "Datatype element {} doesn't have a valid numbered name, usgin default index of {}",
515                                                                datatypeElement.getLocalName(), compNum);
516                                        }
517                                        Type nextComponent = datatypeObject.getComponent(compNum);
518                                        parse(nextComponent, nextElement);
519                                        compNum++;
520                                }
521                        }
522                } else {
523                        Type[] children = datatypeObject.getComponents();
524                        for (int i = 0; i < children.length; i++) {
525                                NodeList matchingElements = datatypeElement.getElementsByTagNameNS(NS, makeElementName(
526                                                datatypeObject, i + 1));
527                                if (matchingElements.getLength() > 0) {
528                                        parse(children[i], (Element) matchingElements.item(0));
529                                }
530                        }
531                        
532                        int nextExtraCmpIndex = 0;
533                        boolean foundExtraComponent;
534                        do {
535                                foundExtraComponent = false;
536                                NodeList matchingElements = datatypeElement.getElementsByTagNameNS(NS, makeElementName(datatypeObject, children.length + nextExtraCmpIndex + 1));
537                                if (matchingElements.getLength() > 0) {
538                                        parse(datatypeObject.getExtraComponents().getComponent(nextExtraCmpIndex), (Element) matchingElements.item(0));
539                                        foundExtraComponent = true;
540                                }
541                                nextExtraCmpIndex++;
542                        } while (foundExtraComponent);
543                        
544                        
545                }
546        }
547
548        /** Returns the expected XML element name for the given child of the given Segment */
549        private String makeElementName(Segment s, int child) {
550                return s.getName() + "." + child;
551        }
552
553        /** Returns the expected XML element name for the given child of the given Composite */
554        private String makeElementName(Composite composite, int child) {
555                return composite.getName() + "." + child;
556        }
557
558        /**
559         * Populates the given Element with data from the given Type, by inserting Elements
560         * corresponding to the Type's components and values. Returns true if the given type contains a
561         * value (i.e. for Primitives, if getValue() doesn't return null, and for Composites, if at
562         * least one underlying Primitive doesn't return null).
563         */
564        private boolean encode(Type datatypeObject, Element datatypeElement) throws DataTypeException {
565                boolean hasData = false;
566                if (datatypeObject instanceof Varies) {
567                        hasData = encodeVaries((Varies) datatypeObject, datatypeElement);
568                } else if (datatypeObject instanceof Primitive) {
569                        hasData = encodePrimitive((Primitive) datatypeObject, datatypeElement);
570                } else if (datatypeObject instanceof Composite) {
571                        hasData = encodeComposite((Composite) datatypeObject, datatypeElement);
572                }
573                return hasData;
574        }
575
576        /**
577         * Encodes a Varies type by extracting it's data field and encoding that. Returns true if the
578         * data field (or one of its components) contains a value.
579         */
580        private boolean encodeVaries(Varies datatypeObject, Element datatypeElement)
581                        throws DataTypeException {
582                boolean hasData = false;
583                if (datatypeObject.getData() != null) {
584                        hasData = encode(datatypeObject.getData(), datatypeElement);
585                }
586                return hasData;
587        }
588
589        /**
590         * Encodes a Primitive in XML by adding it's value as a child of the given Element. Detects
591         * escape character and creates proper <escape> elements in the DOM tree. Returns true if the
592         * given Primitive contains a value.
593         */
594        private boolean encodePrimitive(Primitive datatypeObject, Element datatypeElement)
595                        throws DataTypeException {
596                String value = datatypeObject.getValue();
597                boolean hasValue = (value != null && value.length() > 0);
598                if (hasValue) {
599                        try {
600                                EncodingCharacters ec = EncodingCharacters.getInstance(datatypeObject.getMessage());
601                                char esc = ec.getEscapeCharacter();
602                                int pos;
603                                int oldpos = 0;
604                                boolean escaping = false;
605
606                                // Find next escape character
607                                while ((pos = value.indexOf(esc, oldpos)) >= 0) {
608
609                                        // string until next escape character
610                                        String v = value.substring(oldpos, pos);
611                                        if (!escaping) {
612                                                // currently in "text mode", so create textnode from it
613                                                if (v.length() > 0)
614                                                        datatypeElement.appendChild(datatypeElement.getOwnerDocument()
615                                                                        .createTextNode(v));
616                                                escaping = true;
617                                        } else {
618                                                if (v.startsWith(".") || "H".equals(v) || "N".equals(v)) {
619                                                        // currently in "escape mode", so create escape element from it
620                                                        Element escape = datatypeElement.getOwnerDocument().createElement(
621                                                                        ESCAPE_NODENAME);
622                                                        escape.setAttribute(ESCAPE_ATTRNAME, v);
623                                                        datatypeElement.appendChild(escape);
624                                                        escaping = false;
625                                                } else {
626                                                        // no proper escape sequence, assume text
627                                                        datatypeElement.appendChild(datatypeElement.getOwnerDocument()
628                                                                        .createTextNode(esc + v));
629                                                }
630                                        }
631                                        oldpos = pos + 1;
632                                }
633                                // create text from the remainder
634                                if (oldpos <= value.length()) {
635
636                                        StringBuilder sb = new StringBuilder();
637                                        // If we are in escaping mode, there appears no closing escape character,
638                                        // so we treat the string as text
639                                        if (escaping)
640                                                sb.append(esc);
641
642                                        sb.append(value.substring(oldpos));
643                                        datatypeElement.appendChild(datatypeElement.getOwnerDocument().createTextNode(
644                                                        sb.toString()));
645                                }
646
647                        } catch (Exception e) {
648                                throw new DataTypeException("Exception encoding Primitive: ", e);
649                        }
650
651                }
652                return hasValue;
653        }
654
655        /**
656         * Encodes a Composite in XML by looping through it's components, creating new children for each
657         * of them (with the appropriate names) and populating them by calling encode(Type, Element)
658         * using these children. Returns true if at least one component contains a value.
659         */
660        private boolean encodeComposite(Composite datatypeObject, Element datatypeElement)
661                        throws DataTypeException {
662                Type[] components = datatypeObject.getComponents();
663                boolean hasValue = false;
664                for (int i = 0; i < components.length; i++) {
665                        String name = makeElementName(datatypeObject, i + 1);
666                        Element newNode = datatypeElement.getOwnerDocument().createElement(name);
667                        boolean componentHasValue = encode(components[i], newNode);
668                        if (componentHasValue) {
669                                try {
670                                        datatypeElement.appendChild(newNode);
671                                } catch (DOMException e) {
672                                        throw new DataTypeException("DOMException encoding Composite: ", e);
673                                }
674                                hasValue = true;
675                        }
676                }
677                return hasValue;
678        }
679
680        /**
681         * <p>
682         * Returns a minimal amount of data from a message string, including only the data needed to
683         * send a response to the remote system. This includes the following fields:
684         * <ul>
685         * <li>field separator</li>
686         * <li>encoding characters</li>
687         * <li>processing ID</li>
688         * <li>message control ID</li>
689         * </ul>
690         * This method is intended for use when there is an error parsing a message, (so the Message
691         * object is unavailable) but an error message must be sent back to the remote system including
692         * some of the information in the inbound message. This method parses only that required
693         * information, hopefully avoiding the condition that caused the original error.
694         * </p>
695         */
696        public Segment getCriticalResponseData(String message) throws HL7Exception {
697                String version = getVersion(message);
698                Segment criticalData = Parser.makeControlMSH(version, getFactory());
699
700                Terser.set(criticalData, 1, 0, 1, 1, parseLeaf(message, "MSH.1", 0));
701                Terser.set(criticalData, 2, 0, 1, 1, parseLeaf(message, "MSH.2", 0));
702                Terser.set(criticalData, 10, 0, 1, 1, parseLeaf(message, "MSH.10", 0));
703                String procID = parseLeaf(message, "MSH.11", 0);
704                if (procID == null || procID.length() == 0) {
705                        procID = parseLeaf(message, "PT.1", message.indexOf("MSH.11"));
706                        // this field is a composite in later versions
707                }
708                Terser.set(criticalData, 11, 0, 1, 1, procID);
709
710                return criticalData;
711        }
712
713        /**
714         * For response messages, returns the value of MSA-2 (the message ID of the message sent by the
715         * sending system). This value may be needed prior to main message parsing, so that
716         * (particularly in a multi-threaded scenario) the message can be routed to the thread that sent
717         * the request. We need this information first so that any parse exceptions are thrown to the
718         * correct thread. Implementers of Parsers should take care to make the implementation of this
719         * method very fast and robust. Returns null if MSA-2 can not be found (e.g. if the message is
720         * not a response message). Trims whitespace from around the MSA-2 field.
721         */
722        public String getAckID(String message) {
723                String ackID = null;
724                try {
725                        ackID = parseLeaf(message, "msa.2", 0).trim();
726                } catch (HL7Exception e) { /* OK ... assume it isn't a response message */
727                }
728                return ackID;
729        }
730
731        public String getVersion(String message) throws HL7Exception {
732        String version = parseLeaf(message, "MSH.12", 0);
733        if (version == null || version.trim().length() == 0) {
734            version = parseLeaf(message, "VID.1", message.indexOf("MSH.12"));
735        }
736        return version;     
737        }
738
739        /**
740         * Attempts to retrieve the value of a leaf tag without using DOM or SAX. This method searches
741         * the given message string for the given tag name, and returns everything after the given tag
742         * and before the start of the next tag. Whitespace is stripped. This is intended only for lead
743         * nodes, as the value is considered to end at the start of the next tag, regardless of whether
744         * it is the matching end tag or some other nested tag.
745         * 
746         * @param message a string message in XML form
747         * @param tagName the name of the XML tag, e.g. "MSA.2"
748         * @param startAt the character location at which to start searching
749         * @throws HL7Exception if the tag can not be found
750         */
751        protected static String parseLeaf(String message, String tagName, int startAt) throws HL7Exception {
752
753        // Workaround #176: XML may include explicit namespaces. It would be more stable to use some
754        // kind of pull parser for this method instead of manually digging for tags in the XML structure.
755        String prefix = "";
756        Matcher m = NS_PATTERN.matcher(message);
757        if (m.find()) {
758            String ns = m.group(1);
759            if (ns != null && ns.length() > 0) {
760                prefix = ns.substring(1) + ":";
761            }
762        }
763
764                int tagStart = message.indexOf("<" + prefix + tagName, startAt);
765                if (tagStart < 0)
766                        tagStart = message.indexOf("<" + prefix + tagName.toUpperCase(), startAt);
767                int valStart = message.indexOf(">", tagStart) + 1;
768                int valEnd = message.indexOf("<", valStart);
769
770        String value;
771                if (tagStart >= 0 && valEnd >= valStart) {
772                        value = message.substring(valStart, valEnd);
773                } else {
774                        throw new HL7Exception("Couldn't find " + tagName + " in message beginning: "
775                                        + message.substring(0, Math.min(150, message.length())),
776                                        ErrorCode.REQUIRED_FIELD_MISSING);
777                }
778
779                // Escape codes, as defined at http://hdf.ncsa.uiuc.edu/HDF5/XML/xml_escape_chars.htm
780                value = value.replaceAll("&quot;", "\"");
781                value = value.replaceAll("&apos;", "'");
782                value = value.replaceAll("&amp;", "&");
783                value = value.replaceAll("&lt;", "<");
784                value = value.replaceAll("&gt;", ">");
785
786                return value;
787        }
788
789        /**
790         * Throws unsupported operation exception
791         * 
792         * @throws UnsupportedOperationException
793         */
794        @Override
795        public String doEncode(Segment structure, EncodingCharacters encodingCharacters)
796                        throws HL7Exception {
797                throw new UnsupportedOperationException("Not supported yet.");
798        }
799
800        /**
801         * Throws unsupported operation exception
802         * 
803         * @throws UnsupportedOperationException
804         */
805        @Override
806        protected Message doParseForSpecificPackage(String theMessage, String theVersion,
807                        String thePackageName) throws HL7Exception {
808                throw new UnsupportedOperationException("Not supported yet.");
809        }
810
811        /**
812         * Throws unsupported operation exception
813         * 
814         * @throws UnsupportedOperationException
815         */
816        @Override
817        public String doEncode(Type type, EncodingCharacters encodingCharacters) throws HL7Exception {
818                throw new UnsupportedOperationException("Not supported yet.");
819        }
820
821        /**
822         * Throws unsupported operation exception
823         * 
824         * @throws UnsupportedOperationException
825         */
826        @Override
827        public void parse(Type type, String string, EncodingCharacters encodingCharacters)
828                        throws HL7Exception {
829                throw new UnsupportedOperationException("Not supported yet.");
830        }
831
832        /**
833         * Throws unsupported operation exception
834         * 
835         * @throws UnsupportedOperationException
836         */
837        @Override
838        public void parse(Segment segment, String string, EncodingCharacters encodingCharacters)
839                        throws HL7Exception {
840                throw new UnsupportedOperationException("Not supported yet.");
841        }
842
843        /**
844         * Returns the text encoding to be used in generating new messages. Note that this affects
845         * encoding to string only, not parsing.
846         * 
847         * @return text encoding
848         */
849        public String getTextEncoding() {
850                return textEncoding;
851        }
852
853        /**
854         * Sets the text encoding to be used in generating new messages. Note that this affects encoding
855         * to string only, not parsing.
856         * 
857         * @param textEncoding The encoding. Default is the platform default.
858         */
859        public void setTextEncoding(String textEncoding) {
860                this.textEncoding = textEncoding;
861        }
862
863}