| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| XMLParser |
|
| 3.292682926829268;3.293 |
| 1 | /** | |
| 2 | * The contents of this file are subject to the Mozilla Public License Version 1.1 | |
| 3 | * (the "License"); you may not use this file except in compliance with the License. | |
| 4 | * You may obtain a copy of the License at http://www.mozilla.org/MPL/ | |
| 5 | * Software distributed under the License is distributed on an "AS IS" basis, | |
| 6 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the | |
| 7 | * specific language governing rights and limitations under the License. | |
| 8 | * | |
| 9 | * The Original Code is "XMLParser.java". Description: | |
| 10 | * "Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding | |
| 11 | * specification." | |
| 12 | * | |
| 13 | * The Initial Developer of the Original Code is University Health Network. Copyright (C) | |
| 14 | * 2002. All Rights Reserved. | |
| 15 | * | |
| 16 | * Contributor(s): ______________________________________. | |
| 17 | * | |
| 18 | * Alternatively, the contents of this file may be used under the terms of the | |
| 19 | * GNU General Public License (the �GPL�), in which case the provisions of the GPL are | |
| 20 | * applicable instead of those above. If you wish to allow use of your version of this | |
| 21 | * file only under the terms of the GPL and not to allow others to use your version | |
| 22 | * of this file under the MPL, indicate your decision by deleting the provisions above | |
| 23 | * and replace them with the notice and other provisions required by the GPL License. | |
| 24 | * If you do not delete the provisions above, a recipient may use your version of | |
| 25 | * this file under either the MPL or the GPL. | |
| 26 | */ | |
| 27 | ||
| 28 | package ca.uhn.hl7v2.parser; | |
| 29 | ||
| 30 | import java.util.HashSet; | |
| 31 | import java.util.Set; | |
| 32 | import java.util.regex.Matcher; | |
| 33 | import java.util.regex.Pattern; | |
| 34 | ||
| 35 | import ca.uhn.hl7v2.Version; | |
| 36 | import org.slf4j.Logger; | |
| 37 | import org.slf4j.LoggerFactory; | |
| 38 | import org.w3c.dom.DOMException; | |
| 39 | import org.w3c.dom.Document; | |
| 40 | import org.w3c.dom.Element; | |
| 41 | import org.w3c.dom.Node; | |
| 42 | import org.w3c.dom.NodeList; | |
| 43 | ||
| 44 | import ca.uhn.hl7v2.ErrorCode; | |
| 45 | import ca.uhn.hl7v2.HL7Exception; | |
| 46 | import ca.uhn.hl7v2.HapiContext; | |
| 47 | import ca.uhn.hl7v2.model.Composite; | |
| 48 | import ca.uhn.hl7v2.model.DataTypeException; | |
| 49 | import ca.uhn.hl7v2.model.GenericComposite; | |
| 50 | import ca.uhn.hl7v2.model.GenericMessage; | |
| 51 | import ca.uhn.hl7v2.model.GenericPrimitive; | |
| 52 | import ca.uhn.hl7v2.model.Message; | |
| 53 | import ca.uhn.hl7v2.model.Primitive; | |
| 54 | import ca.uhn.hl7v2.model.Segment; | |
| 55 | import ca.uhn.hl7v2.model.Type; | |
| 56 | import ca.uhn.hl7v2.model.Varies; | |
| 57 | import ca.uhn.hl7v2.util.Terser; | |
| 58 | import ca.uhn.hl7v2.util.XMLUtils; | |
| 59 | ||
| 60 | /** | |
| 61 | * Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding | |
| 62 | * specification. This is an abstract class that handles datatype and segment parsing/encoding, but | |
| 63 | * not the parsing/encoding of entire messages. To use the XML parser, you should create a subclass | |
| 64 | * for a certain message structure. This subclass must be able to identify the Segment objects that | |
| 65 | * correspond to various Segment nodes in an XML document, and call the methods <code> | |
| 66 | * parse(Segment segment, ElementNode segmentNode)</code> and | |
| 67 | * <code>encode(Segment segment, ElementNode segmentNode) | |
| 68 | * </code> as appropriate. XMLParser uses the Xerces parser, which must be installed in your | |
| 69 | * classpath. | |
| 70 | * | |
| 71 | * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour | |
| 72 | * @author Bryan Tripp, Shawn Bellina | |
| 73 | */ | |
| 74 | public abstract class XMLParser extends Parser { | |
| 75 | ||
| 76 | private static final String ESCAPE_ATTRNAME = "V"; | |
| 77 | private static final String ESCAPE_NODENAME = "escape"; | |
| 78 | 5 | private static final Logger log = LoggerFactory.getLogger(XMLParser.class); |
| 79 | protected static final String NS = "urn:hl7-org:v2xml"; | |
| 80 | 5 | private static final Pattern NS_PATTERN = Pattern.compile("xmlns(.*)=\"" + NS + "\""); |
| 81 | ||
| 82 | private String textEncoding; | |
| 83 | ||
| 84 | ||
| 85 | ||
| 86 | ||
| 87 | /** Constructor */ | |
| 88 | public XMLParser() { | |
| 89 | 130 | super(); |
| 90 | 130 | } |
| 91 | ||
| 92 | /** | |
| 93 | * | |
| 94 | * @param context the HAPI context | |
| 95 | */ | |
| 96 | public XMLParser(HapiContext context) { | |
| 97 | 435 | super(context); |
| 98 | 435 | } |
| 99 | ||
| 100 | /** | |
| 101 | * Constructor | |
| 102 | * | |
| 103 | * @param theFactory custom factory to use for model class lookup | |
| 104 | */ | |
| 105 | public XMLParser(ModelClassFactory theFactory) { | |
| 106 | 0 | super(theFactory); |
| 107 | ||
| 108 | 0 | } |
| 109 | ||
| 110 | /** | |
| 111 | * Returns a String representing the encoding of the given message, if the encoding is | |
| 112 | * recognized. For example if the given message appears to be encoded using HL7 2.x XML rules | |
| 113 | * then "XML" would be returned. If the encoding is not recognized then null is returned. That | |
| 114 | * this method returns a specific encoding does not guarantee that the message is correctly | |
| 115 | * encoded (e.g. well formed XML) - just that it is not encoded using any other encoding than | |
| 116 | * the one returned. Returns null if the encoding is not recognized. | |
| 117 | */ | |
| 118 | public String getEncoding(String message) { | |
| 119 | 190 | return EncodingDetector.isXmlEncoded(message) ? getDefaultEncoding() : null; |
| 120 | } | |
| 121 | ||
| 122 | /** | |
| 123 | * @return the preferred encoding of this Parser | |
| 124 | */ | |
| 125 | public String getDefaultEncoding() { | |
| 126 | 340 | return "XML"; |
| 127 | } | |
| 128 | ||
| 129 | /** | |
| 130 | * Sets the <i>keepAsOriginalNodes<i> | |
| 131 | * | |
| 132 | * The nodes whose names match the <i>keepAsOriginalNodes<i> will be kept as original, meaning | |
| 133 | * that no white space treaming will occur on them | |
| 134 | * | |
| 135 | * @param keepAsOriginalNodes of the nodes to be kept as original | |
| 136 | * @deprecated Use {@link ParserConfiguration#setXmlDisableWhitespaceTrimmingOnNodeNames(Set)} instead. That method works exactly the same as this one but has been renamed for a more clear meaning. | |
| 137 | */ | |
| 138 | @Deprecated() | |
| 139 | public void setKeepAsOriginalNodes(String[] keepAsOriginalNodes) { | |
| 140 | 0 | getParserConfiguration().setXmlDisableWhitespaceTrimmingOnNodeNames(keepAsOriginalNodes); |
| 141 | 0 | } |
| 142 | ||
| 143 | /** | |
| 144 | * Sets the <i>keepAsOriginalNodes<i> | |
| 145 | * | |
| 146 | * @deprecated Use {@link ParserConfiguration#getXmlDisableWhitespaceTrimmingOnNodeNames()} instead | |
| 147 | */ | |
| 148 | @Deprecated | |
| 149 | public String[] getKeepAsOriginalNodes() { | |
| 150 | 0 | return getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().toArray(new String[getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().size()]); |
| 151 | } | |
| 152 | ||
| 153 | /** | |
| 154 | * <p> | |
| 155 | * Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 | |
| 156 | * message. | |
| 157 | * </p> | |
| 158 | * <p> | |
| 159 | * The easiest way to implement this method for a particular message structure is as follows: | |
| 160 | * <ol> | |
| 161 | * <li>Create an instance of the Message type you are going to handle with your subclass of | |
| 162 | * XMLParser</li> | |
| 163 | * <li>Go through the given Document and find the Elements that represent the top level of each | |
| 164 | * message segment.</li> | |
| 165 | * <li>For each of these segments, call | |
| 166 | * <code>parse(Segment segmentObject, Element segmentElement)</code>, providing the appropriate | |
| 167 | * Segment from your Message object, and the corresponding Element.</li> | |
| 168 | * </ol> | |
| 169 | * At the end of this process, your Message object should be populated with data from the XML | |
| 170 | * Document. | |
| 171 | * </p> | |
| 172 | * | |
| 173 | * @param xmlMessage DOM message object to be parsed | |
| 174 | * @param version HL7 version | |
| 175 | * @throws HL7Exception if the message is not correctly formatted. | |
| 176 | * @throws EncodingNotSupportedException if the message encoded is not supported by this parser. | |
| 177 | */ | |
| 178 | public abstract Message parseDocument(Document xmlMessage, String version) throws HL7Exception; | |
| 179 | ||
| 180 | /** | |
| 181 | * <p> | |
| 182 | * Parses a message string and returns the corresponding Message object. This method checks that | |
| 183 | * the given message string is XML encoded, creates an XML Document object (using Xerces) from | |
| 184 | * the given String, and calls the abstract method <code>parse(Document XMLMessage)</code> | |
| 185 | * </p> | |
| 186 | */ | |
| 187 | protected Message doParse(String message, String version) throws HL7Exception { | |
| 188 | Message m; | |
| 189 | ||
| 190 | // parse message string into a DOM document | |
| 191 | Document doc; | |
| 192 | 55 | doc = parseStringIntoDocument(message); |
| 193 | 55 | m = parseDocument(doc, version); |
| 194 | ||
| 195 | 55 | return m; |
| 196 | } | |
| 197 | ||
| 198 | /** | |
| 199 | * Parses a string containing an XML document into a Document object. | |
| 200 | * | |
| 201 | * Note that this method is synchronized currently, as the XML parser is not thread safe | |
| 202 | * | |
| 203 | * @throws HL7Exception | |
| 204 | */ | |
| 205 | protected synchronized Document parseStringIntoDocument(String message) throws HL7Exception { | |
| 206 | try { | |
| 207 | 70 | return XMLUtils.parse(message); |
| 208 | 0 | } catch (Exception e) { |
| 209 | 0 | throw new HL7Exception("Exception parsing XML", e); |
| 210 | } | |
| 211 | } | |
| 212 | ||
| 213 | /** | |
| 214 | * Formats a Message object into an HL7 message string using the given encoding. | |
| 215 | * | |
| 216 | * @throws HL7Exception if the data fields in the message do not permit encoding (e.g. required | |
| 217 | * fields are null) | |
| 218 | * @throws EncodingNotSupportedException if the requested encoding is not supported by this | |
| 219 | * parser. | |
| 220 | */ | |
| 221 | protected String doEncode(Message source, String encoding) throws HL7Exception { | |
| 222 | 0 | if (!encoding.equals("XML")) |
| 223 | 0 | throw new EncodingNotSupportedException("XMLParser supports only XML encoding"); |
| 224 | 0 | return encode(source); |
| 225 | } | |
| 226 | ||
| 227 | /** | |
| 228 | * Formats a Message object into an HL7 message string using this parser's default encoding (XML | |
| 229 | * encoding). This method calls the abstract method <code>encodeDocument(...)</code> in order to | |
| 230 | * obtain XML Document object representation of the Message, then serializes it to a String. | |
| 231 | * | |
| 232 | * @throws HL7Exception if the data fields in the message do not permit encoding (e.g. required | |
| 233 | * fields are null) | |
| 234 | */ | |
| 235 | protected String doEncode(Message source) throws HL7Exception { | |
| 236 | 125 | if (source instanceof GenericMessage) { |
| 237 | 0 | throw new HL7Exception( |
| 238 | "Can't XML-encode a GenericMessage. Message must have a recognized structure."); | |
| 239 | } | |
| 240 | ||
| 241 | 125 | Document doc = encodeDocument(source); |
| 242 | // Element documentElement = doc.getDocumentElement(); | |
| 243 | // if (!documentElement.hasAttribute("xmlns")) | |
| 244 | // documentElement.setAttribute("xmlns", "urn:hl7-org:v2xml"); | |
| 245 | try { | |
| 246 | 125 | return XMLUtils.serialize(doc, getParserConfiguration().isPrettyPrintWhenEncodingXml()); |
| 247 | 0 | } catch (Exception e) { |
| 248 | 0 | throw new HL7Exception("Exception serializing XML document to string", e); |
| 249 | } | |
| 250 | } | |
| 251 | ||
| 252 | /** | |
| 253 | * <p> | |
| 254 | * Creates an XML Document that corresponds to the given Message object. | |
| 255 | * </p> | |
| 256 | * <p> | |
| 257 | * If you are implementing this method, you should create an XML Document, and insert XML | |
| 258 | * Elements into it that correspond to the groups and segments that belong to the message type | |
| 259 | * that your subclass of XMLParser supports. Then, for each segment in the message, call the | |
| 260 | * method <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element | |
| 261 | * for that segment and the corresponding Segment object from the given Message. | |
| 262 | * </p> | |
| 263 | * | |
| 264 | * @param source message | |
| 265 | * @return the DOM document object of the encoded message | |
| 266 | */ | |
| 267 | public abstract Document encodeDocument(Message source) throws HL7Exception; | |
| 268 | ||
| 269 | ||
| 270 | protected void assertNamespaceURI(String ns) throws HL7Exception { | |
| 271 | 2205 | if (!NS.equals(ns)) { |
| 272 | 0 | throw new HL7Exception("Namespace URI must be " + NS); |
| 273 | } | |
| 274 | 2205 | } |
| 275 | ||
| 276 | /** | |
| 277 | * Populates the given Segment object with data from the given XML Element. | |
| 278 | * | |
| 279 | * @param segmentObject the segment to parse into | |
| 280 | * @param segmentElement the DOM element to be parsed | |
| 281 | * @throws HL7Exception if the XML Element does not have the correct name and structure for the | |
| 282 | * given Segment, or if there is an error while setting individual field values. | |
| 283 | */ | |
| 284 | public void parse(Segment segmentObject, Element segmentElement) throws HL7Exception { | |
| 285 | 230 | Set<String> done = new HashSet<String>(); |
| 286 | ||
| 287 | 230 | NodeList all = segmentElement.getChildNodes(); |
| 288 | 3550 | for (int i = 0; i < all.getLength(); i++) { |
| 289 | 3320 | String elementName = all.item(i).getNodeName(); |
| 290 | ||
| 291 | 3320 | if (all.item(i).getNodeType() == Node.ELEMENT_NODE && !done.contains(elementName)) { |
| 292 | 1510 | assertNamespaceURI(all.item(i).getNamespaceURI()); |
| 293 | 1510 | done.add(elementName); |
| 294 | ||
| 295 | 1510 | int index = elementName.indexOf('.'); |
| 296 | 1510 | if (index >= 0 && elementName.length() > index) { // properly formatted element |
| 297 | 1510 | String fieldNumString = elementName.substring(index + 1); |
| 298 | 1510 | int fieldNum = Integer.parseInt(fieldNumString); |
| 299 | 1510 | parseReps(segmentObject, segmentElement, elementName, fieldNum); |
| 300 | 1510 | } else { |
| 301 | 0 | log.debug("Child of segment {} doesn't look like a field {}", |
| 302 | 0 | segmentObject.getName(), elementName); |
| 303 | } | |
| 304 | } | |
| 305 | } | |
| 306 | ||
| 307 | // set data type of OBX-5 | |
| 308 | 230 | if (segmentObject.getClass().getName().contains("OBX")) { |
| 309 | 25 | FixFieldDataType.fixOBX5(segmentObject, getFactory(), getHapiContext().getParserConfiguration()); |
| 310 | } | |
| 311 | // set data type of MFE-4 | |
| 312 | 230 | if (segmentObject.getClass().getName().contains("MFE") && |
| 313 | 0 | Version.versionOf(segmentObject.getMessage().getVersion()).isGreaterThan(Version.V23)) { |
| 314 | 0 | FixFieldDataType.fixMFE4(segmentObject, getFactory(), getHapiContext().getParserConfiguration()); |
| 315 | } | |
| 316 | 230 | } |
| 317 | ||
| 318 | private void parseReps(Segment segmentObject, Element segmentElement, String fieldName, | |
| 319 | int fieldNum) throws HL7Exception { | |
| 320 | ||
| 321 | 1510 | NodeList reps = segmentElement.getElementsByTagName(fieldName); |
| 322 | 3040 | for (int i = 0; i < reps.getLength(); i++) { |
| 323 | 1530 | parse(segmentObject.getField(fieldNum, i), (Element) reps.item(i)); |
| 324 | } | |
| 325 | 1510 | } |
| 326 | ||
| 327 | /** | |
| 328 | * Populates the given Element with data from the given Segment, by inserting Elements | |
| 329 | * corresponding to the Segment's fields, their components, etc. Returns true if there is at | |
| 330 | * least one data value in the segment. | |
| 331 | * | |
| 332 | * @param segmentObject the segment to be encoded | |
| 333 | * @param segmentElement the DOM element to encode into | |
| 334 | * @return true if there is at least one data value in the segment | |
| 335 | * @throws HL7Exception if an erro occurred while encoding | |
| 336 | */ | |
| 337 | public boolean encode(Segment segmentObject, Element segmentElement) throws HL7Exception { | |
| 338 | 395 | boolean hasValue = false; |
| 339 | 395 | int n = segmentObject.numFields(); |
| 340 | 9560 | for (int i = 1; i <= n; i++) { |
| 341 | 9165 | String name = makeElementName(segmentObject, i); |
| 342 | 9165 | Type[] reps = segmentObject.getField(i); |
| 343 | 13690 | for (Type rep : reps) { |
| 344 | 4525 | Element newNode = segmentElement.getOwnerDocument().createElement(name); |
| 345 | 4525 | boolean componentHasValue = encode(rep, newNode); |
| 346 | 4525 | if (componentHasValue) { |
| 347 | try { | |
| 348 | 1885 | segmentElement.appendChild(newNode); |
| 349 | 0 | } catch (DOMException e) { |
| 350 | 0 | throw new HL7Exception("DOMException encoding Segment: ", e); |
| 351 | 1885 | } |
| 352 | 1885 | hasValue = true; |
| 353 | } | |
| 354 | } | |
| 355 | } | |
| 356 | 395 | return hasValue; |
| 357 | } | |
| 358 | ||
| 359 | /** | |
| 360 | * Populates the given Type object with data from the given XML Element. | |
| 361 | * | |
| 362 | * @param datatypeObject the type to parse into | |
| 363 | * @param datatypeElement the DOM element to be parsed | |
| 364 | * @throws DataTypeException if the data did not match the expected type rules | |
| 365 | */ | |
| 366 | public void parse(Type datatypeObject, Element datatypeElement) throws HL7Exception { | |
| 367 | 3715 | if (datatypeObject instanceof Varies) { |
| 368 | 75 | parseVaries((Varies) datatypeObject, datatypeElement); |
| 369 | 3640 | } else if (datatypeObject instanceof Primitive) { |
| 370 | 2430 | parsePrimitive((Primitive) datatypeObject, datatypeElement); |
| 371 | 1210 | } else if (datatypeObject instanceof Composite) { |
| 372 | 1210 | parseComposite((Composite) datatypeObject, datatypeElement); |
| 373 | } | |
| 374 | 3715 | } |
| 375 | ||
| 376 | /** | |
| 377 | * Parses an XML element into a Varies by determining whether the element is primitive or | |
| 378 | * composite, calling setData() on the Varies with a new generic primitive or composite as | |
| 379 | * appropriate, and then calling parse again with the new Type object. | |
| 380 | */ | |
| 381 | private void parseVaries(Varies datatypeObject, Element datatypeElement) | |
| 382 | throws HL7Exception { | |
| 383 | // figure out what data type it holds | |
| 384 | // short nodeType = datatypeElement.getFirstChild().getNodeType(); | |
| 385 | 75 | if (!hasChildElement(datatypeElement)) { |
| 386 | // it's a primitive | |
| 387 | 65 | datatypeObject.setData(new GenericPrimitive(datatypeObject.getMessage())); |
| 388 | } else { | |
| 389 | // it's a composite ... almost know what type, except that we don't have the version | |
| 390 | // here | |
| 391 | 10 | datatypeObject.setData(new GenericComposite(datatypeObject.getMessage())); |
| 392 | } | |
| 393 | 75 | parse(datatypeObject.getData(), datatypeElement); |
| 394 | 75 | } |
| 395 | ||
| 396 | /** Returns true if any of the given element's children are (non-escape) elements */ | |
| 397 | private boolean hasChildElement(Element e) { | |
| 398 | 75 | NodeList children = e.getChildNodes(); |
| 399 | 75 | boolean hasElement = false; |
| 400 | 75 | int c = 0; |
| 401 | 180 | while (c < children.getLength() && !hasElement) { |
| 402 | 105 | if (children.item(c).getNodeType() == Node.ELEMENT_NODE |
| 403 | 25 | && !ESCAPE_NODENAME.equals(children.item(c).getNodeName())) { |
| 404 | 10 | hasElement = true; |
| 405 | } | |
| 406 | 105 | c++; |
| 407 | } | |
| 408 | 75 | return hasElement; |
| 409 | } | |
| 410 | ||
| 411 | /** | |
| 412 | * Parses a primitive type by filling it with text child, if any. If the datatype element | |
| 413 | * contains escape elements, resolve them properly. | |
| 414 | */ | |
| 415 | private void parsePrimitive(Primitive datatypeObject, Element datatypeElement) | |
| 416 | throws HL7Exception { | |
| 417 | 2430 | NodeList children = datatypeElement.getChildNodes(); |
| 418 | 2430 | StringBuilder builder = new StringBuilder(); |
| 419 | 4890 | for (int c = 0; c < children.getLength(); c++) { |
| 420 | 2460 | Node child = children.item(c); |
| 421 | try { | |
| 422 | 2460 | if (child.getNodeType() == Node.TEXT_NODE) { |
| 423 | 2440 | String value = child.getNodeValue(); |
| 424 | 2440 | if (value != null && value.length() > 0) { |
| 425 | 2440 | if (keepAsOriginal(child.getParentNode())) { |
| 426 | 0 | builder.append(value); |
| 427 | } else { | |
| 428 | 2440 | builder.append(removeWhitespace(value)); |
| 429 | } | |
| 430 | } | |
| 431 | // Check for formatting elements | |
| 432 | 2440 | } else if (child.getNodeType() == Node.ELEMENT_NODE |
| 433 | 20 | && ESCAPE_NODENAME.equals(child.getLocalName())) { |
| 434 | 15 | assertNamespaceURI(child.getNamespaceURI()); |
| 435 | 30 | EncodingCharacters ec = EncodingCharacters.getInstance(datatypeObject |
| 436 | 15 | .getMessage()); |
| 437 | 15 | Element elem = (Element) child; |
| 438 | 15 | String attr = elem.getAttribute(ESCAPE_ATTRNAME).trim(); |
| 439 | 15 | if (attr.length() > 0) { |
| 440 | 15 | builder.append(ec.getEscapeCharacter()).append(attr) |
| 441 | 15 | .append(ec.getEscapeCharacter()); |
| 442 | } | |
| 443 | } | |
| 444 | 0 | } catch (Exception e) { |
| 445 | 0 | log.error("Error parsing primitive value from TEXT_NODE", e); |
| 446 | 2460 | } |
| 447 | ||
| 448 | } | |
| 449 | 2430 | datatypeObject.setValue(builder.toString()); |
| 450 | 2430 | } |
| 451 | ||
| 452 | /** | |
| 453 | * Checks if <code>Node</code> content should be kept as original (ie.: whitespaces won't be | |
| 454 | * removed) | |
| 455 | * | |
| 456 | * @param node The target <code>Node</code> | |
| 457 | * @return boolean <code>true</code> if whitespaces should not be removed from node content, | |
| 458 | * <code>false</code> otherwise | |
| 459 | */ | |
| 460 | protected boolean keepAsOriginal(Node node) { | |
| 461 | 2440 | if (getParserConfiguration().isXmlDisableWhitespaceTrimmingOnAllNodes()) { |
| 462 | 0 | return true; |
| 463 | } | |
| 464 | 2440 | return (node.getNodeName() != null) && getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().contains(node.getNodeName()); |
| 465 | } | |
| 466 | ||
| 467 | /** | |
| 468 | * Removes all unnecessary whitespace from the given String (intended to be used with Primitive | |
| 469 | * values). This includes leading and trailing whitespace, and repeated space characters. | |
| 470 | * Carriage returns, line feeds, and tabs are replaced with spaces. | |
| 471 | */ | |
| 472 | protected String removeWhitespace(String s) { | |
| 473 | ||
| 474 | 2450 | s = s.replace('\r', ' '); |
| 475 | 2450 | s = s.replace('\n', ' '); |
| 476 | 2450 | s = s.replace('\t', ' '); |
| 477 | ||
| 478 | 2450 | boolean repeatedSpacesExist = true; |
| 479 | 8695 | while (repeatedSpacesExist) { |
| 480 | 6245 | int loc = s.indexOf(" "); |
| 481 | 6245 | if (loc < 0) { |
| 482 | 2450 | repeatedSpacesExist = false; |
| 483 | } else { | |
| 484 | 3795 | StringBuilder buf = new StringBuilder(); |
| 485 | 3795 | buf.append(s.substring(0, loc)); |
| 486 | 3795 | buf.append(" "); |
| 487 | 3795 | buf.append(s.substring(loc + 2)); |
| 488 | 3795 | s = buf.toString(); |
| 489 | } | |
| 490 | 6245 | } |
| 491 | 2450 | return s.trim(); |
| 492 | } | |
| 493 | ||
| 494 | /** | |
| 495 | * Populates a Composite type by looping through it's children, finding corresponding Elements | |
| 496 | * among the children of the given Element, and calling parse(Type, Element) for each. | |
| 497 | */ | |
| 498 | private void parseComposite(Composite datatypeObject, Element datatypeElement) | |
| 499 | throws HL7Exception { | |
| 500 | 1210 | if (datatypeObject instanceof GenericComposite) { // elements won't be named |
| 501 | // GenericComposite.x | |
| 502 | 10 | NodeList children = datatypeElement.getChildNodes(); |
| 503 | 10 | int compNum = 0; |
| 504 | 90 | for (int i = 0; i < children.getLength(); i++) { |
| 505 | 80 | if (children.item(i).getNodeType() == Node.ELEMENT_NODE) { |
| 506 | 35 | Element nextElement = (Element) children.item(i); |
| 507 | 35 | assertNamespaceURI(nextElement.getNamespaceURI()); |
| 508 | 35 | String localName = nextElement.getLocalName(); |
| 509 | 35 | int dotIndex = localName.indexOf("."); |
| 510 | 35 | if (dotIndex > -1) { |
| 511 | 35 | compNum = Integer.parseInt(localName.substring(dotIndex + 1)) - 1; |
| 512 | } else { | |
| 513 | 0 | log.debug( |
| 514 | "Datatype element {} doesn't have a valid numbered name, usgin default index of {}", | |
| 515 | 0 | datatypeElement.getLocalName(), compNum); |
| 516 | } | |
| 517 | 35 | Type nextComponent = datatypeObject.getComponent(compNum); |
| 518 | 35 | parse(nextComponent, nextElement); |
| 519 | 35 | compNum++; |
| 520 | } | |
| 521 | } | |
| 522 | 10 | } else { |
| 523 | 1200 | Type[] children = datatypeObject.getComponents(); |
| 524 | 8835 | for (int i = 0; i < children.length; i++) { |
| 525 | 7635 | NodeList matchingElements = datatypeElement.getElementsByTagNameNS(NS, makeElementName( |
| 526 | datatypeObject, i + 1)); | |
| 527 | 7635 | if (matchingElements.getLength() > 0) { |
| 528 | 2065 | parse(children[i], (Element) matchingElements.item(0)); |
| 529 | } | |
| 530 | } | |
| 531 | ||
| 532 | 1200 | int nextExtraCmpIndex = 0; |
| 533 | boolean foundExtraComponent; | |
| 534 | do { | |
| 535 | 1210 | foundExtraComponent = false; |
| 536 | 1210 | NodeList matchingElements = datatypeElement.getElementsByTagNameNS(NS, makeElementName(datatypeObject, children.length + nextExtraCmpIndex + 1)); |
| 537 | 1210 | if (matchingElements.getLength() > 0) { |
| 538 | 10 | parse(datatypeObject.getExtraComponents().getComponent(nextExtraCmpIndex), (Element) matchingElements.item(0)); |
| 539 | 10 | foundExtraComponent = true; |
| 540 | } | |
| 541 | 1210 | nextExtraCmpIndex++; |
| 542 | 1210 | } while (foundExtraComponent); |
| 543 | ||
| 544 | ||
| 545 | } | |
| 546 | 1210 | } |
| 547 | ||
| 548 | /** Returns the expected XML element name for the given child of the given Segment */ | |
| 549 | private String makeElementName(Segment s, int child) { | |
| 550 | 9165 | return s.getName() + "." + child; |
| 551 | } | |
| 552 | ||
| 553 | /** Returns the expected XML element name for the given child of the given Composite */ | |
| 554 | private String makeElementName(Composite composite, int child) { | |
| 555 | 42960 | return composite.getName() + "." + child; |
| 556 | } | |
| 557 | ||
| 558 | /** | |
| 559 | * Populates the given Element with data from the given Type, by inserting Elements | |
| 560 | * corresponding to the Type's components and values. Returns true if the given type contains a | |
| 561 | * value (i.e. for Primitives, if getValue() doesn't return null, and for Composites, if at | |
| 562 | * least one underlying Primitive doesn't return null). | |
| 563 | */ | |
| 564 | private boolean encode(Type datatypeObject, Element datatypeElement) throws DataTypeException { | |
| 565 | 38680 | boolean hasData = false; |
| 566 | 38680 | if (datatypeObject instanceof Varies) { |
| 567 | 40 | hasData = encodeVaries((Varies) datatypeObject, datatypeElement); |
| 568 | 38640 | } else if (datatypeObject instanceof Primitive) { |
| 569 | 32035 | hasData = encodePrimitive((Primitive) datatypeObject, datatypeElement); |
| 570 | 6605 | } else if (datatypeObject instanceof Composite) { |
| 571 | 6605 | hasData = encodeComposite((Composite) datatypeObject, datatypeElement); |
| 572 | } | |
| 573 | 38680 | return hasData; |
| 574 | } | |
| 575 | ||
| 576 | /** | |
| 577 | * Encodes a Varies type by extracting it's data field and encoding that. Returns true if the | |
| 578 | * data field (or one of its components) contains a value. | |
| 579 | */ | |
| 580 | private boolean encodeVaries(Varies datatypeObject, Element datatypeElement) | |
| 581 | throws DataTypeException { | |
| 582 | 40 | boolean hasData = false; |
| 583 | 40 | if (datatypeObject.getData() != null) { |
| 584 | 40 | hasData = encode(datatypeObject.getData(), datatypeElement); |
| 585 | } | |
| 586 | 40 | return hasData; |
| 587 | } | |
| 588 | ||
| 589 | /** | |
| 590 | * Encodes a Primitive in XML by adding it's value as a child of the given Element. Detects | |
| 591 | * escape character and creates proper <escape> elements in the DOM tree. Returns true if the | |
| 592 | * given Primitive contains a value. | |
| 593 | */ | |
| 594 | private boolean encodePrimitive(Primitive datatypeObject, Element datatypeElement) | |
| 595 | throws DataTypeException { | |
| 596 | 32035 | String value = datatypeObject.getValue(); |
| 597 | 32035 | boolean hasValue = (value != null && value.length() > 0); |
| 598 | 32035 | if (hasValue) { |
| 599 | try { | |
| 600 | 2410 | EncodingCharacters ec = EncodingCharacters.getInstance(datatypeObject.getMessage()); |
| 601 | 2410 | char esc = ec.getEscapeCharacter(); |
| 602 | int pos; | |
| 603 | 2410 | int oldpos = 0; |
| 604 | 2410 | boolean escaping = false; |
| 605 | ||
| 606 | // Find next escape character | |
| 607 | 2685 | while ((pos = value.indexOf(esc, oldpos)) >= 0) { |
| 608 | ||
| 609 | // string until next escape character | |
| 610 | 275 | String v = value.substring(oldpos, pos); |
| 611 | 275 | if (!escaping) { |
| 612 | // currently in "text mode", so create textnode from it | |
| 613 | 190 | if (v.length() > 0) |
| 614 | 360 | datatypeElement.appendChild(datatypeElement.getOwnerDocument() |
| 615 | 180 | .createTextNode(v)); |
| 616 | 190 | escaping = true; |
| 617 | } else { | |
| 618 | 85 | if (v.startsWith(".") || "H".equals(v) || "N".equals(v)) { |
| 619 | // currently in "escape mode", so create escape element from it | |
| 620 | 45 | Element escape = datatypeElement.getOwnerDocument().createElement( |
| 621 | ESCAPE_NODENAME); | |
| 622 | 45 | escape.setAttribute(ESCAPE_ATTRNAME, v); |
| 623 | 45 | datatypeElement.appendChild(escape); |
| 624 | 45 | escaping = false; |
| 625 | 45 | } else { |
| 626 | // no proper escape sequence, assume text | |
| 627 | 80 | datatypeElement.appendChild(datatypeElement.getOwnerDocument() |
| 628 | 40 | .createTextNode(esc + v)); |
| 629 | } | |
| 630 | } | |
| 631 | 275 | oldpos = pos + 1; |
| 632 | 275 | } |
| 633 | // create text from the remainder | |
| 634 | 2410 | if (oldpos <= value.length()) { |
| 635 | ||
| 636 | 2410 | StringBuilder sb = new StringBuilder(); |
| 637 | // If we are in escaping mode, there appears no closing escape character, | |
| 638 | // so we treat the string as text | |
| 639 | 2410 | if (escaping) |
| 640 | 145 | sb.append(esc); |
| 641 | ||
| 642 | 2410 | sb.append(value.substring(oldpos)); |
| 643 | 4820 | datatypeElement.appendChild(datatypeElement.getOwnerDocument().createTextNode( |
| 644 | 2410 | sb.toString())); |
| 645 | } | |
| 646 | ||
| 647 | 0 | } catch (Exception e) { |
| 648 | 0 | throw new DataTypeException("Exception encoding Primitive: ", e); |
| 649 | 2410 | } |
| 650 | ||
| 651 | } | |
| 652 | 32035 | return hasValue; |
| 653 | } | |
| 654 | ||
| 655 | /** | |
| 656 | * Encodes a Composite in XML by looping through it's components, creating new children for each | |
| 657 | * of them (with the appropriate names) and populating them by calling encode(Type, Element) | |
| 658 | * using these children. Returns true if at least one component contains a value. | |
| 659 | */ | |
| 660 | private boolean encodeComposite(Composite datatypeObject, Element datatypeElement) | |
| 661 | throws DataTypeException { | |
| 662 | 6605 | Type[] components = datatypeObject.getComponents(); |
| 663 | 6605 | boolean hasValue = false; |
| 664 | 40720 | for (int i = 0; i < components.length; i++) { |
| 665 | 34115 | String name = makeElementName(datatypeObject, i + 1); |
| 666 | 34115 | Element newNode = datatypeElement.getOwnerDocument().createElement(name); |
| 667 | 34115 | boolean componentHasValue = encode(components[i], newNode); |
| 668 | 34115 | if (componentHasValue) { |
| 669 | try { | |
| 670 | 1560 | datatypeElement.appendChild(newNode); |
| 671 | 0 | } catch (DOMException e) { |
| 672 | 0 | throw new DataTypeException("DOMException encoding Composite: ", e); |
| 673 | 1560 | } |
| 674 | 1560 | hasValue = true; |
| 675 | } | |
| 676 | } | |
| 677 | 6605 | return hasValue; |
| 678 | } | |
| 679 | ||
| 680 | /** | |
| 681 | * <p> | |
| 682 | * Returns a minimal amount of data from a message string, including only the data needed to | |
| 683 | * send a response to the remote system. This includes the following fields: | |
| 684 | * <ul> | |
| 685 | * <li>field separator</li> | |
| 686 | * <li>encoding characters</li> | |
| 687 | * <li>processing ID</li> | |
| 688 | * <li>message control ID</li> | |
| 689 | * </ul> | |
| 690 | * This method is intended for use when there is an error parsing a message, (so the Message | |
| 691 | * object is unavailable) but an error message must be sent back to the remote system including | |
| 692 | * some of the information in the inbound message. This method parses only that required | |
| 693 | * information, hopefully avoiding the condition that caused the original error. | |
| 694 | * </p> | |
| 695 | */ | |
| 696 | public Segment getCriticalResponseData(String message) throws HL7Exception { | |
| 697 | 5 | String version = getVersion(message); |
| 698 | 5 | Segment criticalData = Parser.makeControlMSH(version, getFactory()); |
| 699 | ||
| 700 | 5 | Terser.set(criticalData, 1, 0, 1, 1, parseLeaf(message, "MSH.1", 0)); |
| 701 | 5 | Terser.set(criticalData, 2, 0, 1, 1, parseLeaf(message, "MSH.2", 0)); |
| 702 | 5 | Terser.set(criticalData, 10, 0, 1, 1, parseLeaf(message, "MSH.10", 0)); |
| 703 | 5 | String procID = parseLeaf(message, "MSH.11", 0); |
| 704 | 5 | if (procID == null || procID.length() == 0) { |
| 705 | 0 | procID = parseLeaf(message, "PT.1", message.indexOf("MSH.11")); |
| 706 | // this field is a composite in later versions | |
| 707 | } | |
| 708 | 5 | Terser.set(criticalData, 11, 0, 1, 1, procID); |
| 709 | ||
| 710 | 5 | return criticalData; |
| 711 | } | |
| 712 | ||
| 713 | /** | |
| 714 | * For response messages, returns the value of MSA-2 (the message ID of the message sent by the | |
| 715 | * sending system). This value may be needed prior to main message parsing, so that | |
| 716 | * (particularly in a multi-threaded scenario) the message can be routed to the thread that sent | |
| 717 | * the request. We need this information first so that any parse exceptions are thrown to the | |
| 718 | * correct thread. Implementers of Parsers should take care to make the implementation of this | |
| 719 | * method very fast and robust. Returns null if MSA-2 can not be found (e.g. if the message is | |
| 720 | * not a response message). Trims whitespace from around the MSA-2 field. | |
| 721 | */ | |
| 722 | public String getAckID(String message) { | |
| 723 | 35 | String ackID = null; |
| 724 | try { | |
| 725 | 35 | ackID = parseLeaf(message, "msa.2", 0).trim(); |
| 726 | 15 | } catch (HL7Exception e) { /* OK ... assume it isn't a response message */ |
| 727 | 20 | } |
| 728 | 35 | return ackID; |
| 729 | } | |
| 730 | ||
| 731 | public String getVersion(String message) throws HL7Exception { | |
| 732 | 65 | String version = parseLeaf(message, "MSH.12", 0); |
| 733 | 65 | if (version == null || version.trim().length() == 0) { |
| 734 | 50 | version = parseLeaf(message, "VID.1", message.indexOf("MSH.12")); |
| 735 | } | |
| 736 | 65 | return version; |
| 737 | } | |
| 738 | ||
| 739 | /** | |
| 740 | * Attempts to retrieve the value of a leaf tag without using DOM or SAX. This method searches | |
| 741 | * the given message string for the given tag name, and returns everything after the given tag | |
| 742 | * and before the start of the next tag. Whitespace is stripped. This is intended only for lead | |
| 743 | * nodes, as the value is considered to end at the start of the next tag, regardless of whether | |
| 744 | * it is the matching end tag or some other nested tag. | |
| 745 | * | |
| 746 | * @param message a string message in XML form | |
| 747 | * @param tagName the name of the XML tag, e.g. "MSA.2" | |
| 748 | * @param startAt the character location at which to start searching | |
| 749 | * @throws HL7Exception if the tag can not be found | |
| 750 | */ | |
| 751 | protected static String parseLeaf(String message, String tagName, int startAt) throws HL7Exception { | |
| 752 | ||
| 753 | // Workaround #176: XML may include explicit namespaces. It would be more stable to use some | |
| 754 | // kind of pull parser for this method instead of manually digging for tags in the XML structure. | |
| 755 | 170 | String prefix = ""; |
| 756 | 170 | Matcher m = NS_PATTERN.matcher(message); |
| 757 | 170 | if (m.find()) { |
| 758 | 130 | String ns = m.group(1); |
| 759 | 130 | if (ns != null && ns.length() > 0) { |
| 760 | 5 | prefix = ns.substring(1) + ":"; |
| 761 | } | |
| 762 | } | |
| 763 | ||
| 764 | 170 | int tagStart = message.indexOf("<" + prefix + tagName, startAt); |
| 765 | 170 | if (tagStart < 0) |
| 766 | 25 | tagStart = message.indexOf("<" + prefix + tagName.toUpperCase(), startAt); |
| 767 | 170 | int valStart = message.indexOf(">", tagStart) + 1; |
| 768 | 170 | int valEnd = message.indexOf("<", valStart); |
| 769 | ||
| 770 | String value; | |
| 771 | 170 | if (tagStart >= 0 && valEnd >= valStart) { |
| 772 | 155 | value = message.substring(valStart, valEnd); |
| 773 | } else { | |
| 774 | 15 | throw new HL7Exception("Couldn't find " + tagName + " in message beginning: " |
| 775 | 15 | + message.substring(0, Math.min(150, message.length())), |
| 776 | ErrorCode.REQUIRED_FIELD_MISSING); | |
| 777 | } | |
| 778 | ||
| 779 | // Escape codes, as defined at http://hdf.ncsa.uiuc.edu/HDF5/XML/xml_escape_chars.htm | |
| 780 | 155 | value = value.replaceAll(""", "\""); |
| 781 | 155 | value = value.replaceAll("'", "'"); |
| 782 | 155 | value = value.replaceAll("&", "&"); |
| 783 | 155 | value = value.replaceAll("<", "<"); |
| 784 | 155 | value = value.replaceAll(">", ">"); |
| 785 | ||
| 786 | 155 | return value; |
| 787 | } | |
| 788 | ||
| 789 | /** | |
| 790 | * Throws unsupported operation exception | |
| 791 | * | |
| 792 | * @throws UnsupportedOperationException | |
| 793 | */ | |
| 794 | @Override | |
| 795 | public String doEncode(Segment structure, EncodingCharacters encodingCharacters) | |
| 796 | throws HL7Exception { | |
| 797 | 0 | throw new UnsupportedOperationException("Not supported yet."); |
| 798 | } | |
| 799 | ||
| 800 | /** | |
| 801 | * Throws unsupported operation exception | |
| 802 | * | |
| 803 | * @throws UnsupportedOperationException | |
| 804 | */ | |
| 805 | @Override | |
| 806 | protected Message doParseForSpecificPackage(String theMessage, String theVersion, | |
| 807 | String thePackageName) throws HL7Exception { | |
| 808 | 0 | throw new UnsupportedOperationException("Not supported yet."); |
| 809 | } | |
| 810 | ||
| 811 | /** | |
| 812 | * Throws unsupported operation exception | |
| 813 | * | |
| 814 | * @throws UnsupportedOperationException | |
| 815 | */ | |
| 816 | @Override | |
| 817 | public String doEncode(Type type, EncodingCharacters encodingCharacters) throws HL7Exception { | |
| 818 | 0 | throw new UnsupportedOperationException("Not supported yet."); |
| 819 | } | |
| 820 | ||
| 821 | /** | |
| 822 | * Throws unsupported operation exception | |
| 823 | * | |
| 824 | * @throws UnsupportedOperationException | |
| 825 | */ | |
| 826 | @Override | |
| 827 | public void parse(Type type, String string, EncodingCharacters encodingCharacters) | |
| 828 | throws HL7Exception { | |
| 829 | 0 | throw new UnsupportedOperationException("Not supported yet."); |
| 830 | } | |
| 831 | ||
| 832 | /** | |
| 833 | * Throws unsupported operation exception | |
| 834 | * | |
| 835 | * @throws UnsupportedOperationException | |
| 836 | */ | |
| 837 | @Override | |
| 838 | public void parse(Segment segment, String string, EncodingCharacters encodingCharacters) | |
| 839 | throws HL7Exception { | |
| 840 | 0 | throw new UnsupportedOperationException("Not supported yet."); |
| 841 | } | |
| 842 | ||
| 843 | /** | |
| 844 | * Returns the text encoding to be used in generating new messages. Note that this affects | |
| 845 | * encoding to string only, not parsing. | |
| 846 | * | |
| 847 | * @return text encoding | |
| 848 | */ | |
| 849 | public String getTextEncoding() { | |
| 850 | 0 | return textEncoding; |
| 851 | } | |
| 852 | ||
| 853 | /** | |
| 854 | * Sets the text encoding to be used in generating new messages. Note that this affects encoding | |
| 855 | * to string only, not parsing. | |
| 856 | * | |
| 857 | * @param textEncoding The encoding. Default is the platform default. | |
| 858 | */ | |
| 859 | public void setTextEncoding(String textEncoding) { | |
| 860 | 0 | this.textEncoding = textEncoding; |
| 861 | 0 | } |
| 862 | ||
| 863 | } |