| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| XML |
|
| 5.666666666666667;5.667 | ||||
| XML$HL7MessageHandler |
|
| 5.666666666666667;5.667 | ||||
| XML$StopParsingException |
|
| 5.666666666666667;5.667 |
| 1 | package ca.uhn.hl7v2.preparser; | |
| 2 | ||
| 3 | import java.io.IOException; | |
| 4 | import java.util.ArrayList; | |
| 5 | import java.util.Collection; | |
| 6 | import java.util.Iterator; | |
| 7 | import java.util.List; | |
| 8 | import java.util.Map; | |
| 9 | import java.util.Properties; | |
| 10 | import java.util.SortedMap; | |
| 11 | import java.util.TreeMap; | |
| 12 | ||
| 13 | import javax.xml.parsers.ParserConfigurationException; | |
| 14 | import javax.xml.parsers.SAXParser; | |
| 15 | import javax.xml.parsers.SAXParserFactory; | |
| 16 | ||
| 17 | import org.xml.sax.Attributes; | |
| 18 | import org.xml.sax.InputSource; | |
| 19 | import org.xml.sax.SAXException; | |
| 20 | import org.xml.sax.SAXParseException; | |
| 21 | import org.xml.sax.helpers.DefaultHandler; | |
| 22 | ||
| 23 | import ca.uhn.hl7v2.HL7Exception; | |
| 24 | ||
| 25 | 0 | public class XML |
| 26 | { | |
| 27 | @SuppressWarnings("serial") | |
| 28 | protected static class StopParsingException extends SAXException | |
| 29 | { | |
| 30 | public StopParsingException() | |
| 31 | { | |
| 32 | 0 | super("ca.uhn.hl7.....StopParsingException"); |
| 33 | 0 | } |
| 34 | } | |
| 35 | ||
| 36 | /** the SAXParser reports parsing events to an object of this class. | |
| 37 | We keep track of some parsing state, and the Properties object that | |
| 38 | we're supposed to write our data to. | |
| 39 | */ | |
| 40 | static protected class HL7MessageHandler extends DefaultHandler | |
| 41 | { | |
| 42 | /* m_props & m_msgMask should be set by the user of this handler before | |
| 43 | they pass this handler to SAXParser.parse() or whatever */ | |
| 44 | ||
| 45 | /** The data that is found while parsing, and which passes m_msgMask, | |
| 46 | will be dumped to m_props, as (DatumPath.toString() / text) key/value | |
| 47 | pairs */ | |
| 48 | 35 | public Properties m_props = null; |
| 49 | ||
| 50 | /** Specifies what parts of a message should be dumped to m_props. | |
| 51 | */ | |
| 52 | 35 | public Collection<DatumPath> m_msgMask = null; |
| 53 | ||
| 54 | /* All other fields are parser state. */ | |
| 55 | ||
| 56 | 35 | protected boolean m_startedDocument = false; |
| 57 | ||
| 58 | /* m_msgID / m_curPath together keep track of where we are in the document. | |
| 59 | ||
| 60 | If m_msgID.length() != 0, then we're within the message element. (We're only | |
| 61 | expecting one message per document.) Then m_msgID will be the name of the | |
| 62 | message. ("ACK" or whatever). | |
| 63 | ||
| 64 | m_curPath keeps track of where within the message we are. See notes at | |
| 65 | DatumPath class definition. If m_curPath.size() != 0, then we must be | |
| 66 | within a message. | |
| 67 | ||
| 68 | At any point in the code below: | |
| 69 | ||
| 70 | if m_msgID.length() == 0, | |
| 71 | then m_curPath().size() == 0 | |
| 72 | ||
| 73 | if m_curPath.length() != 0 | |
| 74 | then m_msgID.length() != 0 | |
| 75 | | |
| 76 | Note that our DatumPaths count indices starting from 0 (not 1) -- they're | |
| 77 | only converted to 1-based in the string representations that wind up | |
| 78 | as m_props keys. | |
| 79 | */ | |
| 80 | 35 | StringBuffer m_msgID = new StringBuffer(); |
| 81 | 35 | DatumPath m_curPath = new DatumPath(); |
| 82 | ||
| 83 | /* the location in the document of the last datum we dumped to m_props. */ | |
| 84 | 35 | DatumPath m_lastDumpedPath = new DatumPath(); |
| 85 | ||
| 86 | /** For handling repeat segments. segmentID (String) -> next repeat idx | |
| 87 | (Integer). So when we hit a segment ZYX, we'll know how many times we've | |
| 88 | hit a ZYX before, and set the segmentRepIdx part of m_curPath | |
| 89 | appropriately. */ | |
| 90 | 35 | SortedMap<String, Integer> m_segmentId2nextRepIdx = new TreeMap<String, Integer>(); |
| 91 | ||
| 92 | /* m_depthWithinUselessElement and m_depthWithinUsefulElement | |
| 93 | reflect what m_msgMask thinks about our location in the document at any | |
| 94 | given time. | |
| 95 | ||
| 96 | Both should always be >= -1. Note that both can be >= 0 at the same time | |
| 97 | -- explained in a minute.... | |
| 98 | ||
| 99 | If m_depthWithinUsefulElement >= 0, this means that we are however deep | |
| 100 | (in terms of nested elements: 0 => just within) within an area of the | |
| 101 | message that passes m_msgMask. We should should dump whatever we find | |
| 102 | there to m_props. As we move around within such an element, we will still | |
| 103 | update m_curPath appropriately. | |
| 104 | ||
| 105 | If m_depthWithinUsefulElement >= 0, we are however deep within an element | |
| 106 | which either made no sense (eg. <ZZZ.1> where we were expecting a <ZYX.1> | |
| 107 | -- a few other things maybe), or more importantly that we're within an | |
| 108 | element that otherwise has no hope of having any useful elements within it | |
| 109 | according to m_msgMask. (eg. m_msgMask says it wants only ZYX segment | |
| 110 | contents, we're in an <MSH>). So we can safely ignore all content within, | |
| 111 | and just keep track of how deep we are within this useless element (with | |
| 112 | m_depthWithinUselessElement, of course.) We don't update m_curPath when | |
| 113 | m_depthWithinUselessElement >= 0, there's no point and how would we | |
| 114 | extract information for the DatumPath out of nonsensical element names | |
| 115 | anyway. | |
| 116 | ||
| 117 | If they are both >= 0, this means that there we've found some useless | |
| 118 | stuff (nonsensical element names?) within a known-useful element. | |
| 119 | */ | |
| 120 | 35 | int m_depthWithinUsefulElement = -1, m_depthWithinUselessElement = -1; |
| 121 | ||
| 122 | /* With this we keep the text that we've found within a certain element. | |
| 123 | It's cleared whenever we enter a (sub) element or leave an element. */ | |
| 124 | 35 | StringBuffer m_chars = new StringBuffer(10); |
| 125 | ||
| 126 | public HL7MessageHandler() | |
| 127 | 35 | { |
| 128 | 35 | this.clear(); |
| 129 | 35 | } |
| 130 | ||
| 131 | void clear() | |
| 132 | { | |
| 133 | // reset the state (m_props & m_msgMask are not state) | |
| 134 | 70 | m_startedDocument = false; |
| 135 | 70 | m_msgID.delete(0, m_msgID.length()); |
| 136 | 70 | m_curPath.clear(); |
| 137 | // will always be "less than" (according to DatumPath.numbersLessThan) | |
| 138 | // any sensible DatumPath: | |
| 139 | 70 | m_lastDumpedPath.clear().add(new String()).add(-42).add(-42).add(-42).add(-42).add(-42); |
| 140 | 70 | m_segmentId2nextRepIdx.clear(); |
| 141 | 70 | m_depthWithinUsefulElement = -1; |
| 142 | 70 | m_depthWithinUselessElement = -1; |
| 143 | 70 | m_chars.delete(0, m_chars.length()); |
| 144 | 70 | } |
| 145 | ||
| 146 | public void startDocument() throws SAXException | |
| 147 | { | |
| 148 | 35 | boolean ok = false; |
| 149 | 35 | if(!m_startedDocument && (m_props != null)) { |
| 150 | 35 | m_startedDocument = true; |
| 151 | 35 | ok = true; |
| 152 | } | |
| 153 | ||
| 154 | 35 | if(!ok) { |
| 155 | 0 | clear(); |
| 156 | 0 | throw new StopParsingException(); |
| 157 | } | |
| 158 | 35 | } |
| 159 | ||
| 160 | public void endDocument() throws SAXException | |
| 161 | { | |
| 162 | 35 | boolean ok = false; |
| 163 | 35 | if(m_startedDocument) { |
| 164 | 35 | this.clear(); |
| 165 | 35 | ok = true; |
| 166 | } | |
| 167 | ||
| 168 | 35 | if(!ok) { |
| 169 | 0 | clear(); |
| 170 | 0 | throw new StopParsingException(); |
| 171 | } | |
| 172 | 35 | } |
| 173 | ||
| 174 | public void startElement(String uri, String localName, String qName, | |
| 175 | Attributes attributes) throws SAXException | |
| 176 | { | |
| 177 | //System.err.println("startelem: " + qName + " curpathsize; " + | |
| 178 | //m_curPath.size()); | |
| 179 | 1375 | boolean ok = false; |
| 180 | 1375 | if(m_startedDocument) { |
| 181 | // A single unit of text data will be within a single element, | |
| 182 | // -- none of it will be in sub-elements and there will be no | |
| 183 | // sub-elements fragmenting the data text. | |
| 184 | // Right now we're entering a new element: this means that anything | |
| 185 | // in m_chars will be whitespace (likely), or text left over from, | |
| 186 | // say, the last field, or text that was somewhere it shouldn't have been. | |
| 187 | // (ex. "<ZYX.9> shouldn't be here <PT.1> P </PT.1> </ZYX.9>" | |
| 188 | 1375 | m_chars.delete(0, m_chars.length()); |
| 189 | ||
| 190 | 1375 | if(m_depthWithinUselessElement >= 0) { |
| 191 | 210 | ++m_depthWithinUselessElement; |
| 192 | } | |
| 193 | else { | |
| 194 | 1165 | int oldCurPathSize = m_curPath.size(); |
| 195 | 1165 | if(tryToGrowDocLocationFromElementName(m_msgID, m_curPath, |
| 196 | m_segmentId2nextRepIdx, m_lastDumpedPath, qName)) | |
| 197 | { | |
| 198 | 1060 | if(m_curPath.size() > oldCurPathSize) { |
| 199 | // assert (m_depthWithinUselessElement == -1) // m_curPath | |
| 200 | // should not have grown if we're within a useless element. | |
| 201 | 1025 | if(m_depthWithinUsefulElement == -1) { |
| 202 | // this new element could match one of the DatumPaths in | |
| 203 | // m_msgMask -- if that's the case, we've just entered a | |
| 204 | // useful element. | |
| 205 | // TODO: functional stylee (a la C++'s std::accumulate) ? | |
| 206 | 700 | boolean curPathStartsWithAMaskElem = false; |
| 207 | 700 | for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); |
| 208 | 1400 | !curPathStartsWithAMaskElem && maskIt.hasNext(); ) |
| 209 | { | |
| 210 | 700 | curPathStartsWithAMaskElem |
| 211 | 700 | = m_curPath.startsWith(maskIt.next()); |
| 212 | } | |
| 213 | ||
| 214 | 700 | if(curPathStartsWithAMaskElem) |
| 215 | 700 | m_depthWithinUsefulElement = 0; |
| 216 | else { | |
| 217 | // so this element we're entering is not specified by m_msgMask | |
| 218 | // to be useful -- but might it contains elements that | |
| 219 | // are? | |
| 220 | 0 | boolean aMaskElemStartsWithCurPath = false; |
| 221 | 0 | for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); |
| 222 | 0 | !aMaskElemStartsWithCurPath && maskIt.hasNext(); ) |
| 223 | { | |
| 224 | 0 | aMaskElemStartsWithCurPath |
| 225 | 0 | = maskIt.next().startsWith(m_curPath); |
| 226 | } | |
| 227 | ||
| 228 | 0 | if(!aMaskElemStartsWithCurPath) { |
| 229 | // ... nope! useless. | |
| 230 | 0 | m_depthWithinUselessElement = 0; |
| 231 | 0 | m_curPath.setSize(oldCurPathSize); |
| 232 | } // else => ok, carry on, m_depthWithinUse{less,ful}Element | |
| 233 | // still both -1. | |
| 234 | } | |
| 235 | 700 | } |
| 236 | // else => already within a useful element, don't need to compare | |
| 237 | // against m_msgMask. | |
| 238 | } | |
| 239 | } | |
| 240 | else | |
| 241 | 105 | m_depthWithinUselessElement = 0; |
| 242 | } | |
| 243 | 1375 | ok = true; |
| 244 | } | |
| 245 | ||
| 246 | 1375 | if(!ok) { |
| 247 | 0 | clear(); |
| 248 | 0 | throw new StopParsingException(); |
| 249 | } | |
| 250 | 1375 | } |
| 251 | ||
| 252 | /* doc location == msgID & curPath together. | |
| 253 | If we've encountered an element called "elementNam", then this tries | |
| 254 | to determine what it is, based on what we already know about the document. | |
| 255 | returns true if we can make sense of this new element name given the | |
| 256 | position we're at (represented by msgID / curPath), | |
| 257 | false if we can't (which probably means this should be a useless element). | |
| 258 | returning true doesn't mean that we actually changed msgID or curPath, it | |
| 259 | might mean that we just passed through a segment group element OK. | |
| 260 | */ | |
| 261 | protected static boolean tryToGrowDocLocationFromElementName( | |
| 262 | StringBuffer msgID /*in/out*/, DatumPath curPath /*in/out*/, | |
| 263 | Map<String, Integer> segmentId2nextRepIdx /*in/out*/, DatumPath lastDumpedPath /*in*/, | |
| 264 | String elementName /*in*/) | |
| 265 | { | |
| 266 | 1165 | boolean ok = false; // ok == can we make sense of this new element? |
| 267 | // hmm ... where are we in the document: | |
| 268 | 1165 | if((msgID.length() == 0) && (curPath.size() == 0)) { |
| 269 | // we're entering a message | |
| 270 | 35 | msgID.replace(0, msgID.length(), elementName); |
| 271 | 35 | segmentId2nextRepIdx.clear(); |
| 272 | 35 | ok = true; |
| 273 | } | |
| 274 | 1130 | else if((msgID.length() > 0) && (curPath.size() == 0)) { |
| 275 | // we're entering either a segment-group element (eg. <ADT_A01.PROCEDURE>) | |
| 276 | // or an actual segment element. | |
| 277 | 85 | if(!(elementName.startsWith("" + msgID + '.'))) { |
| 278 | // must be an actual segment. | |
| 279 | 85 | curPath.add(elementName); |
| 280 | ||
| 281 | 85 | if(segmentId2nextRepIdx.containsKey(elementName)) |
| 282 | 0 | curPath.add(segmentId2nextRepIdx.get(elementName)); |
| 283 | else | |
| 284 | 85 | curPath.add(new Integer(0)); |
| 285 | ||
| 286 | 85 | segmentId2nextRepIdx.put(elementName, ((Integer)curPath.get(curPath.size()-1)).intValue() + 1); |
| 287 | } | |
| 288 | 85 | ok = true; |
| 289 | } | |
| 290 | 1045 | else if((msgID.length() > 0) && (curPath.size() > 0)) { |
| 291 | // we're entering a field or a component or a subcomponent. | |
| 292 | 1045 | if(curPath.size() == 2) { // we're entering a field element |
| 293 | // all fields should start with segment-ID + '.' | |
| 294 | 525 | if(elementName.startsWith("" + curPath.get(0) + '.')) { |
| 295 | try { | |
| 296 | 525 | int fieldIdxFromElementName |
| 297 | 525 | = Integer.parseInt(elementName.substring(elementName.indexOf('.') + 1)); |
| 298 | ||
| 299 | 525 | curPath.add(new Integer(fieldIdxFromElementName)); |
| 300 | ||
| 301 | // now add the repetition idx to curPath: | |
| 302 | 525 | if((lastDumpedPath.size() >= 4) |
| 303 | 525 | && (((Integer)lastDumpedPath.get(2)).intValue() |
| 304 | == fieldIdxFromElementName)) | |
| 305 | { | |
| 306 | // lastDumpedPath has a fieldIdx and a fieldRepIdx. | |
| 307 | 0 | curPath.add(new Integer(((Integer)lastDumpedPath.get(3)).intValue() + 1)); |
| 308 | } | |
| 309 | else | |
| 310 | 525 | curPath.add(new Integer(0)); |
| 311 | ||
| 312 | 525 | ok = true; |
| 313 | 525 | } catch(NumberFormatException e) {} |
| 314 | } // else => this isn't a field -- must be useless. | |
| 315 | } | |
| 316 | 520 | else if((curPath.size() == 4) || (curPath.size() == 5)) { |
| 317 | // we're entering a component or subcomponent element | |
| 318 | try { | |
| 319 | 520 | int idxFromElementName |
| 320 | 520 | = Integer.parseInt(elementName.substring(elementName.indexOf('.') + 1)); |
| 321 | 415 | curPath.add(new Integer(idxFromElementName)); |
| 322 | 415 | ok = true; |
| 323 | 415 | } catch(NumberFormatException e) {} |
| 324 | } | |
| 325 | } | |
| 326 | 1165 | return ok; |
| 327 | } | |
| 328 | ||
| 329 | public void endElement(String uri, String localName, String qName) | |
| 330 | throws SAXException | |
| 331 | { | |
| 332 | //System.err.println("endElement: " + qName); | |
| 333 | 1375 | boolean ok = false; |
| 334 | 1375 | if(m_startedDocument) { |
| 335 | 1375 | if(m_depthWithinUselessElement >= 0) { |
| 336 | 315 | --m_depthWithinUselessElement; |
| 337 | 315 | ok = true; |
| 338 | } | |
| 339 | else { | |
| 340 | 1060 | if((m_msgID.length() > 0) && (m_curPath.size() == 0)) { |
| 341 | // we're exiting either a message element or a | |
| 342 | // segment group element. | |
| 343 | 35 | if((""+qName).compareTo(""+m_msgID) == 0) |
| 344 | 35 | m_msgID.delete(0, m_msgID.length()); // => exiting message element |
| 345 | // else => segment group element -- do nothing. | |
| 346 | ||
| 347 | 35 | ok = true; |
| 348 | } | |
| 349 | 1025 | else if((m_msgID.length() > 0) && (m_curPath.size() > 0)) { |
| 350 | 1025 | tryToDumpDataToProps(); |
| 351 | ||
| 352 | 1025 | if(m_curPath.size() == 2) { |
| 353 | // exiting a segment element | |
| 354 | 85 | m_curPath.setSize(0); |
| 355 | 85 | ok = true; |
| 356 | } | |
| 357 | 940 | else if(m_curPath.size() == 4) { |
| 358 | // exiting a field element | |
| 359 | 525 | m_curPath.setSize(2); |
| 360 | 525 | ok = true; |
| 361 | } | |
| 362 | 415 | else if((m_curPath.size() == 5) || (m_curPath.size() == 6)) { |
| 363 | // exiting a component or a subcomponent | |
| 364 | 415 | m_curPath.setSize(m_curPath.size() - 1); |
| 365 | 415 | ok = true; |
| 366 | } | |
| 367 | } | |
| 368 | ||
| 369 | 1060 | if(m_depthWithinUsefulElement >= 0) |
| 370 | 700 | --m_depthWithinUsefulElement; |
| 371 | } | |
| 372 | } | |
| 373 | ||
| 374 | 1375 | if(!ok) { |
| 375 | 0 | clear(); |
| 376 | 0 | throw new StopParsingException(); |
| 377 | } | |
| 378 | 1375 | } |
| 379 | ||
| 380 | /** try to dump whatever we've got in m_chars to m_props, | |
| 381 | with a key of m_curPath.toString(). | |
| 382 | */ | |
| 383 | protected void tryToDumpDataToProps() | |
| 384 | { | |
| 385 | 1025 | if((m_curPath.size() >= 2) && (m_depthWithinUselessElement == -1)) { |
| 386 | /* m_curPath.toString() will be the property key whose value will be | |
| 387 | m_chars. | |
| 388 | ||
| 389 | This is (part of) what m_lastDumpedPath is for: With, for example "<ZYX.9> | |
| 390 | <PT.1>P</PT.1> </ZYX.9>" we might have had a m_curPath containing something | |
| 391 | like [ZYX, 0, 9, 0, 0] when we exited the PT.1 element. (note: internal | |
| 392 | DatumPath elements are 0-indexed, string representations of DatumPaths and | |
| 393 | the XML text is 1-indexed.) So in m_props the key for "P" would have been | |
| 394 | "ZYX[0]-9[0]-1-1". (the last "-1" is a default that got added by | |
| 395 | toString()). | |
| 396 | | |
| 397 | Then we would have exited the PT.3 element, changed m_curPath to [ZYX, 0, | |
| 398 | 9, 0], picked up the whitespace between </PT.3> and </ZYX.9>, and when | |
| 399 | exiting the ZYX.9 element, we might have written that whitespace to m_props | |
| 400 | with a key of the toString() of [ZYX, 0, 9, 0]; that is, "ZYX[0]-9[0]-1-1": | |
| 401 | the same as the key for the "P" ... clobbering "P" in m_props with | |
| 402 | whitespace. | |
| 403 | ||
| 404 | But since we know that HL7 fields / components / etc are always in order | |
| 405 | (numerically), we can count on m_lastDumpedPath and use | |
| 406 | DatumPath.numbersLessThan to avoid the clobbering. | |
| 407 | */ | |
| 408 | 1025 | if((m_lastDumpedPath.get(0).equals(m_curPath.get(0))) |
| 409 | 940 | ? (m_lastDumpedPath.numbersLessThan(m_curPath)) |
| 410 | : true) | |
| 411 | { | |
| 412 | 700 | if(m_depthWithinUsefulElement >= 0) { |
| 413 | 700 | m_props.setProperty(m_curPath.toString(), m_chars.toString()); |
| 414 | 700 | m_lastDumpedPath.copy(m_curPath); |
| 415 | 700 | m_chars.delete(0, m_chars.length()); |
| 416 | } | |
| 417 | } | |
| 418 | } | |
| 419 | 1025 | } |
| 420 | ||
| 421 | public void characters(char[] chars, int start, int length) | |
| 422 | { | |
| 423 | // note that a contiguous run of characters in the document | |
| 424 | // might get reported to us in several chunks. | |
| 425 | // (In the order that the text appears in the document, | |
| 426 | // non-overlapping and with no gaps between chunks.) | |
| 427 | // An entity like & will reach us as an actual & character. | |
| 428 | ||
| 429 | 2180 | if((m_msgID.length() > 0) && (m_curPath.size() >= 4)) { |
| 430 | 1510 | m_chars.append(chars, start, length); |
| 431 | } | |
| 432 | 2180 | } |
| 433 | ||
| 434 | public void ignoreableWhitespace(char []chars, int start, int length) | |
| 435 | { | |
| 436 | // it's unclear which whitespace is considered ignorable for us. | |
| 437 | // what the heck, add it to m_chars. | |
| 438 | 0 | characters(chars, start, length); |
| 439 | 0 | } |
| 440 | ||
| 441 | public void error(SAXParseException e) | |
| 442 | { | |
| 443 | // TODO: remove. | |
| 444 | 0 | System.err.println("Error in " + getClass() + ": " + e); |
| 445 | 0 | } |
| 446 | ||
| 447 | public void fatalError(SAXParseException e) throws SAXException | |
| 448 | { | |
| 449 | 0 | throw e; |
| 450 | } | |
| 451 | } | |
| 452 | ||
| 453 | /** parse message according to our HL7 XML handler, and dump the data found | |
| 454 | to props. | |
| 455 | | |
| 456 | returns true if we parsed ok, which means well-formed XML, and | |
| 457 | that's about it. We just barely check against HL7 structure, and ignore any | |
| 458 | elements / text that is unexpected (that is, impossible in any HL7 message: | |
| 459 | independant of any message / segment definitions). | |
| 460 | ||
| 461 | "message" should be an XML document with one top-level element -- that being | |
| 462 | the message. (<ACK> or whatever). We're only expecting one message to be in | |
| 463 | "message". | |
| 464 | ||
| 465 | props can be null if you don't want the data (we still parse). The message | |
| 466 | data found in message (that passes msgMask) will be added to props as key / | |
| 467 | value pairs with the key a toString() of the appropriate DatumPath for the | |
| 468 | location where the data is found (i.e. in the ZYX[a]-b[c]-d-e style), and | |
| 469 | the value the corresponding text. So, after calling parseMessage | |
| 470 | successfully, if you wanted to retrieve the message data from props you | |
| 471 | might call something like | |
| 472 | props.getProperty((new DatumPath()).add("MSH").add(1).toString()) | |
| 473 | and that would return a String with "|", probably. | |
| 474 | ||
| 475 | Note that this package facilitates the extraction of message data in a way | |
| 476 | independent of message version (i.e. components and whatever getting added): | |
| 477 | ||
| 478 | With a message of "<FOO><ZYX><ZYX.42>fieldy-field-field</ZYX.42></ZYX></FOO>", | |
| 479 | "ZYX[0]-1[0]-1-1" will be the key that ends up in props (see notes at | |
| 480 | DatumPath.toString()) | |
| 481 | ||
| 482 | So if you, coding for a future version of the FOO message but | |
| 483 | recieving old-version message data, tried | |
| 484 | props.getProperty((new DatumPath()).add("ZYX").add(0).add(42).add(0).add(1).toString()) | |
| 485 | with the message above (that is, trying to extract a repetition and | |
| 486 | component that aren't there), you would get "ZYX[0]-42[0]-1-1" mapping to | |
| 487 | "fieldy-field-field" in the resulting props. | |
| 488 | ||
| 489 | If the message was | |
| 490 | "<FOO><ZYX><ZYX.42><ARG.1>component data</ARG.1></ZYX.42></ZYX></FOO>" | |
| 491 | and you, coding for an old version of this FOO message but recieving | |
| 492 | new-version FOO message data, tried | |
| 493 | props.getProperty((new DatumPath()).add("ZYX").add(0).add(42).toString()) | |
| 494 | you would get "ZYX[0]-42[0]-1-1" mapping to "component data" in the resulting | |
| 495 | props. | |
| 496 | ||
| 497 | msgMask lets you specify which parts of the message you want dumped to props. | |
| 498 | Passing in null gets you everything. Otherwise, msgMask's elements should | |
| 499 | all be DatumPaths (! => ClassCastException), and a particular part of the | |
| 500 | message will be dumped to props only if it's location, as represented by a | |
| 501 | DatumPath, startsWith (as in DatumPath.startsWith()) at least one element of | |
| 502 | msgMask. So if one element of msgMask was a (new DatumPath()).add(new | |
| 503 | String("ZYX")), then everything in all ZYX segment would get dumped to props. | |
| 504 | A (new DatumPath()).add(new String("ZYX")).add(1) would get only the first | |
| 505 | repetitions of same (if there is one) dumped to props. etc. etc. Note that | |
| 506 | a DatumPath of size() == 0 in msgMask will get you everything, no matter what | |
| 507 | the other elements of msgMask are, because all DatumPaths startsWith the | |
| 508 | zero-length DatumPath. | |
| 509 | ||
| 510 | Segment group elements (eg. ADT_A01.PROCEDURE) are handled fine, but they | |
| 511 | aren't addressed in msgMask or in the output in props -- basically any | |
| 512 | element tags at the level immediately inside the message element, and having | |
| 513 | a name that starts with the message element name + '.', is ignored (meaning | |
| 514 | it's contents are dealt with the same as if the start and end tags' just | |
| 515 | wasn't there.) | |
| 516 | */ | |
| 517 | public static boolean parseMessage(Properties props, String message, | |
| 518 | Collection<DatumPath> msgMask) throws HL7Exception | |
| 519 | { | |
| 520 | 35 | boolean ret = false; |
| 521 | try { | |
| 522 | 35 | SAXParserFactory factory = SAXParserFactory.newInstance(); |
| 523 | 35 | SAXParser parser = factory.newSAXParser(); |
| 524 | ||
| 525 | 35 | InputSource inSrc = new InputSource(new java.io.StringReader(message)); |
| 526 | ||
| 527 | 35 | HL7MessageHandler handler = new HL7MessageHandler(); |
| 528 | 35 | handler.m_props = (props != null |
| 529 | ? props : new Properties()); // it's expecting a props. | |
| 530 | ||
| 531 | 35 | if(msgMask != null) |
| 532 | 0 | handler.m_msgMask = msgMask; |
| 533 | else { | |
| 534 | 35 | handler.m_msgMask = new ArrayList<DatumPath>(); |
| 535 | 35 | handler.m_msgMask.add(new DatumPath()); |
| 536 | } | |
| 537 | ||
| 538 | 35 | parser.parse(inSrc, handler); |
| 539 | 35 | ret = true; |
| 540 | 0 | } catch (ParserConfigurationException e) { |
| 541 | 0 | throw new HL7Exception(e); |
| 542 | 0 | } catch (IOException e) { |
| 543 | 0 | throw new HL7Exception(e); |
| 544 | 0 | } catch (StopParsingException e) { |
| 545 | 0 | throw new HL7Exception(e); |
| 546 | 0 | } catch (SAXException e) { |
| 547 | 0 | throw new HL7Exception(e); |
| 548 | 35 | } |
| 549 | ||
| 550 | 35 | return ret; |
| 551 | } | |
| 552 | ||
| 553 | public static void main(String args[]) | |
| 554 | { | |
| 555 | 0 | if(args.length >= 1) { |
| 556 | 0 | Properties props = new Properties(); |
| 557 | 0 | List<DatumPath> msgMask = new ArrayList<DatumPath>(); |
| 558 | 0 | msgMask.add(new DatumPath().add("MSH").add(0).add(9)); |
| 559 | //msgMask.add(new DatumPath()); | |
| 560 | boolean parseret; | |
| 561 | try { | |
| 562 | 0 | parseret = XML.parseMessage(props, args[0], msgMask); |
| 563 | 0 | System.err.println("parseMessage returned " + parseret); |
| 564 | 0 | } catch (HL7Exception e) { |
| 565 | 0 | e.printStackTrace(); |
| 566 | 0 | } |
| 567 | 0 | props.list(System.err); |
| 568 | } | |
| 569 | 0 | } |
| 570 | } | |
| 571 |