001package ca.uhn.hl7v2.parser; 002 003import java.util.Arrays; 004import java.util.Collections; 005import java.util.HashSet; 006import java.util.Set; 007 008import ca.uhn.hl7v2.HapiContext; 009import ca.uhn.hl7v2.model.GenericMessage; 010import ca.uhn.hl7v2.model.Varies; 011import ca.uhn.hl7v2.util.Terser; 012import ca.uhn.hl7v2.util.idgenerator.FileBasedHiLoGenerator; 013import ca.uhn.hl7v2.util.idgenerator.IDGenerator; 014import ca.uhn.hl7v2.validation.ValidationContext; 015 016/** 017 * Contains configuration which will be applied to any parsers which are a part of the given 018 * HAPI Context. 019 * 020 * @see HapiContext#getParserConfiguration() 021 */ 022public class ParserConfiguration { 023 024 /** 025 * @link {@link UnexpectedSegmentBehaviourEnum#ADD_INLINE} 026 */ 027 // NB if you change the default, edit the javadoc for the enum itself 028 public static final UnexpectedSegmentBehaviourEnum DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR = UnexpectedSegmentBehaviourEnum.ADD_INLINE; 029 030 private boolean allowUnknownVersions; 031 private boolean escapeSubcomponentDelimiterInPrimitive = false; 032 private IDGenerator idGenerator = new FileBasedHiLoGenerator(); 033 private String myDefaultObx2Type; 034 private boolean myEncodeEmptyMandatorySegments = true; 035 private Set<String> myForcedEncode = new HashSet<String>(); 036 private String myInvalidObx2Type; 037 private UnexpectedSegmentBehaviourEnum myUnexpectedSegmentBehaviour; 038 private boolean nonGreedyMode = false; 039 private boolean prettyPrintWhenEncodingXml = true; 040 private boolean validating = true; 041 private Escaping escaping = new DefaultEscaping(); 042 private boolean xmlDisableWhitespaceTrimmingOnAllNodes = false; 043 private Set<String> xmlDisableWhitespaceTrimmingOnNodeNames = Collections.emptySet(); 044 045 /** 046 * <p> 047 * Forces the parser to encode certain segments/fields, even if they contain 048 * no content. This method may be called multiple times with multiple path 049 * definitions, and each path definition contains the path to the segment or 050 * field which needs to be forced. 051 * </p> 052 * <p> 053 * Path definitions are similar in format to {@link Terser Terser} paths. 054 * They contain a slash-separated lookup path to reach a given segment, and 055 * optionally a field number. The following are examples of paths which 056 * could be added here, as well as the sample output for an otherwise empty 057 * ORU^R01 message: 058 * </p> 059 * <table cellpadding="2" cellspacing="2" border="0"> 060 * <thead> 061 * <tr> 062 * <th style="background: #FFA0FF;">Forced Encode Path</th> 063 * <th style="background: #FFA0FF;">Encode Output</th> 064 * </tr> 065 * </thead> 066 * <tr> 067 * <td>None (for illustration purposes)</td> 068 * <td style=" font-family: monospace;"> 069 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4</td> 070 * </tr> 071 * <tr> 072 * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC</td> 073 * <td style="background: #E0E0E0; font-family: monospace;"> 074 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4<br> 075 * ORC|</td> 076 * </tr> 077 * <tr> 078 * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td> 079 * <td style=" font-family: monospace;"> 080 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4<br> 081 * ORC||||</td> 082 * </tr> 083 * <tr> 084 * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2 085 * </td> 086 * <td style="background: #E0E0E0; font-family: monospace;"> 087 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4<br> 088 * ORC||||^</td> 089 * </tr> 090 * </table> 091 * <p> 092 * While empty segments do not generally have any meaning according to HL7, 093 * this may be useful when transmitting to systems which rely on segments 094 * being received even if they have no content. 095 * </p> 096 * <p> 097 * Note that this configuration item currently only applies to 098 * {@link PipeParser} 099 * </p> 100 * 101 * @param theForcedEncode path definition 102 * @since 2.0 103 */ 104 public void addForcedEncode(String theForcedEncode) { 105 if (theForcedEncode == null) { 106 throw new NullPointerException("forced encode may not be null"); 107 } 108 109 int lastSlashIndex = theForcedEncode.lastIndexOf('/'); 110 lastSlashIndex = Math.max(lastSlashIndex, 0); 111 112 if (lastSlashIndex == 0) { 113 if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) { 114 throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2"); 115 } 116 } else { 117 if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) { 118 throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2"); 119 } 120 } 121 myForcedEncode.add(theForcedEncode); 122 } 123 124 boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) { 125 for (String next : getForcedEncode()) { 126 if (next.startsWith(theTerserPath)) { 127 return true; 128 } 129 } 130 return false; 131 } 132 133 int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) { 134 int forceUpToFieldNum = 0; 135 for (String nextPath : getForcedEncode()) { 136 if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) { 137 int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1); 138 if (endOfFieldDef == -1) { 139 endOfFieldDef = nextPath.length(); 140 } 141 String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef); 142 forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString)); 143 } 144 } 145 return forceUpToFieldNum; 146 } 147 148 /** 149 * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a 150 * missing OBX-2 value 151 * 152 * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment 153 * with a missing OBX-2 value 154 * @see #setDefaultObx2Type(String) 155 */ 156 public String getDefaultObx2Type() { 157 return myDefaultObx2Type; 158 } 159 160 /** 161 * @return Returns the forced encode strings added by 162 * {@link #addForcedEncode(String)} 163 * 164 * @see #addForcedEncode(String) 165 * @since 1.3 166 */ 167 public Set<String> getForcedEncode() { 168 return Collections.unmodifiableSet(myForcedEncode); 169 } 170 171 /** 172 * @return the ID Generator to be used for generating IDs for new messages 173 */ 174 public IDGenerator getIdGenerator() { 175 return idGenerator; 176 } 177 178 /** 179 * Returns the value provides a default datatype ("ST", "NM", etc) for an 180 * OBX segment with an invalid OBX-2 value. 181 * 182 * @return Returns the value provides a default datatype ("ST", "NM", etc) 183 * for an OBX segment with an invalid OBX-2 value. 184 * @see #setInvalidObx2Type(String) 185 */ 186 public String getInvalidObx2Type() { 187 return myInvalidObx2Type; 188 } 189 190 /** 191 * Returns the behaviour to use when parsing a message and a nonstandard 192 * segment is found. Default is 193 * {@link #DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR} 194 * 195 * @return the behaviour to use when a nonstandard egment is found 196 */ 197 public UnexpectedSegmentBehaviourEnum getUnexpectedSegmentBehaviour() { 198 if (myUnexpectedSegmentBehaviour == null) { 199 myUnexpectedSegmentBehaviour = DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR; 200 } 201 return myUnexpectedSegmentBehaviour; 202 } 203 204 /** 205 * @see #setXmlDisableWhitespaceTrimmingOnNodeNames(Set) 206 */ 207 public Set<String> getXmlDisableWhitespaceTrimmingOnNodeNames() { 208 return xmlDisableWhitespaceTrimmingOnNodeNames; 209 } 210 211 /** 212 * If set to <code>true</code> (default is <code>false</code>) the parser 213 * will allow messages to parse, even if they contain a version which is not 214 * known to the parser. When operating in this mode, if a message arrives 215 * with an unknown version string, the parser will attempt to parse it using 216 * a {@link GenericMessage Generic Message} class instead of a specific HAPI 217 * structure class. Default is <code>false</code>. 218 * 219 * @return true if parsing messages with unknown versions is allowed 220 */ 221 public boolean isAllowUnknownVersions() { 222 return this.allowUnknownVersions; 223 } 224 225 /** 226 * Returns <code>true</code> if empty segments should still be encoded 227 * if they are mandatory within their message structure. Default is <code>false</code>. 228 * @return <code>true</code> if empty segments should still be encoded 229 * 230 * @see #setEncodeEmptyMandatoryFirstSegments(boolean) 231 */ 232 public boolean isEncodeEmptyMandatorySegments() { 233 return myEncodeEmptyMandatorySegments; 234 } 235 236 /** 237 * Returns code>true</code> if subcomponent delimiters in OBX-5 shall be 238 * ignored. Default is <code>false</code>. 239 * @return <code>true</code> if subcomponent delimiters in OBX-5 shall be 240 * ignored 241 */ 242 public boolean isEscapeSubcomponentDelimiterInPrimitive() { 243 return escapeSubcomponentDelimiterInPrimitive; 244 } 245 246 /** 247 * Returns <code>true</code> if the parser should parse in non-greedy mode. Default 248 * is <code>false</code> 249 * 250 * @see #setNonGreedyMode(boolean) for an explanation of non-greedy mode 251 */ 252 public boolean isNonGreedyMode() { 253 return nonGreedyMode; 254 } 255 256 /** 257 * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers} 258 * will attempt to pretty-print the XML they generate. This means the messages will look 259 * nicer to humans, but may take up slightly more space/bandwidth. 260 */ 261 public boolean isPrettyPrintWhenEncodingXml() { 262 return prettyPrintWhenEncodingXml; 263 } 264 265 /** 266 * Returns <code>true</code> if the parser validates using a configured 267 * {@link ValidationContext}. Default is <code>true</code>. 268 * @return <code>true</code> if the parser validates using a configured 269 * {@link ValidationContext} 270 */ 271 public boolean isValidating() { 272 return validating; 273 } 274 275 /** 276 * @see #setXmlDisableWhitespaceTrimmingOnAllNodes(boolean) 277 */ 278 public boolean isXmlDisableWhitespaceTrimmingOnAllNodes() { 279 return xmlDisableWhitespaceTrimmingOnAllNodes; 280 } 281 282 /** 283 * Removes a forced encode entry 284 * 285 * @param theForcedEncode path definition to be removed 286 * @see #addForcedEncode(String) 287 * @since 1.3 288 */ 289 public void removeForcedEncode(String theForcedEncode) { 290 if (theForcedEncode == null) { 291 throw new NullPointerException("forced encode may not be null"); 292 } 293 294 myForcedEncode.remove(theForcedEncode); 295 } 296 297 /** 298 * If set to <code>true</code> (default is <code>false</code>) the parser 299 * will allow messages to parse, even if they contain a version which is not 300 * known to the parser. When operating in this mode, if a message arrives 301 * with an unknown version string, the parser will attempt to parse it using 302 * a {@link GenericMessage Generic Message} class instead of a specific HAPI 303 * structure class. 304 * 305 * @param theAllowUnknownVersions true if parsing unknown versions shall be allowed 306 */ 307 public void setAllowUnknownVersions(boolean theAllowUnknownVersions) { 308 allowUnknownVersions = theAllowUnknownVersions; 309 } 310 311 /** 312 * <p> 313 * If this property is set, the value provides a default datatype ("ST", 314 * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful 315 * when parsing messages from systems which do not correctly populate OBX-2. 316 * </p> 317 * <p> 318 * For example, if this property is set to "ST", and the following OBX 319 * segment is encountered: 320 * 321 * <pre> 322 * OBX|||||This is a value 323 * </pre> 324 * 325 * It will be parsed as though it had read: 326 * 327 * <pre> 328 * OBX||ST|||This is a value 329 * </pre> 330 * 331 * </p> 332 * <p> 333 * Note that this configuration can also be set globally using the system 334 * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to 335 * {@link ParserConfiguration} takes priority over the system property. 336 * </p> 337 * 338 * @param theDefaultObx2Type 339 * If this property is set, the value provides a default datatype 340 * ("ST", "NM", etc) for an OBX segment with a missing OBX-2 341 * value 342 * @see #setInvalidObx2Type(String) 343 * @see Varies#INVALID_OBX2_TYPE_PROP 344 */ 345 public void setDefaultObx2Type(String theDefaultObx2Type) { 346 myDefaultObx2Type = theDefaultObx2Type; 347 } 348 349 /** 350 * <p> 351 * If set to <code>true</code> (default is <code>true</code>), when encoding 352 * a group using the PipeParser where the first segment is required, but no 353 * data has been populated in that segment, the empty segment is now still 354 * encoded if needed as a blank segment in order to give parsers a hint 355 * about which group subsequent segments are in. This helps to ensure that 356 * messages can be "round tripped", meaning that a message which is parsed, 357 * encoded, and then re-parsed should contain exactly the same structure 358 * from beginning to end. 359 * </p> 360 * <p> 361 * </p> 362 * For example, in an ORU^R01 message with a populated OBX segment, but no 363 * data in the mandatory OBR segment which begins the ORDER_OBSERVATION 364 * group the message would still contain an empty OBR segment when encoded: 365 * 366 * <pre> 367 * MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5 368 * OBR| 369 * OBX||ST|||Value Data 370 * </pre> 371 * 372 * Previously, the following encoding would have occurred, which would have 373 * incorrectly been parsed as having a custom OBX segment instead of having 374 * a normal ORDER_OBSERVATION group: 375 * 376 * <pre> 377 * MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5 378 * OBX||ST|||Value Data 379 * </pre> 380 * 381 * @param theEncodeEmptyMandatorySegments 382 * If set to <code>true</code> (default is <code>true</code>), 383 * when encoding a group using the PipeParser where the first 384 * segment is required, but no data has been populated in that 385 * segment, the empty segment is now still encoded if needed as a 386 * blank segment in order to give parsers a hint about which 387 * group subsequent segments are in 388 */ 389 public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) { 390 myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments; 391 } 392 393 /** 394 * Set to <code>true</code> if subcomponent delimiters in OBX-5 shall be 395 * ignored 396 * @param escapeSubcomponentDelimiterInPrimitive boolean flag to enable or disable this behavior 397 */ 398 public void setEscapeSubcomponentDelimiterInPrimitive(boolean escapeSubcomponentDelimiterInPrimitive) { 399 this.escapeSubcomponentDelimiterInPrimitive = escapeSubcomponentDelimiterInPrimitive; 400 } 401 402 /** 403 * @param idGenerator 404 * the {@link IDGenerator} to be used for generating IDs for new 405 * messages, preferable initialized using the methods described 406 * in IDGeneratorFactory. 407 * 408 * @see IDGenerator 409 */ 410 public void setIdGenerator(IDGenerator idGenerator) { 411 this.idGenerator = idGenerator; 412 } 413 414 /** 415 * <p> 416 * If this property is set, the value provides a default datatype ("ST", 417 * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful 418 * when parsing messages from systems which do not correctly populate OBX-2. 419 * </p> 420 * <p> 421 * For example, if this property is set to "ST", and the following OBX 422 * segment is encountered: 423 * 424 * <pre> 425 * OBX||INVALID|||This is a value 426 * </pre> 427 * 428 * It will be parsed as though it had read: 429 * 430 * <pre> 431 * OBX||ST|||This is a value 432 * </pre> 433 * 434 * </p> 435 * <p> 436 * Note that this configuration can also be set globally using the system 437 * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to 438 * {@link ParserConfiguration} takes priority over the system property. 439 * </p> 440 * 441 * @param theInvalidObx2Type 442 * If this property is set, the value provides a default datatype 443 * ("ST", "NM", etc) for an OBX segment with an invalid OBX-2 444 * value. This is useful when parsing messages from systems which 445 * do not correctly populate OBX-2. 446 * @see ParserConfiguration#setDefaultObx2Type(String) 447 * @see Varies#DEFAULT_OBX2_TYPE_PROP 448 */ 449 public void setInvalidObx2Type(String theInvalidObx2Type) { 450 myInvalidObx2Type = theInvalidObx2Type; 451 } 452 453 /** 454 * If set to <code>true</code> (default is <code>false</code>), pipe parser will be 455 * put in non-greedy mode. This setting applies only to {@link PipeParser Pipe Parsers} and 456 * will have no effect on {@link XMLParser XML Parsers}. 457 * 458 * <p> 459 * In non-greedy mode, if the message structure being parsed has an ambiguous 460 * choice of where to put a segment because there is a segment matching the 461 * current segment name in both a later position in the message, and 462 * in an earlier position as a part of a repeating group, the earlier 463 * position will be chosen. 464 * </p> 465 * <p> 466 * This is perhaps best explained with an example. Consider the following structure: 467 * </p> 468 * <pre> 469 * MSH 470 * GROUP_1 (start) 471 * { 472 * AAA 473 * BBB 474 * GROUP_2 (start) 475 * { 476 * AAA 477 * } 478 * GROUP_2 (end) 479 * } 480 * GROUP_1 (end) 481 * </pre> 482 * <p> 483 * </p> 484 * For the above example, consider a message containing the following segments:<br/> 485 * <code>MSH<br/> 486 * AAA<br/> 487 * BBB<br/> 488 * AAA</code> 489 * </p> 490 * <p> 491 * In this example, when the second AAA segment is encountered, there are two 492 * possible choices. It would be placed in GROUP_2, or it could be placed in 493 * a second repetition of GROUP_1. By default it will be placed in GROUP_2, but 494 * in non-greedy mode it will be put in a new repetition of GROUP_1. 495 * </p> 496 * <p> 497 * This mode is useful for example when parsing OML^O21 messages containing 498 * multiple orders. 499 * </p> 500 */ 501 public void setNonGreedyMode(boolean theNonGreedyMode) { 502 nonGreedyMode = theNonGreedyMode; 503 } 504 505 /** 506 * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers} 507 * will attempt to pretty-print the XML they generate. This means the messages will look 508 * nicer to humans, but may take up slightly more space/bandwidth. 509 */ 510 public void setPrettyPrintWhenEncodingXml(boolean thePrettyPrintWhenEncodingXml) { 511 prettyPrintWhenEncodingXml = thePrettyPrintWhenEncodingXml; 512 } 513 514 /** 515 * Sets the behaviour to use when parsing a message and a nonstandard 516 * segment is found 517 * 518 * @param theUnexpectedSegmentBehaviour behaviour to use when a nonstandard segment is found 519 */ 520 public void setUnexpectedSegmentBehaviour(UnexpectedSegmentBehaviourEnum theUnexpectedSegmentBehaviour) { 521 if (theUnexpectedSegmentBehaviour == null) { 522 throw new NullPointerException("UnexpectedSegmentBehaviour can not be null"); 523 } 524 myUnexpectedSegmentBehaviour = theUnexpectedSegmentBehaviour; 525 } 526 527 /** 528 * Determines whether the parser validates using a configured 529 * {@link ValidationContext} or not. This allows to disable message 530 * validation although a validation context is defined. 531 * 532 * @param validating 533 * <code>true</code> if parser shall validate, <code>false</code> 534 * if not 535 */ 536 public void setValidating(boolean validating) { 537 this.validating = validating; 538 } 539 540 public Escaping getEscaping() { 541 return escaping; 542 } 543 544 /** 545 * Sets an escaping strategy 546 * @param escaping escaping strategy instance 547 */ 548 public void setEscaping(Escaping escaping) { 549 if (escaping == null) { 550 throw new NullPointerException("Escaping can not be null"); 551 } 552 this.escaping = escaping; 553 } 554 /** 555 * Configures the XML Parser to treat all whitespace within text nodes as literal, meaning that 556 * line breaks, tabs, multiple spaces, etc. will be preserved. If set to <code>true</code>, any values 557 * passed to {@link #setXmlDisableWhitespaceTrimmingOnNodeNames(Set)} will be superceded since all 558 * whitespace will be treated as literal. 559 * <p> 560 * Default is <b>false</b> 561 * </p> 562 */ 563 public void setXmlDisableWhitespaceTrimmingOnAllNodes(boolean theXmlDisableWhitespaceTrimmingOnAllNodes) { 564 this.xmlDisableWhitespaceTrimmingOnAllNodes = theXmlDisableWhitespaceTrimmingOnAllNodes; 565 } 566 567 /** 568 * Configures the XML Parser to treat all whitespace within the given nodes as literal, meaning that 569 * line breaks, tabs, multiple spaces, etc. will be preserved. This method takes individual XML node names 570 * as arguments (e.g. "HD.2", or "TX.1"). 571 * <p> 572 * Default is <b>none</b> 573 * </p> 574 */ 575 public void setXmlDisableWhitespaceTrimmingOnNodeNames(Set<String> theXmlDisableWhitespaceTrimmingOnNodeNames) { 576 if (theXmlDisableWhitespaceTrimmingOnNodeNames==null) { 577 this.xmlDisableWhitespaceTrimmingOnNodeNames = Collections.emptySet(); 578 } else { 579 this.xmlDisableWhitespaceTrimmingOnNodeNames = theXmlDisableWhitespaceTrimmingOnNodeNames; 580 } 581 } 582 583 /** 584 * Configures the XML Parser to treat all whitespace within the given nodes as literal, meaning that 585 * line breaks, tabs, multiple spaces, etc. will be preserved. This method takes individual XML node names 586 * as arguments (e.g. "HD.2", or "TX.1"). 587 * <p> 588 * Default is <b>none</b> 589 * </p> 590 */ 591 public void setXmlDisableWhitespaceTrimmingOnNodeNames(String... theKeepAsOriginalNodes) { 592 if (theKeepAsOriginalNodes==null) { 593 setXmlDisableWhitespaceTrimmingOnNodeNames((Set<String>)null); 594 } else { 595 setXmlDisableWhitespaceTrimmingOnNodeNames(new HashSet<String>(Arrays.asList(theKeepAsOriginalNodes))); 596 } 597 } 598 599}