001package ca.uhn.hl7v2.parser;
002
003import java.util.Arrays;
004import java.util.Collections;
005import java.util.HashSet;
006import java.util.Set;
007
008import ca.uhn.hl7v2.HapiContext;
009import ca.uhn.hl7v2.model.GenericMessage;
010import ca.uhn.hl7v2.model.Varies;
011import ca.uhn.hl7v2.util.Terser;
012import ca.uhn.hl7v2.util.idgenerator.FileBasedHiLoGenerator;
013import ca.uhn.hl7v2.util.idgenerator.IDGenerator;
014import ca.uhn.hl7v2.validation.ValidationContext;
015
016/**
017 * Contains configuration which will be applied to any parsers which are a part of the given
018 * HAPI Context.
019 * 
020 * @see HapiContext#getParserConfiguration()
021 */
022public class ParserConfiguration {
023
024        /**
025         * @link {@link UnexpectedSegmentBehaviourEnum#ADD_INLINE}
026         */
027        // NB if you change the default, edit the javadoc for the enum itself
028        public static final UnexpectedSegmentBehaviourEnum DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR = UnexpectedSegmentBehaviourEnum.ADD_INLINE;
029
030        private boolean allowUnknownVersions;
031        private boolean escapeSubcomponentDelimiterInPrimitive = false;
032        private IDGenerator idGenerator = new FileBasedHiLoGenerator();
033        private String myDefaultObx2Type;
034        private boolean myEncodeEmptyMandatorySegments = true;
035        private Set<String> myForcedEncode = new HashSet<String>();
036        private String myInvalidObx2Type;
037        private UnexpectedSegmentBehaviourEnum myUnexpectedSegmentBehaviour;
038        private boolean nonGreedyMode = false;
039        private boolean prettyPrintWhenEncodingXml = true;
040        private boolean validating = true;
041    private Escaping escaping = new DefaultEscaping();
042        private boolean xmlDisableWhitespaceTrimmingOnAllNodes = false;
043        private Set<String> xmlDisableWhitespaceTrimmingOnNodeNames = Collections.emptySet();
044
045        /**
046         * <p>
047         * Forces the parser to encode certain segments/fields, even if they contain
048         * no content. This method may be called multiple times with multiple path
049         * definitions, and each path definition contains the path to the segment or
050         * field which needs to be forced.
051         * </p>
052         * <p>
053         * Path definitions are similar in format to {@link Terser Terser} paths.
054         * They contain a slash-separated lookup path to reach a given segment, and
055         * optionally a field number. The following are examples of paths which
056         * could be added here, as well as the sample output for an otherwise empty
057         * ORU^R01 message:
058         * </p>
059         * <table cellpadding="2" cellspacing="2" border="0">
060         * <thead>
061         * <tr>
062         * <th style="background: #FFA0FF;">Forced Encode Path</th>
063         * <th style="background: #FFA0FF;">Encode Output</th>
064         * </tr>
065         * </thead>
066         * <tr>
067         * <td>None (for illustration purposes)</td>
068         * <td style=" font-family: monospace;">
069         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4</td>
070         * </tr>
071         * <tr>
072         * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC</td>
073         * <td style="background: #E0E0E0; font-family: monospace;">
074         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
075         * ORC|</td>
076         * </tr>
077         * <tr>
078         * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td>
079         * <td style=" font-family: monospace;">
080         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
081         * ORC||||</td>
082         * </tr>
083         * <tr>
084         * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2
085         * </td>
086         * <td style="background: #E0E0E0; font-family: monospace;">
087         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
088         * ORC||||^</td>
089         * </tr>
090         * </table>
091         * <p>
092         * While empty segments do not generally have any meaning according to HL7,
093         * this may be useful when transmitting to systems which rely on segments
094         * being received even if they have no content.
095         * </p>
096         * <p>
097         * Note that this configuration item currently only applies to
098         * {@link PipeParser}
099         * </p>
100         *
101     * @param theForcedEncode path definition
102         * @since 2.0
103         */
104        public void addForcedEncode(String theForcedEncode) {
105                if (theForcedEncode == null) {
106                        throw new NullPointerException("forced encode may not be null");
107                }
108
109                int lastSlashIndex = theForcedEncode.lastIndexOf('/');
110                lastSlashIndex = Math.max(lastSlashIndex, 0);
111
112                if (lastSlashIndex == 0) {
113                        if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
114                                throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
115                        }
116                } else {
117                        if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
118                                throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
119                        }
120                }
121                myForcedEncode.add(theForcedEncode);
122        }
123
124        boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) {
125                for (String next : getForcedEncode()) {
126                        if (next.startsWith(theTerserPath)) {
127                                return true;
128                        }
129                }
130                return false;
131        }
132
133        int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) {
134                int forceUpToFieldNum = 0;
135                for (String nextPath : getForcedEncode()) {
136                        if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) {
137                                int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1);
138                                if (endOfFieldDef == -1) {
139                                        endOfFieldDef = nextPath.length();
140                                }
141                                String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef);
142                                forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString));
143                        }
144                }
145                return forceUpToFieldNum;
146        }
147
148        /**
149         * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a
150         * missing OBX-2 value
151         * 
152         * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment
153         *         with a missing OBX-2 value
154         * @see #setDefaultObx2Type(String)
155         */
156        public String getDefaultObx2Type() {
157                return myDefaultObx2Type;
158        }
159
160        /**
161         * @return Returns the forced encode strings added by
162         *         {@link #addForcedEncode(String)}
163         * 
164         * @see #addForcedEncode(String)
165         * @since 1.3
166         */
167        public Set<String> getForcedEncode() {
168                return Collections.unmodifiableSet(myForcedEncode);
169        }
170
171        /**
172         * @return the ID Generator to be used for generating IDs for new messages
173         */
174        public IDGenerator getIdGenerator() {
175                return idGenerator;
176        }
177
178        /**
179         * Returns the value provides a default datatype ("ST", "NM", etc) for an
180         * OBX segment with an invalid OBX-2 value.
181         * 
182         * @return Returns the value provides a default datatype ("ST", "NM", etc)
183         *         for an OBX segment with an invalid OBX-2 value.
184         * @see #setInvalidObx2Type(String)
185         */
186        public String getInvalidObx2Type() {
187                return myInvalidObx2Type;
188        }
189
190        /**
191         * Returns the behaviour to use when parsing a message and a nonstandard
192         * segment is found. Default is
193         * {@link #DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR}
194     *
195     * @return the behaviour to use when a nonstandard egment is found
196         */
197        public UnexpectedSegmentBehaviourEnum getUnexpectedSegmentBehaviour() {
198                if (myUnexpectedSegmentBehaviour == null) {
199                        myUnexpectedSegmentBehaviour = DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR;
200                }
201                return myUnexpectedSegmentBehaviour;
202        }
203
204        /**
205         * @see #setXmlDisableWhitespaceTrimmingOnNodeNames(Set)
206         */
207        public Set<String> getXmlDisableWhitespaceTrimmingOnNodeNames() {
208                return xmlDisableWhitespaceTrimmingOnNodeNames;
209        }
210
211        /**
212         * If set to <code>true</code> (default is <code>false</code>) the parser
213         * will allow messages to parse, even if they contain a version which is not
214         * known to the parser. When operating in this mode, if a message arrives
215         * with an unknown version string, the parser will attempt to parse it using
216         * a {@link GenericMessage Generic Message} class instead of a specific HAPI
217         * structure class. Default is <code>false</code>.
218     *
219     * @return true if parsing messages with unknown versions is allowed
220         */
221        public boolean isAllowUnknownVersions() {
222                return this.allowUnknownVersions;
223        }
224
225        /**
226     * Returns <code>true</code> if empty segments should still be encoded
227     * if they are mandatory within their message structure.  Default is <code>false</code>.
228         * @return <code>true</code> if empty segments should still be encoded
229     *
230         * @see #setEncodeEmptyMandatoryFirstSegments(boolean)
231         */
232        public boolean isEncodeEmptyMandatorySegments() {
233                return myEncodeEmptyMandatorySegments;
234        }
235
236        /**
237     * Returns code>true</code> if subcomponent delimiters in OBX-5 shall be
238     *         ignored. Default is <code>false</code>.
239         * @return <code>true</code> if subcomponent delimiters in OBX-5 shall be
240         *         ignored
241         */
242        public boolean isEscapeSubcomponentDelimiterInPrimitive() {
243                return escapeSubcomponentDelimiterInPrimitive;
244        }
245
246        /**
247         * Returns <code>true</code> if the parser should parse in non-greedy mode. Default
248         * is <code>false</code>
249         * 
250         * @see #setNonGreedyMode(boolean) for an explanation of non-greedy mode
251         */
252        public boolean isNonGreedyMode() {
253                return nonGreedyMode;
254        }
255
256        /**
257         * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
258         * will attempt to pretty-print the XML they generate. This means the messages will look
259         * nicer to humans, but may take up slightly more space/bandwidth.
260         */
261        public boolean isPrettyPrintWhenEncodingXml() {
262                return prettyPrintWhenEncodingXml;
263        }
264
265        /**
266     * Returns <code>true</code> if the parser validates using a configured
267     *         {@link ValidationContext}. Default is <code>true</code>.
268         * @return <code>true</code> if the parser validates using a configured
269         *         {@link ValidationContext}
270         */
271        public boolean isValidating() {
272                return validating;
273        }
274
275        /**
276         * @see #setXmlDisableWhitespaceTrimmingOnAllNodes(boolean)
277         */
278        public boolean isXmlDisableWhitespaceTrimmingOnAllNodes() {
279                return xmlDisableWhitespaceTrimmingOnAllNodes;
280        }
281
282        /**
283         * Removes a forced encode entry
284         *
285     * @param theForcedEncode path definition to be removed
286         * @see #addForcedEncode(String)
287         * @since 1.3
288         */
289        public void removeForcedEncode(String theForcedEncode) {
290                if (theForcedEncode == null) {
291                        throw new NullPointerException("forced encode may not be null");
292                }
293
294                myForcedEncode.remove(theForcedEncode);
295        }
296
297        /**
298         * If set to <code>true</code> (default is <code>false</code>) the parser
299         * will allow messages to parse, even if they contain a version which is not
300         * known to the parser. When operating in this mode, if a message arrives
301         * with an unknown version string, the parser will attempt to parse it using
302         * a {@link GenericMessage Generic Message} class instead of a specific HAPI
303         * structure class.
304     *
305     * @param theAllowUnknownVersions true if parsing unknown versions shall be allowed
306         */
307        public void setAllowUnknownVersions(boolean theAllowUnknownVersions) {
308                allowUnknownVersions = theAllowUnknownVersions;
309        }
310
311        /**
312         * <p>
313         * If this property is set, the value provides a default datatype ("ST",
314         * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful
315         * when parsing messages from systems which do not correctly populate OBX-2.
316         * </p>
317         * <p>
318         * For example, if this property is set to "ST", and the following OBX
319         * segment is encountered:
320         * 
321         * <pre>
322         * OBX|||||This is a value
323         * </pre>
324         * 
325         * It will be parsed as though it had read:
326         * 
327         * <pre>
328         * OBX||ST|||This is a value
329         * </pre>
330         * 
331         * </p>
332         * <p>
333         * Note that this configuration can also be set globally using the system
334         * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to
335         * {@link ParserConfiguration} takes priority over the system property.
336         * </p>
337         * 
338         * @param theDefaultObx2Type
339         *            If this property is set, the value provides a default datatype
340         *            ("ST", "NM", etc) for an OBX segment with a missing OBX-2
341         *            value
342         * @see #setInvalidObx2Type(String)
343         * @see Varies#INVALID_OBX2_TYPE_PROP
344         */
345        public void setDefaultObx2Type(String theDefaultObx2Type) {
346                myDefaultObx2Type = theDefaultObx2Type;
347        }
348
349        /**
350         * <p>
351         * If set to <code>true</code> (default is <code>true</code>), when encoding
352         * a group using the PipeParser where the first segment is required, but no
353         * data has been populated in that segment, the empty segment is now still
354         * encoded if needed as a blank segment in order to give parsers a hint
355         * about which group subsequent segments are in. This helps to ensure that
356         * messages can be "round tripped", meaning that a message which is parsed,
357         * encoded, and then re-parsed should contain exactly the same structure
358         * from beginning to end.
359         * </p>
360         * <p>
361         * </p>
362         * For example, in an ORU^R01 message with a populated OBX segment, but no
363         * data in the mandatory OBR segment which begins the ORDER_OBSERVATION
364         * group the message would still contain an empty OBR segment when encoded:
365         * 
366         * <pre>
367         *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
368         *      OBR|
369         *      OBX||ST|||Value Data
370         * </pre>
371         * 
372         * Previously, the following encoding would have occurred, which would have
373         * incorrectly been parsed as having a custom OBX segment instead of having
374         * a normal ORDER_OBSERVATION group:
375         * 
376         * <pre>
377         *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
378         *      OBX||ST|||Value Data
379         * </pre>
380         * 
381         * @param theEncodeEmptyMandatorySegments
382         *            If set to <code>true</code> (default is <code>true</code>),
383         *            when encoding a group using the PipeParser where the first
384         *            segment is required, but no data has been populated in that
385         *            segment, the empty segment is now still encoded if needed as a
386         *            blank segment in order to give parsers a hint about which
387         *            group subsequent segments are in
388         */
389        public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) {
390                myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments;
391        }
392
393        /**
394         * Set to <code>true</code> if subcomponent delimiters in OBX-5 shall be
395         * ignored
396     * @param escapeSubcomponentDelimiterInPrimitive boolean flag to enable or disable this behavior
397         */
398        public void setEscapeSubcomponentDelimiterInPrimitive(boolean escapeSubcomponentDelimiterInPrimitive) {
399                this.escapeSubcomponentDelimiterInPrimitive = escapeSubcomponentDelimiterInPrimitive;
400        }
401
402        /**
403         * @param idGenerator
404         *            the {@link IDGenerator} to be used for generating IDs for new
405         *            messages, preferable initialized using the methods described
406         *            in IDGeneratorFactory.
407         * 
408         * @see IDGenerator
409         */
410        public void setIdGenerator(IDGenerator idGenerator) {
411                this.idGenerator = idGenerator;
412        }
413
414        /**
415         * <p>
416         * If this property is set, the value provides a default datatype ("ST",
417         * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful
418         * when parsing messages from systems which do not correctly populate OBX-2.
419         * </p>
420         * <p>
421         * For example, if this property is set to "ST", and the following OBX
422         * segment is encountered:
423         * 
424         * <pre>
425         * OBX||INVALID|||This is a value
426         * </pre>
427         * 
428         * It will be parsed as though it had read:
429         * 
430         * <pre>
431         * OBX||ST|||This is a value
432         * </pre>
433         * 
434         * </p>
435         * <p>
436         * Note that this configuration can also be set globally using the system
437         * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to
438         * {@link ParserConfiguration} takes priority over the system property.
439         * </p>
440         * 
441         * @param theInvalidObx2Type
442         *            If this property is set, the value provides a default datatype
443         *            ("ST", "NM", etc) for an OBX segment with an invalid OBX-2
444         *            value. This is useful when parsing messages from systems which
445         *            do not correctly populate OBX-2.
446         * @see ParserConfiguration#setDefaultObx2Type(String)
447         * @see Varies#DEFAULT_OBX2_TYPE_PROP
448         */
449        public void setInvalidObx2Type(String theInvalidObx2Type) {
450                myInvalidObx2Type = theInvalidObx2Type;
451        }
452
453        /**
454         * If set to <code>true</code> (default is <code>false</code>), pipe parser will be
455         * put in non-greedy mode. This setting applies only to {@link PipeParser Pipe Parsers} and
456         * will have no effect on {@link XMLParser XML Parsers}.
457         * 
458         * <p>
459         * In non-greedy mode, if the message structure being parsed has an ambiguous
460         * choice of where to put a segment because there is a segment matching the
461         * current segment name in both a later position in the message, and
462         * in an earlier position as a part of a repeating group, the earlier
463         * position will be chosen.
464         * </p>
465         * <p>
466         * This is perhaps best explained with an example. Consider the following structure:
467         * </p>
468         * <pre>
469         * MSH
470         * GROUP_1 (start)
471         * {
472         *    AAA
473         *    BBB
474         *    GROUP_2 (start)
475         *    {
476         *       AAA
477         *    }
478         *    GROUP_2 (end)
479         * }
480         * GROUP_1 (end)
481         * </pre>
482         * <p>
483         * </p>
484         * For the above example, consider a message containing the following segments:<br/>
485         * <code>MSH<br/>
486         * AAA<br/>
487         * BBB<br/>
488         * AAA</code>
489         * </p>
490         * <p>
491         * In this example, when the second AAA segment is encountered, there are two
492         * possible choices. It would be placed in GROUP_2, or it could be placed in 
493         * a second repetition of GROUP_1. By default it will be placed in GROUP_2, but
494         * in non-greedy mode it will be put in a new repetition of GROUP_1.
495         * </p>
496         * <p>
497         * This mode is useful for example when parsing OML^O21 messages containing
498         * multiple orders.
499         * </p>
500         */
501        public void setNonGreedyMode(boolean theNonGreedyMode) {
502                nonGreedyMode = theNonGreedyMode;
503        }
504
505        /**
506         * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
507         * will attempt to pretty-print the XML they generate. This means the messages will look
508         * nicer to humans, but may take up slightly more space/bandwidth.
509         */
510        public void setPrettyPrintWhenEncodingXml(boolean thePrettyPrintWhenEncodingXml) {
511                prettyPrintWhenEncodingXml = thePrettyPrintWhenEncodingXml;
512        }
513
514        /**
515         * Sets the behaviour to use when parsing a message and a nonstandard
516         * segment is found
517     *
518     * @param theUnexpectedSegmentBehaviour behaviour to use when a nonstandard segment is found
519     */
520        public void setUnexpectedSegmentBehaviour(UnexpectedSegmentBehaviourEnum theUnexpectedSegmentBehaviour) {
521                if (theUnexpectedSegmentBehaviour == null) {
522                        throw new NullPointerException("UnexpectedSegmentBehaviour can not be null");
523                }
524                myUnexpectedSegmentBehaviour = theUnexpectedSegmentBehaviour;
525        }
526
527        /**
528         * Determines whether the parser validates using a configured
529         * {@link ValidationContext} or not. This allows to disable message
530         * validation although a validation context is defined.
531         * 
532         * @param validating
533         *            <code>true</code> if parser shall validate, <code>false</code>
534         *            if not
535         */
536        public void setValidating(boolean validating) {
537                this.validating = validating;
538        }
539
540    public Escaping getEscaping() {
541        return escaping;
542    }
543
544    /**
545     * Sets an escaping strategy
546     * @param escaping escaping strategy instance
547     */
548    public void setEscaping(Escaping escaping) {
549        if (escaping == null) {
550            throw new NullPointerException("Escaping can not be null");
551        }
552        this.escaping = escaping;
553    }
554        /**
555         * Configures the XML Parser to treat all whitespace within text nodes as literal, meaning that
556         * line breaks, tabs, multiple spaces, etc. will be preserved. If set to <code>true</code>, any values
557         * passed to {@link #setXmlDisableWhitespaceTrimmingOnNodeNames(Set)} will be superceded since all
558         * whitespace will be treated as literal.
559         * <p>
560         * Default is <b>false</b>
561         * </p> 
562         */
563        public void setXmlDisableWhitespaceTrimmingOnAllNodes(boolean theXmlDisableWhitespaceTrimmingOnAllNodes) {
564                this.xmlDisableWhitespaceTrimmingOnAllNodes = theXmlDisableWhitespaceTrimmingOnAllNodes;
565        }
566
567        /**
568         * Configures the XML Parser to treat all whitespace within the given nodes as literal, meaning that
569         * line breaks, tabs, multiple spaces, etc. will be preserved. This method takes individual XML node names
570         * as arguments (e.g. "HD.2", or "TX.1").
571         * <p>
572         * Default is <b>none</b>
573         * </p> 
574         */
575        public void setXmlDisableWhitespaceTrimmingOnNodeNames(Set<String> theXmlDisableWhitespaceTrimmingOnNodeNames) {
576                if (theXmlDisableWhitespaceTrimmingOnNodeNames==null) {
577                        this.xmlDisableWhitespaceTrimmingOnNodeNames = Collections.emptySet();
578                } else {
579                        this.xmlDisableWhitespaceTrimmingOnNodeNames = theXmlDisableWhitespaceTrimmingOnNodeNames;
580                }
581        }
582
583        /**
584         * Configures the XML Parser to treat all whitespace within the given nodes as literal, meaning that
585         * line breaks, tabs, multiple spaces, etc. will be preserved. This method takes individual XML node names
586         * as arguments (e.g. "HD.2", or "TX.1").
587         * <p>
588         * Default is <b>none</b>
589         * </p> 
590         */
591        public void setXmlDisableWhitespaceTrimmingOnNodeNames(String... theKeepAsOriginalNodes) {
592                if (theKeepAsOriginalNodes==null) {
593                        setXmlDisableWhitespaceTrimmingOnNodeNames((Set<String>)null);
594                } else {
595                        setXmlDisableWhitespaceTrimmingOnNodeNames(new HashSet<String>(Arrays.asList(theKeepAsOriginalNodes)));
596                }
597        }
598
599}