001package ca.uhn.hl7v2.preparser;
003import java.util.ArrayList;
004import java.util.Iterator;
005import java.util.List;
006import java.util.Map;
007import java.util.Properties;
008import java.util.SortedMap;
009import java.util.StringTokenizer;
010import java.util.TreeMap;
012import ca.uhn.hl7v2.parser.EncodingCharacters;
015The point of this class (all static members, not instantiatable) is to take a
016traditionally-encoded HL7 message and add all it's contents to a Properties
017object, via the parseMessage() method.
019The key-value pairs added to the Properties argument have keys that represent a
020datum's location in the message.  (in the ZYX-1-2[0] style.  TODO: define
021exactly.)  See Datum, particularly the toString() of that class.
022Anyway, the Properties keys are those and the values are the tokens found.
024Note: we accept useless field repetition separators at the end of a 
025field repetition sequence.  i.e. |855-4545~555-3792~~~| , and interpret this
026as definining repetitions 0 and 1.  This might not be allowed.  (HL7 2.3.1
027section 2.10 explicitly allows this behaviour for fields / components /
028subcomponents, but the allowance is notably absent for repetitions.  TODO:
029nail down.)  We allow it anyway.
031Also, we accept things like |855-4545~~555-3792|, and interpret it as defining
032repetitions 0 and 2.  The spec would seem to disallow this too, but there's no
033harm.  :D  
035public class ER7 {
037        private ER7() {}
039        /** characters that delimit segments.  for use with StringTokenizer.
040        We are forgiving: HL7 2.3.1 section 2.7 says that carriage return ('\r') is
041        the only segment delimiter.  TODO: check other versions. */ 
042        static final String segmentSeparators = "\r\n\f";
044        /** Parses message and dumps contents to props, with keys in the 
045        ZYX[a]-b[c]-d-e style.
046        */
047        public static boolean parseMessage(/*out*/ Properties props, 
048                /*in*/ List<DatumPath> msgMask, /*in*/ String message)
049        {
050                boolean ok = false;
051                if(message != null) {
052                        if(props == null)
053                                props = new Properties();
055                        StringTokenizer messageTokenizer 
056                                = new StringTokenizer(message, segmentSeparators);
057                        if(messageTokenizer.hasMoreTokens()) {
058                                String firstSegment = messageTokenizer.nextToken();
059                                EncodingCharacters encodingChars = new EncodingCharacters('0', "0000");
060                                if(parseMSHSegmentWhole(props, msgMask, encodingChars, firstSegment)) {
061                                        ok = true;
062                                        SortedMap<String, Integer> segmentId2nextRepIdx = new TreeMap<String, Integer>();
063                                        segmentId2nextRepIdx.put(new String("MSH"), 1); 
064                                                // in case we find another MSH segment, heh.
065                                        while(messageTokenizer.hasMoreTokens()) {
066                                                parseSegmentWhole(props, segmentId2nextRepIdx, 
067                                                        msgMask, encodingChars, messageTokenizer.nextToken());
068                                        }
069                                }
070                        }
071                }
072                return ok;
073        }
075        /** given segment, starting with "MSH", then encoding characters, etc...
076        put MSH[0]-1[0]-1-1 (== MSH-1) and MSH[0]-2[0]-1-1 (== MSH-2) into props, if found,
077        plus everything else found in 'segment' */
078        protected static boolean parseMSHSegmentWhole(/*out*/ Properties props, 
079                /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
080                /*in*/ String segment) 
081        {
082                boolean ret = false;
083                try {
084                        ER7SegmentHandler handler = new ER7SegmentHandler();
085                        handler.m_props = props;
086                        handler.m_encodingChars = encodingChars;
087                        handler.m_segmentId = "MSH";
088                        handler.m_segmentRepIdx = 0;
089                        if(msgMask != null)
090                                handler.m_msgMask = msgMask;
091                        else {
092                                handler.m_msgMask = new ArrayList<DatumPath>();
093                                handler.m_msgMask.add(new DatumPath()); // everything will pass this
094                                        // (every DatumPath startsWith the zero-length DatumPath)
095                        }
097                        encodingChars.setFieldSeparator(segment.charAt(3));
098                        List<Integer> nodeKey = new ArrayList<Integer>();
099                        nodeKey.add(new Integer(0));
100                        handler.putDatum(nodeKey, String.valueOf(encodingChars.getFieldSeparator()));
101                        encodingChars.setComponentSeparator(segment.charAt(4));
102                        encodingChars.setRepetitionSeparator(segment.charAt(5));
103                        encodingChars.setEscapeCharacter(segment.charAt(6));
104                        encodingChars.setSubcomponentSeparator(segment.charAt(7));
105                        nodeKey.set(0, new Integer(1));
106                        handler.putDatum(nodeKey, encodingChars.toString());
108                        if(segment.charAt(8) == encodingChars.getFieldSeparator()) {    
109                                ret = true; 
110                                // now -- we recurse 
111                                // through fields / field-repetitions / components / subcomponents.
112                                nodeKey.clear();
113                                nodeKey.add(new Integer(2));
114                                parseSegmentGuts(handler, segment.substring(9), nodeKey);
115                        }
116                }
117                catch(IndexOutOfBoundsException e) {}
118                catch(NullPointerException e) {}
120                return ret;
121        }
123        /** pass in a whole segment (of type other than MSH), including message type
124        at the start, according to encodingChars, and we'll parse the contents and
125        put them in props. */
126        protected static void parseSegmentWhole(/*out*/ Properties props, 
127                /*in/out*/ Map<String, Integer> segmentId2nextRepIdx, 
128                /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
129                /*in*/ String segment)
130        {
131                try {
132                        String segmentId = segment.substring(0, 3);
134                        int currentSegmentRepIdx = 0;
135                        if(segmentId2nextRepIdx.containsKey(segmentId))
136                                currentSegmentRepIdx = ((Integer)segmentId2nextRepIdx.get(segmentId)).intValue();
137                        else
138                                currentSegmentRepIdx = 0;
139                        segmentId2nextRepIdx.put(segmentId, new Integer(currentSegmentRepIdx+1));
141                        // will only bother to parse this segment if any of it's contents will 
142                        // be dumped to props.
143                        boolean parseThisSegment = false;
144                        DatumPath segmentIdAsDatumPath = new DatumPath().add(segmentId);
145                        for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
146                                parseThisSegment = segmentIdAsDatumPath.startsWith(maskIt.next());
147                        for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
148                                parseThisSegment = maskIt.next().startsWith(segmentIdAsDatumPath);
150                        if(parseThisSegment && (segment.charAt(3) == encodingChars.getFieldSeparator())) {
151                                ER7SegmentHandler handler = new ER7SegmentHandler();
152                                handler.m_props = props;
153                                handler.m_encodingChars = encodingChars;
154                                handler.m_segmentId = segmentId;
155                                handler.m_msgMask = msgMask;
156                                handler.m_segmentRepIdx = currentSegmentRepIdx;
158                                List<Integer> nodeKey = new ArrayList<Integer>();
159                                nodeKey.add(new Integer(0));
160                                parseSegmentGuts(handler, segment.substring(4), nodeKey);
161                        }
162                }
163                catch(NullPointerException e) {}
164                catch(IndexOutOfBoundsException e) {}
165        }
167        static protected interface Handler
168        {
169                public int specDepth();
170                public char delim(int level);
172                public void putDatum(List<Integer> nodeKey, String value);
173        }
175        static protected class ER7SegmentHandler implements Handler
176        {
177                Properties m_props;
179                EncodingCharacters m_encodingChars;
181                String m_segmentId;
182                int m_segmentRepIdx;
184                List<DatumPath> m_msgMask;
186                public int specDepth() {return 4;}
188                public char delim(int level)
189                {
190                        if(level == 0)
191                                return m_encodingChars.getFieldSeparator();
192                        else if(level == 1)
193                                return m_encodingChars.getRepetitionSeparator();
194                        else if(level == 2)
195                                return m_encodingChars.getComponentSeparator();
196                        else if(level == 3)
197                                return m_encodingChars.getSubcomponentSeparator();
198            else if(level == 4)
199                return m_encodingChars.getTruncationCharacter();
200                        else
201                                throw new java.lang.Error();
202                }
204                public void putDatum(List<Integer> valNodeKey, String value)
205                {
206                        // make a DatumPath from valNodeKey and info in this: 
207                        DatumPath valDatumPath = new DatumPath();
208                        valDatumPath.add(m_segmentId).add(m_segmentRepIdx);
209                        for(int i=0; i<valNodeKey.size(); ++i) {
210                                // valNodeKey: everything counts from 0 -- not so with DatumPath ... sigh. 
211                                int itval = ((Integer)valNodeKey.get(i)).intValue();
212                                valDatumPath.add(new Integer(i == 1 ? itval : itval+1));
213                        }
215                        // see if valDatumPath passes m_msgMask: 
216                        boolean valDatumPathPassesMask = false;
217                        for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); 
218                                !valDatumPathPassesMask && maskIt.hasNext(); )
219                        {
220                                valDatumPathPassesMask = valDatumPath.startsWith(maskIt.next());
221                        }
223                        if(valDatumPathPassesMask)
224                                m_props.setProperty(valDatumPath.toString(), value);
225                }
226        }
228        /** recursively tokenize "guts" (a segment, or part of one) into tokens, 
229        according to separators (aka delimiters) which are different at each level
230        of recursion, and to a recursive depth which is discovered through "handler"
231        via handler.delim(int) and handler.specDepth()  As tokens are found, they
232        are reported to handler via handler.putDatum(), which presumably stashes them
233        away somewhere.  We tell the handler about the location in the message via
234        putDatum()'s key argument, which is a List of Integers representing the 
235        position in the parse tree (size() == depth of recursion).
237        TODO: say more.
238        */
239        protected static void parseSegmentGuts(/*in/out*/ Handler handler,  
240                /*in*/ String guts, /*in*/List<Integer> nodeKey)
241        {
242                char thisDepthsDelim = handler.delim(nodeKey.size()-1);
243                //nodeKey.add(new Integer(0)); // will change nodeKey back before function exits
245                StringTokenizer gutsTokenizer 
246                        = new StringTokenizer(guts, String.valueOf(thisDepthsDelim), true);
247                while(gutsTokenizer.hasMoreTokens()) {
248                        String gutsToken = gutsTokenizer.nextToken();
250                        if(gutsToken.charAt(0) == thisDepthsDelim) {
251                                // gutsToken is all delims -- skipping over as many fields or
252                                // components or whatevers as there are characters in the token: 
253                                int oldvalue = ((Integer)nodeKey.get(nodeKey.size()-1)).intValue();
254                                nodeKey.set(nodeKey.size()-1, new Integer(oldvalue + gutsToken.length()));
255                        }
256                        else {
257                                if(nodeKey.size() < handler.specDepth()) {
258                                        nodeKey.add(new Integer(0));
259                                        parseSegmentGuts(handler, gutsToken, nodeKey);
260                                        nodeKey.remove(nodeKey.size()-1);
261                                }
262                                else 
263                                        handler.putDatum(nodeKey, gutsToken);
264                        }
265                }
266                //nodeKey.setSize(nodeKey.size()-1); // undoing add done at top of this func
267        }
269        public static void main(String args[])
270        {
271                if(args.length >= 1) {
272                        //String message = "MSH|^~\\&||||foo|foo|foo";
273                        System.out.println(args[0]);
275                        Properties props = new Properties();
277                        List<DatumPath> msgMask = new ArrayList<DatumPath>();
278                        msgMask.add(new DatumPath());
280                        System.err.println("ER7.parseMessage returned " + parseMessage(props, msgMask, args[0]));
281                        props.list(System.out);
282                }
283        }