001package ca.uhn.hl7v2.preparser;
002
003import java.util.ArrayList;
004import java.util.Iterator;
005import java.util.List;
006import java.util.Map;
007import java.util.Properties;
008import java.util.SortedMap;
009import java.util.StringTokenizer;
010import java.util.TreeMap;
011
012import ca.uhn.hl7v2.parser.EncodingCharacters;
013
014/*
015The point of this class (all static members, not instantiatable) is to take a
016traditionally-encoded HL7 message and add all it's contents to a Properties
017object, via the parseMessage() method.
018
019The key-value pairs added to the Properties argument have keys that represent a
020datum's location in the message.  (in the ZYX-1-2[0] style.  TODO: define
021exactly.)  See Datum, particularly the toString() of that class.
022Anyway, the Properties keys are those and the values are the tokens found.
023
024Note: we accept useless field repetition separators at the end of a 
025field repetition sequence.  i.e. |855-4545~555-3792~~~| , and interpret this
026as definining repetitions 0 and 1.  This might not be allowed.  (HL7 2.3.1
027section 2.10 explicitly allows this behaviour for fields / components /
028subcomponents, but the allowance is notably absent for repetitions.  TODO:
029nail down.)  We allow it anyway.
030
031Also, we accept things like |855-4545~~555-3792|, and interpret it as defining
032repetitions 0 and 2.  The spec would seem to disallow this too, but there's no
033harm.  :D  
034*/
035public class ER7 {
036        
037        private ER7() {}
038
039        /** characters that delimit segments.  for use with StringTokenizer.
040        We are forgiving: HL7 2.3.1 section 2.7 says that carriage return ('\r') is
041        the only segment delimiter.  TODO: check other versions. */ 
042        static final String segmentSeparators = "\r\n\f";
043
044        /** Parses message and dumps contents to props, with keys in the 
045        ZYX[a]-b[c]-d-e style.
046        */
047        public static boolean parseMessage(/*out*/ Properties props, 
048                /*in*/ List<DatumPath> msgMask, /*in*/ String message)
049        {
050                boolean ok = false;
051                if(message != null) {
052                        if(props == null)
053                                props = new Properties();
054
055                        StringTokenizer messageTokenizer 
056                                = new StringTokenizer(message, segmentSeparators);
057                        if(messageTokenizer.hasMoreTokens()) {
058                                String firstSegment = messageTokenizer.nextToken();
059                                EncodingCharacters encodingChars = new EncodingCharacters('0', "0000");
060                                if(parseMSHSegmentWhole(props, msgMask, encodingChars, firstSegment)) {
061                                        ok = true;
062                                        SortedMap<String, Integer> segmentId2nextRepIdx = new TreeMap<String, Integer>();
063                                        segmentId2nextRepIdx.put(new String("MSH"), 1); 
064                                                // in case we find another MSH segment, heh.
065                                        while(messageTokenizer.hasMoreTokens()) {
066                                                parseSegmentWhole(props, segmentId2nextRepIdx, 
067                                                        msgMask, encodingChars, messageTokenizer.nextToken());
068                                        }
069                                }
070                        }
071                }
072                return ok;
073        }
074        
075        /** given segment, starting with "MSH", then encoding characters, etc...
076        put MSH[0]-1[0]-1-1 (== MSH-1) and MSH[0]-2[0]-1-1 (== MSH-2) into props, if found,
077        plus everything else found in 'segment' */
078        protected static boolean parseMSHSegmentWhole(/*out*/ Properties props, 
079                /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
080                /*in*/ String segment) 
081        {
082                boolean ret = false;
083                try {
084                        ER7SegmentHandler handler = new ER7SegmentHandler();
085                        handler.m_props = props;
086                        handler.m_encodingChars = encodingChars;
087                        handler.m_segmentId = "MSH";
088                        handler.m_segmentRepIdx = 0;
089                        if(msgMask != null)
090                                handler.m_msgMask = msgMask;
091                        else {
092                                handler.m_msgMask = new ArrayList<DatumPath>();
093                                handler.m_msgMask.add(new DatumPath()); // everything will pass this
094                                        // (every DatumPath startsWith the zero-length DatumPath)
095                        }
096
097                        encodingChars.setFieldSeparator(segment.charAt(3));
098                        List<Integer> nodeKey = new ArrayList<Integer>();
099                        nodeKey.add(new Integer(0));
100                        handler.putDatum(nodeKey, String.valueOf(encodingChars.getFieldSeparator()));
101                        encodingChars.setComponentSeparator(segment.charAt(4));
102                        encodingChars.setRepetitionSeparator(segment.charAt(5));
103                        encodingChars.setEscapeCharacter(segment.charAt(6));
104                        encodingChars.setSubcomponentSeparator(segment.charAt(7));
105                        nodeKey.set(0, new Integer(1));
106                        handler.putDatum(nodeKey, encodingChars.toString());
107
108                        if(segment.charAt(8) == encodingChars.getFieldSeparator()) {    
109                                ret = true; 
110                                // now -- we recurse 
111                                // through fields / field-repetitions / components / subcomponents.
112                                nodeKey.clear();
113                                nodeKey.add(new Integer(2));
114                                parseSegmentGuts(handler, segment.substring(9), nodeKey);
115                        }
116                }
117                catch(IndexOutOfBoundsException e) {}
118                catch(NullPointerException e) {}
119
120                return ret;
121        }
122
123        /** pass in a whole segment (of type other than MSH), including message type
124        at the start, according to encodingChars, and we'll parse the contents and
125        put them in props. */
126        protected static void parseSegmentWhole(/*out*/ Properties props, 
127                /*in/out*/ Map<String, Integer> segmentId2nextRepIdx, 
128                /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
129                /*in*/ String segment)
130        {
131                try {
132                        String segmentId = segment.substring(0, 3);
133
134                        int currentSegmentRepIdx = 0;
135                        if(segmentId2nextRepIdx.containsKey(segmentId))
136                                currentSegmentRepIdx = ((Integer)segmentId2nextRepIdx.get(segmentId)).intValue();
137                        else
138                                currentSegmentRepIdx = 0;
139                        segmentId2nextRepIdx.put(segmentId, new Integer(currentSegmentRepIdx+1));
140
141                        // will only bother to parse this segment if any of it's contents will 
142                        // be dumped to props.
143                        boolean parseThisSegment = false;
144                        DatumPath segmentIdAsDatumPath = new DatumPath().add(segmentId);
145                        for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
146                                parseThisSegment = segmentIdAsDatumPath.startsWith(maskIt.next());
147                        for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
148                                parseThisSegment = maskIt.next().startsWith(segmentIdAsDatumPath);
149
150                        if(parseThisSegment && (segment.charAt(3) == encodingChars.getFieldSeparator())) {
151                                ER7SegmentHandler handler = new ER7SegmentHandler();
152                                handler.m_props = props;
153                                handler.m_encodingChars = encodingChars;
154                                handler.m_segmentId = segmentId;
155                                handler.m_msgMask = msgMask;
156                                handler.m_segmentRepIdx = currentSegmentRepIdx;
157
158                                List<Integer> nodeKey = new ArrayList<Integer>();
159                                nodeKey.add(new Integer(0));
160                                parseSegmentGuts(handler, segment.substring(4), nodeKey);
161                        }
162                }
163                catch(NullPointerException e) {}
164                catch(IndexOutOfBoundsException e) {}
165        }
166
167        static protected interface Handler
168        {
169                public int specDepth();
170                public char delim(int level);
171
172                public void putDatum(List<Integer> nodeKey, String value);
173        }
174
175        static protected class ER7SegmentHandler implements Handler
176        {
177                Properties m_props;
178
179                EncodingCharacters m_encodingChars;
180
181                String m_segmentId;
182                int m_segmentRepIdx;
183
184                List<DatumPath> m_msgMask;
185
186                public int specDepth() {return 4;}
187
188                public char delim(int level)
189                {
190                        if(level == 0)
191                                return m_encodingChars.getFieldSeparator();
192                        else if(level == 1)
193                                return m_encodingChars.getRepetitionSeparator();
194                        else if(level == 2)
195                                return m_encodingChars.getComponentSeparator();
196                        else if(level == 3)
197                                return m_encodingChars.getSubcomponentSeparator();
198            else if(level == 4)
199                return m_encodingChars.getTruncationCharacter();
200                        else
201                                throw new java.lang.Error();
202                }
203
204                public void putDatum(List<Integer> valNodeKey, String value)
205                {
206                        // make a DatumPath from valNodeKey and info in this: 
207                        DatumPath valDatumPath = new DatumPath();
208                        valDatumPath.add(m_segmentId).add(m_segmentRepIdx);
209                        for(int i=0; i<valNodeKey.size(); ++i) {
210                                // valNodeKey: everything counts from 0 -- not so with DatumPath ... sigh. 
211                                int itval = ((Integer)valNodeKey.get(i)).intValue();
212                                valDatumPath.add(new Integer(i == 1 ? itval : itval+1));
213                        }
214
215                        // see if valDatumPath passes m_msgMask: 
216                        boolean valDatumPathPassesMask = false;
217                        for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); 
218                                !valDatumPathPassesMask && maskIt.hasNext(); )
219                        {
220                                valDatumPathPassesMask = valDatumPath.startsWith(maskIt.next());
221                        }
222
223                        if(valDatumPathPassesMask)
224                                m_props.setProperty(valDatumPath.toString(), value);
225                }
226        }
227
228        /** recursively tokenize "guts" (a segment, or part of one) into tokens, 
229        according to separators (aka delimiters) which are different at each level
230        of recursion, and to a recursive depth which is discovered through "handler"
231        via handler.delim(int) and handler.specDepth()  As tokens are found, they
232        are reported to handler via handler.putDatum(), which presumably stashes them
233        away somewhere.  We tell the handler about the location in the message via
234        putDatum()'s key argument, which is a List of Integers representing the 
235        position in the parse tree (size() == depth of recursion).
236
237        TODO: say more.
238        */
239        protected static void parseSegmentGuts(/*in/out*/ Handler handler,  
240                /*in*/ String guts, /*in*/List<Integer> nodeKey)
241        {
242                char thisDepthsDelim = handler.delim(nodeKey.size()-1);
243                //nodeKey.add(new Integer(0)); // will change nodeKey back before function exits
244
245                StringTokenizer gutsTokenizer 
246                        = new StringTokenizer(guts, String.valueOf(thisDepthsDelim), true);
247                while(gutsTokenizer.hasMoreTokens()) {
248                        String gutsToken = gutsTokenizer.nextToken();
249
250                        if(gutsToken.charAt(0) == thisDepthsDelim) {
251                                // gutsToken is all delims -- skipping over as many fields or
252                                // components or whatevers as there are characters in the token: 
253                                int oldvalue = ((Integer)nodeKey.get(nodeKey.size()-1)).intValue();
254                                nodeKey.set(nodeKey.size()-1, new Integer(oldvalue + gutsToken.length()));
255                        }
256                        else {
257                                if(nodeKey.size() < handler.specDepth()) {
258                                        nodeKey.add(new Integer(0));
259                                        parseSegmentGuts(handler, gutsToken, nodeKey);
260                                        nodeKey.remove(nodeKey.size()-1);
261                                }
262                                else 
263                                        handler.putDatum(nodeKey, gutsToken);
264                        }
265                }
266                //nodeKey.setSize(nodeKey.size()-1); // undoing add done at top of this func
267        }
268
269        public static void main(String args[])
270        {
271                if(args.length >= 1) {
272                        //String message = "MSH|^~\\&||||foo|foo|foo";
273                        System.out.println(args[0]);
274
275                        Properties props = new Properties();
276
277                        List<DatumPath> msgMask = new ArrayList<DatumPath>();
278                        msgMask.add(new DatumPath());
279
280                        System.err.println("ER7.parseMessage returned " + parseMessage(props, msgMask, args[0]));
281                        props.list(System.out);
282                }
283        }
284        
285}
286