001/**
002The contents of this file are subject to the Mozilla Public License Version 1.1
003(the "License"); you may not use this file except in compliance with the License.
004You may obtain a copy of the License at http://www.mozilla.org/MPL/
005Software distributed under the License is distributed on an "AS IS" basis,
006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007specific language governing rights and limitations under the License.
008
009The Initial Developer of the Original Code is University Health Network. Copyright (C)
0102001.  All Rights Reserved.
011
012Contributor(s): ______________________________________.
013
014Alternatively, the contents of this file may be used under the terms of the
015GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
016applicable instead of those above.  If you wish to allow use of your version of this
017file only under the terms of the GPL and not to allow others to use your version
018of this file under the MPL, indicate your decision by deleting  the provisions above
019and replace  them with the notice and other provisions required by the GPL License.
020If you do not delete the provisions above, a recipient may use your version of
021this file under either the MPL or the GPL.
022
023*/
024package ca.uhn.hl7v2.parser;
025
026import java.io.File;
027import java.io.FileReader;
028import java.util.ArrayList;
029import java.util.HashSet;
030import java.util.List;
031import java.util.Set;
032
033import ca.uhn.hl7v2.DefaultHapiContext;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036import org.w3c.dom.DOMException;
037import org.w3c.dom.Document;
038import org.w3c.dom.Element;
039import org.w3c.dom.Node;
040import org.w3c.dom.NodeList;
041
042import ca.uhn.hl7v2.HL7Exception;
043import ca.uhn.hl7v2.HapiContext;
044import ca.uhn.hl7v2.model.Group;
045import ca.uhn.hl7v2.model.Message;
046import ca.uhn.hl7v2.model.Segment;
047import ca.uhn.hl7v2.model.Structure;
048import ca.uhn.hl7v2.util.XMLUtils;
049import ca.uhn.hl7v2.validation.impl.NoValidation;
050import ca.uhn.hl7v2.validation.impl.ValidationContextFactory;
051
052/**
053 * <p>A default XMLParser.  This class assigns segment elements (in an XML-encoded message) 
054 * to Segment objects (in a Message object) using the name of a segment and the names 
055 * of any groups in which the segment is nested.  The names of group classes must correspond
056 * to the names of group elements (they must be identical except that a dot in the element 
057 * name, following the message name, is replaced with an underscore, in order to consitute a 
058 * valid class name). </p>
059 * <p>At the time of writing, the group names in the XML spec are changing.  Many of the group 
060 * names have been automatically generated based on the group contents.  However, these automatic 
061 * names are gradually being replaced with manually assigned names.  This process is expected to 
062 * be complete by November 2002.  As a result, mismatches are likely.  Messages could be  
063 * transformed prior to parsing (using XSLT) as a work-around.  Alternatively the group class names 
064 * could be changed to reflect updates in the XML spec.  Ultimately, HAPI group classes will be 
065 * changed to correspond with the official group names, once these are all assigned.  </p>
066 * 
067 * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour
068 * @author Bryan Tripp
069 */
070public class DefaultXMLParser extends XMLParser {
071
072    private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
073
074    private static final Set<String> ourForceGroupNames;
075    
076    static {
077        ourForceGroupNames = new HashSet<String>();
078        ourForceGroupNames.add("DIET");
079    }
080    
081    public DefaultXMLParser() {
082        super();
083    }
084    
085    public DefaultXMLParser(HapiContext context) {
086                super(context);
087        }
088
089        /** 
090     * Creates a new instance of DefaultXMLParser 
091     *  
092     * @param theFactory custom factory to use for model class lookup 
093     */
094    public DefaultXMLParser(ModelClassFactory theFactory) {
095        super(theFactory);
096    }
097    
098    /**
099     * <p>Creates an XML Document that corresponds to the given Message object. </p>
100     * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
101     * into it that correspond to the groups and segments that belong to the message type that your subclass
102     * of XMLParser supports.  Then, for each segment in the message, call the method
103     * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
104     * that segment and the corresponding Segment object from the given Message.</p>
105     */
106    public Document encodeDocument(Message source) throws HL7Exception {
107        String messageClassName = source.getClass().getName();
108        String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
109        try {
110            Document doc = XMLUtils.emptyDocument(messageName);
111            //Element root = doc.createElement(messageName);
112            //doc.appendChild(root);
113            encode(source, doc.getDocumentElement());
114            return doc;
115        } catch (Exception e) {
116            throw new HL7Exception(
117                "Can't create XML document - " + e.getClass().getName(), e);
118        }
119    }
120
121    /**
122     * Copies data from a group object into the corresponding group element, creating any 
123     * necessary child nodes.  
124     */
125    private void encode(Group groupObject, Element groupElement) throws HL7Exception {
126        String[] childNames = groupObject.getNames();
127        String messageName = groupObject.getMessage().getName();
128        
129        try {
130                for (String name : childNames) {
131                Structure[] reps = groupObject.getAll(name);
132                for (Structure rep : reps) {
133                    String elementName = makeGroupElementName(messageName, name);
134                                        Element childElement;
135                                        try {
136                                                childElement = groupElement.getOwnerDocument().createElement(elementName);
137                                } catch (DOMException e) {
138                                    throw new HL7Exception(
139                                        "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(), e);
140                                }
141                    groupElement.appendChild(childElement);
142                    if (rep instanceof Group) {
143                        encode((Group) rep, childElement);
144                    }
145                    else if (rep instanceof Segment) {
146                        encode((Segment) rep, childElement);
147                    }
148                                }
149            }
150        } catch (DOMException e) {
151            throw new HL7Exception(
152                "Can't encode group " + groupObject.getClass().getName(), e);
153        }
154    }
155
156
157    /**
158     * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
159     * <p>The easiest way to implement this method for a particular message structure is as follows:
160     * <ol><li>Create an instance of the Message type you are going to handle with your subclass
161     * of XMLParser</li>
162     * <li>Go through the given Document and find the Elements that represent the top level of
163     * each message segment. </li>
164     * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
165     * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
166     * At the end of this process, your Message object should be populated with data from the XML
167     * Document.</p>
168     * @throws HL7Exception if the message is not correctly formatted.
169     * @throws EncodingNotSupportedException if the message encoded
170     *     is not supported by this parser.
171     */
172    public Message parseDocument(Document xmlMessage, String version) throws HL7Exception {
173
174        assertNamespaceURI(xmlMessage.getDocumentElement().getNamespaceURI());
175
176        Message message = instantiateMessage(xmlMessage.getDocumentElement().getLocalName(), version, true);
177        // Note: this will change in future to reuse the Parser's/HapiContext's
178        // ValidationContext.
179        // message.setValidationContext(getValidationContext());
180        parse(message, xmlMessage.getDocumentElement());
181        return message;
182    }
183
184    /**
185     * Populates the given group object with data from the given group element, ignoring 
186     * any unrecognized nodes.  
187     */
188    private void parse(Group groupObject, Element groupElement) throws HL7Exception {
189        String[] childNames = groupObject.getNames();
190        String messageName = groupObject.getMessage().getName();
191        
192        NodeList allChildNodes = groupElement.getChildNodes();
193        List<String> unparsedElementList = new ArrayList<String>();
194        for (int i = 0; i < allChildNodes.getLength(); i++) {
195            Node node = allChildNodes.item(i);
196            String name = node.getLocalName();
197            if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
198                assertNamespaceURI(node.getNamespaceURI());
199                unparsedElementList.add(name);                
200            }
201        }
202        
203        //we're not too fussy about order here (all occurrences get parsed as repetitions) ... 
204        for (String nextChildName : childNames) {
205            String childName = nextChildName;
206            if(groupObject.isGroup(nextChildName)) {
207                childName = makeGroupElementName(groupObject.getMessage().getName(), nextChildName);
208            }
209                        unparsedElementList.remove(childName);
210            
211            // 4 char segment names are second occurrences of a segment within a single message
212            // structure. e.g. the second PID segment in an A17 patient swap message is known
213            // to hapi's code represenation as PID2
214            if (nextChildName.length() == 4 && Character.isDigit(nextChildName.charAt(3))) {
215                log.trace("Skipping rep segment: {}", nextChildName);
216            } else {   
217                parseReps(groupElement, groupObject, messageName, nextChildName, nextChildName);
218            }
219        }
220        
221        for (String segName : unparsedElementList) {
222            String segIndexName = groupObject.addNonstandardSegment(segName);
223            parseReps(groupElement, groupObject, messageName, segName, segIndexName);
224        }
225    }
226    
227    //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2) 
228    private void parseReps(Element groupElement, Group groupObject, 
229            String messageName, String childName, String childIndexName) throws HL7Exception {
230        
231        String groupName = makeGroupElementName(messageName, childName);
232        List<Element> reps = getChildElementsByTagName(groupElement, groupName);
233        log.trace("# of elements matching {}: {}", groupName, reps.size());
234
235                if (groupObject.isRepeating(childIndexName)) {
236                        for (int i = 0; i < reps.size(); i++) {
237                                parseRep(reps.get(i), groupObject.get(childIndexName, i));
238                        }                                       
239                } else {
240                        if (reps.size() > 0) {
241                                parseRep(reps.get(0), groupObject.get(childIndexName, 0));                              
242                        }
243
244//                      if (reps.size() > 1) {                  
245//                              String newIndexName = groupObject.addNonstandardSegment(childName);                     
246//                              for (int i = 1; i < reps.size(); i++) {
247//                                      parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
248//                              }                                                               
249//                      }
250                        if (reps.size() > 1) {
251                                String newIndexName;
252                                int i=1;
253                                try     {
254                                        for (i = 1; i < reps.size(); i++) {
255                                                newIndexName = childName+(i+1);
256                                                Structure st = groupObject.get(newIndexName);
257                                                parseRep(reps.get(i), st);
258                                        }
259                                } catch(Throwable t) {
260                                        log.info("Issue Parsing: " + t);
261                                        newIndexName = groupObject.addNonstandardSegment(childName);
262                                        for (int j = i; j < reps.size(); j++) {
263                                                parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
264                                        }
265                                }
266                        }
267                        
268                }
269    }
270    
271    private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
272                if (theObj instanceof Group) {
273                        parse((Group) theObj, theElem);
274                }
275                else if (theObj instanceof Segment) {
276                        parse((Segment) theObj, theElem);
277                }                
278                log.trace("Parsed element: {}", theElem.getNodeName());         
279    }
280    
281    //includes direct children only
282    private List<Element> getChildElementsByTagName(Element theElement, String theName) throws HL7Exception {
283        List<Element> result = new ArrayList<Element>(10);
284        NodeList children = theElement.getChildNodes();
285        
286        for (int i = 0; i < children.getLength(); i++) {
287                Node child = children.item(i);
288                if (child.getNodeType() == Node.ELEMENT_NODE && child.getLocalName().equals(theName)) {
289                assertNamespaceURI(child.getNamespaceURI());
290                        result.add((Element)child);
291                }
292        }
293        
294        return result; 
295    }
296    
297    /** 
298     * Given the name of a group element in an XML message, returns the corresponding 
299     * group class name.  This name is identical except in order to be a valid class 
300     * name, the dot character immediately following the message name is replaced with 
301     * an underscore.  For example, there is a group element called ADT_A01.INSURANCE and the 
302     * corresponding group Class is called ADT_A01_INSURANCE. 
303     */
304//    protected static String makeGroupClassName(String elementName) {
305//        return elementName.replace('.', '_');
306//    }
307
308    /** 
309     * Given the name of a message and a Group class, returns the corresponding group element name in an 
310     * XML-encoded message.  This is the message name and group name separated by a dot. For example, 
311     * ADT_A01.INSURANCE.
312     * 
313     * If it looks like a segment name (i.e. has 3 characters), no change is made. 
314     */
315    protected static String makeGroupElementName(String messageName, String className) {
316        String ret;
317        
318        if (className.length() > 4 || ourForceGroupNames.contains(className)) {
319            StringBuilder elementName = new StringBuilder();
320            elementName.append(messageName);
321            elementName.append('.');
322            elementName.append(className);
323            ret = elementName.toString();
324        } else if (className.length() == 4) {
325            // It is not clear why this case is needed.. We should figure out
326                // why it was added, since removing it or optimizing its use would
327                // prevent the need for "ourForGroupNames" above
328                ret = className.substring(0,3);
329        } else {
330            ret = className;
331        }
332        
333        return ret;
334    }
335
336    /** Test harness */
337    public static void main(String args[]) {
338        if (args.length != 1) {
339            System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
340            System.exit(1);
341        }
342
343        //read and parse message from file 
344        try {
345            File messageFile = new File(args[0]);
346            long fileLength = messageFile.length();
347            FileReader r = new FileReader(messageFile);
348            char[] cbuf = new char[(int) fileLength];
349            System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
350            r.close();
351            String messString = String.valueOf(cbuf);
352
353            Parser inParser = null;
354            Parser outParser = null;
355            PipeParser pp = new PipeParser();
356            ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
357            System.out.println("Encoding: " + pp.getEncoding(messString));
358            if (pp.getEncoding(messString) != null) {
359                inParser = pp;
360                outParser = xp;
361            }
362            else if (xp.getEncoding(messString) != null) {
363                inParser = xp;
364                outParser = pp;
365            }
366
367            Message mess = inParser.parse(messString);
368            System.out.println("Got message of type " + mess.getClass().getName());
369
370            String otherEncoding = outParser.encode(mess);
371            System.out.println(otherEncoding);
372        }
373        catch (Exception e) {
374            e.printStackTrace();
375        }
376    }
377
378    /**
379     * {@inheritDoc}
380     */
381        @Override
382        public void parse(Message theMessage, String theString) throws HL7Exception {
383                Document doc = parseStringIntoDocument(theString);
384        parse(theMessage, doc.getDocumentElement());
385
386        applySuperStructureName(theMessage);
387        }
388
389    /**
390     * Convenience factory method which returns an instance that has a 
391     * {@link NoValidation NoValidation validation context}. 
392     */
393    public static XMLParser getInstanceWithNoValidation() {
394        HapiContext context = new DefaultHapiContext(ValidationContextFactory.noValidation());
395        XMLParser retVal = context.getXMLParser();
396        return retVal;
397    }
398
399
400}