View Javadoc
1   /**
2   The contents of this file are subject to the Mozilla Public License Version 1.1
3   (the "License"); you may not use this file except in compliance with the License.
4   You may obtain a copy of the License at http://www.mozilla.org/MPL/
5   Software distributed under the License is distributed on an "AS IS" basis,
6   WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7   specific language governing rights and limitations under the License.
8   
9   The Initial Developer of the Original Code is University Health Network. Copyright (C)
10  2001.  All Rights Reserved.
11  
12  Contributor(s): ______________________________________.
13  
14  Alternatively, the contents of this file may be used under the terms of the
15  GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
16  applicable instead of those above.  If you wish to allow use of your version of this
17  file only under the terms of the GPL and not to allow others to use your version
18  of this file under the MPL, indicate your decision by deleting  the provisions above
19  and replace  them with the notice and other provisions required by the GPL License.
20  If you do not delete the provisions above, a recipient may use your version of
21  this file under either the MPL or the GPL.
22  
23  */
24  package ca.uhn.hl7v2.parser;
25  
26  import java.io.File;
27  import java.io.FileReader;
28  import java.util.ArrayList;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Set;
32  
33  import ca.uhn.hl7v2.DefaultHapiContext;
34  import ca.uhn.hl7v2.model.GenericMessage;
35  import org.slf4j.Logger;
36  import org.slf4j.LoggerFactory;
37  import org.w3c.dom.DOMException;
38  import org.w3c.dom.Document;
39  import org.w3c.dom.Element;
40  import org.w3c.dom.Node;
41  import org.w3c.dom.NodeList;
42  
43  import ca.uhn.hl7v2.HL7Exception;
44  import ca.uhn.hl7v2.HapiContext;
45  import ca.uhn.hl7v2.model.Group;
46  import ca.uhn.hl7v2.model.Message;
47  import ca.uhn.hl7v2.model.Segment;
48  import ca.uhn.hl7v2.model.Structure;
49  import ca.uhn.hl7v2.util.XMLUtils;
50  import ca.uhn.hl7v2.validation.impl.NoValidation;
51  import ca.uhn.hl7v2.validation.impl.ValidationContextFactory;
52  
53  /**
54   * <p>A default XMLParser.  This class assigns segment elements (in an XML-encoded message) 
55   * to Segment objects (in a Message object) using the name of a segment and the names 
56   * of any groups in which the segment is nested.  The names of group classes must correspond
57   * to the names of group elements (they must be identical except that a dot in the element 
58   * name, following the message name, is replaced with an underscore, in order to consitute a 
59   * valid class name). </p>
60   * <p>At the time of writing, the group names in the XML spec are changing.  Many of the group 
61   * names have been automatically generated based on the group contents.  However, these automatic 
62   * names are gradually being replaced with manually assigned names.  This process is expected to 
63   * be complete by November 2002.  As a result, mismatches are likely.  Messages could be  
64   * transformed prior to parsing (using XSLT) as a work-around.  Alternatively the group class names 
65   * could be changed to reflect updates in the XML spec.  Ultimately, HAPI group classes will be 
66   * changed to correspond with the official group names, once these are all assigned.  </p>
67   * 
68   * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour
69   * @author Bryan Tripp
70   */
71  public class DefaultXMLParser extends XMLParser {
72  
73      private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
74  
75      private static final Set<String> ourForceGroupNames;
76      
77      static {
78      	ourForceGroupNames = new HashSet<>();
79      	ourForceGroupNames.add("DIET");
80      }
81      
82      public DefaultXMLParser() {
83      	super();
84      }
85      
86      public DefaultXMLParser(HapiContext context) {
87  		super(context);
88  	}
89  
90  	/** 
91       * Creates a new instance of DefaultXMLParser 
92       *  
93       * @param theFactory custom factory to use for model class lookup 
94       */
95      public DefaultXMLParser(ModelClassFactory theFactory) {
96      	super(theFactory);
97      }
98      
99      /**
100      * <p>Creates an XML Document that corresponds to the given Message object. </p>
101      * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
102      * into it that correspond to the groups and segments that belong to the message type that your subclass
103      * of XMLParser supports.  Then, for each segment in the message, call the method
104      * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
105      * that segment and the corresponding Segment object from the given Message.</p>
106      */
107     public Document encodeDocument(Message source) throws HL7Exception {
108         String messageClassName = source.getClass().getName();
109         String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
110 
111         // Handle GenericMessages which will have an errant $ in their class name.
112         if (source instanceof GenericMessage) {
113             messageName = messageName.replaceAll("\\$", "");
114         }
115 
116         try {
117             Document doc = XMLUtils.emptyDocument(messageName);
118             encode(source, doc.getDocumentElement());
119             return doc;
120         } catch (Exception e) {
121             throw new HL7Exception(
122                 "Can't create XML document - " + e.getClass().getName(), e);
123         }
124     }
125 
126     /**
127      * Copies data from a group object into the corresponding group element, creating any 
128      * necessary child nodes.  
129      */
130     private void encode(Group groupObject, Element groupElement) throws HL7Exception {
131         String[] childNames = groupObject.getNames();
132         String messageName = groupObject.getMessage().getName();
133         
134         try {
135         	for (String name : childNames) {
136                 Structure[] reps = groupObject.getAll(name);
137                 for (Structure rep : reps) {
138                     String elementName = makeGroupElementName(messageName, name);
139 					Element childElement;
140 					try {
141 						childElement = groupElement.getOwnerDocument().createElementNS(NS, elementName);
142 			        } catch (DOMException e) {
143 			            throw new HL7Exception(
144 			                "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(), e);
145 			        }
146                     groupElement.appendChild(childElement);
147                     if (rep instanceof Group) {
148                         encode((Group) rep, childElement);
149                     }
150                     else if (rep instanceof Segment) {
151                         encode((Segment) rep, childElement);
152                     }
153 				}
154             }
155         } catch (DOMException e) {
156             throw new HL7Exception(
157                 "Can't encode group " + groupObject.getClass().getName(), e);
158         }
159     }
160 
161 
162     /**
163      * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
164      * <p>The easiest way to implement this method for a particular message structure is as follows:
165      * <ol><li>Create an instance of the Message type you are going to handle with your subclass
166      * of XMLParser</li>
167      * <li>Go through the given Document and find the Elements that represent the top level of
168      * each message segment. </li>
169      * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
170      * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
171      * At the end of this process, your Message object should be populated with data from the XML
172      * Document.</p>
173      * @throws HL7Exception if the message is not correctly formatted.
174      * @throws EncodingNotSupportedException if the message encoded
175      *     is not supported by this parser.
176      */
177     public Message parseDocument(Document xmlMessage, String version) throws HL7Exception {
178 
179         assertNamespaceURI(xmlMessage.getDocumentElement().getNamespaceURI());
180 
181         Message message = instantiateMessage(xmlMessage.getDocumentElement().getLocalName(), version, true);
182     	// Set parser before parsing the contents actually starts in order to respect
183         // the settings of the HapiContext
184         message.setParser(this);
185         parse(message, xmlMessage.getDocumentElement());
186         return message;
187     }
188 
189     /**
190      * Populates the given group object with data from the given group element, ignoring 
191      * any unrecognized nodes.  
192      */
193     private void parse(Group groupObject, Element groupElement) throws HL7Exception {
194         String[] childNames = groupObject.getNames();
195         String messageName = groupObject.getMessage().getName();
196         
197         NodeList allChildNodes = groupElement.getChildNodes();
198         List<String> unparsedElementList = new ArrayList<>();
199         for (int i = 0; i < allChildNodes.getLength(); i++) {
200             Node node = allChildNodes.item(i);
201             String name = node.getLocalName();
202             if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
203                 assertNamespaceURI(node.getNamespaceURI());
204                 unparsedElementList.add(name);                
205             }
206         }
207         
208         //we're not too fussy about order here (all occurrences get parsed as repetitions) ... 
209         for (String nextChildName : childNames) {
210             String childName = nextChildName;
211             if(groupObject.isGroup(nextChildName)) {
212             	childName = makeGroupElementName(groupObject.getMessage().getName(), nextChildName);
213             }
214 			unparsedElementList.remove(childName);
215             
216             // 4 char segment names are second occurrences of a segment within a single message
217             // structure. e.g. the second PID segment in an A17 patient swap message is known
218             // to hapi's code represenation as PID2
219             if (nextChildName.length() == 4 && Character.isDigit(nextChildName.charAt(3))) {
220             	log.trace("Skipping rep segment: {}", nextChildName);
221             } else {   
222             	parseReps(groupElement, groupObject, messageName, nextChildName, nextChildName);
223             }
224         }
225         
226         for (String segName : unparsedElementList) {
227             String segIndexName = groupObject.addNonstandardSegment(segName);
228             parseReps(groupElement, groupObject, messageName, segName, segIndexName);
229         }
230     }
231     
232     //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2) 
233     private void parseReps(Element groupElement, Group groupObject, 
234             String messageName, String childName, String childIndexName) throws HL7Exception {
235         
236     	String groupName = makeGroupElementName(messageName, childName);
237         List<Element> reps = getChildElementsByTagName(groupElement, groupName);
238         log.trace("# of elements matching {}: {}", groupName, reps.size());
239 
240 		if (groupObject.isRepeating(childIndexName)) {
241 			for (int i = 0; i < reps.size(); i++) {
242 				parseRep(reps.get(i), groupObject.get(childIndexName, i));
243 			}        			        
244 		} else {
245 			if (reps.size() > 0) {
246 				parseRep(reps.get(0), groupObject.get(childIndexName, 0));				
247 			}
248 
249 //			if (reps.size() > 1) {			
250 //				String newIndexName = groupObject.addNonstandardSegment(childName);			
251 //				for (int i = 1; i < reps.size(); i++) {
252 //					parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
253 //				}        			        			
254 //			}
255 			if (reps.size() > 1) {
256 				String newIndexName;
257 				int i=1;
258 				try	{
259 					for (i = 1; i < reps.size(); i++) {
260 						newIndexName = childName+(i+1);
261 						Structure st = groupObject.get(newIndexName);
262 						parseRep(reps.get(i), st);
263 					}
264 				} catch(Throwable t) {
265 					log.info("Issue Parsing: " + t);
266 					newIndexName = groupObject.addNonstandardSegment(childName);
267 					for (int j = i; j < reps.size(); j++) {
268 						parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
269 					}
270 				}
271 			}
272 			
273 		}
274     }
275     
276     private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
277 		if (theObj instanceof Group) {
278 			parse((Group) theObj, theElem);
279 		}
280 		else if (theObj instanceof Segment) {
281 			parse((Segment) theObj, theElem);
282 		}                
283 		log.trace("Parsed element: {}", theElem.getNodeName());    	
284     }
285     
286     //includes direct children only
287     private List<Element> getChildElementsByTagName(Element theElement, String theName) throws HL7Exception {
288     	List<Element> result = new ArrayList<>(10);
289     	NodeList children = theElement.getChildNodes();
290     	
291     	for (int i = 0; i < children.getLength(); i++) {
292     		Node child = children.item(i);
293     		if (child.getNodeType() == Node.ELEMENT_NODE && child.getLocalName().equals(theName)) {
294                 assertNamespaceURI(child.getNamespaceURI());
295     			result.add((Element)child);
296     		}
297     	}
298     	
299     	return result; 
300     }
301     
302     /** 
303      * Given the name of a group element in an XML message, returns the corresponding 
304      * group class name.  This name is identical except in order to be a valid class 
305      * name, the dot character immediately following the message name is replaced with 
306      * an underscore.  For example, there is a group element called ADT_A01.INSURANCE and the 
307      * corresponding group Class is called ADT_A01_INSURANCE. 
308      */
309 //    protected static String makeGroupClassName(String elementName) {
310 //        return elementName.replace('.', '_');
311 //    }
312 
313     /** 
314      * Given the name of a message and a Group class, returns the corresponding group element name in an 
315      * XML-encoded message.  This is the message name and group name separated by a dot. For example, 
316      * ADT_A01.INSURANCE.
317      * 
318      * If it looks like a segment name (i.e. has 3 characters), no change is made. 
319      */
320     protected static String makeGroupElementName(String messageName, String className) {
321         String ret;
322         
323         if (className.length() > 4 || ourForceGroupNames.contains(className)) {
324             ret = messageName +
325                     '.' +
326                     className;
327         } else if (className.length() == 4) {
328             // It is not clear why this case is needed.. We should figure out
329         	// why it was added, since removing it or optimizing its use would
330         	// prevent the need for "ourForGroupNames" above
331         	ret = className.substring(0,3);
332         } else {
333             ret = className;
334         }
335         
336         return ret;
337     }
338 
339     /** Test harness */
340     public static void main(String[] args) {
341         if (args.length != 1) {
342             System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
343             System.exit(1);
344         }
345 
346         //read and parse message from file 
347         try {
348             File messageFile = new File(args[0]);
349             long fileLength = messageFile.length();
350             FileReader r = new FileReader(messageFile);
351             char[] cbuf = new char[(int) fileLength];
352             System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
353             r.close();
354             String messString = String.valueOf(cbuf);
355 
356             Parser inParser = null;
357             Parser outParser = null;
358             PipeParserer.html#PipeParser">PipeParser pp = new PipeParser();
359             ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
360             System.out.println("Encoding: " + pp.getEncoding(messString));
361             if (pp.getEncoding(messString) != null) {
362                 inParser = pp;
363                 outParser = xp;
364             }
365             else if (xp.getEncoding(messString) != null) {
366                 inParser = xp;
367                 outParser = pp;
368             }
369 
370             Message mess = inParser.parse(messString);
371             System.out.println("Got message of type " + mess.getClass().getName());
372 
373             String otherEncoding = outParser.encode(mess);
374             System.out.println(otherEncoding);
375         }
376         catch (Exception e) {
377             e.printStackTrace();
378         }
379     }
380 
381     /**
382      * {@inheritDoc}
383      */
384 	@Override
385 	public void parse(Message theMessage, String theString) throws HL7Exception {
386 	   theMessage.setParser(this);
387 		Document doc = parseStringIntoDocument(theString);
388         parse(theMessage, doc.getDocumentElement());
389 
390         applySuperStructureName(theMessage);
391 	}
392 
393     /**
394      * Convenience factory method which returns an instance that has a 
395      * {@link NoValidation NoValidation validation context}. 
396      */
397     public static XMLParser getInstanceWithNoValidation() {
398         HapiContext context = new DefaultHapiContext(ValidationContextFactory.noValidation());
399         return context.getXMLParser();
400     }
401 
402 
403 }