View Javadoc
1   package ca.uhn.hl7v2.util;
2   
3   import java.util.regex.Pattern;
4   
5   import org.w3c.dom.Document;
6   import org.w3c.dom.Element;
7   import org.w3c.dom.NamedNodeMap;
8   import org.w3c.dom.Node;
9   import org.w3c.dom.NodeList;
10  import org.xml.sax.SAXException;
11  
12  import ca.uhn.hl7v2.HL7Exception;
13  import ca.uhn.hl7v2.model.Message;
14  import ca.uhn.hl7v2.parser.GenericParser;
15  
16  /**
17   * Tools for testing message strings for semantic equivalence without assuming the correctness
18   * of parsers.  
19   * @author Bryan Tripp
20   */
21  public class EncodedMessageComparator {
22      
23      static final GenericParsertml#GenericParser">GenericParser parser = new GenericParser();  
24      
25      /**
26       * Returns a "standardized" equivalent of the given message string.  For delimited
27       * messages, the returned value is the shortest string that has an equivalent
28       * meaning in HL7.  For XML-encoded messages, the returned value is equivalent XML output
29       * using a standard pretty-print format.  An automatic determination is made about whether 
30       * the given string is XML or ER7 (i.e. traditionally) encoded.
31       * @param message an XML-encoded or ER7-encoded message string
32       */
33      public static String standardize(String message) throws SAXException {
34          String result;
35          String encoding = parser.getEncoding(message);
36          if (encoding.equals("XML")) {
37              result = standardizeXML(message);
38          } else {
39              result = standardizeER7(message);
40          }
41          return result;
42      }
43      
44      /**
45       * Returns the shortest string that is semantically equivalent to a given ER7-encoded 
46       * message string.
47       */
48      public static String standardizeER7(String message) {
49          
50          //make delimiter sequences (must quote with \ if not alphanumeric; can't otherwise because of regexp rules)
51          char fieldDelimChar = message.charAt(3);
52          String fieldDelim = String.valueOf(fieldDelimChar);
53          if (!Character.isLetterOrDigit(fieldDelimChar)) fieldDelim = "\\" + fieldDelimChar;
54          
55          char compSepChar = message.charAt(4);
56          String compSep = String.valueOf(compSepChar);
57          if (!Character.isLetterOrDigit(compSepChar)) compSep = "\\" + compSepChar;
58          
59          char repSepChar = message.charAt(5);
60          String repSep = String.valueOf(repSepChar);
61          if (!Character.isLetterOrDigit(repSepChar)) repSep = "\\" + repSepChar;
62          
63          char subSepChar = message.charAt(7);
64          String subSep = String.valueOf(subSepChar);
65          if (!Character.isLetterOrDigit(subSepChar)) subSep = "\\" + subSepChar;
66          
67          //char space = ' ';
68          
69          /* Things to strip (cumulative):
70           *  - all delimiters and repetition separators before end line (i.e. end segment)
71           *  - repetition separators, comp and subcomp delims before new field
72           *  - subcomponent delimiters before new component
73           */
74          Pattern endSegment = Pattern.compile("[" + fieldDelim + compSep + repSep + subSep + "]*[\n\r]+");
75          message = endSegment.matcher(message).replaceAll("\r");
76          
77          Pattern endField = Pattern.compile("[" + repSep + compSep + subSep + "]*" + fieldDelim);
78          message = endField.matcher(message).replaceAll(fieldDelim);
79          
80          Pattern endComp = Pattern.compile("[" + subSep + "]*" + compSep);
81          message = endComp.matcher(message).replaceAll(compSep);
82          
83          //Pattern endSub = Pattern.compile("[ ]*" + subSep);
84          //message = endSub.matcher(message).replaceAll(String.valueOf(subSep));
85          
86          //handle special case of subcomp delim in encoding characters
87          message = message.substring(0, 7) + subSepChar + message.substring(7);
88          
89          return message;
90      }
91      
92      /**
93       * Returns a semantic equivalent of a given XML-encoded message in a default format.
94       * Attributes, comments, and processing instructions are not considered to change the 
95       * HL7 meaning of the message, and are removed in the standardized representation.    
96       */
97      public static String standardizeXML(String message) {
98          try {
99          	Document doc = XMLUtils.parse(message);
100             clean(doc.getDocumentElement());
101             return XMLUtils.serialize(doc, true);
102         } catch (Exception e) {
103             throw new RuntimeException("Exception while standardizing XML ", e);
104         }
105 
106     }
107     
108     /** Removes attributes, comments, and processing instructions. */
109     private static void clean(Element elem) {
110         NodeList children = elem.getChildNodes();        
111         for (int i = 0; i < children.getLength(); i++) {
112             Node child = children.item(i);
113             if (child.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE 
114                 || child.getNodeType() == Node.COMMENT_NODE)
115             {
116 				elem.removeChild(child);
117             } else if (child.getNodeType() == Node.ELEMENT_NODE) {
118                 clean((Element) child);
119             }
120         }
121         
122         NamedNodeMap attributes = elem.getAttributes();
123         //get names
124         String[] names = new String[attributes.getLength()];
125         for (int i = 0; i < names.length; i++) {
126             names[i] = attributes.item(i).getNodeName();
127         }
128         //remove by name
129         for (String name : names) {
130             attributes.removeNamedItem(name);
131         }
132 
133     }
134     
135     /**
136      * <p>Compares two HL7 messages to see if they are equivalent (in terms of their  
137      * HL7 meaning).  Semantically irrelevant differences (e.g. spaces in an XML tag; 
138      * extra field delimiters at the end of a segment; XML vs. ER7 encoding; XML attributes)
139      * are ignored. This check is performed without assuming the correctness of the HAPI parsers, 
140      * and can therefore be used to test them.  This is done by parsing a message, encoding it
141      * again, and comparing the result with this original.  </p>
142      * <p>If one message is in XML and the other in ER7, the former is converted to ER7 to 
143      * perform the comparison.  This process relies on the HAPI parsers.  However, the 
144      * parsed message is first encoded as XML and compared to the original, so that the 
145      * integrity of the parser can be verified.  An exception is thrown if this comparison 
146      * is unsuccessful.  </p>
147      * @return true if given messages are semantically equivalent 
148      */
149     public static boolean equivalent(String message1, String message2) throws HL7Exception {
150     	Pair<String> messages = standardize(message1, message2);
151         return messages.getValue1().equals(messages.getValue2());
152     }
153     
154     static Pair<String> standardize(String message1, String message2) throws HL7Exception {
155         String encoding1 = parser.getEncoding(message1);
156         String encoding2 = parser.getEncoding(message2);
157         
158         if (!encoding1.equals(encoding2)) {
159             if (encoding1.equals("XML")) {
160                 message1 = safeER7Conversion(message1);
161             } else {
162                 message2 = safeER7Conversion(message2);
163             }
164         }
165         
166         String std1, std2;
167         try {
168             std1 = standardize(message1);
169             std2 = standardize(message2);
170         } catch (SAXException e) {
171             throw new HL7Exception("Equivalence check failed due to SAXException: " + e.getMessage());
172         }
173         
174         return new Pair<>(std1, std2);
175 	}
176 
177 	/** 
178      * Converts XML message to ER7, first checking integrity of parse and throwing 
179      * an exception if parse not correct
180      */
181     static String safeER7Conversion(String xmlMessage) throws HL7Exception {
182         Message m = parser.parse(xmlMessage);
183 
184         String check = parser.encode(m, "XML");
185         if (!equivalent(xmlMessage, check)) {
186             throw new HL7Exception("Parsed and encoded message not equivalent to original (possibilities: invalid message, bug in parser)");
187         }
188         
189         return parser.encode(m, "VB");        
190     }
191    
192 
193     
194 }