Coverage Report - ca.uhn.hl7v2.util.EncodedMessageComparator
 
Classes in this File Line Coverage Branch Coverage Complexity
EncodedMessageComparator
89%
60/67
78%
22/28
3.857
 
 1  
 package ca.uhn.hl7v2.util;
 2  
 
 3  
 import java.util.regex.Pattern;
 4  
 
 5  
 import org.w3c.dom.Document;
 6  
 import org.w3c.dom.Element;
 7  
 import org.w3c.dom.NamedNodeMap;
 8  
 import org.w3c.dom.Node;
 9  
 import org.w3c.dom.NodeList;
 10  
 import org.xml.sax.SAXException;
 11  
 
 12  
 import ca.uhn.hl7v2.HL7Exception;
 13  
 import ca.uhn.hl7v2.model.Message;
 14  
 import ca.uhn.hl7v2.parser.GenericParser;
 15  
 
 16  
 /**
 17  
  * Tools for testing message strings for semantic equivalence without assuming the correctness
 18  
  * of parsers.  
 19  
  * @author Bryan Tripp
 20  
  */
 21  0
 public class EncodedMessageComparator {
 22  
     
 23  5
     static final GenericParser parser = new GenericParser();  
 24  
     
 25  
     /**
 26  
      * Returns a "standardized" equivalent of the given message string.  For delimited
 27  
      * messages, the returned value is the shortest string that has an equivalent
 28  
      * meaning in HL7.  For XML-encoded messages, the returned value is equivalent XML output
 29  
      * using a standard pretty-print format.  An automatic determination is made about whether 
 30  
      * the given string is XML or ER7 (i.e. traditionally) encoded.
 31  
      * @param message an XML-encoded or ER7-encoded message string
 32  
      */
 33  
     public static String standardize(String message) throws SAXException {
 34  40
         String result = null;
 35  40
         String encoding = parser.getEncoding(message);
 36  40
         if (encoding.equals("XML")) {
 37  20
             result = standardizeXML(message);
 38  
         } else {
 39  20
             result = standardizeER7(message);
 40  
         }
 41  40
         return result;
 42  
     }
 43  
     
 44  
     /**
 45  
      * Returns the shortest string that is semantically equivalent to a given ER7-encoded 
 46  
      * message string.
 47  
      */
 48  
     public static String standardizeER7(String message) {
 49  
         
 50  
         //make delimiter sequences (must quote with \ if not alphanumeric; can't otherwise because of regexp rules)
 51  25
         char fieldDelimChar = message.charAt(3);
 52  25
         String fieldDelim = String.valueOf(fieldDelimChar);
 53  25
         if (!Character.isLetterOrDigit(fieldDelimChar)) fieldDelim = "\\" + fieldDelimChar;
 54  
         
 55  25
         char compSepChar = message.charAt(4);
 56  25
         String compSep = String.valueOf(compSepChar);
 57  25
         if (!Character.isLetterOrDigit(compSepChar)) compSep = "\\" + compSepChar;
 58  
         
 59  25
         char repSepChar = message.charAt(5);
 60  25
         String repSep = String.valueOf(repSepChar);
 61  25
         if (!Character.isLetterOrDigit(repSepChar)) repSep = "\\" + repSepChar;
 62  
         
 63  25
         char subSepChar = message.charAt(7);
 64  25
         String subSep = String.valueOf(subSepChar);
 65  25
         if (!Character.isLetterOrDigit(subSepChar)) subSep = "\\" + subSepChar;
 66  
         
 67  
         //char space = ' ';
 68  
         
 69  
         /* Things to strip (cumulative):
 70  
          *  - all delimiters and repetition separators before end line (i.e. end segment)
 71  
          *  - repetition separators, comp and subcomp delims before new field
 72  
          *  - subcomponent delimiters before new component
 73  
          */
 74  25
         Pattern endSegment = Pattern.compile("[" + fieldDelim + compSep + repSep + subSep + "]*[\n\r]+");
 75  25
         message = endSegment.matcher(message).replaceAll("\r");
 76  
         
 77  25
         Pattern endField = Pattern.compile("[" + repSep + compSep + subSep + "]*" + fieldDelim);
 78  25
         message = endField.matcher(message).replaceAll(String.valueOf(fieldDelim));
 79  
         
 80  25
         Pattern endComp = Pattern.compile("[" + subSep + "]*" + compSep);
 81  25
         message = endComp.matcher(message).replaceAll(String.valueOf(compSep));
 82  
         
 83  
         //Pattern endSub = Pattern.compile("[ ]*" + subSep);
 84  
         //message = endSub.matcher(message).replaceAll(String.valueOf(subSep));
 85  
         
 86  
         //handle special case of subcomp delim in encoding characters
 87  25
         message = message.substring(0, 7) + subSepChar + message.substring(7);
 88  
         
 89  25
         return message;
 90  
     }
 91  
     
 92  
     /**
 93  
      * Returns a semantic equivalent of a given XML-encoded message in a default format.
 94  
      * Attributes, comments, and processing instructions are not considered to change the 
 95  
      * HL7 meaning of the message, and are removed in the standardized representation.    
 96  
      */
 97  
     public static String standardizeXML(String message) throws SAXException {
 98  
         try {
 99  25
                 Document doc = XMLUtils.parse(message);
 100  25
             clean(doc.getDocumentElement());
 101  25
             return XMLUtils.serialize(doc, true);
 102  0
         } catch (Exception e) {
 103  0
             throw new RuntimeException("Exception while standardizing XML ", e);
 104  
         }
 105  
 
 106  
     }
 107  
     
 108  
     /** Removes attributes, comments, and processing instructions. */
 109  
     private static void clean(Element elem) {
 110  575
         NodeList children = elem.getChildNodes();        
 111  2260
         for (int i = 0; i < children.getLength(); i++) {
 112  1685
             Node child = children.item(i);
 113  1685
             if (child.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE 
 114  1680
                 || child.getNodeType() == Node.COMMENT_NODE)
 115  
             {
 116  10
                                 elem.removeChild(child);
 117  1675
             } else if (child.getNodeType() == Node.ELEMENT_NODE) {
 118  550
                 clean((Element) child);
 119  
             }
 120  
         }
 121  
         
 122  575
         NamedNodeMap attributes = elem.getAttributes();
 123  
         //get names
 124  575
         String[] names = new String[attributes.getLength()];
 125  605
         for (int i = 0; i < names.length; i++) {
 126  30
             names[i] = attributes.item(i).getNodeName();
 127  
         }
 128  
         //remove by name
 129  605
         for (int i = 0; i < names.length; i++) {
 130  30
             attributes.removeNamedItem(names[i]);
 131  
         }
 132  
 
 133  575
     }
 134  
     
 135  
     /**
 136  
      * <p>Compares two HL7 messages to see if they are equivalent (in terms of their  
 137  
      * HL7 meaning).  Semantically irrelevant differences (e.g. spaces in an XML tag; 
 138  
      * extra field delimiters at the end of a segment; XML vs. ER7 encoding; XML attributes)
 139  
      * are ignored. This check is performed without assuming the correctness of the HAPI parsers, 
 140  
      * and can therefore be used to test them.  This is done by parsing a message, encoding it
 141  
      * again, and comparing the result with this original.  </p>
 142  
      * <p>If one message is in XML and the other in ER7, the former is converted to ER7 to 
 143  
      * perform the comparison.  This process relies on the HAPI parsers.  However, the 
 144  
      * parsed message is first encoded as XML and compared to the original, so that the 
 145  
      * integrity of the parser can be verified.  An exception is thrown if this comparison 
 146  
      * is unsuccessful.  </p>
 147  
      * @return true if given messages are semantically equivalent 
 148  
      */
 149  
     public static boolean equivalent(String message1, String message2) throws HL7Exception {
 150  20
             Pair<String> messages = standardize(message1, message2);
 151  20
         return messages.getValue1().equals(messages.getValue2());
 152  
     }
 153  
     
 154  
     static Pair<String> standardize(String message1, String message2) throws HL7Exception {
 155  20
         String encoding1 = parser.getEncoding(message1);
 156  20
         String encoding2 = parser.getEncoding(message2);
 157  
         
 158  20
         if (!encoding1.equals(encoding2)) {
 159  5
             if (encoding1.equals("XML")) {
 160  0
                 message1 = safeER7Conversion(message1);
 161  
             } else {
 162  5
                 message2 = safeER7Conversion(message2);
 163  
             }
 164  
         }
 165  
         
 166  
         String std1, std2;
 167  
         try {
 168  20
             std1 = standardize(message1);
 169  20
             std2 = standardize(message2);
 170  0
         } catch (SAXException e) {
 171  0
             throw new HL7Exception("Equivalence check failed due to SAXException: " + e.getMessage());
 172  20
         }
 173  
         
 174  20
         return new Pair<String>(std1, std2);
 175  
         }
 176  
 
 177  
         /** 
 178  
      * Converts XML message to ER7, first checking integrity of parse and throwing 
 179  
      * an exception if parse not correct
 180  
      */
 181  
     static String safeER7Conversion(String xmlMessage) throws HL7Exception {
 182  5
         Message m = parser.parse(xmlMessage);
 183  
 
 184  5
         String check = parser.encode(m, "XML");
 185  5
         if (!equivalent(xmlMessage, check)) {
 186  0
             throw new HL7Exception("Parsed and encoded message not equivalent to original (possibilities: invalid message, bug in parser)");
 187  
         }
 188  
         
 189  5
         return parser.encode(m, "VB");        
 190  
     }
 191  
    
 192  
 
 193  
     
 194  
 }