001package ca.uhn.hl7v2.util;
002
003import java.util.regex.Pattern;
004
005import org.w3c.dom.Document;
006import org.w3c.dom.Element;
007import org.w3c.dom.NamedNodeMap;
008import org.w3c.dom.Node;
009import org.w3c.dom.NodeList;
010import org.xml.sax.SAXException;
011
012import ca.uhn.hl7v2.HL7Exception;
013import ca.uhn.hl7v2.model.Message;
014import ca.uhn.hl7v2.parser.GenericParser;
015
016/**
017 * Tools for testing message strings for semantic equivalence without assuming the correctness
018 * of parsers.  
019 * @author Bryan Tripp
020 */
021public class EncodedMessageComparator {
022    
023    static final GenericParser parser = new GenericParser();  
024    
025    /**
026     * Returns a "standardized" equivalent of the given message string.  For delimited
027     * messages, the returned value is the shortest string that has an equivalent
028     * meaning in HL7.  For XML-encoded messages, the returned value is equivalent XML output
029     * using a standard pretty-print format.  An automatic determination is made about whether 
030     * the given string is XML or ER7 (i.e. traditionally) encoded.
031     * @param message an XML-encoded or ER7-encoded message string
032     */
033    public static String standardize(String message) throws SAXException {
034        String result = null;
035        String encoding = parser.getEncoding(message);
036        if (encoding.equals("XML")) {
037            result = standardizeXML(message);
038        } else {
039            result = standardizeER7(message);
040        }
041        return result;
042    }
043    
044    /**
045     * Returns the shortest string that is semantically equivalent to a given ER7-encoded 
046     * message string.
047     */
048    public static String standardizeER7(String message) {
049        
050        //make delimiter sequences (must quote with \ if not alphanumeric; can't otherwise because of regexp rules)
051        char fieldDelimChar = message.charAt(3);
052        String fieldDelim = String.valueOf(fieldDelimChar);
053        if (!Character.isLetterOrDigit(fieldDelimChar)) fieldDelim = "\\" + fieldDelimChar;
054        
055        char compSepChar = message.charAt(4);
056        String compSep = String.valueOf(compSepChar);
057        if (!Character.isLetterOrDigit(compSepChar)) compSep = "\\" + compSepChar;
058        
059        char repSepChar = message.charAt(5);
060        String repSep = String.valueOf(repSepChar);
061        if (!Character.isLetterOrDigit(repSepChar)) repSep = "\\" + repSepChar;
062        
063        char subSepChar = message.charAt(7);
064        String subSep = String.valueOf(subSepChar);
065        if (!Character.isLetterOrDigit(subSepChar)) subSep = "\\" + subSepChar;
066        
067        //char space = ' ';
068        
069        /* Things to strip (cumulative):
070         *  - all delimiters and repetition separators before end line (i.e. end segment)
071         *  - repetition separators, comp and subcomp delims before new field
072         *  - subcomponent delimiters before new component
073         */
074        Pattern endSegment = Pattern.compile("[" + fieldDelim + compSep + repSep + subSep + "]*[\n\r]+");
075        message = endSegment.matcher(message).replaceAll("\r");
076        
077        Pattern endField = Pattern.compile("[" + repSep + compSep + subSep + "]*" + fieldDelim);
078        message = endField.matcher(message).replaceAll(String.valueOf(fieldDelim));
079        
080        Pattern endComp = Pattern.compile("[" + subSep + "]*" + compSep);
081        message = endComp.matcher(message).replaceAll(String.valueOf(compSep));
082        
083        //Pattern endSub = Pattern.compile("[ ]*" + subSep);
084        //message = endSub.matcher(message).replaceAll(String.valueOf(subSep));
085        
086        //handle special case of subcomp delim in encoding characters
087        message = message.substring(0, 7) + subSepChar + message.substring(7);
088        
089        return message;
090    }
091    
092    /**
093     * Returns a semantic equivalent of a given XML-encoded message in a default format.
094     * Attributes, comments, and processing instructions are not considered to change the 
095     * HL7 meaning of the message, and are removed in the standardized representation.    
096     */
097    public static String standardizeXML(String message) throws SAXException {
098        try {
099                Document doc = XMLUtils.parse(message);
100            clean(doc.getDocumentElement());
101            return XMLUtils.serialize(doc, true);
102        } catch (Exception e) {
103            throw new RuntimeException("Exception while standardizing XML ", e);
104        }
105
106    }
107    
108    /** Removes attributes, comments, and processing instructions. */
109    private static void clean(Element elem) {
110        NodeList children = elem.getChildNodes();        
111        for (int i = 0; i < children.getLength(); i++) {
112            Node child = children.item(i);
113            if (child.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE 
114                || child.getNodeType() == Node.COMMENT_NODE)
115            {
116                                elem.removeChild(child);
117            } else if (child.getNodeType() == Node.ELEMENT_NODE) {
118                clean((Element) child);
119            }
120        }
121        
122        NamedNodeMap attributes = elem.getAttributes();
123        //get names
124        String[] names = new String[attributes.getLength()];
125        for (int i = 0; i < names.length; i++) {
126            names[i] = attributes.item(i).getNodeName();
127        }
128        //remove by name
129        for (int i = 0; i < names.length; i++) {
130            attributes.removeNamedItem(names[i]);
131        }
132
133    }
134    
135    /**
136     * <p>Compares two HL7 messages to see if they are equivalent (in terms of their  
137     * HL7 meaning).  Semantically irrelevant differences (e.g. spaces in an XML tag; 
138     * extra field delimiters at the end of a segment; XML vs. ER7 encoding; XML attributes)
139     * are ignored. This check is performed without assuming the correctness of the HAPI parsers, 
140     * and can therefore be used to test them.  This is done by parsing a message, encoding it
141     * again, and comparing the result with this original.  </p>
142     * <p>If one message is in XML and the other in ER7, the former is converted to ER7 to 
143     * perform the comparison.  This process relies on the HAPI parsers.  However, the 
144     * parsed message is first encoded as XML and compared to the original, so that the 
145     * integrity of the parser can be verified.  An exception is thrown if this comparison 
146     * is unsuccessful.  </p>
147     * @return true if given messages are semantically equivalent 
148     */
149    public static boolean equivalent(String message1, String message2) throws HL7Exception {
150        Pair<String> messages = standardize(message1, message2);
151        return messages.getValue1().equals(messages.getValue2());
152    }
153    
154    static Pair<String> standardize(String message1, String message2) throws HL7Exception {
155        String encoding1 = parser.getEncoding(message1);
156        String encoding2 = parser.getEncoding(message2);
157        
158        if (!encoding1.equals(encoding2)) {
159            if (encoding1.equals("XML")) {
160                message1 = safeER7Conversion(message1);
161            } else {
162                message2 = safeER7Conversion(message2);
163            }
164        }
165        
166        String std1, std2;
167        try {
168            std1 = standardize(message1);
169            std2 = standardize(message2);
170        } catch (SAXException e) {
171            throw new HL7Exception("Equivalence check failed due to SAXException: " + e.getMessage());
172        }
173        
174        return new Pair<String>(std1, std2);
175        }
176
177        /** 
178     * Converts XML message to ER7, first checking integrity of parse and throwing 
179     * an exception if parse not correct
180     */
181    static String safeER7Conversion(String xmlMessage) throws HL7Exception {
182        Message m = parser.parse(xmlMessage);
183
184        String check = parser.encode(m, "XML");
185        if (!equivalent(xmlMessage, check)) {
186            throw new HL7Exception("Parsed and encoded message not equivalent to original (possibilities: invalid message, bug in parser)");
187        }
188        
189        return parser.encode(m, "VB");        
190    }
191   
192
193    
194}