001package ca.uhn.hl7v2.util; 002 003import java.util.regex.Pattern; 004 005import org.w3c.dom.Document; 006import org.w3c.dom.Element; 007import org.w3c.dom.NamedNodeMap; 008import org.w3c.dom.Node; 009import org.w3c.dom.NodeList; 010import org.xml.sax.SAXException; 011 012import ca.uhn.hl7v2.HL7Exception; 013import ca.uhn.hl7v2.model.Message; 014import ca.uhn.hl7v2.parser.GenericParser; 015 016/** 017 * Tools for testing message strings for semantic equivalence without assuming the correctness 018 * of parsers. 019 * @author Bryan Tripp 020 */ 021public class EncodedMessageComparator { 022 023 static final GenericParser parser = new GenericParser(); 024 025 /** 026 * Returns a "standardized" equivalent of the given message string. For delimited 027 * messages, the returned value is the shortest string that has an equivalent 028 * meaning in HL7. For XML-encoded messages, the returned value is equivalent XML output 029 * using a standard pretty-print format. An automatic determination is made about whether 030 * the given string is XML or ER7 (i.e. traditionally) encoded. 031 * @param message an XML-encoded or ER7-encoded message string 032 */ 033 public static String standardize(String message) throws SAXException { 034 String result = null; 035 String encoding = parser.getEncoding(message); 036 if (encoding.equals("XML")) { 037 result = standardizeXML(message); 038 } else { 039 result = standardizeER7(message); 040 } 041 return result; 042 } 043 044 /** 045 * Returns the shortest string that is semantically equivalent to a given ER7-encoded 046 * message string. 047 */ 048 public static String standardizeER7(String message) { 049 050 //make delimiter sequences (must quote with \ if not alphanumeric; can't otherwise because of regexp rules) 051 char fieldDelimChar = message.charAt(3); 052 String fieldDelim = String.valueOf(fieldDelimChar); 053 if (!Character.isLetterOrDigit(fieldDelimChar)) fieldDelim = "\\" + fieldDelimChar; 054 055 char compSepChar = message.charAt(4); 056 String compSep = String.valueOf(compSepChar); 057 if (!Character.isLetterOrDigit(compSepChar)) compSep = "\\" + compSepChar; 058 059 char repSepChar = message.charAt(5); 060 String repSep = String.valueOf(repSepChar); 061 if (!Character.isLetterOrDigit(repSepChar)) repSep = "\\" + repSepChar; 062 063 char subSepChar = message.charAt(7); 064 String subSep = String.valueOf(subSepChar); 065 if (!Character.isLetterOrDigit(subSepChar)) subSep = "\\" + subSepChar; 066 067 //char space = ' '; 068 069 /* Things to strip (cumulative): 070 * - all delimiters and repetition separators before end line (i.e. end segment) 071 * - repetition separators, comp and subcomp delims before new field 072 * - subcomponent delimiters before new component 073 */ 074 Pattern endSegment = Pattern.compile("[" + fieldDelim + compSep + repSep + subSep + "]*[\n\r]+"); 075 message = endSegment.matcher(message).replaceAll("\r"); 076 077 Pattern endField = Pattern.compile("[" + repSep + compSep + subSep + "]*" + fieldDelim); 078 message = endField.matcher(message).replaceAll(String.valueOf(fieldDelim)); 079 080 Pattern endComp = Pattern.compile("[" + subSep + "]*" + compSep); 081 message = endComp.matcher(message).replaceAll(String.valueOf(compSep)); 082 083 //Pattern endSub = Pattern.compile("[ ]*" + subSep); 084 //message = endSub.matcher(message).replaceAll(String.valueOf(subSep)); 085 086 //handle special case of subcomp delim in encoding characters 087 message = message.substring(0, 7) + subSepChar + message.substring(7); 088 089 return message; 090 } 091 092 /** 093 * Returns a semantic equivalent of a given XML-encoded message in a default format. 094 * Attributes, comments, and processing instructions are not considered to change the 095 * HL7 meaning of the message, and are removed in the standardized representation. 096 */ 097 public static String standardizeXML(String message) throws SAXException { 098 try { 099 Document doc = XMLUtils.parse(message); 100 clean(doc.getDocumentElement()); 101 return XMLUtils.serialize(doc, true); 102 } catch (Exception e) { 103 throw new RuntimeException("Exception while standardizing XML ", e); 104 } 105 106 } 107 108 /** Removes attributes, comments, and processing instructions. */ 109 private static void clean(Element elem) { 110 NodeList children = elem.getChildNodes(); 111 for (int i = 0; i < children.getLength(); i++) { 112 Node child = children.item(i); 113 if (child.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE 114 || child.getNodeType() == Node.COMMENT_NODE) 115 { 116 elem.removeChild(child); 117 } else if (child.getNodeType() == Node.ELEMENT_NODE) { 118 clean((Element) child); 119 } 120 } 121 122 NamedNodeMap attributes = elem.getAttributes(); 123 //get names 124 String[] names = new String[attributes.getLength()]; 125 for (int i = 0; i < names.length; i++) { 126 names[i] = attributes.item(i).getNodeName(); 127 } 128 //remove by name 129 for (int i = 0; i < names.length; i++) { 130 attributes.removeNamedItem(names[i]); 131 } 132 133 } 134 135 /** 136 * <p>Compares two HL7 messages to see if they are equivalent (in terms of their 137 * HL7 meaning). Semantically irrelevant differences (e.g. spaces in an XML tag; 138 * extra field delimiters at the end of a segment; XML vs. ER7 encoding; XML attributes) 139 * are ignored. This check is performed without assuming the correctness of the HAPI parsers, 140 * and can therefore be used to test them. This is done by parsing a message, encoding it 141 * again, and comparing the result with this original. </p> 142 * <p>If one message is in XML and the other in ER7, the former is converted to ER7 to 143 * perform the comparison. This process relies on the HAPI parsers. However, the 144 * parsed message is first encoded as XML and compared to the original, so that the 145 * integrity of the parser can be verified. An exception is thrown if this comparison 146 * is unsuccessful. </p> 147 * @return true if given messages are semantically equivalent 148 */ 149 public static boolean equivalent(String message1, String message2) throws HL7Exception { 150 Pair<String> messages = standardize(message1, message2); 151 return messages.getValue1().equals(messages.getValue2()); 152 } 153 154 static Pair<String> standardize(String message1, String message2) throws HL7Exception { 155 String encoding1 = parser.getEncoding(message1); 156 String encoding2 = parser.getEncoding(message2); 157 158 if (!encoding1.equals(encoding2)) { 159 if (encoding1.equals("XML")) { 160 message1 = safeER7Conversion(message1); 161 } else { 162 message2 = safeER7Conversion(message2); 163 } 164 } 165 166 String std1, std2; 167 try { 168 std1 = standardize(message1); 169 std2 = standardize(message2); 170 } catch (SAXException e) { 171 throw new HL7Exception("Equivalence check failed due to SAXException: " + e.getMessage()); 172 } 173 174 return new Pair<String>(std1, std2); 175 } 176 177 /** 178 * Converts XML message to ER7, first checking integrity of parse and throwing 179 * an exception if parse not correct 180 */ 181 static String safeER7Conversion(String xmlMessage) throws HL7Exception { 182 Message m = parser.parse(xmlMessage); 183 184 String check = parser.encode(m, "XML"); 185 if (!equivalent(xmlMessage, check)) { 186 throw new HL7Exception("Parsed and encoded message not equivalent to original (possibilities: invalid message, bug in parser)"); 187 } 188 189 return parser.encode(m, "VB"); 190 } 191 192 193 194}