001package ca.uhn.hl7v2.preparser; 002 003import java.util.ArrayList; 004import java.util.Iterator; 005import java.util.List; 006import java.util.Map; 007import java.util.Properties; 008import java.util.SortedMap; 009import java.util.StringTokenizer; 010import java.util.TreeMap; 011 012import ca.uhn.hl7v2.parser.EncodingCharacters; 013 014/* 015The point of this class (all static members, not instantiatable) is to take a 016traditionally-encoded HL7 message and add all it's contents to a Properties 017object, via the parseMessage() method. 018 019The key-value pairs added to the Properties argument have keys that represent a 020datum's location in the message. (in the ZYX-1-2[0] style. TODO: define 021exactly.) See Datum, particularly the toString() of that class. 022Anyway, the Properties keys are those and the values are the tokens found. 023 024Note: we accept useless field repetition separators at the end of a 025field repetition sequence. i.e. |855-4545~555-3792~~~| , and interpret this 026as definining repetitions 0 and 1. This might not be allowed. (HL7 2.3.1 027section 2.10 explicitly allows this behaviour for fields / components / 028subcomponents, but the allowance is notably absent for repetitions. TODO: 029nail down.) We allow it anyway. 030 031Also, we accept things like |855-4545~~555-3792|, and interpret it as defining 032repetitions 0 and 2. The spec would seem to disallow this too, but there's no 033harm. :D 034*/ 035public class ER7 { 036 037 private ER7() {} 038 039 /** characters that delimit segments. for use with StringTokenizer. 040 We are forgiving: HL7 2.3.1 section 2.7 says that carriage return ('\r') is 041 the only segment delimiter. TODO: check other versions. */ 042 static final String segmentSeparators = "\r\n\f"; 043 044 /** Parses message and dumps contents to props, with keys in the 045 ZYX[a]-b[c]-d-e style. 046 */ 047 public static boolean parseMessage(/*out*/ Properties props, 048 /*in*/ List<DatumPath> msgMask, /*in*/ String message) 049 { 050 boolean ok = false; 051 if(message != null) { 052 if(props == null) 053 props = new Properties(); 054 055 StringTokenizer messageTokenizer 056 = new StringTokenizer(message, segmentSeparators); 057 if(messageTokenizer.hasMoreTokens()) { 058 String firstSegment = messageTokenizer.nextToken(); 059 EncodingCharacters encodingChars = new EncodingCharacters('0', "0000"); 060 if(parseMSHSegmentWhole(props, msgMask, encodingChars, firstSegment)) { 061 ok = true; 062 SortedMap<String, Integer> segmentId2nextRepIdx = new TreeMap<String, Integer>(); 063 segmentId2nextRepIdx.put(new String("MSH"), 1); 064 // in case we find another MSH segment, heh. 065 while(messageTokenizer.hasMoreTokens()) { 066 parseSegmentWhole(props, segmentId2nextRepIdx, 067 msgMask, encodingChars, messageTokenizer.nextToken()); 068 } 069 } 070 } 071 } 072 return ok; 073 } 074 075 /** given segment, starting with "MSH", then encoding characters, etc... 076 put MSH[0]-1[0]-1-1 (== MSH-1) and MSH[0]-2[0]-1-1 (== MSH-2) into props, if found, 077 plus everything else found in 'segment' */ 078 protected static boolean parseMSHSegmentWhole(/*out*/ Properties props, 079 /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 080 /*in*/ String segment) 081 { 082 boolean ret = false; 083 try { 084 ER7SegmentHandler handler = new ER7SegmentHandler(); 085 handler.m_props = props; 086 handler.m_encodingChars = encodingChars; 087 handler.m_segmentId = "MSH"; 088 handler.m_segmentRepIdx = 0; 089 if(msgMask != null) 090 handler.m_msgMask = msgMask; 091 else { 092 handler.m_msgMask = new ArrayList<DatumPath>(); 093 handler.m_msgMask.add(new DatumPath()); // everything will pass this 094 // (every DatumPath startsWith the zero-length DatumPath) 095 } 096 097 encodingChars.setFieldSeparator(segment.charAt(3)); 098 List<Integer> nodeKey = new ArrayList<Integer>(); 099 nodeKey.add(new Integer(0)); 100 handler.putDatum(nodeKey, String.valueOf(encodingChars.getFieldSeparator())); 101 encodingChars.setComponentSeparator(segment.charAt(4)); 102 encodingChars.setRepetitionSeparator(segment.charAt(5)); 103 encodingChars.setEscapeCharacter(segment.charAt(6)); 104 encodingChars.setSubcomponentSeparator(segment.charAt(7)); 105 nodeKey.set(0, new Integer(1)); 106 handler.putDatum(nodeKey, encodingChars.toString()); 107 108 if(segment.charAt(8) == encodingChars.getFieldSeparator()) { 109 ret = true; 110 // now -- we recurse 111 // through fields / field-repetitions / components / subcomponents. 112 nodeKey.clear(); 113 nodeKey.add(new Integer(2)); 114 parseSegmentGuts(handler, segment.substring(9), nodeKey); 115 } 116 } 117 catch(IndexOutOfBoundsException e) {} 118 catch(NullPointerException e) {} 119 120 return ret; 121 } 122 123 /** pass in a whole segment (of type other than MSH), including message type 124 at the start, according to encodingChars, and we'll parse the contents and 125 put them in props. */ 126 protected static void parseSegmentWhole(/*out*/ Properties props, 127 /*in/out*/ Map<String, Integer> segmentId2nextRepIdx, 128 /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 129 /*in*/ String segment) 130 { 131 try { 132 String segmentId = segment.substring(0, 3); 133 134 int currentSegmentRepIdx = 0; 135 if(segmentId2nextRepIdx.containsKey(segmentId)) 136 currentSegmentRepIdx = ((Integer)segmentId2nextRepIdx.get(segmentId)).intValue(); 137 else 138 currentSegmentRepIdx = 0; 139 segmentId2nextRepIdx.put(segmentId, new Integer(currentSegmentRepIdx+1)); 140 141 // will only bother to parse this segment if any of it's contents will 142 // be dumped to props. 143 boolean parseThisSegment = false; 144 DatumPath segmentIdAsDatumPath = new DatumPath().add(segmentId); 145 for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 146 parseThisSegment = segmentIdAsDatumPath.startsWith(maskIt.next()); 147 for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 148 parseThisSegment = maskIt.next().startsWith(segmentIdAsDatumPath); 149 150 if(parseThisSegment && (segment.charAt(3) == encodingChars.getFieldSeparator())) { 151 ER7SegmentHandler handler = new ER7SegmentHandler(); 152 handler.m_props = props; 153 handler.m_encodingChars = encodingChars; 154 handler.m_segmentId = segmentId; 155 handler.m_msgMask = msgMask; 156 handler.m_segmentRepIdx = currentSegmentRepIdx; 157 158 List<Integer> nodeKey = new ArrayList<Integer>(); 159 nodeKey.add(new Integer(0)); 160 parseSegmentGuts(handler, segment.substring(4), nodeKey); 161 } 162 } 163 catch(NullPointerException e) {} 164 catch(IndexOutOfBoundsException e) {} 165 } 166 167 static protected interface Handler 168 { 169 public int specDepth(); 170 public char delim(int level); 171 172 public void putDatum(List<Integer> nodeKey, String value); 173 } 174 175 static protected class ER7SegmentHandler implements Handler 176 { 177 Properties m_props; 178 179 EncodingCharacters m_encodingChars; 180 181 String m_segmentId; 182 int m_segmentRepIdx; 183 184 List<DatumPath> m_msgMask; 185 186 public int specDepth() {return 4;} 187 188 public char delim(int level) 189 { 190 if(level == 0) 191 return m_encodingChars.getFieldSeparator(); 192 else if(level == 1) 193 return m_encodingChars.getRepetitionSeparator(); 194 else if(level == 2) 195 return m_encodingChars.getComponentSeparator(); 196 else if(level == 3) 197 return m_encodingChars.getSubcomponentSeparator(); 198 else if(level == 4) 199 return m_encodingChars.getTruncationCharacter(); 200 else 201 throw new java.lang.Error(); 202 } 203 204 public void putDatum(List<Integer> valNodeKey, String value) 205 { 206 // make a DatumPath from valNodeKey and info in this: 207 DatumPath valDatumPath = new DatumPath(); 208 valDatumPath.add(m_segmentId).add(m_segmentRepIdx); 209 for(int i=0; i<valNodeKey.size(); ++i) { 210 // valNodeKey: everything counts from 0 -- not so with DatumPath ... sigh. 211 int itval = ((Integer)valNodeKey.get(i)).intValue(); 212 valDatumPath.add(new Integer(i == 1 ? itval : itval+1)); 213 } 214 215 // see if valDatumPath passes m_msgMask: 216 boolean valDatumPathPassesMask = false; 217 for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); 218 !valDatumPathPassesMask && maskIt.hasNext(); ) 219 { 220 valDatumPathPassesMask = valDatumPath.startsWith(maskIt.next()); 221 } 222 223 if(valDatumPathPassesMask) 224 m_props.setProperty(valDatumPath.toString(), value); 225 } 226 } 227 228 /** recursively tokenize "guts" (a segment, or part of one) into tokens, 229 according to separators (aka delimiters) which are different at each level 230 of recursion, and to a recursive depth which is discovered through "handler" 231 via handler.delim(int) and handler.specDepth() As tokens are found, they 232 are reported to handler via handler.putDatum(), which presumably stashes them 233 away somewhere. We tell the handler about the location in the message via 234 putDatum()'s key argument, which is a List of Integers representing the 235 position in the parse tree (size() == depth of recursion). 236 237 TODO: say more. 238 */ 239 protected static void parseSegmentGuts(/*in/out*/ Handler handler, 240 /*in*/ String guts, /*in*/List<Integer> nodeKey) 241 { 242 char thisDepthsDelim = handler.delim(nodeKey.size()-1); 243 //nodeKey.add(new Integer(0)); // will change nodeKey back before function exits 244 245 StringTokenizer gutsTokenizer 246 = new StringTokenizer(guts, String.valueOf(thisDepthsDelim), true); 247 while(gutsTokenizer.hasMoreTokens()) { 248 String gutsToken = gutsTokenizer.nextToken(); 249 250 if(gutsToken.charAt(0) == thisDepthsDelim) { 251 // gutsToken is all delims -- skipping over as many fields or 252 // components or whatevers as there are characters in the token: 253 int oldvalue = ((Integer)nodeKey.get(nodeKey.size()-1)).intValue(); 254 nodeKey.set(nodeKey.size()-1, new Integer(oldvalue + gutsToken.length())); 255 } 256 else { 257 if(nodeKey.size() < handler.specDepth()) { 258 nodeKey.add(new Integer(0)); 259 parseSegmentGuts(handler, gutsToken, nodeKey); 260 nodeKey.remove(nodeKey.size()-1); 261 } 262 else 263 handler.putDatum(nodeKey, gutsToken); 264 } 265 } 266 //nodeKey.setSize(nodeKey.size()-1); // undoing add done at top of this func 267 } 268 269 public static void main(String args[]) 270 { 271 if(args.length >= 1) { 272 //String message = "MSH|^~\\&||||foo|foo|foo"; 273 System.out.println(args[0]); 274 275 Properties props = new Properties(); 276 277 List<DatumPath> msgMask = new ArrayList<DatumPath>(); 278 msgMask.add(new DatumPath()); 279 280 System.err.println("ER7.parseMessage returned " + parseMessage(props, msgMask, args[0])); 281 props.list(System.out); 282 } 283 } 284 285} 286