001/** 002 * The contents of this file are subject to the Mozilla Public License Version 1.1 003 * (the "License"); you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005 * Software distributed under the License is distributed on an "AS IS" basis, 006 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007 * specific language governing rights and limitations under the License. 008 * 009 * The Original Code is "MessageQuery.java". Description: 010 * "Queries messages in an SQL-like style. " 011 * 012 * The Initial Developer of the Original Code is University Health Network. Copyright (C) 013 * 2005. All Rights Reserved. 014 * 015 * Contributor(s): ______________________________________. 016 * 017 * Alternatively, the contents of this file may be used under the terms of the 018 * GNU General Public License (the "GPL"), in which case the provisions of the GPL are 019 * applicable instead of those above. If you wish to allow use of your version of this 020 * file only under the terms of the GPL and not to allow others to use your version 021 * of this file under the MPL, indicate your decision by deleting the provisions above 022 * and replace them with the notice and other provisions required by the GPL License. 023 * If you do not delete the provisions above, a recipient may use your version of 024 * this file under either the MPL or the GPL. 025 * 026 */ 027package ca.uhn.hl7v2.util; 028 029import java.util.ArrayList; 030import java.util.HashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.Properties; 034import java.util.StringTokenizer; 035import java.util.regex.Matcher; 036import java.util.regex.Pattern; 037 038import ca.uhn.hl7v2.HL7Exception; 039import ca.uhn.hl7v2.model.Message; 040 041/** 042 * Queries messages in an SQL-like style. We get repeated row-like structures by 043 * looping over repetitions of groups, segments, or fields. 044 * 045 * This is a very advanced class ... maybe too advanced even for you. If you 046 * find it confusing, please note that there are simpler ways to get data from a 047 * message (like calling its getters or using Terser). 048 * 049 * LOOPING: You specify the loop points as part of the query. For example you 050 * could specify loop point x like this: <code>x = /.MSH-18(*)</code>. The * is 051 * replaced by numbers 0, 1, 2, etc. as you loop through the results, so this 052 * example would loop through repetitions of MSH-18. If there are multiple loop 053 * points, the loops are nested so that each possible combination is returned. 054 * Looping stops when none of the fields under a loop point are valued. The name 055 * of the loop point ('x' in the example above) is arbitrary. 056 * 057 * SELECTING FIELDS: The syntax is similar to SQL, except that Terser paths are 058 * used in place of table.field. You can use the "as" keyword to give a field a 059 * name, like this: <code>select /.MSH-7 as msg_date</code>. If your field is 060 * under a loop point, replace the path up to the loop point with a loop point 061 * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code> 062 * 063 * SELECTING ROWS: A "row" is a combination of all selected fields at one 064 * iteration. You can filter which rows are returned using a where clause 065 * similar to that in SQL. Use exact values or regular expressions, for example: 066 * <code>where {1} like '.*blood.*'</code> or 067 * <code>where {1}/PID-3-1 = '111'</code> Multiple filters can be separated with 068 * commas (which mean 'and'). Future versions may support 'or', negation, 069 * brackets, etc., but this version doesn't. 070 * 071 * FULL EXAMPLE: select {pat-id}-1 as id loop pat-id = ./PID-3(*) where 072 * {pat-id}-2 = 'mrn' 073 * 074 * SUBTLETIES OF LOOPING: A loop point can be under another loop point. For 075 * example consider the message: 076 * 077 * MSH|etc.|etc. Z01|one~two|a Z01|three~four|b 078 * 079 * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: a 080 * one a two b three b four 081 * 082 * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would 083 * return: a one a two b one b two 084 * 085 * In the first case, one loop point refers to another. In the second case the 086 * loops are treated as independent, just as if they referred to different 087 * branches of the message. 088 * 089 * TODO: could support distinct easily by keeping record of rows and comparing 090 * each one to previous rows 091 * 092 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a> 093 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: 094 * jamesagnew $ 095 * @deprecated 096 */ 097public class MessageQuery { 098 099 /** 100 * @param theMessage 101 * an HL7 message from which data are to be queried 102 * @param theQuery 103 * the query (see class docs for syntax) 104 * @return data from the message that are selected by the query 105 */ 106 public static Result query(Message theMessage, String theQuery) { 107 Properties clauses = getClauses(theQuery); 108 109 // parse select clause 110 StringTokenizer select = new StringTokenizer( 111 clauses.getProperty("select"), ", ", false); 112 List<String> fieldPaths = new ArrayList<String>(10); 113 Map<String, Integer> names = new HashMap<String, Integer>(10); 114 while (select.hasMoreTokens()) { 115 String token = select.nextToken(); 116 if (token.equals("as")) { 117 if (!select.hasMoreTokens()) { 118 throw new IllegalArgumentException( 119 "Keyword 'as' must be followed by a field label"); 120 } 121 names.put(select.nextToken(), fieldPaths.size() - 1); 122 } else { 123 fieldPaths.add(token); 124 } 125 } 126 127 // parse loop clause 128 StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop", 129 ""), ",", false); 130 List<String> loopPoints = new ArrayList<String>(10); 131 Map<String, Integer> loopPointNames = new HashMap<String, Integer>(10); 132 while (loop.hasMoreTokens()) { 133 String pointDecl = loop.nextToken(); 134 StringTokenizer tok = new StringTokenizer(pointDecl, "=", false); 135 String name = tok.nextToken().trim(); 136 String path = tok.nextToken().trim(); 137 loopPoints.add(path); 138 loopPointNames.put(name, loopPoints.size() - 1); 139 } 140 141 // parse where clause 142 // TODO: this will do for now but it should really be evaluated like an 143 // expression 144 // rather than a list 145 StringTokenizer where = new StringTokenizer(clauses.getProperty( 146 "where", ""), ",", false); 147 List<String> filters = new ArrayList<String>(); 148 while (where.hasMoreTokens()) { 149 filters.add(where.nextToken()); 150 } 151 String[] filterPaths = new String[filters.size()]; 152 String[] filterPatterns = new String[filters.size()]; 153 boolean[] exactFlags = new boolean[filters.size()]; 154 155 for (int i = 0; i < filters.size(); i++) { 156 exactFlags[i] = true; 157 String filter = filters.get(i); 158 String[] parts = splitFromEnd(filter, "="); 159 if (parts[1] != null) { 160 parts[1] = parts[1].substring(1); 161 } else { 162 exactFlags[i] = false; 163 parts = splitFromEnd(filter, "like"); 164 parts[1] = parts[1].substring(4); 165 } 166 filterPaths[i] = parts[0].trim(); 167 parts[1] = parts[1].trim(); 168 filterPatterns[i] = parts[1].substring(1, parts[1].length() - 1); 169 } 170 171 return new ResultImpl(theMessage, 172 loopPoints.toArray(new String[0]), loopPointNames, 173 fieldPaths.toArray(new String[0]), names, 174 filterPaths, filterPatterns, exactFlags); 175 } 176 177 private static Properties getClauses(String theQuery) { 178 Properties clauses = new Properties(); 179 180 String[] split = splitFromEnd(theQuery, "where "); 181 setClause(clauses, "where", split[1]); 182 183 split = splitFromEnd(split[0], "loop "); 184 setClause(clauses, "loop", split[1]); 185 setClause(clauses, "select", split[0]); 186 187 if (clauses.getProperty("where", "").indexOf("loop ") >= 0) { 188 throw new IllegalArgumentException( 189 "The loop clause must precede the where clause"); 190 } 191 if (clauses.getProperty("select") == null) { 192 throw new IllegalArgumentException( 193 "The query must begin with a select clause"); 194 } 195 return clauses; 196 } 197 198 private static void setClause(Properties theClauses, String theName, 199 String theClause) { 200 if (theClause != null) { 201 theClauses.setProperty(theName, 202 theClause.substring(theName.length()).trim()); 203 } 204 } 205 206 private static String[] splitFromEnd(String theString, String theMarker) { 207 String[] result = new String[2]; 208 int begin = theString.indexOf(theMarker); 209 if (begin >= 0) { 210 result[0] = theString.substring(0, begin); 211 result[1] = theString.substring(begin); 212 } else { 213 result[0] = theString; 214 } 215 return result; 216 } 217 218 /** 219 * A result set for a message query. 220 * 221 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a> 222 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by 223 * $Author: jamesagnew $ 224 */ 225 public static interface Result { 226 227 /** 228 * @param theFieldNumber 229 * numbered from zero in the order they are specified in the 230 * query 231 * @return the corresponding value in the current row 232 */ 233 public String get(int theFieldNumber); 234 235 /** 236 * @param theFieldName 237 * a field name as specified in the query with the keyword 238 * "as" 239 * @return the corresponding value in the current row 240 */ 241 public String get(String theFieldName); 242 243 /** 244 * @return a list of named fields as defined with 'as' in the query 245 */ 246 public String[] getNamedFields(); 247 248 /** 249 * Advances to the next "row" of data if one is available. 250 * 251 * @return true if another row is available 252 * @throws HL7Exception 253 */ 254 public boolean next() throws HL7Exception; 255 256 } 257 258 private static class ResultImpl implements Result { 259 260 private Terser myTerser; 261 private String[] myValues; 262 private String[] myLoopPoints; 263 private Map<String, Integer> myLoopPointNames; 264 private String[] myFieldPaths; 265 private Map<String, Integer> myFieldNames; 266 private int[] myIndices; 267 private int[] myNumEmpty; // number of empty sub-loops since last 268 // non-empty one 269 private int[] myMaxNumEmpty; 270 private boolean myNonLoopingQuery = false; 271 private String[] myWherePaths; 272 private String[] myWhereValues; 273 private String[] myWherePatterns; 274 private boolean[] myExactMatchFlags; 275 276 public ResultImpl(Message theMessage, String[] theLoopPoints, 277 Map<String, Integer> theLoopPointNames, String[] theFieldPaths, 278 Map<String, Integer> theFieldNames, String[] theWherePaths, 279 String[] theWherePatterns, boolean[] theExactMatchFlags) { 280 281 myTerser = new Terser(theMessage); 282 myLoopPoints = theLoopPoints; 283 myIndices = new int[theLoopPoints.length]; 284 myNumEmpty = new int[theLoopPoints.length]; 285 myMaxNumEmpty = getMaxNumEmpty(theLoopPoints); 286 myLoopPointNames = theLoopPointNames; 287 myFieldPaths = theFieldPaths; 288 myValues = new String[theFieldPaths.length]; 289 myFieldNames = theFieldNames; 290 myWherePaths = theWherePaths; 291 myWherePatterns = theWherePatterns; 292 myExactMatchFlags = theExactMatchFlags; 293 294 if (theLoopPoints.length == 0) { 295 myNonLoopingQuery = true; // if no loops, give ourselves 1 296 // iteration 297 } else { 298 myIndices[myIndices.length - 1] = -1; // start before 1st 299 // iteration 300 } 301 302 } 303 304 // extracts max number of empty iterations for each loop point (this is 305 // communicated 306 // as an optional integer after the *, e.g. blah(*3) ... default is 0). 307 private int[] getMaxNumEmpty(String[] theLoopPoints) { 308 int[] retVal = new int[theLoopPoints.length]; 309 for (int i = 0; i < theLoopPoints.length; i++) { 310 retVal[i] = getMaxNumEmpty(theLoopPoints[i]); 311 } 312 return retVal; 313 } 314 315 private int getMaxNumEmpty(String theLoopPoint) { 316 int retVal = 0; // default 317 318 Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint); 319 if (m.find()) { 320 String num = m.group(1); 321 retVal = Integer.parseInt(num); 322 } 323 324 return retVal; 325 } 326 327 // returns true if some field under the given loop point has a value at 328 // the present 329 // iteration 330 private boolean currentRowValued(int theLoopPoint) { 331 for (int i = 0; i < myFieldPaths.length; i++) { 332 if (referencesLoop(myFieldPaths[i], theLoopPoint)) { 333 String value = myValues[i]; 334 if (value != null && value.length() > 0) { 335 return true; 336 } 337 } 338 } 339 return false; 340 } 341 342 // returns true if the current row matches the where clause filters 343 private boolean currentRowMatchesFilter() { 344 for (int i = 0; i < myWhereValues.length; i++) { 345 if (myExactMatchFlags[i]) { 346 if (!myWherePatterns[i].equals(myWhereValues[i])) { 347 return false; 348 } 349 } else { 350 if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) { 351 return false; 352 } 353 } 354 } 355 return true; 356 } 357 358 // true if the given path references the given loop point (directly 359 // or indirectly) 360 private boolean referencesLoop(String theFieldPath, int theLoopPoint) { 361 String path = theFieldPath; 362 int lp; 363 while ((lp = getLoopPointReference(path)) >= 0) { 364 if (lp == theLoopPoint) { 365 return true; 366 } else { 367 path = myLoopPoints[lp]; 368 } 369 } 370 return false; 371 } 372 373 // expands a set of paths to their current loop point iterations, and 374 // gets 375 // current values from our message 376 private String[] getCurrentValues(String[] thePaths) 377 throws HL7Exception { 378 String[] paths = composePaths(thePaths); 379 String[] values = new String[paths.length]; 380 for (int i = 0; i < paths.length; i++) { 381 values[i] = myTerser.get(paths[i]); 382 if (values[i] == null) { 383 values[i] = ""; 384 } 385 } 386 return values; 387 } 388 389 // creates full Terser paths from current location, loop points, and 390 // given paths 391 // with loop point references 392 private String[] composePaths(String[] thePaths) { 393 String[] currentLoopPoints = composeLoopPoints(); 394 String[] result = new String[thePaths.length]; 395 for (int i = 0; i < thePaths.length; i++) { 396 result[i] = thePaths[i]; 397 int ref = getLoopPointReference(thePaths[i]); 398 if (ref >= 0) { 399 result[i] = expandLoopPointReference(result[i], 400 currentLoopPoints[ref]); 401 } 402 } 403 return result; 404 } 405 406 // parameterizes loop points with present location (i.e. replaces * with 407 // current 408 // indices) 409 private String[] composeLoopPoints() { 410 String[] result = new String[myLoopPoints.length]; 411 for (int i = 0; i < myLoopPoints.length; i++) { 412 result[i] = myLoopPoints[i].replaceAll("\\*\\d*", 413 String.valueOf(myIndices[i])); 414 415 int ref = getLoopPointReference(myLoopPoints[i]); 416 if (ref >= i) { 417 throw new IllegalStateException( 418 "Loop point must be defined after the " 419 + "one it references: " + myLoopPoints[i]); 420 } else if (ref >= 0) { 421 result[i] = expandLoopPointReference(result[i], result[ref]); 422 } 423 } 424 return result; 425 } 426 427 // extracts LP# of label between first '{' and first '}', or -1 if there 428 // isn't one 429 private int getLoopPointReference(String thePath) { 430 StringTokenizer tok = new StringTokenizer(thePath, "{}", false); 431 if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) { 432 String ref = tok.nextToken(); 433 return myLoopPointNames.get(ref); 434 } else { 435 return -1; 436 } 437 } 438 439 private String expandLoopPointReference(String thePath, 440 String theLoopPoint) { 441 return thePath.replaceAll("\\{.*\\}", theLoopPoint); 442 } 443 444 /** 445 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int) 446 */ 447 public String get(int theFieldNumber) { 448 if (theFieldNumber < 0 || theFieldNumber >= myValues.length) { 449 throw new IllegalArgumentException( 450 "Field number must be between 0 and " 451 + (myValues.length - 1)); 452 } 453 return myValues[theFieldNumber]; 454 } 455 456 /** 457 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String) 458 */ 459 public String get(String theFieldName) { 460 Integer fieldNum = myFieldNames.get(theFieldName); 461 if (fieldNum == null) { 462 throw new IllegalArgumentException( 463 "Field name not recognized: " + theFieldName); 464 } 465 return get(fieldNum); 466 } 467 468 /** 469 * @throws HL7Exception 470 * @see ca.uhn.hl7v2.util.MessageQuery.Result#next() 471 */ 472 public boolean next() throws HL7Exception { 473 if (myNonLoopingQuery) { 474 myNonLoopingQuery = false; 475 myValues = getCurrentValues(myFieldPaths); 476 myWhereValues = getCurrentValues(myWherePaths); 477 return currentRowMatchesFilter(); 478 } 479 480 boolean hasNext = false; 481 findNext: for (int i = myIndices.length - 1; i >= 0; i--) { 482 boolean gotMatch = false; 483 while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) { 484 myIndices[i]++; 485 myValues = getCurrentValues(myFieldPaths); 486 myWhereValues = getCurrentValues(myWherePaths); 487 488 if (!currentRowValued(i)) { 489 myNumEmpty[i]++; 490 } else { 491 myNumEmpty[i] = 0; 492 } 493 if (currentRowMatchesFilter()) { 494 gotMatch = true; 495 } 496 } 497 498 hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// && 499 // currentRowMatchesFilter(); 500 if (hasNext) { 501 break findNext; 502 } 503 504 myIndices[i] = 0; 505 myNumEmpty[i] = 0; 506 507 // TODO: if we aren't allowing empties in this loop, and have no 508 // value, we want to 509 // return the null in the super-loop. However, we don't know 510 // which loop point, if 511 // any, is the super-loop. If it was the next one we could do 512 // this ... 513 // if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0 514 // && myIndices[i-1] == 0) { 515 // myIndices[i-1] = -1; 516 // } ... but it may not be, so we'll ignore this problem for 517 // now. 518 } 519 return hasNext; 520 } 521 522 /** 523 * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields() 524 */ 525 public String[] getNamedFields() { 526 return myFieldNames.keySet().toArray(new String[0]); 527 } 528 529 } 530 531}