View Javadoc
1   /**
2    * The contents of this file are subject to the Mozilla Public License Version 1.1
3    * (the "License"); you may not use this file except in compliance with the License.
4    * You may obtain a copy of the License at http://www.mozilla.org/MPL/
5    * Software distributed under the License is distributed on an "AS IS" basis,
6    * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7    * specific language governing rights and limitations under the License.
8    *
9    * The Original Code is "MessageQuery.java".  Description:
10   * "Queries messages in an SQL-like style.  "
11   *
12   * The Initial Developer of the Original Code is University Health Network. Copyright (C)
13   * 2005.  All Rights Reserved.
14   *
15   * Contributor(s): ______________________________________.
16   *
17   * Alternatively, the contents of this file may be used under the terms of the
18   * GNU General Public License (the "GPL"), in which case the provisions of the GPL are
19   * applicable instead of those above.  If you wish to allow use of your version of this
20   * file only under the terms of the GPL and not to allow others to use your version
21   * of this file under the MPL, indicate your decision by deleting  the provisions above
22   * and replace  them with the notice and other provisions required by the GPL License.
23   * If you do not delete the provisions above, a recipient may use your version of
24   * this file under either the MPL or the GPL.
25   *
26   */
27  package ca.uhn.hl7v2.util;
28  
29  import java.util.ArrayList;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Properties;
34  import java.util.StringTokenizer;
35  import java.util.regex.Matcher;
36  import java.util.regex.Pattern;
37  
38  import ca.uhn.hl7v2.HL7Exception;
39  import ca.uhn.hl7v2.model.Message;
40  
41  /**
42   * Queries messages in an SQL-like style. We get repeated row-like structures by
43   * looping over repetitions of groups, segments, or fields.
44   * 
45   * This is a very advanced class ... maybe too advanced even for you. If you
46   * find it confusing, please note that there are simpler ways to get data from a
47   * message (like calling its getters or using Terser).
48   * 
49   * LOOPING: You specify the loop points as part of the query. For example you
50   * could specify loop point x like this: <code>x = /.MSH-18(*)</code>. The * is
51   * replaced by numbers 0, 1, 2, etc. as you loop through the results, so this
52   * example would loop through repetitions of MSH-18. If there are multiple loop
53   * points, the loops are nested so that each possible combination is returned.
54   * Looping stops when none of the fields under a loop point are valued. The name
55   * of the loop point ('x' in the example above) is arbitrary.
56   * 
57   * SELECTING FIELDS: The syntax is similar to SQL, except that Terser paths are
58   * used in place of table.field. You can use the "as" keyword to give a field a
59   * name, like this: <code>select /.MSH-7 as msg_date</code>. If your field is
60   * under a loop point, replace the path up to the loop point with a loop point
61   * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code>
62   * 
63   * SELECTING ROWS: A "row" is a combination of all selected fields at one
64   * iteration. You can filter which rows are returned using a where clause
65   * similar to that in SQL. Use exact values or regular expressions, for example:
66   * <code>where {1} like '.*blood.*'</code> or
67   * <code>where {1}/PID-3-1 = '111'</code> Multiple filters can be separated with
68   * commas (which mean 'and'). Future versions may support 'or', negation,
69   * brackets, etc., but this version doesn't.
70   * 
71   * FULL EXAMPLE: select {pat-id}-1 as id loop pat-id = ./PID-3(*) where
72   * {pat-id}-2 = 'mrn'
73   * 
74   * SUBTLETIES OF LOOPING: A loop point can be under another loop point. For
75   * example consider the message:
76   * 
77   * MSH|etc.|etc. Z01|one~two|a Z01|three~four|b
78   * 
79   * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: a
80   * one a two b three b four
81   * 
82   * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would
83   * return: a one a two b one b two
84   * 
85   * In the first case, one loop point refers to another. In the second case the
86   * loops are treated as independent, just as if they referred to different
87   * branches of the message.
88   * 
89   * TODO: could support distinct easily by keeping record of rows and comparing
90   * each one to previous rows
91   * 
92   * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
93   * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author:
94   *          jamesagnew $
95   * @deprecated
96   */
97  public class MessageQuery {
98  
99  	/**
100 	 * @param theMessage
101 	 *            an HL7 message from which data are to be queried
102 	 * @param theQuery
103 	 *            the query (see class docs for syntax)
104 	 * @return data from the message that are selected by the query
105 	 */
106 	public static Result query(Message theMessage, String theQuery) {
107 		Properties clauses = getClauses(theQuery);
108 
109 		// parse select clause
110 		StringTokenizer select = new StringTokenizer(
111 				clauses.getProperty("select"), ", ", false);
112 		List<String> fieldPaths = new ArrayList<>(10);
113 		Map<String, Integer> names = new HashMap<>(10);
114 		while (select.hasMoreTokens()) {
115 			String token = select.nextToken();
116 			if (token.equals("as")) {
117 				if (!select.hasMoreTokens()) {
118 					throw new IllegalArgumentException(
119 							"Keyword 'as' must be followed by a field label");
120 				}
121 				names.put(select.nextToken(), fieldPaths.size() - 1);
122 			} else {
123 				fieldPaths.add(token);
124 			}
125 		}
126 
127 		// parse loop clause
128 		StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop",
129 				""), ",", false);
130 		List<String> loopPoints = new ArrayList<>(10);
131 		Map<String, Integer> loopPointNames = new HashMap<>(10);
132 		while (loop.hasMoreTokens()) {
133 			String pointDecl = loop.nextToken();
134 			StringTokenizer tok = new StringTokenizer(pointDecl, "=", false);
135 			String name = tok.nextToken().trim();
136 			String path = tok.nextToken().trim();
137 			loopPoints.add(path);
138 			loopPointNames.put(name, loopPoints.size() - 1);
139 		}
140 
141 		// parse where clause
142 		// TODO: this will do for now but it should really be evaluated like an
143 		// expression
144 		// rather than a list
145 		StringTokenizer where = new StringTokenizer(clauses.getProperty(
146 				"where", ""), ",", false);
147 		List<String> filters = new ArrayList<>();
148 		while (where.hasMoreTokens()) {
149 			filters.add(where.nextToken());
150 		}
151 		String[] filterPaths = new String[filters.size()];
152 		String[] filterPatterns = new String[filters.size()];
153 		boolean[] exactFlags = new boolean[filters.size()];
154 
155 		for (int i = 0; i < filters.size(); i++) {
156 			exactFlags[i] = true;
157 			String filter = filters.get(i);
158 			String[] parts = splitFromEnd(filter, "=");
159 			if (parts[1] != null) {
160 				parts[1] = parts[1].substring(1);
161 			} else {
162 				exactFlags[i] = false;
163 				parts = splitFromEnd(filter, "like");
164 				parts[1] = parts[1].substring(4);
165 			}
166 			filterPaths[i] = parts[0].trim();
167 			parts[1] = parts[1].trim();
168 			filterPatterns[i] = parts[1].substring(1, parts[1].length() - 1);
169 		}
170 
171 		return new ResultImpl(theMessage,
172 				loopPoints.toArray(new String[0]), loopPointNames,
173 				fieldPaths.toArray(new String[0]), names,
174 				filterPaths, filterPatterns, exactFlags);
175 	}
176 
177 	private static Properties getClauses(String theQuery) {
178 		Properties clauses = new Properties();
179 
180 		String[] split = splitFromEnd(theQuery, "where ");
181 		setClause(clauses, "where", split[1]);
182 
183 		split = splitFromEnd(split[0], "loop ");
184 		setClause(clauses, "loop", split[1]);
185 		setClause(clauses, "select", split[0]);
186 
187 		if (clauses.getProperty("where", "").contains("loop ")) {
188 			throw new IllegalArgumentException(
189 					"The loop clause must precede the where clause");
190 		}
191 		if (clauses.getProperty("select") == null) {
192 			throw new IllegalArgumentException(
193 					"The query must begin with a select clause");
194 		}
195 		return clauses;
196 	}
197 
198 	private static void setClause(Properties theClauses, String theName,
199 			String theClause) {
200 		if (theClause != null) {
201 			theClauses.setProperty(theName,
202 					theClause.substring(theName.length()).trim());
203 		}
204 	}
205 
206 	private static String[] splitFromEnd(String theString, String theMarker) {
207 		String[] result = new String[2];
208 		int begin = theString.indexOf(theMarker);
209 		if (begin >= 0) {
210 			result[0] = theString.substring(0, begin);
211 			result[1] = theString.substring(begin);
212 		} else {
213 			result[0] = theString;
214 		}
215 		return result;
216 	}
217 
218 	/**
219 	 * A result set for a message query.
220 	 * 
221 	 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
222 	 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by
223 	 *          $Author: jamesagnew $
224 	 */
225 	public interface Result {
226 
227 		/**
228 		 * @param theFieldNumber
229 		 *            numbered from zero in the order they are specified in the
230 		 *            query
231 		 * @return the corresponding value in the current row
232 		 */
233         String get(int theFieldNumber);
234 
235 		/**
236 		 * @param theFieldName
237 		 *            a field name as specified in the query with the keyword
238 		 *            "as"
239 		 * @return the corresponding value in the current row
240 		 */
241         String get(String theFieldName);
242 
243 		/**
244 		 * @return a list of named fields as defined with 'as' in the query
245 		 */
246         String[] getNamedFields();
247 
248 		/**
249 		 * Advances to the next "row" of data if one is available.
250 		 * 
251 		 * @return true if another row is available
252 		 * @throws HL7Exception
253 		 */
254         boolean next() throws HL7Exception;
255 
256 	}
257 
258 	private static class ResultImpl implements Result {
259 
260 		private final Terser myTerser;
261 		private String[] myValues;
262 		private final String[] myLoopPoints;
263 		private final Map<String, Integer> myLoopPointNames;
264 		private final String[] myFieldPaths;
265 		private final Map<String, Integer> myFieldNames;
266 		private final int[] myIndices;
267 		private final int[] myNumEmpty; // number of empty sub-loops since last
268 									// non-empty one
269 		private final int[] myMaxNumEmpty;
270 		private boolean myNonLoopingQuery = false;
271 		private final String[] myWherePaths;
272 		private String[] myWhereValues;
273 		private final String[] myWherePatterns;
274 		private final boolean[] myExactMatchFlags;
275 
276 		public ResultImpl(Message theMessage, String[] theLoopPoints,
277 				Map<String, Integer> theLoopPointNames, String[] theFieldPaths,
278 				Map<String, Integer> theFieldNames, String[] theWherePaths,
279 				String[] theWherePatterns, boolean[] theExactMatchFlags) {
280 
281 			myTerser = new Terser(theMessage);
282 			myLoopPoints = theLoopPoints;
283 			myIndices = new int[theLoopPoints.length];
284 			myNumEmpty = new int[theLoopPoints.length];
285 			myMaxNumEmpty = getMaxNumEmpty(theLoopPoints);
286 			myLoopPointNames = theLoopPointNames;
287 			myFieldPaths = theFieldPaths;
288 			myValues = new String[theFieldPaths.length];
289 			myFieldNames = theFieldNames;
290 			myWherePaths = theWherePaths;
291 			myWherePatterns = theWherePatterns;
292 			myExactMatchFlags = theExactMatchFlags;
293 
294 			if (theLoopPoints.length == 0) {
295 				myNonLoopingQuery = true; // if no loops, give ourselves 1
296 											// iteration
297 			} else {
298 				myIndices[myIndices.length - 1] = -1; // start before 1st
299 														// iteration
300 			}
301 
302 		}
303 
304 		// extracts max number of empty iterations for each loop point (this is
305 		// communicated
306 		// as an optional integer after the *, e.g. blah(*3) ... default is 0).
307 		private int[] getMaxNumEmpty(String[] theLoopPoints) {
308 			int[] retVal = new int[theLoopPoints.length];
309 			for (int i = 0; i < theLoopPoints.length; i++) {
310 				retVal[i] = getMaxNumEmpty(theLoopPoints[i]);
311 			}
312 			return retVal;
313 		}
314 
315 		private int getMaxNumEmpty(String theLoopPoint) {
316 			int retVal = 0; // default
317 
318 			Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint);
319 			if (m.find()) {
320 				String num = m.group(1);
321 				retVal = Integer.parseInt(num);
322 			}
323 
324 			return retVal;
325 		}
326 
327 		// returns true if some field under the given loop point has a value at
328 		// the present
329 		// iteration
330 		private boolean currentRowValued(int theLoopPoint) {
331 			for (int i = 0; i < myFieldPaths.length; i++) {
332 				if (referencesLoop(myFieldPaths[i], theLoopPoint)) {
333 					String value = myValues[i];
334 					if (value != null && value.length() > 0) {
335 						return true;
336 					}
337 				}
338 			}
339 			return false;
340 		}
341 
342 		// returns true if the current row matches the where clause filters
343 		private boolean currentRowMatchesFilter() {
344 			for (int i = 0; i < myWhereValues.length; i++) {
345 				if (myExactMatchFlags[i]) {
346 					if (!myWherePatterns[i].equals(myWhereValues[i])) {
347 						return false;
348 					}
349 				} else {
350 					if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) {
351 						return false;
352 					}
353 				}
354 			}
355 			return true;
356 		}
357 
358 		// true if the given path references the given loop point (directly
359 		// or indirectly)
360 		private boolean referencesLoop(String theFieldPath, int theLoopPoint) {
361 			String path = theFieldPath;
362 			int lp;
363 			while ((lp = getLoopPointReference(path)) >= 0) {
364 				if (lp == theLoopPoint) {
365 					return true;
366 				} else {
367 					path = myLoopPoints[lp];
368 				}
369 			}
370 			return false;
371 		}
372 
373 		// expands a set of paths to their current loop point iterations, and
374 		// gets
375 		// current values from our message
376 		private String[] getCurrentValues(String[] thePaths)
377 				throws HL7Exception {
378 			String[] paths = composePaths(thePaths);
379 			String[] values = new String[paths.length];
380 			for (int i = 0; i < paths.length; i++) {
381 				values[i] = myTerser.get(paths[i]);
382 				if (values[i] == null) {
383 					values[i] = "";
384 				}
385 			}
386 			return values;
387 		}
388 
389 		// creates full Terser paths from current location, loop points, and
390 		// given paths
391 		// with loop point references
392 		private String[] composePaths(String[] thePaths) {
393 			String[] currentLoopPoints = composeLoopPoints();
394 			String[] result = new String[thePaths.length];
395 			for (int i = 0; i < thePaths.length; i++) {
396 				result[i] = thePaths[i];
397 				int ref = getLoopPointReference(thePaths[i]);
398 				if (ref >= 0) {
399 					result[i] = expandLoopPointReference(result[i],
400 							currentLoopPoints[ref]);
401 				}
402 			}
403 			return result;
404 		}
405 
406 		// parameterizes loop points with present location (i.e. replaces * with
407 		// current
408 		// indices)
409 		private String[] composeLoopPoints() {
410 			String[] result = new String[myLoopPoints.length];
411 			for (int i = 0; i < myLoopPoints.length; i++) {
412 				result[i] = myLoopPoints[i].replaceAll("\\*\\d*",
413 						String.valueOf(myIndices[i]));
414 
415 				int ref = getLoopPointReference(myLoopPoints[i]);
416 				if (ref >= i) {
417 					throw new IllegalStateException(
418 							"Loop point must be defined after the "
419 									+ "one it references: " + myLoopPoints[i]);
420 				} else if (ref >= 0) {
421 					result[i] = expandLoopPointReference(result[i], result[ref]);
422 				}
423 			}
424 			return result;
425 		}
426 
427 		// extracts LP# of label between first '{' and first '}', or -1 if there
428 		// isn't one
429 		private int getLoopPointReference(String thePath) {
430 			StringTokenizer tok = new StringTokenizer(thePath, "{}", false);
431 			if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) {
432 				String ref = tok.nextToken();
433 				return myLoopPointNames.get(ref);
434 			} else {
435 				return -1;
436 			}
437 		}
438 
439 		private String expandLoopPointReference(String thePath,
440 				String theLoopPoint) {
441 			return thePath.replaceAll("\\{.*}", theLoopPoint);
442 		}
443 
444 		/**
445 		 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int)
446 		 */
447 		public String get(int theFieldNumber) {
448 			if (theFieldNumber < 0 || theFieldNumber >= myValues.length) {
449 				throw new IllegalArgumentException(
450 						"Field number must be between 0 and "
451 								+ (myValues.length - 1));
452 			}
453 			return myValues[theFieldNumber];
454 		}
455 
456 		/**
457 		 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String)
458 		 */
459 		public String get(String theFieldName) {
460 			Integer fieldNum = myFieldNames.get(theFieldName);
461 			if (fieldNum == null) {
462 				throw new IllegalArgumentException(
463 						"Field name not recognized: " + theFieldName);
464 			}
465 			return get(fieldNum);
466 		}
467 
468 		/**
469 		 * @throws HL7Exception
470 		 * @see ca.uhn.hl7v2.util.MessageQuery.Result#next()
471 		 */
472 		public boolean next() throws HL7Exception {
473 			if (myNonLoopingQuery) {
474 				myNonLoopingQuery = false;
475 				myValues = getCurrentValues(myFieldPaths);
476 				myWhereValues = getCurrentValues(myWherePaths);
477 				return currentRowMatchesFilter();
478 			}
479 
480 			boolean hasNext = false;
481 			for (int i = myIndices.length - 1; i >= 0; i--) {
482 				boolean gotMatch = false;
483 				while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) {
484 					myIndices[i]++;
485 					myValues = getCurrentValues(myFieldPaths);
486 					myWhereValues = getCurrentValues(myWherePaths);
487 
488 					if (!currentRowValued(i)) {
489 						myNumEmpty[i]++;
490 					} else {
491 						myNumEmpty[i] = 0;
492 					}
493 					if (currentRowMatchesFilter()) {
494 						gotMatch = true;
495 					}
496 				}
497 
498 				hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// &&
499 				// currentRowMatchesFilter();
500 				if (hasNext) {
501 					break;
502 				}
503 
504 				myIndices[i] = 0;
505 				myNumEmpty[i] = 0;
506 
507 				// TODO: if we aren't allowing empties in this loop, and have no
508 				// value, we want to
509 				// return the null in the super-loop. However, we don't know
510 				// which loop point, if
511 				// any, is the super-loop. If it was the next one we could do
512 				// this ...
513 				// if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0
514 				// && myIndices[i-1] == 0) {
515 				// myIndices[i-1] = -1;
516 				// } ... but it may not be, so we'll ignore this problem for
517 				// now.
518 			}
519 			return hasNext;
520 		}
521 
522 		/**
523 		 * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields()
524 		 */
525 		public String[] getNamedFields() {
526 			return myFieldNames.keySet().toArray(new String[0]);
527 		}
528 
529 	}
530 
531 }