Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
MessageQuery |
|
| 3.391304347826087;3.391 | ||||
MessageQuery$Result |
|
| 3.391304347826087;3.391 | ||||
MessageQuery$ResultImpl |
|
| 3.391304347826087;3.391 |
1 | /** | |
2 | * The contents of this file are subject to the Mozilla Public License Version 1.1 | |
3 | * (the "License"); you may not use this file except in compliance with the License. | |
4 | * You may obtain a copy of the License at http://www.mozilla.org/MPL/ | |
5 | * Software distributed under the License is distributed on an "AS IS" basis, | |
6 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the | |
7 | * specific language governing rights and limitations under the License. | |
8 | * | |
9 | * The Original Code is "MessageQuery.java". Description: | |
10 | * "Queries messages in an SQL-like style. " | |
11 | * | |
12 | * The Initial Developer of the Original Code is University Health Network. Copyright (C) | |
13 | * 2005. All Rights Reserved. | |
14 | * | |
15 | * Contributor(s): ______________________________________. | |
16 | * | |
17 | * Alternatively, the contents of this file may be used under the terms of the | |
18 | * GNU General Public License (the "GPL"), in which case the provisions of the GPL are | |
19 | * applicable instead of those above. If you wish to allow use of your version of this | |
20 | * file only under the terms of the GPL and not to allow others to use your version | |
21 | * of this file under the MPL, indicate your decision by deleting the provisions above | |
22 | * and replace them with the notice and other provisions required by the GPL License. | |
23 | * If you do not delete the provisions above, a recipient may use your version of | |
24 | * this file under either the MPL or the GPL. | |
25 | * | |
26 | */ | |
27 | package ca.uhn.hl7v2.util; | |
28 | ||
29 | import java.util.ArrayList; | |
30 | import java.util.HashMap; | |
31 | import java.util.List; | |
32 | import java.util.Map; | |
33 | import java.util.Properties; | |
34 | import java.util.StringTokenizer; | |
35 | import java.util.regex.Matcher; | |
36 | import java.util.regex.Pattern; | |
37 | ||
38 | import ca.uhn.hl7v2.HL7Exception; | |
39 | import ca.uhn.hl7v2.model.Message; | |
40 | ||
41 | /** | |
42 | * Queries messages in an SQL-like style. We get repeated row-like structures by | |
43 | * looping over repetitions of groups, segments, or fields. | |
44 | * | |
45 | * This is a very advanced class ... maybe too advanced even for you. If you | |
46 | * find it confusing, please note that there are simpler ways to get data from a | |
47 | * message (like calling its getters or using Terser). | |
48 | * | |
49 | * LOOPING: You specify the loop points as part of the query. For example you | |
50 | * could specify loop point x like this: <code>x = /.MSH-18(*)</code>. The * is | |
51 | * replaced by numbers 0, 1, 2, etc. as you loop through the results, so this | |
52 | * example would loop through repetitions of MSH-18. If there are multiple loop | |
53 | * points, the loops are nested so that each possible combination is returned. | |
54 | * Looping stops when none of the fields under a loop point are valued. The name | |
55 | * of the loop point ('x' in the example above) is arbitrary. | |
56 | * | |
57 | * SELECTING FIELDS: The syntax is similar to SQL, except that Terser paths are | |
58 | * used in place of table.field. You can use the "as" keyword to give a field a | |
59 | * name, like this: <code>select /.MSH-7 as msg_date</code>. If your field is | |
60 | * under a loop point, replace the path up to the loop point with a loop point | |
61 | * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code> | |
62 | * | |
63 | * SELECTING ROWS: A "row" is a combination of all selected fields at one | |
64 | * iteration. You can filter which rows are returned using a where clause | |
65 | * similar to that in SQL. Use exact values or regular expressions, for example: | |
66 | * <code>where {1} like '.*blood.*'</code> or | |
67 | * <code>where {1}/PID-3-1 = '111'</code> Multiple filters can be separated with | |
68 | * commas (which mean 'and'). Future versions may support 'or', negation, | |
69 | * brackets, etc., but this version doesn't. | |
70 | * | |
71 | * FULL EXAMPLE: select {pat-id}-1 as id loop pat-id = ./PID-3(*) where | |
72 | * {pat-id}-2 = 'mrn' | |
73 | * | |
74 | * SUBTLETIES OF LOOPING: A loop point can be under another loop point. For | |
75 | * example consider the message: | |
76 | * | |
77 | * MSH|etc.|etc. Z01|one~two|a Z01|three~four|b | |
78 | * | |
79 | * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: a | |
80 | * one a two b three b four | |
81 | * | |
82 | * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would | |
83 | * return: a one a two b one b two | |
84 | * | |
85 | * In the first case, one loop point refers to another. In the second case the | |
86 | * loops are treated as independent, just as if they referred to different | |
87 | * branches of the message. | |
88 | * | |
89 | * TODO: could support distinct easily by keeping record of rows and comparing | |
90 | * each one to previous rows | |
91 | * | |
92 | * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a> | |
93 | * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: | |
94 | * jamesagnew $ | |
95 | * @deprecated | |
96 | */ | |
97 | 0 | public class MessageQuery { |
98 | ||
99 | /** | |
100 | * @param theMessage | |
101 | * an HL7 message from which data are to be queried | |
102 | * @param theQuery | |
103 | * the query (see class docs for syntax) | |
104 | * @return data from the message that are selected by the query | |
105 | */ | |
106 | public static Result query(Message theMessage, String theQuery) { | |
107 | 55 | Properties clauses = getClauses(theQuery); |
108 | ||
109 | // parse select clause | |
110 | 55 | StringTokenizer select = new StringTokenizer( |
111 | 55 | clauses.getProperty("select"), ", ", false); |
112 | 55 | List<String> fieldPaths = new ArrayList<String>(10); |
113 | 55 | Map<String, Integer> names = new HashMap<String, Integer>(10); |
114 | 235 | while (select.hasMoreTokens()) { |
115 | 180 | String token = select.nextToken(); |
116 | 180 | if (token.equals("as")) { |
117 | 90 | if (!select.hasMoreTokens()) { |
118 | 0 | throw new IllegalArgumentException( |
119 | "Keyword 'as' must be followed by a field label"); | |
120 | } | |
121 | 90 | names.put(select.nextToken(), fieldPaths.size() - 1); |
122 | } else { | |
123 | 90 | fieldPaths.add(token); |
124 | } | |
125 | 180 | } |
126 | ||
127 | // parse loop clause | |
128 | 55 | StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop", |
129 | ""), ",", false); | |
130 | 55 | List<String> loopPoints = new ArrayList<String>(10); |
131 | 55 | Map<String, Integer> loopPointNames = new HashMap<String, Integer>(10); |
132 | 135 | while (loop.hasMoreTokens()) { |
133 | 80 | String pointDecl = loop.nextToken(); |
134 | 80 | StringTokenizer tok = new StringTokenizer(pointDecl, "=", false); |
135 | 80 | String name = tok.nextToken().trim(); |
136 | 80 | String path = tok.nextToken().trim(); |
137 | 80 | loopPoints.add(path); |
138 | 80 | loopPointNames.put(name, loopPoints.size() - 1); |
139 | 80 | } |
140 | ||
141 | // parse where clause | |
142 | // TODO: this will do for now but it should really be evaluated like an | |
143 | // expression | |
144 | // rather than a list | |
145 | 55 | StringTokenizer where = new StringTokenizer(clauses.getProperty( |
146 | "where", ""), ",", false); | |
147 | 55 | List<String> filters = new ArrayList<String>(); |
148 | 80 | while (where.hasMoreTokens()) { |
149 | 25 | filters.add(where.nextToken()); |
150 | } | |
151 | 55 | String[] filterPaths = new String[filters.size()]; |
152 | 55 | String[] filterPatterns = new String[filters.size()]; |
153 | 55 | boolean[] exactFlags = new boolean[filters.size()]; |
154 | ||
155 | 80 | for (int i = 0; i < filters.size(); i++) { |
156 | 25 | exactFlags[i] = true; |
157 | 25 | String filter = filters.get(i); |
158 | 25 | String[] parts = splitFromEnd(filter, "="); |
159 | 25 | if (parts[1] != null) { |
160 | 15 | parts[1] = parts[1].substring(1); |
161 | } else { | |
162 | 10 | exactFlags[i] = false; |
163 | 10 | parts = splitFromEnd(filter, "like"); |
164 | 10 | parts[1] = parts[1].substring(4); |
165 | } | |
166 | 25 | filterPaths[i] = parts[0].trim(); |
167 | 25 | parts[1] = parts[1].trim(); |
168 | 25 | filterPatterns[i] = parts[1].substring(1, parts[1].length() - 1); |
169 | } | |
170 | ||
171 | 110 | return new ResultImpl(theMessage, |
172 | 55 | loopPoints.toArray(new String[0]), loopPointNames, |
173 | 55 | fieldPaths.toArray(new String[0]), names, |
174 | filterPaths, filterPatterns, exactFlags); | |
175 | } | |
176 | ||
177 | private static Properties getClauses(String theQuery) { | |
178 | 55 | Properties clauses = new Properties(); |
179 | ||
180 | 55 | String[] split = splitFromEnd(theQuery, "where "); |
181 | 55 | setClause(clauses, "where", split[1]); |
182 | ||
183 | 55 | split = splitFromEnd(split[0], "loop "); |
184 | 55 | setClause(clauses, "loop", split[1]); |
185 | 55 | setClause(clauses, "select", split[0]); |
186 | ||
187 | 55 | if (clauses.getProperty("where", "").indexOf("loop ") >= 0) { |
188 | 0 | throw new IllegalArgumentException( |
189 | "The loop clause must precede the where clause"); | |
190 | } | |
191 | 55 | if (clauses.getProperty("select") == null) { |
192 | 0 | throw new IllegalArgumentException( |
193 | "The query must begin with a select clause"); | |
194 | } | |
195 | 55 | return clauses; |
196 | } | |
197 | ||
198 | private static void setClause(Properties theClauses, String theName, | |
199 | String theClause) { | |
200 | 165 | if (theClause != null) { |
201 | 240 | theClauses.setProperty(theName, |
202 | 120 | theClause.substring(theName.length()).trim()); |
203 | } | |
204 | 165 | } |
205 | ||
206 | private static String[] splitFromEnd(String theString, String theMarker) { | |
207 | 145 | String[] result = new String[2]; |
208 | 145 | int begin = theString.indexOf(theMarker); |
209 | 145 | if (begin >= 0) { |
210 | 90 | result[0] = theString.substring(0, begin); |
211 | 90 | result[1] = theString.substring(begin); |
212 | } else { | |
213 | 55 | result[0] = theString; |
214 | } | |
215 | 145 | return result; |
216 | } | |
217 | ||
218 | /** | |
219 | * A result set for a message query. | |
220 | * | |
221 | * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a> | |
222 | * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by | |
223 | * $Author: jamesagnew $ | |
224 | */ | |
225 | public static interface Result { | |
226 | ||
227 | /** | |
228 | * @param theFieldNumber | |
229 | * numbered from zero in the order they are specified in the | |
230 | * query | |
231 | * @return the corresponding value in the current row | |
232 | */ | |
233 | public String get(int theFieldNumber); | |
234 | ||
235 | /** | |
236 | * @param theFieldName | |
237 | * a field name as specified in the query with the keyword | |
238 | * "as" | |
239 | * @return the corresponding value in the current row | |
240 | */ | |
241 | public String get(String theFieldName); | |
242 | ||
243 | /** | |
244 | * @return a list of named fields as defined with 'as' in the query | |
245 | */ | |
246 | public String[] getNamedFields(); | |
247 | ||
248 | /** | |
249 | * Advances to the next "row" of data if one is available. | |
250 | * | |
251 | * @return true if another row is available | |
252 | * @throws HL7Exception | |
253 | */ | |
254 | public boolean next() throws HL7Exception; | |
255 | ||
256 | } | |
257 | ||
258 | private static class ResultImpl implements Result { | |
259 | ||
260 | private Terser myTerser; | |
261 | private String[] myValues; | |
262 | private String[] myLoopPoints; | |
263 | private Map<String, Integer> myLoopPointNames; | |
264 | private String[] myFieldPaths; | |
265 | private Map<String, Integer> myFieldNames; | |
266 | private int[] myIndices; | |
267 | private int[] myNumEmpty; // number of empty sub-loops since last | |
268 | // non-empty one | |
269 | private int[] myMaxNumEmpty; | |
270 | 55 | private boolean myNonLoopingQuery = false; |
271 | private String[] myWherePaths; | |
272 | private String[] myWhereValues; | |
273 | private String[] myWherePatterns; | |
274 | private boolean[] myExactMatchFlags; | |
275 | ||
276 | public ResultImpl(Message theMessage, String[] theLoopPoints, | |
277 | Map<String, Integer> theLoopPointNames, String[] theFieldPaths, | |
278 | Map<String, Integer> theFieldNames, String[] theWherePaths, | |
279 | 55 | String[] theWherePatterns, boolean[] theExactMatchFlags) { |
280 | ||
281 | 55 | myTerser = new Terser(theMessage); |
282 | 55 | myLoopPoints = theLoopPoints; |
283 | 55 | myIndices = new int[theLoopPoints.length]; |
284 | 55 | myNumEmpty = new int[theLoopPoints.length]; |
285 | 55 | myMaxNumEmpty = getMaxNumEmpty(theLoopPoints); |
286 | 55 | myLoopPointNames = theLoopPointNames; |
287 | 55 | myFieldPaths = theFieldPaths; |
288 | 55 | myValues = new String[theFieldPaths.length]; |
289 | 55 | myFieldNames = theFieldNames; |
290 | 55 | myWherePaths = theWherePaths; |
291 | 55 | myWherePatterns = theWherePatterns; |
292 | 55 | myExactMatchFlags = theExactMatchFlags; |
293 | ||
294 | 55 | if (theLoopPoints.length == 0) { |
295 | 10 | myNonLoopingQuery = true; // if no loops, give ourselves 1 |
296 | // iteration | |
297 | } else { | |
298 | 45 | myIndices[myIndices.length - 1] = -1; // start before 1st |
299 | // iteration | |
300 | } | |
301 | ||
302 | 55 | } |
303 | ||
304 | // extracts max number of empty iterations for each loop point (this is | |
305 | // communicated | |
306 | // as an optional integer after the *, e.g. blah(*3) ... default is 0). | |
307 | private int[] getMaxNumEmpty(String[] theLoopPoints) { | |
308 | 55 | int[] retVal = new int[theLoopPoints.length]; |
309 | 135 | for (int i = 0; i < theLoopPoints.length; i++) { |
310 | 80 | retVal[i] = getMaxNumEmpty(theLoopPoints[i]); |
311 | } | |
312 | 55 | return retVal; |
313 | } | |
314 | ||
315 | private int getMaxNumEmpty(String theLoopPoint) { | |
316 | 80 | int retVal = 0; // default |
317 | ||
318 | 80 | Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint); |
319 | 80 | if (m.find()) { |
320 | 10 | String num = m.group(1); |
321 | 10 | retVal = Integer.parseInt(num); |
322 | } | |
323 | ||
324 | 80 | return retVal; |
325 | } | |
326 | ||
327 | // returns true if some field under the given loop point has a value at | |
328 | // the present | |
329 | // iteration | |
330 | private boolean currentRowValued(int theLoopPoint) { | |
331 | 540 | for (int i = 0; i < myFieldPaths.length; i++) { |
332 | 430 | if (referencesLoop(myFieldPaths[i], theLoopPoint)) { |
333 | 275 | String value = myValues[i]; |
334 | 275 | if (value != null && value.length() > 0) { |
335 | 160 | return true; |
336 | } | |
337 | } | |
338 | } | |
339 | 110 | return false; |
340 | } | |
341 | ||
342 | // returns true if the current row matches the where clause filters | |
343 | private boolean currentRowMatchesFilter() { | |
344 | 325 | for (int i = 0; i < myWhereValues.length; i++) { |
345 | 140 | if (myExactMatchFlags[i]) { |
346 | 70 | if (!myWherePatterns[i].equals(myWhereValues[i])) { |
347 | 55 | return false; |
348 | } | |
349 | } else { | |
350 | 70 | if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) { |
351 | 40 | return false; |
352 | } | |
353 | } | |
354 | } | |
355 | 185 | return true; |
356 | } | |
357 | ||
358 | // true if the given path references the given loop point (directly | |
359 | // or indirectly) | |
360 | private boolean referencesLoop(String theFieldPath, int theLoopPoint) { | |
361 | 430 | String path = theFieldPath; |
362 | int lp; | |
363 | 535 | while ((lp = getLoopPointReference(path)) >= 0) { |
364 | 380 | if (lp == theLoopPoint) { |
365 | 275 | return true; |
366 | } else { | |
367 | 105 | path = myLoopPoints[lp]; |
368 | } | |
369 | } | |
370 | 155 | return false; |
371 | } | |
372 | ||
373 | // expands a set of paths to their current loop point iterations, and | |
374 | // gets | |
375 | // current values from our message | |
376 | private String[] getCurrentValues(String[] thePaths) | |
377 | throws HL7Exception { | |
378 | 560 | String[] paths = composePaths(thePaths); |
379 | 560 | String[] values = new String[paths.length]; |
380 | 1200 | for (int i = 0; i < paths.length; i++) { |
381 | 640 | values[i] = myTerser.get(paths[i]); |
382 | 640 | if (values[i] == null) { |
383 | 145 | values[i] = ""; |
384 | } | |
385 | } | |
386 | 560 | return values; |
387 | } | |
388 | ||
389 | // creates full Terser paths from current location, loop points, and | |
390 | // given paths | |
391 | // with loop point references | |
392 | private String[] composePaths(String[] thePaths) { | |
393 | 560 | String[] currentLoopPoints = composeLoopPoints(); |
394 | 560 | String[] result = new String[thePaths.length]; |
395 | 1200 | for (int i = 0; i < thePaths.length; i++) { |
396 | 640 | result[i] = thePaths[i]; |
397 | 640 | int ref = getLoopPointReference(thePaths[i]); |
398 | 640 | if (ref >= 0) { |
399 | 485 | result[i] = expandLoopPointReference(result[i], |
400 | currentLoopPoints[ref]); | |
401 | } | |
402 | } | |
403 | 560 | return result; |
404 | } | |
405 | ||
406 | // parameterizes loop points with present location (i.e. replaces * with | |
407 | // current | |
408 | // indices) | |
409 | private String[] composeLoopPoints() { | |
410 | 560 | String[] result = new String[myLoopPoints.length]; |
411 | 1570 | for (int i = 0; i < myLoopPoints.length; i++) { |
412 | 2020 | result[i] = myLoopPoints[i].replaceAll("\\*\\d*", |
413 | 1010 | String.valueOf(myIndices[i])); |
414 | ||
415 | 1010 | int ref = getLoopPointReference(myLoopPoints[i]); |
416 | 1010 | if (ref >= i) { |
417 | 0 | throw new IllegalStateException( |
418 | "Loop point must be defined after the " | |
419 | + "one it references: " + myLoopPoints[i]); | |
420 | 1010 | } else if (ref >= 0) { |
421 | 380 | result[i] = expandLoopPointReference(result[i], result[ref]); |
422 | } | |
423 | } | |
424 | 560 | return result; |
425 | } | |
426 | ||
427 | // extracts LP# of label between first '{' and first '}', or -1 if there | |
428 | // isn't one | |
429 | private int getLoopPointReference(String thePath) { | |
430 | 2185 | StringTokenizer tok = new StringTokenizer(thePath, "{}", false); |
431 | 2185 | if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) { |
432 | 1245 | String ref = tok.nextToken(); |
433 | 1245 | return myLoopPointNames.get(ref); |
434 | } else { | |
435 | 940 | return -1; |
436 | } | |
437 | } | |
438 | ||
439 | private String expandLoopPointReference(String thePath, | |
440 | String theLoopPoint) { | |
441 | 865 | return thePath.replaceAll("\\{.*\\}", theLoopPoint); |
442 | } | |
443 | ||
444 | /** | |
445 | * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int) | |
446 | */ | |
447 | public String get(int theFieldNumber) { | |
448 | 185 | if (theFieldNumber < 0 || theFieldNumber >= myValues.length) { |
449 | 0 | throw new IllegalArgumentException( |
450 | "Field number must be between 0 and " | |
451 | + (myValues.length - 1)); | |
452 | } | |
453 | 185 | return myValues[theFieldNumber]; |
454 | } | |
455 | ||
456 | /** | |
457 | * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String) | |
458 | */ | |
459 | public String get(String theFieldName) { | |
460 | 180 | Integer fieldNum = myFieldNames.get(theFieldName); |
461 | 180 | if (fieldNum == null) { |
462 | 0 | throw new IllegalArgumentException( |
463 | "Field name not recognized: " + theFieldName); | |
464 | } | |
465 | 180 | return get(fieldNum); |
466 | } | |
467 | ||
468 | /** | |
469 | * @throws HL7Exception | |
470 | * @see ca.uhn.hl7v2.util.MessageQuery.Result#next() | |
471 | */ | |
472 | public boolean next() throws HL7Exception { | |
473 | 170 | if (myNonLoopingQuery) { |
474 | 10 | myNonLoopingQuery = false; |
475 | 10 | myValues = getCurrentValues(myFieldPaths); |
476 | 10 | myWhereValues = getCurrentValues(myWherePaths); |
477 | 10 | return currentRowMatchesFilter(); |
478 | } | |
479 | ||
480 | 160 | boolean hasNext = false; |
481 | 260 | findNext: for (int i = myIndices.length - 1; i >= 0; i--) { |
482 | 215 | boolean gotMatch = false; |
483 | 485 | while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) { |
484 | 270 | myIndices[i]++; |
485 | 270 | myValues = getCurrentValues(myFieldPaths); |
486 | 270 | myWhereValues = getCurrentValues(myWherePaths); |
487 | ||
488 | 270 | if (!currentRowValued(i)) { |
489 | 110 | myNumEmpty[i]++; |
490 | } else { | |
491 | 160 | myNumEmpty[i] = 0; |
492 | } | |
493 | 270 | if (currentRowMatchesFilter()) { |
494 | 175 | gotMatch = true; |
495 | } | |
496 | } | |
497 | ||
498 | 215 | hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// && |
499 | // currentRowMatchesFilter(); | |
500 | 215 | if (hasNext) { |
501 | 115 | break findNext; |
502 | } | |
503 | ||
504 | 100 | myIndices[i] = 0; |
505 | 100 | myNumEmpty[i] = 0; |
506 | ||
507 | // TODO: if we aren't allowing empties in this loop, and have no | |
508 | // value, we want to | |
509 | // return the null in the super-loop. However, we don't know | |
510 | // which loop point, if | |
511 | // any, is the super-loop. If it was the next one we could do | |
512 | // this ... | |
513 | // if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0 | |
514 | // && myIndices[i-1] == 0) { | |
515 | // myIndices[i-1] = -1; | |
516 | // } ... but it may not be, so we'll ignore this problem for | |
517 | // now. | |
518 | } | |
519 | 160 | return hasNext; |
520 | } | |
521 | ||
522 | /** | |
523 | * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields() | |
524 | */ | |
525 | public String[] getNamedFields() { | |
526 | 0 | return myFieldNames.keySet().toArray(new String[0]); |
527 | } | |
528 | ||
529 | } | |
530 | ||
531 | } |