001/**
002 * The contents of this file are subject to the Mozilla Public License Version 1.1
003 * (the "License"); you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005 * Software distributed under the License is distributed on an "AS IS" basis,
006 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007 * specific language governing rights and limitations under the License.
008 *
009 * The Original Code is "MessageQuery.java".  Description:
010 * "Queries messages in an SQL-like style.  "
011 *
012 * The Initial Developer of the Original Code is University Health Network. Copyright (C)
013 * 2005.  All Rights Reserved.
014 *
015 * Contributor(s): ______________________________________.
016 *
017 * Alternatively, the contents of this file may be used under the terms of the
018 * GNU General Public License (the "GPL"), in which case the provisions of the GPL are
019 * applicable instead of those above.  If you wish to allow use of your version of this
020 * file only under the terms of the GPL and not to allow others to use your version
021 * of this file under the MPL, indicate your decision by deleting  the provisions above
022 * and replace  them with the notice and other provisions required by the GPL License.
023 * If you do not delete the provisions above, a recipient may use your version of
024 * this file under either the MPL or the GPL.
025 *
026 */
027package ca.uhn.hl7v2.util;
028
029import java.util.ArrayList;
030import java.util.HashMap;
031import java.util.List;
032import java.util.Map;
033import java.util.Properties;
034import java.util.StringTokenizer;
035import java.util.regex.Matcher;
036import java.util.regex.Pattern;
037
038import ca.uhn.hl7v2.HL7Exception;
039import ca.uhn.hl7v2.model.Message;
040
041/**
042 * Queries messages in an SQL-like style. We get repeated row-like structures by
043 * looping over repetitions of groups, segments, or fields.
044 * 
045 * This is a very advanced class ... maybe too advanced even for you. If you
046 * find it confusing, please note that there are simpler ways to get data from a
047 * message (like calling its getters or using Terser).
048 * 
049 * LOOPING: You specify the loop points as part of the query. For example you
050 * could specify loop point x like this: <code>x = /.MSH-18(*)</code>. The * is
051 * replaced by numbers 0, 1, 2, etc. as you loop through the results, so this
052 * example would loop through repetitions of MSH-18. If there are multiple loop
053 * points, the loops are nested so that each possible combination is returned.
054 * Looping stops when none of the fields under a loop point are valued. The name
055 * of the loop point ('x' in the example above) is arbitrary.
056 * 
057 * SELECTING FIELDS: The syntax is similar to SQL, except that Terser paths are
058 * used in place of table.field. You can use the "as" keyword to give a field a
059 * name, like this: <code>select /.MSH-7 as msg_date</code>. If your field is
060 * under a loop point, replace the path up to the loop point with a loop point
061 * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code>
062 * 
063 * SELECTING ROWS: A "row" is a combination of all selected fields at one
064 * iteration. You can filter which rows are returned using a where clause
065 * similar to that in SQL. Use exact values or regular expressions, for example:
066 * <code>where {1} like '.*blood.*'</code> or
067 * <code>where {1}/PID-3-1 = '111'</code> Multiple filters can be separated with
068 * commas (which mean 'and'). Future versions may support 'or', negation,
069 * brackets, etc., but this version doesn't.
070 * 
071 * FULL EXAMPLE: select {pat-id}-1 as id loop pat-id = ./PID-3(*) where
072 * {pat-id}-2 = 'mrn'
073 * 
074 * SUBTLETIES OF LOOPING: A loop point can be under another loop point. For
075 * example consider the message:
076 * 
077 * MSH|etc.|etc. Z01|one~two|a Z01|three~four|b
078 * 
079 * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: a
080 * one a two b three b four
081 * 
082 * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would
083 * return: a one a two b one b two
084 * 
085 * In the first case, one loop point refers to another. In the second case the
086 * loops are treated as independent, just as if they referred to different
087 * branches of the message.
088 * 
089 * TODO: could support distinct easily by keeping record of rows and comparing
090 * each one to previous rows
091 * 
092 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
093 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author:
094 *          jamesagnew $
095 * @deprecated
096 */
097public class MessageQuery {
098
099        /**
100         * @param theMessage
101         *            an HL7 message from which data are to be queried
102         * @param theQuery
103         *            the query (see class docs for syntax)
104         * @return data from the message that are selected by the query
105         */
106        public static Result query(Message theMessage, String theQuery) {
107                Properties clauses = getClauses(theQuery);
108
109                // parse select clause
110                StringTokenizer select = new StringTokenizer(
111                                clauses.getProperty("select"), ", ", false);
112                List<String> fieldPaths = new ArrayList<String>(10);
113                Map<String, Integer> names = new HashMap<String, Integer>(10);
114                while (select.hasMoreTokens()) {
115                        String token = select.nextToken();
116                        if (token.equals("as")) {
117                                if (!select.hasMoreTokens()) {
118                                        throw new IllegalArgumentException(
119                                                        "Keyword 'as' must be followed by a field label");
120                                }
121                                names.put(select.nextToken(), fieldPaths.size() - 1);
122                        } else {
123                                fieldPaths.add(token);
124                        }
125                }
126
127                // parse loop clause
128                StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop",
129                                ""), ",", false);
130                List<String> loopPoints = new ArrayList<String>(10);
131                Map<String, Integer> loopPointNames = new HashMap<String, Integer>(10);
132                while (loop.hasMoreTokens()) {
133                        String pointDecl = loop.nextToken();
134                        StringTokenizer tok = new StringTokenizer(pointDecl, "=", false);
135                        String name = tok.nextToken().trim();
136                        String path = tok.nextToken().trim();
137                        loopPoints.add(path);
138                        loopPointNames.put(name, loopPoints.size() - 1);
139                }
140
141                // parse where clause
142                // TODO: this will do for now but it should really be evaluated like an
143                // expression
144                // rather than a list
145                StringTokenizer where = new StringTokenizer(clauses.getProperty(
146                                "where", ""), ",", false);
147                List<String> filters = new ArrayList<String>();
148                while (where.hasMoreTokens()) {
149                        filters.add(where.nextToken());
150                }
151                String[] filterPaths = new String[filters.size()];
152                String[] filterPatterns = new String[filters.size()];
153                boolean[] exactFlags = new boolean[filters.size()];
154
155                for (int i = 0; i < filters.size(); i++) {
156                        exactFlags[i] = true;
157                        String filter = filters.get(i);
158                        String[] parts = splitFromEnd(filter, "=");
159                        if (parts[1] != null) {
160                                parts[1] = parts[1].substring(1);
161                        } else {
162                                exactFlags[i] = false;
163                                parts = splitFromEnd(filter, "like");
164                                parts[1] = parts[1].substring(4);
165                        }
166                        filterPaths[i] = parts[0].trim();
167                        parts[1] = parts[1].trim();
168                        filterPatterns[i] = parts[1].substring(1, parts[1].length() - 1);
169                }
170
171                return new ResultImpl(theMessage,
172                                loopPoints.toArray(new String[0]), loopPointNames,
173                                fieldPaths.toArray(new String[0]), names,
174                                filterPaths, filterPatterns, exactFlags);
175        }
176
177        private static Properties getClauses(String theQuery) {
178                Properties clauses = new Properties();
179
180                String[] split = splitFromEnd(theQuery, "where ");
181                setClause(clauses, "where", split[1]);
182
183                split = splitFromEnd(split[0], "loop ");
184                setClause(clauses, "loop", split[1]);
185                setClause(clauses, "select", split[0]);
186
187                if (clauses.getProperty("where", "").indexOf("loop ") >= 0) {
188                        throw new IllegalArgumentException(
189                                        "The loop clause must precede the where clause");
190                }
191                if (clauses.getProperty("select") == null) {
192                        throw new IllegalArgumentException(
193                                        "The query must begin with a select clause");
194                }
195                return clauses;
196        }
197
198        private static void setClause(Properties theClauses, String theName,
199                        String theClause) {
200                if (theClause != null) {
201                        theClauses.setProperty(theName,
202                                        theClause.substring(theName.length()).trim());
203                }
204        }
205
206        private static String[] splitFromEnd(String theString, String theMarker) {
207                String[] result = new String[2];
208                int begin = theString.indexOf(theMarker);
209                if (begin >= 0) {
210                        result[0] = theString.substring(0, begin);
211                        result[1] = theString.substring(begin);
212                } else {
213                        result[0] = theString;
214                }
215                return result;
216        }
217
218        /**
219         * A result set for a message query.
220         * 
221         * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
222         * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by
223         *          $Author: jamesagnew $
224         */
225        public static interface Result {
226
227                /**
228                 * @param theFieldNumber
229                 *            numbered from zero in the order they are specified in the
230                 *            query
231                 * @return the corresponding value in the current row
232                 */
233                public String get(int theFieldNumber);
234
235                /**
236                 * @param theFieldName
237                 *            a field name as specified in the query with the keyword
238                 *            "as"
239                 * @return the corresponding value in the current row
240                 */
241                public String get(String theFieldName);
242
243                /**
244                 * @return a list of named fields as defined with 'as' in the query
245                 */
246                public String[] getNamedFields();
247
248                /**
249                 * Advances to the next "row" of data if one is available.
250                 * 
251                 * @return true if another row is available
252                 * @throws HL7Exception
253                 */
254                public boolean next() throws HL7Exception;
255
256        }
257
258        private static class ResultImpl implements Result {
259
260                private Terser myTerser;
261                private String[] myValues;
262                private String[] myLoopPoints;
263                private Map<String, Integer> myLoopPointNames;
264                private String[] myFieldPaths;
265                private Map<String, Integer> myFieldNames;
266                private int[] myIndices;
267                private int[] myNumEmpty; // number of empty sub-loops since last
268                                                                        // non-empty one
269                private int[] myMaxNumEmpty;
270                private boolean myNonLoopingQuery = false;
271                private String[] myWherePaths;
272                private String[] myWhereValues;
273                private String[] myWherePatterns;
274                private boolean[] myExactMatchFlags;
275
276                public ResultImpl(Message theMessage, String[] theLoopPoints,
277                                Map<String, Integer> theLoopPointNames, String[] theFieldPaths,
278                                Map<String, Integer> theFieldNames, String[] theWherePaths,
279                                String[] theWherePatterns, boolean[] theExactMatchFlags) {
280
281                        myTerser = new Terser(theMessage);
282                        myLoopPoints = theLoopPoints;
283                        myIndices = new int[theLoopPoints.length];
284                        myNumEmpty = new int[theLoopPoints.length];
285                        myMaxNumEmpty = getMaxNumEmpty(theLoopPoints);
286                        myLoopPointNames = theLoopPointNames;
287                        myFieldPaths = theFieldPaths;
288                        myValues = new String[theFieldPaths.length];
289                        myFieldNames = theFieldNames;
290                        myWherePaths = theWherePaths;
291                        myWherePatterns = theWherePatterns;
292                        myExactMatchFlags = theExactMatchFlags;
293
294                        if (theLoopPoints.length == 0) {
295                                myNonLoopingQuery = true; // if no loops, give ourselves 1
296                                                                                        // iteration
297                        } else {
298                                myIndices[myIndices.length - 1] = -1; // start before 1st
299                                                                                                                // iteration
300                        }
301
302                }
303
304                // extracts max number of empty iterations for each loop point (this is
305                // communicated
306                // as an optional integer after the *, e.g. blah(*3) ... default is 0).
307                private int[] getMaxNumEmpty(String[] theLoopPoints) {
308                        int[] retVal = new int[theLoopPoints.length];
309                        for (int i = 0; i < theLoopPoints.length; i++) {
310                                retVal[i] = getMaxNumEmpty(theLoopPoints[i]);
311                        }
312                        return retVal;
313                }
314
315                private int getMaxNumEmpty(String theLoopPoint) {
316                        int retVal = 0; // default
317
318                        Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint);
319                        if (m.find()) {
320                                String num = m.group(1);
321                                retVal = Integer.parseInt(num);
322                        }
323
324                        return retVal;
325                }
326
327                // returns true if some field under the given loop point has a value at
328                // the present
329                // iteration
330                private boolean currentRowValued(int theLoopPoint) {
331                        for (int i = 0; i < myFieldPaths.length; i++) {
332                                if (referencesLoop(myFieldPaths[i], theLoopPoint)) {
333                                        String value = myValues[i];
334                                        if (value != null && value.length() > 0) {
335                                                return true;
336                                        }
337                                }
338                        }
339                        return false;
340                }
341
342                // returns true if the current row matches the where clause filters
343                private boolean currentRowMatchesFilter() {
344                        for (int i = 0; i < myWhereValues.length; i++) {
345                                if (myExactMatchFlags[i]) {
346                                        if (!myWherePatterns[i].equals(myWhereValues[i])) {
347                                                return false;
348                                        }
349                                } else {
350                                        if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) {
351                                                return false;
352                                        }
353                                }
354                        }
355                        return true;
356                }
357
358                // true if the given path references the given loop point (directly
359                // or indirectly)
360                private boolean referencesLoop(String theFieldPath, int theLoopPoint) {
361                        String path = theFieldPath;
362                        int lp;
363                        while ((lp = getLoopPointReference(path)) >= 0) {
364                                if (lp == theLoopPoint) {
365                                        return true;
366                                } else {
367                                        path = myLoopPoints[lp];
368                                }
369                        }
370                        return false;
371                }
372
373                // expands a set of paths to their current loop point iterations, and
374                // gets
375                // current values from our message
376                private String[] getCurrentValues(String[] thePaths)
377                                throws HL7Exception {
378                        String[] paths = composePaths(thePaths);
379                        String[] values = new String[paths.length];
380                        for (int i = 0; i < paths.length; i++) {
381                                values[i] = myTerser.get(paths[i]);
382                                if (values[i] == null) {
383                                        values[i] = "";
384                                }
385                        }
386                        return values;
387                }
388
389                // creates full Terser paths from current location, loop points, and
390                // given paths
391                // with loop point references
392                private String[] composePaths(String[] thePaths) {
393                        String[] currentLoopPoints = composeLoopPoints();
394                        String[] result = new String[thePaths.length];
395                        for (int i = 0; i < thePaths.length; i++) {
396                                result[i] = thePaths[i];
397                                int ref = getLoopPointReference(thePaths[i]);
398                                if (ref >= 0) {
399                                        result[i] = expandLoopPointReference(result[i],
400                                                        currentLoopPoints[ref]);
401                                }
402                        }
403                        return result;
404                }
405
406                // parameterizes loop points with present location (i.e. replaces * with
407                // current
408                // indices)
409                private String[] composeLoopPoints() {
410                        String[] result = new String[myLoopPoints.length];
411                        for (int i = 0; i < myLoopPoints.length; i++) {
412                                result[i] = myLoopPoints[i].replaceAll("\\*\\d*",
413                                                String.valueOf(myIndices[i]));
414
415                                int ref = getLoopPointReference(myLoopPoints[i]);
416                                if (ref >= i) {
417                                        throw new IllegalStateException(
418                                                        "Loop point must be defined after the "
419                                                                        + "one it references: " + myLoopPoints[i]);
420                                } else if (ref >= 0) {
421                                        result[i] = expandLoopPointReference(result[i], result[ref]);
422                                }
423                        }
424                        return result;
425                }
426
427                // extracts LP# of label between first '{' and first '}', or -1 if there
428                // isn't one
429                private int getLoopPointReference(String thePath) {
430                        StringTokenizer tok = new StringTokenizer(thePath, "{}", false);
431                        if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) {
432                                String ref = tok.nextToken();
433                                return myLoopPointNames.get(ref);
434                        } else {
435                                return -1;
436                        }
437                }
438
439                private String expandLoopPointReference(String thePath,
440                                String theLoopPoint) {
441                        return thePath.replaceAll("\\{.*\\}", theLoopPoint);
442                }
443
444                /**
445                 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int)
446                 */
447                public String get(int theFieldNumber) {
448                        if (theFieldNumber < 0 || theFieldNumber >= myValues.length) {
449                                throw new IllegalArgumentException(
450                                                "Field number must be between 0 and "
451                                                                + (myValues.length - 1));
452                        }
453                        return myValues[theFieldNumber];
454                }
455
456                /**
457                 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String)
458                 */
459                public String get(String theFieldName) {
460                        Integer fieldNum = myFieldNames.get(theFieldName);
461                        if (fieldNum == null) {
462                                throw new IllegalArgumentException(
463                                                "Field name not recognized: " + theFieldName);
464                        }
465                        return get(fieldNum);
466                }
467
468                /**
469                 * @throws HL7Exception
470                 * @see ca.uhn.hl7v2.util.MessageQuery.Result#next()
471                 */
472                public boolean next() throws HL7Exception {
473                        if (myNonLoopingQuery) {
474                                myNonLoopingQuery = false;
475                                myValues = getCurrentValues(myFieldPaths);
476                                myWhereValues = getCurrentValues(myWherePaths);
477                                return currentRowMatchesFilter();
478                        }
479
480                        boolean hasNext = false;
481                        findNext: for (int i = myIndices.length - 1; i >= 0; i--) {
482                                boolean gotMatch = false;
483                                while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) {
484                                        myIndices[i]++;
485                                        myValues = getCurrentValues(myFieldPaths);
486                                        myWhereValues = getCurrentValues(myWherePaths);
487
488                                        if (!currentRowValued(i)) {
489                                                myNumEmpty[i]++;
490                                        } else {
491                                                myNumEmpty[i] = 0;
492                                        }
493                                        if (currentRowMatchesFilter()) {
494                                                gotMatch = true;
495                                        }
496                                }
497
498                                hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// &&
499                                                                                                                        // currentRowMatchesFilter();
500                                if (hasNext) {
501                                        break findNext;
502                                }
503
504                                myIndices[i] = 0;
505                                myNumEmpty[i] = 0;
506
507                                // TODO: if we aren't allowing empties in this loop, and have no
508                                // value, we want to
509                                // return the null in the super-loop. However, we don't know
510                                // which loop point, if
511                                // any, is the super-loop. If it was the next one we could do
512                                // this ...
513                                // if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0
514                                // && myIndices[i-1] == 0) {
515                                // myIndices[i-1] = -1;
516                                // } ... but it may not be, so we'll ignore this problem for
517                                // now.
518                        }
519                        return hasNext;
520                }
521
522                /**
523                 * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields()
524                 */
525                public String[] getNamedFields() {
526                        return myFieldNames.keySet().toArray(new String[0]);
527                }
528
529        }
530
531}