001/*
002 * Created on 28-Apr-2004
003 */
004package ca.uhn.hl7v2.preparser;
005
006import java.util.Arrays;
007import java.util.List;
008import java.util.Properties;
009import java.util.StringTokenizer;
010
011import ca.uhn.hl7v2.HL7Exception;
012import ca.uhn.hl7v2.parser.EncodingDetector;
013import ca.uhn.hl7v2.util.Terser;
014
015/**
016 * <p>Extracts specified fields from unparsed messages.  This class is a 
017 * facade for the ER7 and XML classes.  Use it like this: </p>
018 * 
019 * <code>
020 * String message = null; //... your ER7 or XML message string goes here
021 * String[] fieldSpecs = {"MSH-9-1", "MSH-9-2", "MSH-12"};
022 * String[] fields = PreParser.getFields(message, fieldSpecs);
023 * </code>
024 * 
025 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
026 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:37 $ by $Author: jamesagnew $
027 */
028public class PreParser {
029
030    /**
031     * Extracts selected fields from a message.  
032     *   
033     * @param theMessageText an unparsed message from which to get fields 
034     * @param thePathSpecs Terser-like paths to fields in the message.  See documentation
035     *      for Terser.  These paths are identical except that they start with the segment
036     *      name (search flags and group names are to be omitted as they are not relevant 
037     *      with unparsed ER7 messages).  
038     * @return field values corresponding to the given paths
039     * @throws HL7Exception
040     */
041    public static String[] getFields(String theMessageText, String... thePathSpecs) throws HL7Exception {
042        DatumPath[] paths = new DatumPath[thePathSpecs.length];
043        for (int i = 0; i < thePathSpecs.length; i++) {
044            StringTokenizer tok = new StringTokenizer(thePathSpecs[i], "-", false);
045            String segSpec = tok.nextToken();
046            tok = new StringTokenizer(segSpec, "()", false);
047            String segName = tok.nextToken();
048            if (segName.length() != 3) {
049                throw new HL7Exception("In field path, " + segName + " is not a valid segment name");
050            }
051            int segRep = 0;
052            if (tok.hasMoreTokens()) {
053                String rep = tok.nextToken();
054                try {
055                    segRep = Integer.parseInt(rep);
056                } catch (NumberFormatException e) {
057                    throw new HL7Exception("In field path, segment rep" + rep + " is not valid", e);
058                }
059            }
060            
061            int[] indices = Terser.getIndices(thePathSpecs[i]);
062            paths[i] = new DatumPath();
063            paths[i].add(segName).add(segRep);
064            paths[i].add(indices[0]).add(indices[1]).add(indices[2]).add(indices[3]);
065            
066        }
067        return getFields(theMessageText, paths);
068    }
069    
070    /** 
071     * Gets selected fields from a message, as with String[] arg version but 
072     * using DatumPaths. 
073     */     
074    private static String[] getFields(String theMessageText, DatumPath[] thePaths) throws HL7Exception {
075        String[] fields = new String[thePaths.length];
076        Properties props = new Properties();
077        
078        List<DatumPath> mask = Arrays.asList(thePaths);
079
080        boolean OK = false;
081        if (EncodingDetector.isEr7Encoded(theMessageText)) {
082            OK = ER7.parseMessage(props, mask, theMessageText);
083        } else if (EncodingDetector.isXmlEncoded(theMessageText)) {
084            OK = XML.parseMessage(props, theMessageText, null);
085        } else {
086            throw new HL7Exception("Message encoding is not recognized"); 
087        }
088        
089        if (!OK) {
090            throw new HL7Exception("Parse failed");
091        }
092        
093        for (int i = 0; i < fields.length; i++) {
094            fields[i] = props.getProperty(thePaths[i].toString());
095        }
096        return fields;
097    }    
098
099}