Coverage Report - ca.uhn.hl7v2.llp.CharSetUtil
 
Classes in this File Line Coverage Branch Coverage Complexity
CharSetUtil
96%
26/27
75%
9/12
3
CharSetUtil$BOM
85%
17/20
75%
3/4
3
 
 1  
 /**
 2  
  The contents of this file are subject to the Mozilla Public License Version 1.1
 3  
  (the "License"); you may not use this file except in compliance with the License.
 4  
  You may obtain a copy of the License at http://www.mozilla.org/MPL/
 5  
  Software distributed under the License is distributed on an "AS IS" basis,
 6  
  WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
 7  
  specific language governing rights and limitations under the License.
 8  
 
 9  
  The Initial Developer of the Original Code is University Health Network. Copyright (C)
 10  
  2001.  All Rights Reserved.
 11  
 
 12  
  Contributor(s): Jens Kristian Villadsen from Cetrea A/S
 13  
 
 14  
  Alternatively, the contents of this file may be used under the terms of the
 15  
  GNU General Public License (the "GPL"), in which case the provisions of the GPL are
 16  
  applicable instead of those above.  If you wish to allow use of your version of this
 17  
  file only under the terms of the GPL and not to allow others to use your version
 18  
  of this file under the MPL, indicate your decision by deleting  the provisions above
 19  
  and replace  them with the notice and other provisions required by the GPL License.
 20  
  If you do not delete the provisions above, a recipient may use your version of
 21  
  this file under either the MPL or the GPL.
 22  
 
 23  
  */
 24  
 
 25  
 
 26  
 package ca.uhn.hl7v2.llp;
 27  
 
 28  
 import java.io.UnsupportedEncodingException;
 29  
 import java.nio.charset.Charset;
 30  
 import java.util.Arrays;
 31  
 
 32  
 import ca.uhn.hl7v2.HL7Exception;
 33  
 import ca.uhn.hl7v2.parser.EncodingNotSupportedException;
 34  
 import ca.uhn.hl7v2.preparser.PreParser;
 35  
 import org.slf4j.Logger;
 36  
 import org.slf4j.LoggerFactory;
 37  
 
 38  
 /**
 39  
  * Charset utility class
 40  
  *
 41  
  * @author Jens Kristian Villadsen from Cetrea A/S
 42  
  * @author Christian Ohr
 43  
  */
 44  0
 public class CharSetUtil {
 45  
 
 46  5
     private static final Logger LOG = LoggerFactory.getLogger(CharSetUtil.class);
 47  
 
 48  
     static Charset checkCharset(String message, Charset defaultCharset) {
 49  185
         Charset charset = defaultCharset;
 50  
         try {
 51  185
             String[] fields = PreParser.getFields(message, "MSH-18(0)");
 52  180
             String hl7CharsetName = stripNonLowAscii(fields[0]);
 53  180
             if (hl7CharsetName != null && hl7CharsetName.length() > 0)
 54  65
                 charset = HL7Charsets.getCharsetForHL7Encoding(hl7CharsetName);
 55  175
             LOG.trace("Detected MSH-18 value \"{}\" so using charset {}", hl7CharsetName, charset.displayName());
 56  5
         } catch (EncodingNotSupportedException e) {
 57  5
             LOG.warn("Invalid or unsupported charset in MSH-18. Defaulting to {}", charset.displayName());
 58  5
         } catch (HL7Exception e) {
 59  5
             LOG.warn("Failed to parse MSH segment. Defaulting to {}", charset.displayName(), e);
 60  180
         }
 61  185
         return charset;
 62  
     }
 63  
 
 64  
     static Charset checkCharset(byte[] message, Charset defaultCharset) {
 65  100
         String guessMessage = BOM.skipBOM(message);
 66  100
         return checkCharset(guessMessage, defaultCharset);
 67  
     }
 68  
 
 69  
     private static String stripNonLowAscii(String theString) {
 70  180
         if (theString == null) return "";
 71  65
         StringBuilder b = new StringBuilder();
 72  
 
 73  665
         for (int i = 0; i < theString.length(); i++) {
 74  600
             char next = theString.charAt(i);
 75  600
             if (next > 0 && next < 127) {
 76  600
                 b.append(next);
 77  
             }
 78  
         }
 79  
 
 80  65
         return b.toString();
 81  
     }
 82  
 
 83  
     public static byte[] withoutBOM(byte[] bytes) {
 84  5
         BOM bom = BOM.getBOM(bytes);
 85  5
         byte[] withoutBOM = new byte[bytes.length - bom.bytes.length];
 86  5
         System.arraycopy(bytes, bom.bytes.length, withoutBOM, 0, bytes.length - bom.bytes.length);
 87  5
         return withoutBOM;
 88  
     }
 89  
 
 90  125
     private enum BOM {
 91  
 
 92  5
         UTF_8(new byte[]{
 93  
                 (byte) 0xEF,
 94  
                 (byte) 0xBB,
 95  
                 (byte) 0xBF}, "UTF-8"),
 96  5
         UTF_16_LE(new byte[]{
 97  
                 (byte) 0xFF,
 98  
                 (byte) 0xFE}, "UTF-16LE"),
 99  5
         UTF_16_BE(new byte[]{
 100  
                 (byte) 0xFE,
 101  
                 (byte) 0xFF}, "UTF-16BE"),
 102  5
         UTF_32_LE(new byte[]{
 103  
                 (byte) 0xFF,
 104  
                 (byte) 0xFE,
 105  
                 (byte) 0x00,
 106  
                 (byte) 0x00}, "UTF-32LE"),
 107  5
         UTF_32_BE(new byte[]{
 108  
                 (byte) 0x00,
 109  
                 (byte) 0x00,
 110  
                 (byte) 0xFE,
 111  
                 (byte) 0xFF}, "UTF-32BE"),
 112  5
         NONE(new byte[]{},    "US-ASCII");
 113  
 
 114  
         private byte[] bytes;
 115  
         private Charset charset;
 116  
 
 117  30
         BOM(byte[] bytes, String charset) {
 118  30
             this.bytes = bytes;
 119  30
             this.charset = Charset.forName(charset);
 120  30
         }
 121  
 
 122  
         public static BOM getBOM(byte[] bytes) {
 123  585
             for (BOM bom : BOM.values()) {
 124  585
                 byte[] bytesToCompare = new byte[bom.bytes.length];
 125  585
                 System.arraycopy(bytes, 0, bytesToCompare, 0, bom.bytes.length);
 126  585
                 if (Arrays.equals(bom.bytes, bytesToCompare)) return bom;
 127  
             }
 128  0
             return BOM.NONE;
 129  
         }
 130  
 
 131  
         public static String skipBOM(byte[] bytes) {
 132  
             try {
 133  100
                 BOM bom = getBOM(bytes);
 134  100
                 return new String(bytes, bom.bytes.length, bytes.length - bom.bytes.length, bom.charset.toString());
 135  0
             } catch (UnsupportedEncodingException e) {
 136  
                 // does not happen
 137  0
                 return null;
 138  
             }
 139  
         }
 140  
 
 141  
 
 142  
 
 143  
 
 144  
     }
 145  
 }