001/** 002 The contents of this file are subject to the Mozilla Public License Version 1.1 003 (the "License"); you may not use this file except in compliance with the License. 004 You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005 Software distributed under the License is distributed on an "AS IS" basis, 006 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007 specific language governing rights and limitations under the License. 008 009 The Initial Developer of the Original Code is University Health Network. Copyright (C) 010 2001. All Rights Reserved. 011 012 Contributor(s): Jens Kristian Villadsen from Cetrea A/S 013 014 Alternatively, the contents of this file may be used under the terms of the 015 GNU General Public License (the "GPL"), in which case the provisions of the GPL are 016 applicable instead of those above. If you wish to allow use of your version of this 017 file only under the terms of the GPL and not to allow others to use your version 018 of this file under the MPL, indicate your decision by deleting the provisions above 019 and replace them with the notice and other provisions required by the GPL License. 020 If you do not delete the provisions above, a recipient may use your version of 021 this file under either the MPL or the GPL. 022 023 */ 024 025 026package ca.uhn.hl7v2.llp; 027 028import java.io.UnsupportedEncodingException; 029import java.nio.charset.Charset; 030import java.util.Arrays; 031 032import ca.uhn.hl7v2.HL7Exception; 033import ca.uhn.hl7v2.parser.EncodingNotSupportedException; 034import ca.uhn.hl7v2.preparser.PreParser; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038/** 039 * Charset utility class 040 * 041 * @author Jens Kristian Villadsen from Cetrea A/S 042 * @author Christian Ohr 043 */ 044public class CharSetUtil { 045 046 private static final Logger LOG = LoggerFactory.getLogger(CharSetUtil.class); 047 048 static Charset checkCharset(String message, Charset defaultCharset) { 049 Charset charset = defaultCharset; 050 try { 051 String[] fields = PreParser.getFields(message, "MSH-18(0)"); 052 String hl7CharsetName = stripNonLowAscii(fields[0]); 053 if (hl7CharsetName != null && hl7CharsetName.length() > 0) 054 charset = HL7Charsets.getCharsetForHL7Encoding(hl7CharsetName); 055 LOG.trace("Detected MSH-18 value \"{}\" so using charset {}", hl7CharsetName, charset.displayName()); 056 } catch (EncodingNotSupportedException e) { 057 LOG.warn("Invalid or unsupported charset in MSH-18. Defaulting to {}", charset.displayName()); 058 } catch (HL7Exception e) { 059 LOG.warn("Failed to parse MSH segment. Defaulting to {}", charset.displayName(), e); 060 } 061 return charset; 062 } 063 064 static Charset checkCharset(byte[] message, Charset defaultCharset) { 065 String guessMessage = BOM.skipBOM(message); 066 return checkCharset(guessMessage, defaultCharset); 067 } 068 069 private static String stripNonLowAscii(String theString) { 070 if (theString == null) return ""; 071 StringBuilder b = new StringBuilder(); 072 073 for (int i = 0; i < theString.length(); i++) { 074 char next = theString.charAt(i); 075 if (next > 0 && next < 127) { 076 b.append(next); 077 } 078 } 079 080 return b.toString(); 081 } 082 083 public static byte[] withoutBOM(byte[] bytes) { 084 BOM bom = BOM.getBOM(bytes); 085 byte[] withoutBOM = new byte[bytes.length - bom.bytes.length]; 086 System.arraycopy(bytes, bom.bytes.length, withoutBOM, 0, bytes.length - bom.bytes.length); 087 return withoutBOM; 088 } 089 090 private enum BOM { 091 092 UTF_8(new byte[]{ 093 (byte) 0xEF, 094 (byte) 0xBB, 095 (byte) 0xBF}, "UTF-8"), 096 UTF_16_LE(new byte[]{ 097 (byte) 0xFF, 098 (byte) 0xFE}, "UTF-16LE"), 099 UTF_16_BE(new byte[]{ 100 (byte) 0xFE, 101 (byte) 0xFF}, "UTF-16BE"), 102 UTF_32_LE(new byte[]{ 103 (byte) 0xFF, 104 (byte) 0xFE, 105 (byte) 0x00, 106 (byte) 0x00}, "UTF-32LE"), 107 UTF_32_BE(new byte[]{ 108 (byte) 0x00, 109 (byte) 0x00, 110 (byte) 0xFE, 111 (byte) 0xFF}, "UTF-32BE"), 112 NONE(new byte[]{}, "US-ASCII"); 113 114 private byte[] bytes; 115 private Charset charset; 116 117 BOM(byte[] bytes, String charset) { 118 this.bytes = bytes; 119 this.charset = Charset.forName(charset); 120 } 121 122 public static BOM getBOM(byte[] bytes) { 123 for (BOM bom : BOM.values()) { 124 byte[] bytesToCompare = new byte[bom.bytes.length]; 125 System.arraycopy(bytes, 0, bytesToCompare, 0, bom.bytes.length); 126 if (Arrays.equals(bom.bytes, bytesToCompare)) return bom; 127 } 128 return BOM.NONE; 129 } 130 131 public static String skipBOM(byte[] bytes) { 132 try { 133 BOM bom = getBOM(bytes); 134 return new String(bytes, bom.bytes.length, bytes.length - bom.bytes.length, bom.charset.toString()); 135 } catch (UnsupportedEncodingException e) { 136 // does not happen 137 return null; 138 } 139 } 140 141 142 143 144 } 145}