MimeUtility

/*
 * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
 * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */

package javax.mail.internet;

import javax.mail.MessagingException;
import javax.activation.*;
import java.util.*;
import java.io.*;
import com.sun.mail.util.*;

/**
 * This is a utility class that provides various MIME related
 * functionality. <p>
 *
 * There are a set of methods to encode and decode MIME headers as 
 * per RFC 2047. A brief description on handling such headers is
 * given below: <p>
 *
 * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
 * characters. Headers that contain non US-ASCII characters must be
 * encoded so that they contain only US-ASCII characters. Basically,
 * this process involves using either BASE64 or QP to encode certain
 * characters. RFC 2047 describes this in detail. <p>
 *
 * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
 * subset of Unicode (and occupies the range 0 - 127). A String
 * that contains only ASCII characters is already mail-safe. If the
 * String contains non US-ASCII characters, it must be encoded. An
 * additional complexity in this step is that since Unicode is not
 * yet a widely used charset, one might want to first charset-encode
 * the String into another charset and then do the transfer-encoding.
 * <p>
 * Note that to get the actual bytes of a mail-safe String (say,
 * for sending over SMTP), one must do 
 * <p><blockquote><pre>
 *
 *	byte[] bytes = string.getBytes("iso-8859-1");	
 *
 * </pre></blockquote><p>
 * 
 * The <code>setHeader()</code> and <code>addHeader()</code> methods
 * on MimeMessage and MimeBodyPart assume that the given header values
 * are Unicode strings that contain only US-ASCII characters. Hence
 * the callers of those methods must insure that the values they pass
 * do not contain non US-ASCII characters. The methods in this class 
 * help do this. <p>
 *
 * The <code>getHeader()</code> family of methods on MimeMessage and
 * MimeBodyPart return the raw header value. These might be encoded
 * as per RFC 2047, and if so, must be decoded into Unicode Strings.
 * The methods in this class help to do this.
 *
 * @version 1.32, 00/10/17
 * @author  John Mani
 */

public class MimeUtility {

    // This class cannot be instantiated
    private MimeUtility() { }

    public static final int ALL = -1;

    /**
     * Get the content-transfer-encoding that should be applied
     * to the input stream of this datasource, to make it mailsafe. <p>
     *
     * The algorithm used here is: <br>
     * <ul>
     * <li>
     * If the primary type of this datasource is "text" and if all
     * the bytes in its input stream are US-ASCII, then the encoding
     * is "7bit". If more than half of the bytes are non-US-ASCII, then
     * the encoding is "base64". If less than half of the bytes are
     * non-US-ASCII, then the encoding is "quoted-printable".
     * <li>
     * If the primary type of this datasource is not "text", then if
     * all the bytes of its input stream are US-ASCII, the encoding
     * is "7bit". If there is even one non-US-ASCII character, the
     * encoding is "base64".
     * </ul>
     *
     * @param	ds	DataSource
     * @return		the encoding. This is either "7bit",
     *			"quoted-printable" or "base64"
     */ 
    public static String getEncoding(DataSource ds) {
	ContentType cType = null;
	InputStream is = null;
	String encoding = null;

	try {
	    cType = new ContentType(ds.getContentType());
	    is = ds.getInputStream();
	} catch (Exception ex) {
	    return "base64"; // what else ?!
	}

	if (cType.match("text/*")) {
	    // Check all of the available bytes
	    int i = checkAscii(is, ALL, false);
	    switch (i) {
		case ALL_ASCII:
		    encoding = "7bit"; // all ascii
		    break;
		case MOSTLY_ASCII:
		    encoding = "quoted-printable"; // mostly ascii
		    break;
		default:
	   	    encoding = "base64"; // mostly binary
		    break;
	    }
	} else { // not "text"
	    // Check all of available bytes, break out if we find
	    // at least one non-US-ASCII character
	    if (checkAscii(is, ALL, true) == ALL_ASCII) // all ascii
		encoding = "7bit";
	    else // found atleast one non-ascii character, use b64 
		encoding = "base64";
	}
	
	// Close the input stream
	try {
	    is.close();
	} catch (IOException ioex) { }

	return encoding;
    }

    /**
     * Same as <code>getEncoding(DataSource)</code> except that instead
     * of reading the data from an <code>InputStream</code> it uses the
     * <code>writeTo</code> method to examine the data.  This is more
     * efficient in the common case of a <code>DataHandler</code>
     * created with an object and a MIME type (for example, a
     * "text/plain" String) because all the I/O is done in this
     * thread.  In the case requiring an <code>InputStream</code> the
     * <code>DataHandler</code> uses a thread, a pair of pipe streams,
     * and the <code>writeTo</code> method to produce the data. <p>
     *
     * @since	JavaMail 1.2
     */
    public static String getEncoding(DataHandler dh) {
	ContentType cType = null;
	String encoding = null;

	/*
	 * Try to pick the most efficient means of determining the
	 * encoding.  If this DataHandler was created using a DataSource,
	 * the getEncoding(DataSource) method is typically faster.  If
	 * the DataHandler was created with an object, this method is
	 * much faster.  To distinguish the two cases, we use a heuristic.
	 * A DataHandler created with an object will always have a null name.
	 * A DataHandler created with a DataSource will usually have a
	 * non-null name.
	 *
	 * XXX - This is actually quite a disgusting hack, but it makes
	 *	 a common case run over twice as fast.
	 */
	if (dh.getName() != null)
	    return getEncoding(dh.getDataSource());

	try {
	    cType = new ContentType(dh.getContentType());
	} catch (Exception ex) {
	    return "base64"; // what else ?!
	}

	if (cType.match("text/*")) {
	    // Check all of the available bytes
	    AsciiOutputStream aos = new AsciiOutputStream(false);
	    try {
		dh.writeTo(aos);
	    } catch (IOException ex) { }	// ignore it
	    switch (aos.getAscii()) {
	    case ALL_ASCII:
		encoding = "7bit"; // all ascii
		break;
	    case MOSTLY_ASCII:
		encoding = "quoted-printable"; // mostly ascii
		break;
	    default:
		encoding = "base64"; // mostly binary
		break;
	    }
	} else { // not "text"
	    // Check all of available bytes, break out if we find
	    // at least one non-US-ASCII character
	    AsciiOutputStream aos = new AsciiOutputStream(true);
	    try {
		dh.writeTo(aos);
	    } catch (IOException ex) { }	// ignore it
	    if (aos.getAscii() == ALL_ASCII) // all ascii
		encoding = "7bit";
	    else // found atleast one non-ascii character, use b64 
		encoding = "base64";
	}

	return encoding;
    }

    /**
     * Decode the given input stream. The Input stream returned is
     * the decoded input stream. All the encodings defined in RFC 2045
     * are supported here. They include "base64", "quoted-printable",
     * "7bit", "8bit", and "binary". In addition, "uuencode" is also
     * supported.
     *
     * @param	is		input stream
     * @param	encoding	the encoding of the stream.
     * @return			decoded input stream.
     */
    public static InputStream decode(InputStream is, String encoding)
		throws MessagingException {
	if (encoding.equalsIgnoreCase("base64"))
	    return new BASE64DecoderStream(is);
	else if (encoding.equalsIgnoreCase("quoted-printable"))
	    return new QPDecoderStream(is);
	else if (encoding.equalsIgnoreCase("uuencode") ||
		 encoding.equalsIgnoreCase("x-uuencode"))
	    return new UUDecoderStream(is);
	else if (encoding.equalsIgnoreCase("binary") ||
		 encoding.equalsIgnoreCase("7bit") ||
		 encoding.equalsIgnoreCase("8bit"))
	    return is;
	else
	    throw new MessagingException("Unknown encoding: " + encoding);
    }

    /**
     * Wrap an encoder around the given output stream. 
     * All the encodings defined in RFC 2045 are supported here. 
     * They include "base64", "quoted-printable", "7bit", "8bit" and
     * "binary". In addition, "uuencode" is also supported.
     *
     * @param	os		output stream
     * @param	encoding	the encoding of the stream. 
     * @return			output stream that applies the
     *				specified encoding.
     */
    public static OutputStream encode(OutputStream os, String encoding)
		throws MessagingException {
        if (encoding == null)
	    return os;
	else if (encoding.equalsIgnoreCase("base64"))
	    return new BASE64EncoderStream(os);
	else if (encoding.equalsIgnoreCase("quoted-printable"))
	    return new QPEncoderStream(os);
	else if (encoding.equalsIgnoreCase("uuencode") ||
		 encoding.equalsIgnoreCase("x-uuencode"))
	    return new UUEncoderStream(os);
	else if (encoding.equalsIgnoreCase("binary") ||
		 encoding.equalsIgnoreCase("7bit") ||
		 encoding.equalsIgnoreCase("8bit"))
	    return os;
	else
	    throw new MessagingException("Unknown encoding: " +encoding);
    }

    /**
     * Wrap an encoder around the given output stream.
     * All the encodings defined in RFC 2045 are supported here.
     * They include "base64", "quoted-printable", "7bit", "8bit" and
     * "binary". In addition, "uuencode" is also supported.
     * The <code>filename</code> parameter is used with the "uuencode"
     * encoding and is included in the encoded output.
     *
     * @param   os              output stream
     * @param   encoding        the encoding of the stream.
     * @param   filename        name for the file being encoded (only used
     *                          with uuencode)
     * @return                  output stream that applies the
     *                          specified encoding.
     * @since                   JavaMail 1.2
     */
    public static OutputStream encode(OutputStream os, String encoding,
                                      String filename)
                throws MessagingException {
        if (encoding == null)
            return os;
        else if (encoding.equalsIgnoreCase("base64"))
            return new BASE64EncoderStream(os);
        else if (encoding.equalsIgnoreCase("quoted-printable"))
            return new QPEncoderStream(os);
        else if (encoding.equalsIgnoreCase("uuencode") ||
                 encoding.equalsIgnoreCase("x-uuencode"))
            return new UUEncoderStream(os, filename);
        else if (encoding.equalsIgnoreCase("binary") ||
                 encoding.equalsIgnoreCase("7bit") ||
                 encoding.equalsIgnoreCase("8bit"))
            return os;
        else
            throw new MessagingException("Unknown encoding: " +encoding);
    }

    /**
     * Encode a RFC 822 "text" token into mail-safe form as per
     * RFC 2047. <p>
     *
     * The given Unicode string is examined for non US-ASCII
     * characters. If the string contains only US-ASCII characters,
     * it is returned as-is.  If the string contains non US-ASCII
     * characters, it is first character-encoded using the platform's
     * default charset, then transfer-encoded using either the B or 
     * Q encoding. The resulting bytes are then returned as a Unicode 
     * string containing only ASCII  characters. <p>
     *
     * Note that this method should be used to encode only 
     * "unstructured" RFC 822 headers. <p>
     *
     * Example of usage:
     * <p><blockquote><pre>
     *
     *  MimePart part = ...
     *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
     *  try {
     *    // If we know for sure that rawvalue contains only US-ASCII 
     *    // characters, we can skip the encoding part
     *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
     *  } catch (UnsupportedEncodingException e) {
     *    // encoding failure
     *  } catch (MessagingException me) {
     *   // setHeader() failure
     *  }
     *
     * </pre></blockquote><p>
     * 
     * @param	text	unicode string
     * @return	Unicode string containing only US-ASCII characters
     * @exception UnsupportedEncodingException if the encoding fails
     */
    public static String encodeText(String text)
			throws UnsupportedEncodingException {
	return encodeText(text, null, null);
    }

    /**
     * Encode a RFC 822 "text" token into mail-safe form as per
     * RFC 2047. <p>
     *
     * The given Unicode string is examined for non US-ASCII
     * characters. If the string contains only US-ASCII characters,
     * it is returned as-is.  If the string contains non US-ASCII
     * characters, it is first character-encoded using the specified
     * charset, then transfer-encoded using either the B or Q encoding.
     * The resulting bytes are then returned as a Unicode string 
     * containing only ASCII characters. <p>
     *
     * Note that this method should be used to encode only 
     * "unstructured" RFC 822 headers. 
     * 
     * @param	text	the header value
     * @param	charset	the charset. If this parameter is null, the
     *		platform's default chatset is used.
     * @param	encoding the encoding to be used. Currently supported
     *		values are "B" and "Q". If this parameter is null, then
     *		the "Q" encoding is used if most of characters to be
     *		encoded are in the ASCII charset, otherwise "B" encoding
     *		is used.
     * @return	Unicode string containing only US-ASCII characters
     */
    public static String encodeText(String text, String charset,
				    String encoding)
			throws UnsupportedEncodingException {
	return encodeWord(text, charset, encoding, false);
    }

    /**
     * Decode "unstructured" headers, that is, headers that are defined
     * as '*text' as per RFC 822. <p>
     *
     * The string is decoded using the algorithm specified in
     * RFC 2047, Section 6.1.1. If the charset-conversion fails
     * for any sequence, an UnsupportedEncodingException is thrown.
     * If the String is not an RFC 2047 style encoded header, it is
     * returned as-is <p>
     *
     * Example of usage:
     * <p><blockquote><pre>
     *
     *  MimePart part = ...
     *  String rawvalue = null;
     *  String  value = null;
     *  try {
     *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
     *      value = MimeUtility.decodeText(rawvalue);
     *  } catch (UnsupportedEncodingException e) {
     *      // Don't care
     *      value = rawvalue;
     *  } catch (MessagingException me) { }
     *
     *  return value;
     *
     * </pre></blockquote><p>
     *
     * @param	etext	the possibly encoded value
     * @exception       UnsupportedEncodingException if the charset
     *			conversion failed.
     */
    public static String decodeText(String etext)
		throws UnsupportedEncodingException {
	/*
	 * We look for sequences separated by "linear-white-space".
	 * (as per RFC 2047, Section 6.1.1)
	 * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
	 */
	String lwsp = " \t\n\r";
	StringTokenizer st;

	/*
	 * First, lets do a quick run thru the string and check
	 * whether the sequence "=?"  exists at all. If none exists,
	 * we know there are no encoded-words in here and we can just
	 * return the string as-is, without suffering thru the later 
	 * decoding logic. 
	 * This handles the most common case of unencoded headers 
	 * efficiently.
	 */
	if (etext.indexOf("=?") == -1)
	    return etext;

	// Encoded words found. Start decoding ...

	st = new StringTokenizer(etext, lwsp, true);
	StringBuffer sb = new StringBuffer();  // decode buffer
	StringBuffer wsb = new StringBuffer(); // white space buffer
	boolean prevWasEncoded = false;

	while (st.hasMoreTokens()) {
	    char c;
	    String s = st.nextToken();
	    // If whitespace, append it to the whitespace buffer
	    if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
		(c == '\r') || (c == '\n'))
		wsb.append(c);
	    else {
		// Check if token is an 'encoded-word' ..
		String word;
		try {
		    word = decodeWord(s);
		    // Yes, this IS an 'encoded-word'.
		    if (!prevWasEncoded && wsb.length() > 0) {
			// if the previous word was also encoded, we
			// should ignore the collected whitespace. Else
			// we include the whitespace as well.
			sb.append(wsb);
		    }
		    prevWasEncoded = true;
		} catch (ParseException pex) {
		    // This is NOT an 'encoded-word'.
		    word = s;
		    // include colleced whitespace ..
		    if (wsb.length() > 0)
			sb.append(wsb);
		    prevWasEncoded = false;
		}
		sb.append(word); // append the actual word
		wsb.setLength(0); // reset wsb for reuse
	    }
	}
	return sb.toString();
    }

    /**
     * Encode a RFC 822 "word" token into mail-safe form as per
     * RFC 2047. <p>
     *
     * The given Unicode string is examined for non US-ASCII
     * characters. If the string contains only US-ASCII characters,
     * it is returned as-is.  If the string contains non US-ASCII
     * characters, it is first character-encoded using the platform's
     * default charset, then transfer-encoded using either the B or 
     * Q encoding. The resulting bytes are then returned as a Unicode 
     * string containing only ASCII  characters. <p>
     * 
     * This method is meant to be used when creating RFC 822 "phrases".
     * The InternetAddress class, for example, uses this to encode
     * it's 'phrase' component.
     *
     * @param	text	unicode string
     * @return	Array of Unicode strings containing only US-ASCII 
     *		characters.
     * @exception UnsupportedEncodingException if the encoding fails
     */
    public static String encodeWord(String word) 
			throws UnsupportedEncodingException {
	return encodeWord(word, null, null);
    }

    /**
     * Encode a RFC 822 "word" token into mail-safe form as per
     * RFC 2047. <p>
     *
     * The given Unicode string is examined for non US-ASCII
     * characters. If the string contains only US-ASCII characters,
     * it is returned as-is.  If the string contains non US-ASCII
     * characters, it is first character-encoded using the specified
     * charset, then transfer-encoded using either the B or Q encoding.
     * The resulting bytes are then returned as a Unicode string 
     * containing only ASCII characters. <p>
     * 
     * @param	text	unicode string
     * @param	charset	the MIME charset
     * @param	encoding the encoding to be used. Currently supported
     *		values are "B" and "Q". If this parameter is null, then
     *		the "Q" encoding is used if most of characters to be
     *		encoded are in the ASCII charset, otherwise "B" encoding
     *		is used.
     * @return	Unicode string containing only US-ASCII characters
     * @exception UnsupportedEncodingException if the encoding fails
     */
    public static String encodeWord(String word, String charset, 
				    String encoding)
    			throws UnsupportedEncodingException {
	return encodeWord(word, charset, encoding, true);
    }

    /*
     * Encode the given string. The parameter 'encodingWord' should
     * be true if a RFC 822 "word" token is being encoded and false if a
     * RFC 822 "text" token is being encoded. This is because the 
     * "Q" encoding defined in RFC 2047 has more restrictions when
     * encoding "word" tokens. (Sigh)
     */ 
    private static String encodeWord(String string, String charset,
				     String encoding, boolean encodingWord)
			throws UnsupportedEncodingException {

	// If 'string' contains only US-ASCII characters, just
	// return it.
	if (checkAscii(string) == ALL_ASCII)
	    return string;

	// Else, apply the specified charset conversion.
	String jcharset;
	if (charset == null) { // use default charset
	    jcharset = getDefaultJavaCharset(); // the java charset
	    charset = getDefaultMIMECharset(); // the MIME equivalent
	} else // MIME charset -> java charset
	    jcharset = javaCharset(charset);

	// If no transfer-encoding is specified, figure one out.
	if (encoding == null) {
	    byte[] bytes = string.getBytes(jcharset);
	    if (checkAscii(bytes) != MOSTLY_NONASCII)
		encoding = "Q";
	    else
		encoding = "B";
	}

	boolean b64;
	if (encoding.equalsIgnoreCase("B")) 
	    b64 = true;
	else if (encoding.equalsIgnoreCase("Q"))
	    b64 = false;
	else
	    throw new UnsupportedEncodingException(
			"Unknown transfer encoding: " + encoding);

	StringBuffer outb = new StringBuffer(); // the output buffer
	doEncode(string, b64, jcharset, 
		 // As per RFC 2047, size of an encoded string should not
		 // exceed 75 bytes.
		 // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
		 75 - 7 - charset.length(), // the available space
		 "=?" + charset + "?" + encoding + "?", // prefix
		 true, encodingWord, outb);

	return outb.toString();
    }

    private static void doEncode(String string, boolean b64, 
		String jcharset, int avail, String prefix, 
		boolean first, boolean encodingWord, StringBuffer buf) 
			throws UnsupportedEncodingException {

	// First find out what the length of the encoded version of
	// 'string' would be.
	byte[] bytes = string.getBytes(jcharset);
	int len;
	if (b64) // "B" encoding
	    len = BEncoderStream.encodedLength(bytes);
	else // "Q"
	    len = QEncoderStream.encodedLength(bytes, encodingWord);
	
	int size;
	if ((len > avail) && ((size = string.length()) > 1)) { 
	    // If the length is greater than 'avail', split 'string'
	    // into two and recurse.
	    doEncode(string.substring(0, size2), b64, jcharset, 
		     avail, prefix, first, encodingWord, buf);
	    doEncode(string.substring(size2, size), b64, jcharset,
		     avail, prefix, false, encodingWord, buf);
	} else {
	    // length <= than 'avail'. Encode the given string
	    ByteArrayOutputStream os = new ByteArrayOutputStream();
	    OutputStream eos; // the encoder
	    if (b64) // "B" encoding
		eos = new BEncoderStream(os);
	    else // "Q" encoding
		eos = new QEncoderStream(os, encodingWord);
	    
	    try { // do the encoding
		eos.write(bytes);
		eos.close();
	    } catch (IOException ioex) { }

	    byte[] encodedBytes = os.toByteArray(); // the encoded stuff
	    // Now write out the encoded (all ASCII) bytes into our
	    // StringBuffer
	    if (!first) // not the first line of this sequence
		buf.append("\r\n "); // start a continuation line

	    buf.append(prefix);
	    for (int i = 0; i < encodedBytes.length; i++)
		buf.append((char)encodedBytes[i]);
	    buf.append("?="); // terminate the current sequence
	}
    }

    /**
     * The string is parsed using the rules in RFC 2047 for parsing
     * an "encoded-word". If the parse fails, a ParseException is 
     * thrown. Otherwise, it is transfer-decoded, and then 
     * charset-converted into Unicode. If the charset-conversion
     * fails, an UnsupportedEncodingException is thrown.<p>
     *
     * @param	eword	the possibly encoded value
     * @exception       ParseException if the string is not an
     *			encoded-word as per RFC 2047.
     * @exception       UnsupportedEncodingException if the charset
     *			conversion failed.
     */
    public static String decodeWord(String eword)
		throws ParseException, UnsupportedEncodingException {

	if (!eword.startsWith("=?")) // not an encoded word
	    throw new ParseException();
	
	// get charset
	int start = 2; int pos; 
	if ((pos = eword.indexOf('?', start)) == -1)
	    throw new ParseException();
	String charset = javaCharset(eword.substring(start, pos));

	// get encoding
	start = pos+1;
	if ((pos = eword.indexOf('?', start)) == -1)
	    throw new ParseException();
	String encoding = eword.substring(start, pos);

	// get encoded-sequence
	start = pos+1;
	if ((pos = eword.indexOf("?=", start)) == -1)
	    throw new ParseException();
	String word = eword.substring(start, pos);

	try {
	    // Extract the bytes from word
	    ByteArrayInputStream bis = 
		new ByteArrayInputStream(ASCIIUtility.getBytes(word));

	    // Get the appropriate decoder
	    InputStream is;
	    if (encoding.equalsIgnoreCase("B")) 
		is = new BASE64DecoderStream(bis);
	    else if (encoding.equalsIgnoreCase("Q"))
		is = new QDecoderStream(bis);
	    else
		throw new UnsupportedEncodingException(
				"unknown encoding: " + encoding);

	    // For b64 & q, size of decoded word <= size of word. So
	    // the decoded bytes must fit into the 'bytes' array. This
	    // is certainly more efficient than writing bytes into a
	    // ByteArrayOutputStream and then pulling out the byte[]
	    // from it.
	    int count = bis.available();
	    byte[] bytes = new byte[count];
	    // count is set to the actual number of decoded bytes 
	    count = is.read(bytes, 0, count);

	    // Finally, convert the decoded bytes into a String using
	    // the specified charset
	    return new String(bytes, 0, count, charset);
	} catch (UnsupportedEncodingException uex) {
	    // explicitly catch and rethrow this exception, otherwise
	    // the below IOException catch will swallow this up!
	    throw uex;
	} catch (IOException ioex) {
	    // Shouldn't happen.
	    throw new ParseException();
	} catch (IllegalArgumentException iex) {
	    /* An unknown charset of the form ISO-XXX-XXX, will cause
	     * the JDK to throw an IllegalArgumentException ... Since the
	     * JDK will attempt to create a classname using this string,
	     * but valid classnames must not contain the character '-',
	     * and this results in an IllegalArgumentException, rather than
	     * the expected UnsupportedEncodingException. Yikes
	     */
	    throw new UnsupportedEncodingException();
	}
    }

    /**
     * A utility method to quote a word, if the word contains any
     * characters from the specified 'specials' list.<p>
     *
     * The <code>HeaderTokenizer</code> class defines two special
     * sets of delimiters - MIME and RFC 822. <p>
     *
     * This method is typically used during the generation of 
     * RFC 822 and MIME header fields.
     *
     * @param	word	word to be quoted
     * @param	specials the set of special characters
     * @return		the possibly quoted word
     * @see	javax.mail.internet.HeaderTokenizer#MIME
     * @see	javax.mail.internet.HeaderTokenizer#RFC822
     */
    public static String quote(String word, String specials) {
	int len = word.length();

	/*
	 * Look for any "bad" characters, Escape and
	 *  quote the entire string if necessary.
	 */
	boolean needQuoting = false;
	for (int i = 0; i < len; i++) {
	    char c = word.charAt(i);
	    if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
		// need to escape them and then quote the whole string
		StringBuffer sb = new StringBuffer(len + 3);
		sb.append('"');
		for (int j = 0; j < len; j++) {
		    char cc = word.charAt(j);
		    if ((cc == '"') || (cc == '\\') || 
			(cc == '\r') || (cc == '\n'))
			// Escape the character
			sb.append('\\');
		    sb.append(cc);
		}
		sb.append('"');
		return sb.toString();
	    } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
		// These characters cause the string to be quoted
		needQuoting = true;
	}

	if (needQuoting) {
	    StringBuffer sb = new StringBuffer(len + 2);
	    sb.append('"').append(word).append('"');
	    return sb.toString();
	} else 
	    return word;
    }

    /**
     * Convert a MIME charset name into a valid Java charset name. <p>
     *
     * @param charset	the MIME charset name
     * @return  the Java charset equivalent. If a suitable mapping is
     *		not available, the passed in charset is itself returned.
     */
    public static String javaCharset(String charset) {
	if (mime2java == null || charset == null)
	    // no mapping table, or charset parameter is null
	    return charset;

	String alias = (String)mime2java.get(charset.toLowerCase());
	return alias == null ? charset : alias;
    }

    /**
     * Convert a java charset into its MIME charset name. <p>
     *
     * Note that a future version of JDK (post 1.2) might provide
     * this functionality, in which case, we may deprecate this
     * method then.
     *
     * @param   charset    the JDK charset
     * @return      	the MIME/IANA equivalent. If a mapping
     *			is not possible, the passed in charset itself
     *			is returned.
     * @since		JavaMail 1.1
     */
    public static String mimeCharset(String charset) {
	if (java2mime == null || charset == null) 
	    // no mapping table or charset param is null
	    return charset;

	String alias = (String)java2mime.get(charset.toLowerCase());
	return alias == null ? charset : alias;
    }

    private static String defaultJavaCharset;
    private static String defaultMIMECharset;

    /**
     * Get the default charset corresponding to the system's current 
     * default locale. <p>
     * 
     * @return	the default charset of the system's default locale, 
     * 		as a Java charset. (NOT a MIME charset)
     * @since	JavaMail 1.1
     */
    public static String getDefaultJavaCharset() {
	if (defaultJavaCharset == null) {
	    try {
		defaultJavaCharset = System.getProperty("file.encoding", 
							"8859_1");
	    } catch (SecurityException sex) {
		
		class NullInputStream extends InputStream {
		    public int read() {
			return 0;
		    }
		}
		InputStreamReader reader = 
			new InputStreamReader(new NullInputStream());
		defaultJavaCharset = reader.getEncoding();
		if (defaultJavaCharset == null)
		    defaultJavaCharset = "8859_1";
	    }
	}

	return defaultJavaCharset;
    }

    /*
     * Get the default MIME charset for this locale.
     */
    static String getDefaultMIMECharset() {
	if (defaultMIMECharset == null)
	    defaultMIMECharset = System.getProperty("mail.mime.charset");
	if (defaultMIMECharset == null)
	    defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
	return defaultMIMECharset;
    }

    private static Hashtable mime2java;
    private static Hashtable java2mime;

    static {

	// Use this class's classloader to load the mapping file
	InputStream is = 
		javax.mail.internet.MimeUtility.class.getResourceAsStream(
		"/META-INF/javamail.charset.map");

	if (is != null) {
	    is = new LineInputStream(is);

	    // Load the JDK-to-MIME charset mapping table
	    java2mime = new Hashtable(20);
	    loadMappings((LineInputStream)is, java2mime);

	    // Load the MIME-to-JDK charset mapping table
	    mime2java = new Hashtable(10);
	    loadMappings((LineInputStream)is, mime2java);
	}
    }

    private static void loadMappings(LineInputStream is, Hashtable table) {
	String currLine;

	while (true) {
	    try {
		currLine = is.readLine();
	    } catch (IOException ioex) {
		break; // error in reading, stop
	    }

	    if (currLine == null) // end of file, stop
		break;
	    if (currLine.startsWith("--") && currLine.endsWith("--"))
		// end of this table
		break;	

	    // ignore empty lines and comments
	    if (currLine.trim().length() == 0 || currLine.startsWith("#"))
		continue;
	    
	    // A valid entry is of the form <key><separator><value>
	    // where, <separator> := SPACE | HT. Parse this
	    StringTokenizer tk = new StringTokenizer(currLine, " \t");
	    try {
		String key = tk.nextToken();
		String value = tk.nextToken();
		table.put(key.toLowerCase(), value);
	    } catch (NoSuchElementException nex) { }
	}
    }

    static final int ALL_ASCII 		= 1;
    static final int MOSTLY_ASCII 	= 2;
    static final int MOSTLY_NONASCII 	= 3;

    /** 
     * Check if the given string contains non US-ASCII characters.
     * @param	s	string
     * @return		ALL_ASCII if all characters in the string 
     *			belong to the US-ASCII charset. MOSTLY_NONASCII
     *			if any one character is non-ascii.
     */
    static int checkAscii(String s) {
	int l = s.length();

	for (int i=0; i < l; i++) {
	    if (nonascii((int)s.charAt(i))) // non-ascii
		return MOSTLY_NONASCII;
	}
	
	return ALL_ASCII; // all ascii
    }

    /** 
     * Check if the given byte array contains non US-ASCII characters.
     * @param	b	byte array
     * @return		ALL_ASCII if all characters in the string 
     *			belong to the US-ASCII charset. MOSTLY_ASCII
     *			if more than half of the available characters
     *			are US-ASCII characters. Else MOSTLY_NONASCII.
     */
    static int checkAscii(byte[] b) {
	int ascii = 0, non_ascii = 0;

	for (int i=0; i < b.length; i++) {
	    // The '&' operator automatically causes b[i] to be promoted
	    // to an int, and we mask out the higher bytes in the int 
	    // so that the resulting value is not a negative integer.
	    if (nonascii(b[i] & 0xff)) // non-ascii
		non_ascii++;
	    else
		ascii++;
	}
	
	if (non_ascii == 0)
	    return ALL_ASCII;
	if (ascii > non_ascii)
	    return MOSTLY_ASCII;
	
	return MOSTLY_NONASCII;
    }

    /** 
     * Check if the given input stream contains non US-ASCII characters.
     * Upto <code>max</code> bytes are checked. If <code>max</code> is
     * set to <code>ALL</code>, then all the bytes available in this
     * input stream are checked. If <code>breakOnNonAscii</code> is true
     * the check terminates when the first non-US-ASCII character is
     * found and MOSTLY_NONASCII is returned. Else, the check continues
     * till <code>max</code> bytes or till the end of stream.
     *
     * @param	is	the input stream
     * @param	max	maximum bytes to check for. The special value
     *			ALL indicates that all the bytes in this input
     *			stream must be checked.
     * @param	breakOnNonAscii if <code>true</code>, then terminate the
     *			the check when the first non-US-ASCII character
     *			is found.
     * @return		ALL_ASCII if all characters in the string 
     *			belong to the US-ASCII charset. MOSTLY_ASCII
     *			if more than half of the available characters
     *			are US-ASCII characters. Else MOSTLY_NONASCII.
     */
    static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
	int ascii = 0, non_ascii = 0;
	int len;
	int block = 4096;
	int linelen = 0;
	boolean longLine = false;
	byte buf[] = null;
	if (max != 0) {
	    block = (max == ALL) ? 4096 : Math.min(max, 4096);
	    buf = new byte[block]; 
	}
	while (max != 0) {
	    try {
		if ((len = is.read(buf, 0, block)) == -1)
		    break;
		for (int i = 0; i < len; i++) {
	    	    // The '&' operator automatically causes b[i] to 
		    // be promoted to an int, and we mask out the higher
		    // bytes in the int so that the resulting value is 
		    // not a negative integer.
		    int b = buf[i] & 0xff;
		    if (b == '\r' || b == '\n')
			linelen = 0;
		    else {
			linelen++;
			if (linelen > 998)	// 1000 - CRLF
			    longLine = true;
		    }
		    if (nonascii(b)) {	// non-ascii
		        if (breakOnNonAscii) // we are done
			    return MOSTLY_NONASCII;
		        else
			    non_ascii++;
		    } else
		        ascii++;
		}
	    } catch (IOException ioex) {
		break;
	    }
	    if (max != ALL)
		max -= len;
	}

	if (max == 0 && breakOnNonAscii)
	    // We have been told to break on the first non-ascii character.
	    // We haven't got any non-ascii character yet, but then we
	    // have not checked all of the available bytes either. So we
	    // cannot say for sure that this input stream is ALL_ASCII,
	    // and hence we must play safe and return MOSTLY_NONASCII

	    return MOSTLY_NONASCII;

	if (non_ascii == 0) { // no non-us-ascii characters so far
	    // if we've seen a long line, we degrade to mostly ascii
	    if (longLine)
		return MOSTLY_ASCII;
	    else
		return ALL_ASCII;
	}
	if (ascii > non_ascii) // mostly ascii
	    return MOSTLY_ASCII;
	return MOSTLY_NONASCII;
    }

    private static final boolean nonascii(int b) {
	return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
    }
}

/**
 * An OutputStream that determines whether the data written to
 * it is all ASCII, mostly ASCII, or mostly non-ASCII.
 */
class AsciiOutputStream extends OutputStream {
    private boolean breakOnNonAscii;
    private int ascii = 0, non_ascii = 0;
    private int linelen = 0;
    private boolean longLine = false;
    private int ret = 0;

    public AsciiOutputStream(boolean breakOnNonAscii) {
	this.breakOnNonAscii = breakOnNonAscii;
    }

    public void write(int b) throws IOException {
	check(b);
    }

    public void write(byte b[]) throws IOException {
	write(b, 0, b.length);
    }

    public void write(byte b[], int off, int len) throws IOException {
	len += off;
	for (int i = off; i < len ; i++)
	    check(b[i]);
    }

    private final void check(int b) throws IOException {
	b &= 0xff;
	if (b == '\r' || b == '\n')
	    linelen = 0;
	else {
	    linelen++;
	    if (linelen > 998)	// 1000 - CRLF
		longLine = true;
	}
	if (b > 0177) { // non-ascii
	    non_ascii++;
	    if (breakOnNonAscii) {	// we are done
		ret = MimeUtility.MOSTLY_NONASCII;
		throw new EOFException();
	    }
	} else
	    ascii++;
    }

    /**
     * Return ASCII-ness of data stream.
     */
    public int getAscii() {
	if (ret != 0)
	    return ret;
	if (non_ascii == 0) { // no non-us-ascii characters so far
	    // if we've seen a long line, we degrade to mostly ascii
	    if (longLine)
		return MimeUtility.MOSTLY_ASCII;
	    else
		return MimeUtility.ALL_ASCII;
	}
	if (ascii > non_ascii) // mostly ascii
	    return MimeUtility.MOSTLY_ASCII;
	return MimeUtility.MOSTLY_NONASCII;
    }
}