1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xml.internal.serialize;
  58. import java.io.UnsupportedEncodingException;
  59. import java.util.Hashtable;
  60. import java.util.Locale;
  61. import com.sun.org.apache.xerces.internal.util.EncodingMap;
  62. /**
  63. * Provides information about encodings. Depends on the Java runtime
  64. * to provides writers for the different encodings, but can be used
  65. * to override encoding names and provide the last printable character
  66. * for each encoding.
  67. *
  68. * @version $Id: Encodings.java,v 1.8 2003/07/18 16:47:22 mrglavas Exp $
  69. * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  70. */
  71. public class Encodings
  72. {
  73. /**
  74. * The last printable character for unknown encodings.
  75. */
  76. static final int DEFAULT_LAST_PRINTABLE = 0x7F;
  77. // last printable character for Unicode-compatible encodings
  78. static final int LAST_PRINTABLE_UNICODE = 0xffff;
  79. // unicode-compliant encodings; can express plane 0
  80. static final String[] UNICODE_ENCODINGS = {
  81. "Unicode", "UnicodeBig", "UnicodeLittle", "GB2312", "UTF8",
  82. };
  83. // default (Java) encoding if none supplied:
  84. static final String DEFAULT_ENCODING = "UTF8";
  85. // note that the size of this Hashtable
  86. // is bounded by the number of encodings recognized by EncodingMap;
  87. // therefore it poses no static mutability risk.
  88. static Hashtable _encodings = new Hashtable();
  89. /**
  90. * @param encoding a MIME charset name, or null.
  91. */
  92. static EncodingInfo getEncodingInfo(String encoding, boolean allowJavaNames) throws UnsupportedEncodingException {
  93. EncodingInfo eInfo = null;
  94. if (encoding == null) {
  95. if((eInfo = (EncodingInfo)_encodings.get(DEFAULT_ENCODING)) != null)
  96. return eInfo;
  97. eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(DEFAULT_ENCODING), DEFAULT_ENCODING, LAST_PRINTABLE_UNICODE);
  98. _encodings.put(DEFAULT_ENCODING, eInfo);
  99. return eInfo;
  100. }
  101. // need to convert it to upper case:
  102. encoding = encoding.toUpperCase(Locale.ENGLISH);
  103. String jName = EncodingMap.getIANA2JavaMapping(encoding);
  104. if(jName == null) {
  105. // see if the encoding passed in is a Java encoding name.
  106. if(allowJavaNames ) {
  107. EncodingInfo.testJavaEncodingName(encoding);
  108. if((eInfo = (EncodingInfo)_encodings.get(encoding)) != null)
  109. return eInfo;
  110. // is it known to be unicode-compliant?
  111. int i=0;
  112. for(; i<UNICODE_ENCODINGS.length; i++) {
  113. if(UNICODE_ENCODINGS[i].equalsIgnoreCase(encoding)) {
  114. eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, LAST_PRINTABLE_UNICODE);
  115. break;
  116. }
  117. }
  118. if(i == UNICODE_ENCODINGS.length) {
  119. eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, DEFAULT_LAST_PRINTABLE);
  120. }
  121. _encodings.put(encoding, eInfo);
  122. return eInfo;
  123. } else {
  124. throw new UnsupportedEncodingException(encoding);
  125. }
  126. }
  127. if ((eInfo = (EncodingInfo)_encodings.get(jName)) != null)
  128. return eInfo;
  129. // have to create one...
  130. // is it known to be unicode-compliant?
  131. int i=0;
  132. for(; i<UNICODE_ENCODINGS.length; i++) {
  133. if(UNICODE_ENCODINGS[i].equalsIgnoreCase(jName)) {
  134. eInfo = new EncodingInfo(encoding, jName, LAST_PRINTABLE_UNICODE);
  135. break;
  136. }
  137. }
  138. if(i == UNICODE_ENCODINGS.length) {
  139. eInfo = new EncodingInfo(encoding, jName, DEFAULT_LAST_PRINTABLE);
  140. }
  141. _encodings.put(jName, eInfo);
  142. return eInfo;
  143. }
  144. static final String JIS_DANGER_CHARS
  145. = "\\\u007e\u007f\u00a2\u00a3\u00a5\u00ac"
  146. +"\u2014\u2015\u2016\u2026\u203e\u203e\u2225\u222f\u301c"
  147. +"\uff3c\uff5e\uffe0\uffe1\uffe2\uffe3";
  148. }