1. /*
  2. * Copyright 1999-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: Encodings.java,v 1.8 2004/02/23 10:29:37 aruny Exp $
  18. */
  19. package com.sun.org.apache.xml.internal.serializer;
  20. import java.io.InputStream;
  21. import java.io.OutputStream;
  22. import java.io.OutputStreamWriter;
  23. import java.io.UnsupportedEncodingException;
  24. import java.io.Writer;
  25. import java.lang.reflect.Method;
  26. import java.net.URL;
  27. import java.util.Enumeration;
  28. import java.util.Hashtable;
  29. import java.util.Properties;
  30. import java.util.StringTokenizer;
  31. import java.security.PrivilegedAction;
  32. import java.security.AccessController;
  33. /**
  34. * Provides information about encodings. Depends on the Java runtime
  35. * to provides writers for the different encodings, but can be used
  36. * to override encoding names and provide the last printable character
  37. * for each encoding.
  38. *
  39. * @version $Revision: 1.8 $ $Date: 2004/02/23 10:29:37 $
  40. * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  41. */
  42. public class Encodings extends Object
  43. {
  44. /**
  45. * The last printable character for unknown encodings.
  46. */
  47. static final int m_defaultLastPrintable = 0x7F;
  48. /**
  49. * Standard filename for properties file with encodings data.
  50. */
  51. static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties";
  52. /**
  53. * Standard filename for properties file with encodings data.
  54. */
  55. static final String ENCODINGS_PROP = "com.sun.org.apache.xalan.internal.serialize.encodings";
  56. /** SUN JVM internal ByteToChar converter method */
  57. private static final Method
  58. SUN_CHAR2BYTE_CONVERTER_METHOD = findCharToByteConverterMethod();
  59. private static Method findCharToByteConverterMethod() {
  60. try
  61. {
  62. AccessController.doPrivileged(new PrivilegedAction() {
  63. public Object run() {
  64. try {
  65. Class charToByteConverterClass = (Class)
  66. Class.forName("sun.io.CharToByteConverter");
  67. Class argTypes[] = {String.class};
  68. return charToByteConverterClass.getMethod("getConverter", argTypes);
  69. }
  70. catch (Exception e) {
  71. throw new RuntimeException(e.toString());
  72. }
  73. }});
  74. }
  75. catch (Exception e)
  76. {
  77. System.err.println(
  78. "Warning: Could not get charToByteConverterClass!");
  79. }
  80. return null;
  81. }
  82. /**
  83. * Returns a writer for the specified encoding based on
  84. * an output stream.
  85. *
  86. * @param output The output stream
  87. * @param encoding The encoding
  88. * @return A suitable writer
  89. * @throws UnsupportedEncodingException There is no convertor
  90. * to support this encoding
  91. */
  92. public static Writer getWriter(OutputStream output, String encoding)
  93. throws UnsupportedEncodingException
  94. {
  95. for (int i = 0; i < _encodings.length; ++i)
  96. {
  97. if (_encodings[i].name.equalsIgnoreCase(encoding))
  98. {
  99. try
  100. {
  101. return new OutputStreamWriter(
  102. output,
  103. _encodings[i].javaName);
  104. }
  105. catch (java.lang.IllegalArgumentException iae) // java 1.1.8
  106. {
  107. // keep trying
  108. }
  109. catch (UnsupportedEncodingException usee)
  110. {
  111. // keep trying
  112. }
  113. }
  114. }
  115. try
  116. {
  117. return new OutputStreamWriter(output, encoding);
  118. }
  119. catch (java.lang.IllegalArgumentException iae) // java 1.1.8
  120. {
  121. throw new UnsupportedEncodingException(encoding);
  122. }
  123. }
  124. /**
  125. * Returns an opaque CharToByte converter for the specified encoding.
  126. *
  127. * @param encoding The encoding
  128. * @return An object which should be a sun.io.CharToByteConverter, or null.
  129. */
  130. public static Object getCharToByteConverter(String encoding)
  131. {
  132. if (SUN_CHAR2BYTE_CONVERTER_METHOD == null) {
  133. return null;
  134. }
  135. Object args[] = new Object[1];
  136. for (int i = 0; i < _encodings.length; ++i)
  137. {
  138. if (_encodings[i].name.equalsIgnoreCase(encoding))
  139. {
  140. try
  141. {
  142. args[0] = _encodings[i].javaName;
  143. Object converter =
  144. SUN_CHAR2BYTE_CONVERTER_METHOD.invoke(null, args);
  145. if (null != converter)
  146. return converter;
  147. }
  148. catch (Exception iae)
  149. {
  150. // keep trying
  151. }
  152. }
  153. }
  154. return null;
  155. }
  156. /**
  157. * Returns the last printable character for the specified
  158. * encoding.
  159. *
  160. * @param encoding The encoding
  161. * @return The last printable character
  162. */
  163. public static int getLastPrintable(String encoding)
  164. {
  165. EncodingInfo ei;
  166. String normalizedEncoding = encoding.toUpperCase();
  167. ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding);
  168. if (ei == null)
  169. ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding);
  170. if (ei != null)
  171. return ei.lastPrintable;
  172. return m_defaultLastPrintable;
  173. }
  174. /**
  175. * Returns the last printable character for an unspecified
  176. * encoding.
  177. *
  178. * @return the default size
  179. */
  180. public static int getLastPrintable()
  181. {
  182. return m_defaultLastPrintable;
  183. }
  184. /** The default encoding, ISO style, ISO style. */
  185. public static final String DEFAULT_MIME_ENCODING = "UTF-8";
  186. /**
  187. * Get the proper mime encoding. From the XSLT recommendation: "The encoding
  188. * attribute specifies the preferred encoding to use for outputting the result
  189. * tree. XSLT processors are required to respect values of UTF-8 and UTF-16.
  190. * For other values, if the XSLT processor does not support the specified
  191. * encoding it may signal an error; if it does not signal an error it should
  192. * use UTF-8 or UTF-16 instead. The XSLT processor must not use an encoding
  193. * whose name does not match the EncName production of the XML Recommendation
  194. * [XML]. If no encoding attribute is specified, then the XSLT processor should
  195. * use either UTF-8 or UTF-16."
  196. *
  197. * @param encoding Reference to java-style encoding string, which may be null,
  198. * in which case a default will be found.
  199. *
  200. * @return The ISO-style encoding string, or null if failure.
  201. */
  202. public static String getMimeEncoding(String encoding)
  203. {
  204. if (null == encoding)
  205. {
  206. try
  207. {
  208. // Get the default system character encoding. This may be
  209. // incorrect if they passed in a writer, but right now there
  210. // seems to be no way to get the encoding from a writer.
  211. encoding = System.getProperty("file.encoding", "UTF8");
  212. if (null != encoding)
  213. {
  214. /*
  215. * See if the mime type is equal to UTF8. If you don't
  216. * do that, then convertJava2MimeEncoding will convert
  217. * 8859_1 to "ISO-8859-1", which is not what we want,
  218. * I think, and I don't think I want to alter the tables
  219. * to convert everything to UTF-8.
  220. */
  221. String jencoding =
  222. (encoding.equalsIgnoreCase("Cp1252")
  223. || encoding.equalsIgnoreCase("ISO8859_1")
  224. || encoding.equalsIgnoreCase("8859_1")
  225. || encoding.equalsIgnoreCase("UTF8"))
  226. ? DEFAULT_MIME_ENCODING
  227. : convertJava2MimeEncoding(encoding);
  228. encoding =
  229. (null != jencoding) ? jencoding : DEFAULT_MIME_ENCODING;
  230. }
  231. else
  232. {
  233. encoding = DEFAULT_MIME_ENCODING;
  234. }
  235. }
  236. catch (SecurityException se)
  237. {
  238. encoding = DEFAULT_MIME_ENCODING;
  239. }
  240. }
  241. else
  242. {
  243. encoding = convertJava2MimeEncoding(encoding);
  244. }
  245. return encoding;
  246. }
  247. /**
  248. * Try the best we can to convert a Java encoding to a XML-style encoding.
  249. *
  250. * @param encoding non-null reference to encoding string, java style.
  251. *
  252. * @return ISO-style encoding string.
  253. */
  254. public static String convertJava2MimeEncoding(String encoding)
  255. {
  256. EncodingInfo enc =
  257. (EncodingInfo) _encodingTableKeyJava.get(encoding.toUpperCase());
  258. if (null != enc)
  259. return enc.name;
  260. return encoding;
  261. }
  262. /**
  263. * Try the best we can to convert a Java encoding to a XML-style encoding.
  264. *
  265. * @param encoding non-null reference to encoding string, java style.
  266. *
  267. * @return ISO-style encoding string.
  268. */
  269. public static String convertMime2JavaEncoding(String encoding)
  270. {
  271. for (int i = 0; i < _encodings.length; ++i)
  272. {
  273. if (_encodings[i].name.equalsIgnoreCase(encoding))
  274. {
  275. return _encodings[i].javaName;
  276. }
  277. }
  278. return encoding;
  279. }
  280. /**
  281. * Load a list of all the supported encodings.
  282. *
  283. * System property "encodings" formatted using URL syntax may define an
  284. * external encodings list. Thanks to Sergey Ushakov for the code
  285. * contribution!
  286. */
  287. private static EncodingInfo[] loadEncodingInfo()
  288. {
  289. URL url = null;
  290. try
  291. {
  292. String urlString = null;
  293. InputStream is = null;
  294. try
  295. {
  296. urlString = System.getProperty(ENCODINGS_PROP, "");
  297. }
  298. catch (SecurityException e)
  299. {
  300. }
  301. if (urlString != null && urlString.length() > 0) {
  302. url = new URL(urlString);
  303. is = url.openStream();
  304. }
  305. if (is == null) {
  306. SecuritySupport ss = SecuritySupport.getInstance();
  307. is = ss.getResourceAsStream(ObjectFactory.findClassLoader(),
  308. ENCODINGS_FILE);
  309. }
  310. Properties props = new Properties();
  311. if (is != null) {
  312. props.load(is);
  313. is.close();
  314. } else {
  315. // Seems to be no real need to force failure here, let the
  316. // system do its best... The issue is not really very critical,
  317. // and the output will be in any case _correct_ though maybe not
  318. // always human-friendly... :)
  319. // But maybe report/log the resource problem?
  320. // Any standard ways to report/log errors (in static context)?
  321. }
  322. int totalEntries = props.size();
  323. int totalMimeNames = 0;
  324. Enumeration keys = props.keys();
  325. for (int i = 0; i < totalEntries; ++i)
  326. {
  327. String javaName = (String) keys.nextElement();
  328. String val = props.getProperty(javaName);
  329. totalMimeNames++;
  330. int pos = val.indexOf(' ');
  331. for (int j = 0; j < pos; ++j)
  332. if (val.charAt(j) == ',')
  333. totalMimeNames++;
  334. }
  335. EncodingInfo[] ret = new EncodingInfo[totalMimeNames];
  336. int j = 0;
  337. keys = props.keys();
  338. for (int i = 0; i < totalEntries; ++i)
  339. {
  340. String javaName = (String) keys.nextElement();
  341. String val = props.getProperty(javaName);
  342. int pos = val.indexOf(' ');
  343. String mimeName;
  344. int lastPrintable;
  345. if (pos < 0)
  346. {
  347. // Maybe report/log this problem?
  348. // "Last printable character not defined for encoding " +
  349. // mimeName + " (" + val + ")" ...
  350. mimeName = val;
  351. lastPrintable = 0x00FF;
  352. }
  353. else
  354. {
  355. lastPrintable =
  356. Integer.decode(val.substring(pos).trim()).intValue();
  357. StringTokenizer st =
  358. new StringTokenizer(val.substring(0, pos), ",");
  359. for (boolean first = true;
  360. st.hasMoreTokens();
  361. first = false)
  362. {
  363. mimeName = st.nextToken();
  364. ret[j] =
  365. new EncodingInfo(mimeName, javaName, lastPrintable);
  366. _encodingTableKeyMime.put(
  367. mimeName.toUpperCase(),
  368. ret[j]);
  369. if (first)
  370. _encodingTableKeyJava.put(
  371. javaName.toUpperCase(),
  372. ret[j]);
  373. j++;
  374. }
  375. }
  376. }
  377. return ret;
  378. }
  379. catch (java.net.MalformedURLException mue)
  380. {
  381. throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(mue);
  382. }
  383. catch (java.io.IOException ioe)
  384. {
  385. throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(ioe);
  386. }
  387. }
  388. private static final Hashtable _encodingTableKeyJava = new Hashtable();
  389. private static final Hashtable _encodingTableKeyMime = new Hashtable();
  390. private static final EncodingInfo[] _encodings = loadEncodingInfo();
  391. }