1. /*
  2. * @(#)StringCoding.java 1.13 03/12/19
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package java.lang;
  8. import java.io.CharConversionException;
  9. import java.io.UnsupportedEncodingException;
  10. import java.lang.ref.SoftReference;
  11. import java.nio.ByteBuffer;
  12. import java.nio.CharBuffer;
  13. import java.nio.BufferOverflowException;
  14. import java.nio.BufferUnderflowException;
  15. import java.nio.charset.Charset;
  16. import java.nio.charset.CharsetDecoder;
  17. import java.nio.charset.CharsetEncoder;
  18. import java.nio.charset.CharacterCodingException;
  19. import java.nio.charset.CoderResult;
  20. import java.nio.charset.CodingErrorAction;
  21. import java.nio.charset.IllegalCharsetNameException;
  22. import java.nio.charset.MalformedInputException;
  23. import java.nio.charset.UnsupportedCharsetException;
  24. import sun.io.ByteToCharConverter;
  25. import sun.io.CharToByteConverter;
  26. import sun.io.Converters;
  27. import sun.misc.MessageUtils;
  28. import sun.nio.cs.HistoricallyNamedCharset;
  29. /**
  30. * Utility class for string encoding and decoding.
  31. */
  32. class StringCoding {
  33. private StringCoding() { }
  34. /* The cached coders for each thread
  35. */
  36. private static ThreadLocal decoder = new ThreadLocal();
  37. private static ThreadLocal encoder = new ThreadLocal();
  38. private static boolean warnUnsupportedCharset = true;
  39. private static Object deref(ThreadLocal tl) {
  40. SoftReference sr = (SoftReference)tl.get();
  41. if (sr == null)
  42. return null;
  43. return sr.get();
  44. }
  45. private static void set(ThreadLocal tl, Object ob) {
  46. tl.set(new SoftReference(ob));
  47. }
  48. // Trim the given byte array to the given length
  49. //
  50. private static byte[] trim(byte[] ba, int len) {
  51. if (len == ba.length)
  52. return ba;
  53. byte[] tba = new byte[len];
  54. System.arraycopy(ba, 0, tba, 0, len);
  55. return tba;
  56. }
  57. // Trim the given char array to the given length
  58. //
  59. private static char[] trim(char[] ca, int len) {
  60. if (len == ca.length)
  61. return ca;
  62. char[] tca = new char[len];
  63. System.arraycopy(ca, 0, tca, 0, len);
  64. return tca;
  65. }
  66. private static Charset lookupCharset(String csn) {
  67. if (Charset.isSupported(csn)) {
  68. try {
  69. return Charset.forName(csn);
  70. } catch (UnsupportedCharsetException x) {
  71. throw new Error(x);
  72. }
  73. }
  74. return null;
  75. }
  76. private static void warnUnsupportedCharset(String csn) {
  77. if (warnUnsupportedCharset) {
  78. // Use sun.misc.MessageUtils rather than the Logging API or
  79. // System.err since this method may be called during VM
  80. // initialization before either is available.
  81. MessageUtils.err("WARNING: Default charset " + csn +
  82. " not supported, using ISO-8859-1 instead");
  83. warnUnsupportedCharset = false;
  84. }
  85. }
  86. // -- Decoding --
  87. // Encapsulates either a ByteToCharConverter or a CharsetDecoder
  88. //
  89. private static abstract class StringDecoder {
  90. private final String requestedCharsetName;
  91. protected StringDecoder(String requestedCharsetName) {
  92. this.requestedCharsetName = requestedCharsetName;
  93. }
  94. final String requestedCharsetName() {
  95. return requestedCharsetName;
  96. }
  97. abstract String charsetName();
  98. abstract char[] decode(byte[] ba, int off, int len);
  99. }
  100. // A string decoder based upon a ByteToCharConverter
  101. //
  102. private static class ConverterSD
  103. extends StringDecoder
  104. {
  105. private ByteToCharConverter btc;
  106. private ConverterSD(ByteToCharConverter btc, String rcn) {
  107. super(rcn);
  108. this.btc = btc;
  109. }
  110. String charsetName() {
  111. return btc.getCharacterEncoding();
  112. }
  113. char[] decode(byte[] ba, int off, int len) {
  114. int en = btc.getMaxCharsPerByte() * len;
  115. char[] ca = new char[en];
  116. if (len == 0)
  117. return ca;
  118. btc.reset();
  119. int n = 0;
  120. try {
  121. n = btc.convert(ba, off, off + len, ca, 0, en);
  122. n += btc.flush(ca, btc.nextCharIndex(), en);
  123. } catch (CharConversionException x) {
  124. // Yes, this is what we've always done
  125. n = btc.nextCharIndex();
  126. }
  127. return trim(ca, n);
  128. }
  129. }
  130. // A string decoder based upon a CharsetDecoder
  131. //
  132. private static class CharsetSD
  133. extends StringDecoder
  134. {
  135. private final Charset cs;
  136. private final CharsetDecoder cd;
  137. private CharsetSD(Charset cs, String rcn) {
  138. super(rcn);
  139. this.cs = cs;
  140. this.cd = cs.newDecoder()
  141. .onMalformedInput(CodingErrorAction.REPLACE)
  142. .onUnmappableCharacter(CodingErrorAction.REPLACE);
  143. }
  144. String charsetName() {
  145. if (cs instanceof HistoricallyNamedCharset)
  146. return ((HistoricallyNamedCharset)cs).historicalName();
  147. return cs.name();
  148. }
  149. char[] decode(byte[] ba, int off, int len) {
  150. int en = (int)(cd.maxCharsPerByte() * len);
  151. char[] ca = new char[en];
  152. if (len == 0)
  153. return ca;
  154. cd.reset();
  155. ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
  156. CharBuffer cb = CharBuffer.wrap(ca);
  157. try {
  158. CoderResult cr = cd.decode(bb, cb, true);
  159. if (!cr.isUnderflow())
  160. cr.throwException();
  161. cr = cd.flush(cb);
  162. if (!cr.isUnderflow())
  163. cr.throwException();
  164. } catch (CharacterCodingException x) {
  165. // Substitution is always enabled,
  166. // so this shouldn't happen
  167. throw new Error(x);
  168. }
  169. return trim(ca, cb.position());
  170. }
  171. }
  172. static char[] decode(String charsetName, byte[] ba, int off, int len)
  173. throws UnsupportedEncodingException
  174. {
  175. StringDecoder sd = (StringDecoder)deref(decoder);
  176. String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
  177. if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
  178. || csn.equals(sd.charsetName()))) {
  179. sd = null;
  180. try {
  181. Charset cs = lookupCharset(csn);
  182. if (cs != null)
  183. sd = new CharsetSD(cs, csn);
  184. else
  185. sd = null;
  186. } catch (IllegalCharsetNameException x) {
  187. // FALL THROUGH to ByteToCharConverter, for compatibility
  188. }
  189. if (sd == null)
  190. sd = new ConverterSD(ByteToCharConverter.getConverter(csn),
  191. csn);
  192. set(decoder, sd);
  193. }
  194. return sd.decode(ba, off, len);
  195. }
  196. static char[] decode(byte[] ba, int off, int len) {
  197. String csn = Converters.getDefaultEncodingName();
  198. try {
  199. return decode(csn, ba, off, len);
  200. } catch (UnsupportedEncodingException x) {
  201. Converters.resetDefaultEncodingName();
  202. warnUnsupportedCharset(csn);
  203. }
  204. try {
  205. return decode("ISO-8859-1", ba, off, len);
  206. } catch (UnsupportedEncodingException x) {
  207. // If this code is hit during VM initialization, MessageUtils is
  208. // the only way we will be able to get any kind of error message.
  209. MessageUtils.err("ISO-8859-1 charset not available: "
  210. + x.toString());
  211. // If we can not find ISO-8859-1 (a required encoding) then things
  212. // are seriously wrong with the installation.
  213. System.exit(1);
  214. return null;
  215. }
  216. }
  217. // -- Encoding --
  218. // Encapsulates either a CharToByteConverter or a CharsetEncoder
  219. //
  220. private static abstract class StringEncoder {
  221. private final String requestedCharsetName;
  222. protected StringEncoder(String requestedCharsetName) {
  223. this.requestedCharsetName = requestedCharsetName;
  224. }
  225. final String requestedCharsetName() {
  226. return requestedCharsetName;
  227. }
  228. abstract String charsetName();
  229. abstract byte[] encode(char[] cs, int off, int len);
  230. }
  231. // A string encoder based upon a CharToByteConverter
  232. //
  233. private static class ConverterSE
  234. extends StringEncoder
  235. {
  236. private CharToByteConverter ctb;
  237. private ConverterSE(CharToByteConverter ctb, String rcn) {
  238. super(rcn);
  239. this.ctb = ctb;
  240. }
  241. String charsetName() {
  242. return ctb.getCharacterEncoding();
  243. }
  244. byte[] encode(char[] ca, int off, int len) {
  245. int en = ctb.getMaxBytesPerChar() * len;
  246. byte[] ba = new byte[en];
  247. if (len == 0)
  248. return ba;
  249. ctb.reset();
  250. int n;
  251. try {
  252. n = ctb.convertAny(ca, off, (off + len),
  253. ba, 0, en);
  254. n += ctb.flushAny(ba, ctb.nextByteIndex(), en);
  255. } catch (CharConversionException x) {
  256. throw new Error("Converter malfunction: " +
  257. ctb.getClass().getName(),
  258. x);
  259. }
  260. return trim(ba, n);
  261. }
  262. }
  263. // A string encoder based upon a CharsetEncoder
  264. //
  265. private static class CharsetSE
  266. extends StringEncoder
  267. {
  268. private Charset cs;
  269. private CharsetEncoder ce;
  270. private CharsetSE(Charset cs, String rcn) {
  271. super(rcn);
  272. this.cs = cs;
  273. this.ce = cs.newEncoder()
  274. .onMalformedInput(CodingErrorAction.REPLACE)
  275. .onUnmappableCharacter(CodingErrorAction.REPLACE);
  276. }
  277. String charsetName() {
  278. if (cs instanceof HistoricallyNamedCharset)
  279. return ((HistoricallyNamedCharset)cs).historicalName();
  280. return cs.name();
  281. }
  282. byte[] encode(char[] ca, int off, int len) {
  283. int en = (int)(ce.maxBytesPerChar() * len);
  284. byte[] ba = new byte[en];
  285. if (len == 0)
  286. return ba;
  287. ce.reset();
  288. ByteBuffer bb = ByteBuffer.wrap(ba);
  289. CharBuffer cb = CharBuffer.wrap(ca, off, len);
  290. try {
  291. CoderResult cr = ce.encode(cb, bb, true);
  292. if (!cr.isUnderflow())
  293. cr.throwException();
  294. cr = ce.flush(bb);
  295. if (!cr.isUnderflow())
  296. cr.throwException();
  297. } catch (CharacterCodingException x) {
  298. // Substitution is always enabled,
  299. // so this shouldn't happen
  300. throw new Error(x);
  301. }
  302. return trim(ba, bb.position());
  303. }
  304. }
  305. static byte[] encode(String charsetName, char[] ca, int off, int len)
  306. throws UnsupportedEncodingException
  307. {
  308. StringEncoder se = (StringEncoder)deref(encoder);
  309. String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
  310. if ((se == null) || !(csn.equals(se.requestedCharsetName())
  311. || csn.equals(se.charsetName()))) {
  312. se = null;
  313. try {
  314. Charset cs = lookupCharset(csn);
  315. if (cs != null)
  316. se = new CharsetSE(cs, csn);
  317. } catch (IllegalCharsetNameException x) {
  318. // FALL THROUGH to CharToByteConverter, for compatibility
  319. }
  320. if (se == null)
  321. se = new ConverterSE(CharToByteConverter.getConverter(csn),
  322. csn);
  323. set(encoder, se);
  324. }
  325. return se.encode(ca, off, len);
  326. }
  327. static byte[] encode(char[] ca, int off, int len) {
  328. String csn = Converters.getDefaultEncodingName();
  329. try {
  330. return encode(csn, ca, off, len);
  331. } catch (UnsupportedEncodingException x) {
  332. Converters.resetDefaultEncodingName();
  333. warnUnsupportedCharset(csn);
  334. }
  335. try {
  336. return encode("ISO-8859-1", ca, off, len);
  337. } catch (UnsupportedEncodingException x) {
  338. // If this code is hit during VM initialization, MessageUtils is
  339. // the only way we will be able to get any kind of error message.
  340. MessageUtils.err("ISO-8859-1 charset not available: "
  341. + x.toString());
  342. // If we can not find ISO-8859-1 (a required encoding) then things
  343. // are seriously wrong with the installation.
  344. System.exit(1);
  345. return null;
  346. }
  347. }
  348. }