1. /*
  2. * @(#)StringCoding.java 1.9 02/04/09
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package java.lang;
  8. import java.io.CharConversionException;
  9. import java.io.UnsupportedEncodingException;
  10. import java.lang.ref.SoftReference;
  11. import java.nio.ByteBuffer;
  12. import java.nio.CharBuffer;
  13. import java.nio.BufferOverflowException;
  14. import java.nio.BufferUnderflowException;
  15. import java.nio.charset.Charset;
  16. import java.nio.charset.CharsetDecoder;
  17. import java.nio.charset.CharsetEncoder;
  18. import java.nio.charset.CharacterCodingException;
  19. import java.nio.charset.CoderResult;
  20. import java.nio.charset.CodingErrorAction;
  21. import java.nio.charset.IllegalCharsetNameException;
  22. import java.nio.charset.MalformedInputException;
  23. import java.nio.charset.UnsupportedCharsetException;
  24. import sun.io.ByteToCharConverter;
  25. import sun.io.CharToByteConverter;
  26. import sun.io.Converters;
  27. import sun.misc.MessageUtils;
  28. import sun.nio.cs.HistoricallyNamedCharset;
  29. /**
  30. * Utility class for string encoding and decoding.
  31. */
  32. class StringCoding {
  33. private StringCoding() { }
  34. /* The cached coders for each thread
  35. */
  36. private static ThreadLocal decoder = new ThreadLocal();
  37. private static ThreadLocal encoder = new ThreadLocal();
  38. private static boolean warnUnsupportedCharset = true;
  39. private static Object deref(ThreadLocal tl) {
  40. SoftReference sr = (SoftReference)tl.get();
  41. if (sr == null)
  42. return null;
  43. return sr.get();
  44. }
  45. private static void set(ThreadLocal tl, Object ob) {
  46. tl.set(new SoftReference(ob));
  47. }
  48. // Trim the given byte array to the given length
  49. //
  50. private static byte[] trim(byte[] ba, int len) {
  51. if (len == ba.length)
  52. return ba;
  53. byte[] tba = new byte[len];
  54. System.arraycopy(ba, 0, tba, 0, len);
  55. return tba;
  56. }
  57. // Trim the given char array to the given length
  58. //
  59. private static char[] trim(char[] ca, int len) {
  60. if (len == ca.length)
  61. return ca;
  62. char[] tca = new char[len];
  63. System.arraycopy(ca, 0, tca, 0, len);
  64. return tca;
  65. }
  66. private static Charset lookupCharset(String csn) {
  67. if (csn.equalsIgnoreCase("PCK"))
  68. return null;
  69. if (Charset.isSupported(csn)) {
  70. try {
  71. return Charset.forName(csn);
  72. } catch (UnsupportedCharsetException x) {
  73. throw new Error(x);
  74. }
  75. }
  76. return null;
  77. }
  78. private static void warnUnsupportedCharset(String csn) {
  79. if (warnUnsupportedCharset) {
  80. // Use sun.misc.MessageUtils rather than the Logging API or
  81. // System.err since this method may be called during VM
  82. // initialization before either is available.
  83. MessageUtils.err("WARNING: Default charset " + csn +
  84. " not supported, using ISO-8859-1 instead");
  85. warnUnsupportedCharset = false;
  86. }
  87. }
  88. // -- Decoding --
  89. // Encapsulates either a ByteToCharConverter or a CharsetDecoder
  90. //
  91. private static abstract class StringDecoder {
  92. private final String requestedCharsetName;
  93. protected StringDecoder(String requestedCharsetName) {
  94. this.requestedCharsetName = requestedCharsetName;
  95. }
  96. final String requestedCharsetName() {
  97. return requestedCharsetName;
  98. }
  99. abstract String charsetName();
  100. abstract char[] decode(byte[] ba, int off, int len);
  101. }
  102. // A string decoder based upon a ByteToCharConverter
  103. //
  104. private static class ConverterSD
  105. extends StringDecoder
  106. {
  107. private ByteToCharConverter btc;
  108. private ConverterSD(ByteToCharConverter btc, String rcn) {
  109. super(rcn);
  110. this.btc = btc;
  111. }
  112. String charsetName() {
  113. return btc.getCharacterEncoding();
  114. }
  115. char[] decode(byte[] ba, int off, int len) {
  116. int en = btc.getMaxCharsPerByte() * len;
  117. char[] ca = new char[en];
  118. if (len == 0)
  119. return ca;
  120. btc.reset();
  121. int n = 0;
  122. try {
  123. n = btc.convert(ba, off, off + len, ca, 0, en);
  124. n += btc.flush(ca, btc.nextCharIndex(), en);
  125. } catch (CharConversionException x) {
  126. // Yes, this is what we've always done
  127. n = btc.nextCharIndex();
  128. }
  129. return trim(ca, n);
  130. }
  131. }
  132. // A string decoder based upon a CharsetDecoder
  133. //
  134. private static class CharsetSD
  135. extends StringDecoder
  136. {
  137. private final Charset cs;
  138. private final CharsetDecoder cd;
  139. private CharsetSD(Charset cs, String rcn) {
  140. super(rcn);
  141. this.cs = cs;
  142. this.cd = cs.newDecoder()
  143. .onMalformedInput(CodingErrorAction.REPLACE)
  144. .onUnmappableCharacter(CodingErrorAction.REPLACE);
  145. }
  146. String charsetName() {
  147. if (cs instanceof HistoricallyNamedCharset)
  148. return ((HistoricallyNamedCharset)cs).historicalName();
  149. return cs.name();
  150. }
  151. char[] decode(byte[] ba, int off, int len) {
  152. int en = (int)(cd.maxCharsPerByte() * len);
  153. char[] ca = new char[en];
  154. if (len == 0)
  155. return ca;
  156. cd.reset();
  157. ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
  158. CharBuffer cb = CharBuffer.wrap(ca);
  159. try {
  160. CoderResult cr = cd.decode(bb, cb, true);
  161. if (!cr.isUnderflow())
  162. cr.throwException();
  163. cr = cd.flush(cb);
  164. if (!cr.isUnderflow())
  165. cr.throwException();
  166. } catch (CharacterCodingException x) {
  167. // Substitution is always enabled,
  168. // so this shouldn't happen
  169. throw new Error(x);
  170. }
  171. return trim(ca, cb.position());
  172. }
  173. }
  174. static char[] decode(String charsetName, byte[] ba, int off, int len)
  175. throws UnsupportedEncodingException
  176. {
  177. StringDecoder sd = (StringDecoder)deref(decoder);
  178. String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
  179. if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
  180. || csn.equals(sd.charsetName()))) {
  181. sd = null;
  182. try {
  183. Charset cs = lookupCharset(csn);
  184. if (cs != null)
  185. sd = new CharsetSD(cs, csn);
  186. else
  187. sd = null;
  188. } catch (IllegalCharsetNameException x) {
  189. // FALL THROUGH to ByteToCharConverter, for compatibility
  190. }
  191. if (sd == null)
  192. sd = new ConverterSD(ByteToCharConverter.getConverter(csn),
  193. csn);
  194. set(decoder, sd);
  195. }
  196. return sd.decode(ba, off, len);
  197. }
  198. static char[] decode(byte[] ba, int off, int len) {
  199. String csn = Converters.getDefaultEncodingName();
  200. try {
  201. return decode(csn, ba, off, len);
  202. } catch (UnsupportedEncodingException x) {
  203. Converters.resetDefaultEncodingName();
  204. warnUnsupportedCharset(csn);
  205. }
  206. try {
  207. return decode("ISO-8859-1", ba, off, len);
  208. } catch (UnsupportedEncodingException x) {
  209. // If this code is hit during VM initialization, MessageUtils is
  210. // the only way we will be able to get any kind of error message.
  211. MessageUtils.err("ISO-8859-1 charset not available: "
  212. + x.toString());
  213. // If we can not find ISO-8859-1 (a required encoding) then things
  214. // are seriously wrong with the installation.
  215. System.exit(1);
  216. return null;
  217. }
  218. }
  219. // -- Encoding --
  220. // Encapsulates either a CharToByteConverter or a CharsetEncoder
  221. //
  222. private static abstract class StringEncoder {
  223. private final String requestedCharsetName;
  224. protected StringEncoder(String requestedCharsetName) {
  225. this.requestedCharsetName = requestedCharsetName;
  226. }
  227. final String requestedCharsetName() {
  228. return requestedCharsetName;
  229. }
  230. abstract String charsetName();
  231. abstract byte[] encode(char[] cs, int off, int len);
  232. }
  233. // A string encoder based upon a CharToByteConverter
  234. //
  235. private static class ConverterSE
  236. extends StringEncoder
  237. {
  238. private CharToByteConverter ctb;
  239. private ConverterSE(CharToByteConverter ctb, String rcn) {
  240. super(rcn);
  241. this.ctb = ctb;
  242. }
  243. String charsetName() {
  244. return ctb.getCharacterEncoding();
  245. }
  246. byte[] encode(char[] ca, int off, int len) {
  247. int en = ctb.getMaxBytesPerChar() * len;
  248. byte[] ba = new byte[en];
  249. if (len == 0)
  250. return ba;
  251. ctb.reset();
  252. int n;
  253. try {
  254. n = ctb.convertAny(ca, off, (off + len),
  255. ba, 0, en);
  256. n += ctb.flushAny(ba, ctb.nextByteIndex(), en);
  257. } catch (CharConversionException x) {
  258. throw new Error("Converter malfunction: " +
  259. ctb.getClass().getName(),
  260. x);
  261. }
  262. return trim(ba, n);
  263. }
  264. }
  265. // A string encoder based upon a CharsetEncoder
  266. //
  267. private static class CharsetSE
  268. extends StringEncoder
  269. {
  270. private Charset cs;
  271. private CharsetEncoder ce;
  272. private CharsetSE(Charset cs, String rcn) {
  273. super(rcn);
  274. this.cs = cs;
  275. this.ce = cs.newEncoder()
  276. .onMalformedInput(CodingErrorAction.REPLACE)
  277. .onUnmappableCharacter(CodingErrorAction.REPLACE);
  278. }
  279. String charsetName() {
  280. if (cs instanceof HistoricallyNamedCharset)
  281. return ((HistoricallyNamedCharset)cs).historicalName();
  282. return cs.name();
  283. }
  284. byte[] encode(char[] ca, int off, int len) {
  285. int en = (int)(ce.maxBytesPerChar() * len);
  286. byte[] ba = new byte[en];
  287. if (len == 0)
  288. return ba;
  289. ce.reset();
  290. ByteBuffer bb = ByteBuffer.wrap(ba);
  291. CharBuffer cb = CharBuffer.wrap(ca, off, len);
  292. try {
  293. CoderResult cr = ce.encode(cb, bb, true);
  294. if (!cr.isUnderflow())
  295. cr.throwException();
  296. cr = ce.flush(bb);
  297. if (!cr.isUnderflow())
  298. cr.throwException();
  299. } catch (CharacterCodingException x) {
  300. // Substitution is always enabled,
  301. // so this shouldn't happen
  302. throw new Error(x);
  303. }
  304. return trim(ba, bb.position());
  305. }
  306. }
  307. static byte[] encode(String charsetName, char[] ca, int off, int len)
  308. throws UnsupportedEncodingException
  309. {
  310. StringEncoder se = (StringEncoder)deref(encoder);
  311. String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
  312. if ((se == null) || !(csn.equals(se.requestedCharsetName())
  313. || csn.equals(se.charsetName()))) {
  314. se = null;
  315. try {
  316. Charset cs = lookupCharset(csn);
  317. if (cs != null)
  318. se = new CharsetSE(cs, csn);
  319. } catch (IllegalCharsetNameException x) {
  320. // FALL THROUGH to CharToByteConverter, for compatibility
  321. }
  322. if (se == null)
  323. se = new ConverterSE(CharToByteConverter.getConverter(csn),
  324. csn);
  325. set(encoder, se);
  326. }
  327. return se.encode(ca, off, len);
  328. }
  329. static byte[] encode(char[] ca, int off, int len) {
  330. String csn = Converters.getDefaultEncodingName();
  331. try {
  332. return encode(csn, ca, off, len);
  333. } catch (UnsupportedEncodingException x) {
  334. Converters.resetDefaultEncodingName();
  335. warnUnsupportedCharset(csn);
  336. }
  337. try {
  338. return encode("ISO-8859-1", ca, off, len);
  339. } catch (UnsupportedEncodingException x) {
  340. // If this code is hit during VM initialization, MessageUtils is
  341. // the only way we will be able to get any kind of error message.
  342. MessageUtils.err("ISO-8859-1 charset not available: "
  343. + x.toString());
  344. // If we can not find ISO-8859-1 (a required encoding) then things
  345. // are seriously wrong with the installation.
  346. System.exit(1);
  347. return null;
  348. }
  349. }
  350. }