1. /*
  2. * Copyright 1999-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: WriterToUTF8Buffered.java,v 1.5 2004/02/17 04:18:18 minchau Exp $
  18. */
  19. package com.sun.org.apache.xml.internal.serializer;
  20. import java.io.IOException;
  21. import java.io.OutputStream;
  22. import java.io.UnsupportedEncodingException;
  23. import java.io.Writer;
  24. /**
  25. * This class writes unicode characters to a byte stream (java.io.OutputStream)
  26. * as quickly as possible. It buffers the output in an internal
  27. * buffer which must be flushed to the OutputStream when done. This flushing
  28. * is done via the close() flush() or flushBuffer() method.
  29. */
  30. public final class WriterToUTF8Buffered extends Writer
  31. {
  32. /** number of bytes that the byte buffer can hold.
  33. * This is a fixed constant is used rather than m_outputBytes.lenght for performance.
  34. */
  35. private static final int BYTES_MAX=16*1024;
  36. /** number of characters that the character buffer can hold.
  37. * This is 1/3 of the number of bytes because UTF-8 encoding
  38. * can expand one unicode character by up to 3 bytes.
  39. */
  40. private static final int CHARS_MAX=(BYTES_MAX3);
  41. // private static final int
  42. /** The byte stream to write to. (sc & sb remove final to compile in JDK 1.1.8) */
  43. private final OutputStream m_os;
  44. /**
  45. * The internal buffer where data is stored.
  46. * (sc & sb remove final to compile in JDK 1.1.8)
  47. */
  48. private final byte m_outputBytes[];
  49. private final char m_inputChars[];
  50. /**
  51. * The number of valid bytes in the buffer. This value is always
  52. * in the range <tt>0</tt> through <tt>m_outputBytes.length</tt> elements
  53. * <tt>m_outputBytes[0]</tt> through <tt>m_outputBytes[count-1]</tt> contain valid
  54. * byte data.
  55. */
  56. private int count;
  57. /**
  58. * Create an buffered UTF-8 writer.
  59. *
  60. *
  61. * @param out the underlying output stream.
  62. *
  63. * @throws UnsupportedEncodingException
  64. */
  65. public WriterToUTF8Buffered(OutputStream out)
  66. throws UnsupportedEncodingException
  67. {
  68. m_os = out;
  69. // get 3 extra bytes to make buffer overflow checking simpler and faster
  70. // we won't have to keep checking for a few extra characters
  71. m_outputBytes = new byte[BYTES_MAX + 3];
  72. // Big enough to hold the input chars that will be transformed
  73. // into output bytes in m_ouputBytes.
  74. m_inputChars = new char[CHARS_MAX + 1];
  75. count = 0;
  76. // the old body of this constructor, before the buffersize was changed to a constant
  77. // this(out, 8*1024);
  78. }
  79. /**
  80. * Create an buffered UTF-8 writer to write data to the
  81. * specified underlying output stream with the specified buffer
  82. * size.
  83. *
  84. * @param out the underlying output stream.
  85. * @param size the buffer size.
  86. * @exception IllegalArgumentException if size <= 0.
  87. */
  88. // public WriterToUTF8Buffered(final OutputStream out, final int size)
  89. // {
  90. //
  91. // m_os = out;
  92. //
  93. // if (size <= 0)
  94. // {
  95. // throw new IllegalArgumentException(
  96. // SerializerMessages.createMessage(SerializerErrorResources.ER_BUFFER_SIZE_LESSTHAN_ZERO, null)); //"Buffer size <= 0");
  97. // }
  98. //
  99. // m_outputBytes = new byte[size];
  100. // count = 0;
  101. // }
  102. /**
  103. * Write a single character. The character to be written is contained in
  104. * the 16 low-order bits of the given integer value; the 16 high-order bits
  105. * are ignored.
  106. *
  107. * <p> Subclasses that intend to support efficient single-character output
  108. * should override this method.
  109. *
  110. * @param c int specifying a character to be written.
  111. * @exception IOException If an I/O error occurs
  112. */
  113. public void write(final int c) throws IOException
  114. {
  115. /* If we are close to the end of the buffer then flush it.
  116. * Remember the buffer can hold a few more bytes than BYTES_MAX
  117. */
  118. if (count >= BYTES_MAX)
  119. flushBuffer();
  120. if (c < 0x80)
  121. {
  122. m_outputBytes[count++] = (byte) (c);
  123. }
  124. else if (c < 0x800)
  125. {
  126. m_outputBytes[count++] = (byte) (0xc0 + (c >> 6));
  127. m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
  128. }
  129. else
  130. {
  131. m_outputBytes[count++] = (byte) (0xe0 + (c >> 12));
  132. m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
  133. m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
  134. }
  135. }
  136. /**
  137. * Write a portion of an array of characters.
  138. *
  139. * @param chars Array of characters
  140. * @param start Offset from which to start writing characters
  141. * @param length Number of characters to write
  142. *
  143. * @exception IOException If an I/O error occurs
  144. *
  145. * @throws java.io.IOException
  146. */
  147. public void write(final char chars[], final int start, final int length)
  148. throws java.io.IOException
  149. {
  150. // We multiply the length by three since this is the maximum length
  151. // of the characters that we can put into the buffer. It is possible
  152. // for each Unicode character to expand to three bytes.
  153. int lengthx3 = 3*length;
  154. if (lengthx3 >= BYTES_MAX - count)
  155. {
  156. // The requested length is greater than the unused part of the buffer
  157. flushBuffer();
  158. if (lengthx3 >= BYTES_MAX)
  159. {
  160. /*
  161. * The requested length exceeds the size of the buffer.
  162. * Cut the buffer up into chunks, each of which will
  163. * not cause an overflow to the output buffer m_outputBytes,
  164. * and make multiple recursive calls.
  165. * Be careful about integer overflows in multiplication.
  166. */
  167. final int chunks = 1 + lengthCHARS_MAX;
  168. int end_chunk = start;
  169. for (int chunk = 1; chunk <= chunks; chunk++)
  170. {
  171. int start_chunk = end_chunk;
  172. end_chunk = start + (int) ((((long) length) * chunk) / chunks);
  173. int len_chunk = (end_chunk - start_chunk);
  174. this.write(chars,start_chunk, len_chunk);
  175. }
  176. return;
  177. }
  178. }
  179. final int n = length+start;
  180. final byte[] buf_loc = m_outputBytes; // local reference for faster access
  181. int count_loc = count; // local integer for faster access
  182. int i = start;
  183. {
  184. /* This block could be omitted and the code would produce
  185. * the same result. But this block exists to give the JIT
  186. * a better chance of optimizing a tight and common loop which
  187. * occurs when writing out ASCII characters.
  188. */
  189. char c;
  190. for(; i < n && (c = chars[i])< 0x80 ; i++ )
  191. buf_loc[count_loc++] = (byte)c;
  192. }
  193. for (; i < n; i++)
  194. {
  195. final char c = chars[i];
  196. if (c < 0x80)
  197. buf_loc[count_loc++] = (byte) (c);
  198. else if (c < 0x800)
  199. {
  200. buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
  201. buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
  202. }
  203. else
  204. {
  205. buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
  206. buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
  207. buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
  208. }
  209. }
  210. // Store the local integer back into the instance variable
  211. count = count_loc;
  212. }
  213. /**
  214. * Writes out the character array
  215. * @param chars a character array with only ASCII characters, so
  216. * the UTF-8 encoding is optimized.
  217. * @param start the first character in the input array
  218. * @param length the number of characters in the input array
  219. */
  220. private void directWrite(final char chars[], final int start, final int length)
  221. throws java.io.IOException
  222. {
  223. if (length >= BYTES_MAX - count)
  224. {
  225. // The requested length is greater than the unused part of the buffer
  226. flushBuffer();
  227. if (length >= BYTES_MAX)
  228. {
  229. /*
  230. * The requested length exceeds the size of the buffer.
  231. * Cut the buffer up into chunks, each of which will
  232. * not cause an overflow to the output buffer m_outputBytes,
  233. * and make multiple recursive calls.
  234. */
  235. int chunks = 1 + lengthCHARS_MAX;
  236. for (int chunk =0 ; chunk < chunks; chunk++)
  237. {
  238. int start_chunk = start + ((length*chunk)/chunks);
  239. int end_chunk = start + ((length*(chunk+1))/chunks);
  240. int len_chunk = (end_chunk - start_chunk);
  241. this.directWrite(chars,start_chunk, len_chunk);
  242. }
  243. return;
  244. }
  245. }
  246. final int n = length+start;
  247. final byte[] buf_loc = m_outputBytes; // local reference for faster access
  248. int count_loc = count; // local integer for faster access
  249. for(int i=start; i < n ; i++ )
  250. buf_loc[count_loc++] = (byte) buf_loc[i];
  251. // Store the local integer back into the instance variable
  252. count = count_loc;
  253. }
  254. /**
  255. * Write a string.
  256. *
  257. * @param s String to be written
  258. *
  259. * @exception IOException If an I/O error occurs
  260. */
  261. public void write(final String s) throws IOException
  262. {
  263. // We multiply the length by three since this is the maximum length
  264. // of the characters that we can put into the buffer. It is possible
  265. // for each Unicode character to expand to three bytes.
  266. final int length = s.length();
  267. int lengthx3 = 3*length;
  268. if (lengthx3 >= BYTES_MAX - count)
  269. {
  270. // The requested length is greater than the unused part of the buffer
  271. flushBuffer();
  272. if (lengthx3 >= BYTES_MAX)
  273. {
  274. /*
  275. * The requested length exceeds the size of the buffer,
  276. * so break it up in chunks that don't exceed the buffer size.
  277. */
  278. final int start = 0;
  279. int chunks = 1 + lengthCHARS_MAX;
  280. for (int chunk =0 ; chunk < chunks; chunk++)
  281. {
  282. int start_chunk = start + ((length*chunk)/chunks);
  283. int end_chunk = start + ((length*(chunk+1))/chunks);
  284. int len_chunk = (end_chunk - start_chunk);
  285. s.getChars(start_chunk,end_chunk, m_inputChars,0);
  286. this.write(m_inputChars,0, len_chunk);
  287. }
  288. return;
  289. }
  290. }
  291. s.getChars(0, length , m_inputChars, 0);
  292. final char[] chars = m_inputChars;
  293. final int n = length;
  294. final byte[] buf_loc = m_outputBytes; // local reference for faster access
  295. int count_loc = count; // local integer for faster access
  296. int i = 0;
  297. {
  298. /* This block could be omitted and the code would produce
  299. * the same result. But this block exists to give the JIT
  300. * a better chance of optimizing a tight and common loop which
  301. * occurs when writing out ASCII characters.
  302. */
  303. char c;
  304. for(; i < n && (c = chars[i])< 0x80 ; i++ )
  305. buf_loc[count_loc++] = (byte)c;
  306. }
  307. for (; i < n; i++)
  308. {
  309. final char c = chars[i];
  310. if (c < 0x80)
  311. buf_loc[count_loc++] = (byte) (c);
  312. else if (c < 0x800)
  313. {
  314. buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
  315. buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
  316. }
  317. else
  318. {
  319. buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
  320. buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
  321. buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
  322. }
  323. }
  324. // Store the local integer back into the instance variable
  325. count = count_loc;
  326. }
  327. /**
  328. * Flush the internal buffer
  329. *
  330. * @throws IOException
  331. */
  332. public void flushBuffer() throws IOException
  333. {
  334. if (count > 0)
  335. {
  336. m_os.write(m_outputBytes, 0, count);
  337. count = 0;
  338. }
  339. }
  340. /**
  341. * Flush the stream. If the stream has saved any characters from the
  342. * various write() methods in a buffer, write them immediately to their
  343. * intended destination. Then, if that destination is another character or
  344. * byte stream, flush it. Thus one flush() invocation will flush all the
  345. * buffers in a chain of Writers and OutputStreams.
  346. *
  347. * @exception IOException If an I/O error occurs
  348. *
  349. * @throws java.io.IOException
  350. */
  351. public void flush() throws java.io.IOException
  352. {
  353. flushBuffer();
  354. m_os.flush();
  355. }
  356. /**
  357. * Close the stream, flushing it first. Once a stream has been closed,
  358. * further write() or flush() invocations will cause an IOException to be
  359. * thrown. Closing a previously-closed stream, however, has no effect.
  360. *
  361. * @exception IOException If an I/O error occurs
  362. *
  363. * @throws java.io.IOException
  364. */
  365. public void close() throws java.io.IOException
  366. {
  367. flushBuffer();
  368. m_os.close();
  369. }
  370. /**
  371. * Get the output stream where the events will be serialized to.
  372. *
  373. * @return reference to the result stream, or null of only a writer was
  374. * set.
  375. */
  376. public OutputStream getOutputStream()
  377. {
  378. return m_os;
  379. }
  380. /**
  381. *
  382. * @param s A string with only ASCII characters
  383. * @throws IOException
  384. */
  385. public void directWrite(final String s) throws IOException
  386. {
  387. final int length = s.length();
  388. if (length >= BYTES_MAX - count)
  389. {
  390. // The requested length is greater than the unused part of the buffer
  391. flushBuffer();
  392. if (length >= BYTES_MAX)
  393. {
  394. /*
  395. * The requested length exceeds the size of the buffer,
  396. * so don't bother to buffer this one, just write it out
  397. * directly. The buffer is already flushed so this is a
  398. * safe thing to do.
  399. */
  400. final int start = 0;
  401. int chunks = 1 + lengthCHARS_MAX;
  402. for (int chunk =0 ; chunk < chunks; chunk++)
  403. {
  404. int start_chunk = start + ((length*chunk)/chunks);
  405. int end_chunk = start + ((length*(chunk+1))/chunks);
  406. int len_chunk = (end_chunk - start_chunk);
  407. s.getChars(start_chunk,end_chunk, m_inputChars,0);
  408. this.directWrite(m_inputChars,0, len_chunk);
  409. }
  410. return;
  411. }
  412. }
  413. s.getChars(0, length , m_inputChars, 0);
  414. final char[] chars = m_inputChars;
  415. final byte[] buf_loc = m_outputBytes; // local reference for faster access
  416. int count_loc = count; // local integer for faster access
  417. int i = 0;
  418. while( i < length)
  419. buf_loc[count_loc++] = (byte)chars[i++];
  420. // Store the local integer back into the instance variable
  421. count = count_loc;
  422. }
  423. }