1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 2000-2004 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl.io;
  58. import java.io.InputStream;
  59. import java.io.IOException;
  60. import java.io.Reader;
  61. import java.util.Locale;
  62. import com.sun.org.apache.xerces.internal.util.MessageFormatter;
  63. import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  64. /**
  65. * <p>A UTF-8 reader.</p>
  66. *
  67. * @author Andy Clark, IBM
  68. *
  69. * @version $Id: UTF8Reader.java,v 1.10 2004/03/04 19:27:13 mrglavas Exp $
  70. */
  71. public class UTF8Reader
  72. extends Reader {
  73. //
  74. // Constants
  75. //
  76. /** Default byte buffer size (2048). */
  77. public static final int DEFAULT_BUFFER_SIZE = 2048;
  78. // debugging
  79. /** Debug read. */
  80. private static final boolean DEBUG_READ = false;
  81. //
  82. // Data
  83. //
  84. /** Input stream. */
  85. protected InputStream fInputStream;
  86. /** Byte buffer. */
  87. protected byte[] fBuffer;
  88. /** Offset into buffer. */
  89. protected int fOffset;
  90. /** Surrogate character. */
  91. private int fSurrogate = -1;
  92. // message formatter; used to produce localized
  93. // exception messages
  94. private MessageFormatter fFormatter = null;
  95. //Locale to use for messages
  96. private Locale fLocale = null;
  97. //
  98. // Constructors
  99. //
  100. /**
  101. * Constructs a UTF-8 reader from the specified input stream
  102. * using the default buffer size. Primarily for testing.
  103. *
  104. * @param inputStream The input stream.
  105. */
  106. public UTF8Reader(InputStream inputStream) {
  107. this(inputStream, DEFAULT_BUFFER_SIZE, new XMLMessageFormatter(), Locale.getDefault());
  108. } // <init>(InputStream, MessageFormatter)
  109. /**
  110. * Constructs a UTF-8 reader from the specified input stream
  111. * using the default buffer size and the given MessageFormatter.
  112. *
  113. * @param inputStream The input stream.
  114. * @param messageFormatter given MessageFormatter
  115. * @param locale Locale to use for messages
  116. */
  117. public UTF8Reader(InputStream inputStream, MessageFormatter messageFormatter,
  118. Locale locale) {
  119. this(inputStream, DEFAULT_BUFFER_SIZE, messageFormatter, locale);
  120. } // <init>(InputStream, MessageFormatter, Locale)
  121. /**
  122. * Constructs a UTF-8 reader from the specified input stream,
  123. * buffer size and MessageFormatter.
  124. *
  125. * @param inputStream The input stream.
  126. * @param size The initial buffer size.
  127. * @param messageFormatter the formatter for localizing/formatting errors.
  128. * @param locale the Locale to use for messages
  129. */
  130. public UTF8Reader(InputStream inputStream, int size,
  131. MessageFormatter messageFormatter, Locale locale) {
  132. fInputStream = inputStream;
  133. fBuffer = new byte[size];
  134. fFormatter = messageFormatter;
  135. fLocale = locale;
  136. } // <init>(InputStream, int, MessageFormatter, Locale)
  137. //
  138. // Reader methods
  139. //
  140. /**
  141. * Read a single character. This method will block until a character is
  142. * available, an I/O error occurs, or the end of the stream is reached.
  143. *
  144. * <p> Subclasses that intend to support efficient single-character input
  145. * should override this method.
  146. *
  147. * @return The character read, as an integer in the range 0 to 16383
  148. * (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
  149. * been reached
  150. *
  151. * @exception IOException If an I/O error occurs
  152. */
  153. public int read() throws IOException {
  154. // decode character
  155. int c = fSurrogate;
  156. if (fSurrogate == -1) {
  157. // NOTE: We use the index into the buffer if there are remaining
  158. // bytes from the last block read. -Ac
  159. int index = 0;
  160. // get first byte
  161. int b0 = index == fOffset
  162. ? fInputStream.read() : fBuffer[index++] & 0x00FF;
  163. if (b0 == -1) {
  164. return -1;
  165. }
  166. // UTF-8: [0xxx xxxx]
  167. // Unicode: [0000 0000] [0xxx xxxx]
  168. if (b0 < 0x80) {
  169. c = (char)b0;
  170. }
  171. // UTF-8: [110y yyyy] [10xx xxxx]
  172. // Unicode: [0000 0yyy] [yyxx xxxx]
  173. else if ((b0 & 0xE0) == 0xC0 && (b0 & 0x1E) != 0) {
  174. int b1 = index == fOffset
  175. ? fInputStream.read() : fBuffer[index++] & 0x00FF;
  176. if (b1 == -1) {
  177. expectedByte(2, 2);
  178. }
  179. if ((b1 & 0xC0) != 0x80) {
  180. invalidByte(2, 2, b1);
  181. }
  182. c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
  183. }
  184. // UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
  185. // Unicode: [zzzz yyyy] [yyxx xxxx]
  186. else if ((b0 & 0xF0) == 0xE0) {
  187. int b1 = index == fOffset
  188. ? fInputStream.read() : fBuffer[index++] & 0x00FF;
  189. if (b1 == -1) {
  190. expectedByte(2, 3);
  191. }
  192. if ((b1 & 0xC0) != 0x80
  193. || (b0 == 0xED && b1 >= 0xA0)
  194. || ((b0 & 0x0F) == 0 && (b1 & 0x20) == 0)) {
  195. invalidByte(2, 3, b1);
  196. }
  197. int b2 = index == fOffset
  198. ? fInputStream.read() : fBuffer[index++] & 0x00FF;
  199. if (b2 == -1) {
  200. expectedByte(3, 3);
  201. }
  202. if ((b2 & 0xC0) != 0x80) {
  203. invalidByte(3, 3, b2);
  204. }
  205. c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
  206. (b2 & 0x003F);
  207. }
  208. // UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
  209. // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
  210. // [1101 11yy] [yyxx xxxx] (low surrogate)
  211. // * uuuuu = wwww + 1
  212. else if ((b0 & 0xF8) == 0xF0) {
  213. int b1 = index == fOffset
  214. ? fInputStream.read() : fBuffer[index++] & 0x00FF;
  215. if (b1 == -1) {
  216. expectedByte(2, 4);
  217. }
  218. if ((b1 & 0xC0) != 0x80
  219. || ((b1 & 0x30) == 0 && (b0 & 0x07) == 0)) {
  220. invalidByte(2, 3, b1);
  221. }
  222. int b2 = index == fOffset
  223. ? fInputStream.read() : fBuffer[index++] & 0x00FF;
  224. if (b2 == -1) {
  225. expectedByte(3, 4);
  226. }
  227. if ((b2 & 0xC0) != 0x80) {
  228. invalidByte(3, 3, b2);
  229. }
  230. int b3 = index == fOffset
  231. ? fInputStream.read() : fBuffer[index++] & 0x00FF;
  232. if (b3 == -1) {
  233. expectedByte(4, 4);
  234. }
  235. if ((b3 & 0xC0) != 0x80) {
  236. invalidByte(4, 4, b3);
  237. }
  238. int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
  239. if (uuuuu > 0x10) {
  240. invalidSurrogate(uuuuu);
  241. }
  242. int wwww = uuuuu - 1;
  243. int hs = 0xD800 |
  244. ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) |
  245. ((b2 >> 4) & 0x0003);
  246. int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
  247. c = hs;
  248. fSurrogate = ls;
  249. }
  250. // error
  251. else {
  252. invalidByte(1, 1, b0);
  253. }
  254. }
  255. // use surrogate
  256. else {
  257. fSurrogate = -1;
  258. }
  259. // return character
  260. if (DEBUG_READ) {
  261. System.out.println("read(): 0x"+Integer.toHexString(c));
  262. }
  263. return c;
  264. } // read():int
  265. /**
  266. * Read characters into a portion of an array. This method will block
  267. * until some input is available, an I/O error occurs, or the end of the
  268. * stream is reached.
  269. *
  270. * @param ch Destination buffer
  271. * @param offset Offset at which to start storing characters
  272. * @param length Maximum number of characters to read
  273. *
  274. * @return The number of characters read, or -1 if the end of the
  275. * stream has been reached
  276. *
  277. * @exception IOException If an I/O error occurs
  278. */
  279. public int read(char ch[], int offset, int length) throws IOException {
  280. // handle surrogate
  281. int out = offset;
  282. if (fSurrogate != -1) {
  283. ch[offset + 1] = (char)fSurrogate;
  284. fSurrogate = -1;
  285. length--;
  286. out++;
  287. }
  288. // read bytes
  289. int count = 0;
  290. if (fOffset == 0) {
  291. // adjust length to read
  292. if (length > fBuffer.length) {
  293. length = fBuffer.length;
  294. }
  295. // perform read operation
  296. count = fInputStream.read(fBuffer, 0, length);
  297. if (count == -1) {
  298. return -1;
  299. }
  300. count += out - offset;
  301. }
  302. // skip read; last character was in error
  303. // NOTE: Having an offset value other than zero means that there was
  304. // an error in the last character read. In this case, we have
  305. // skipped the read so we don't consume any bytes past the
  306. // error. By signalling the error on the next block read we
  307. // allow the method to return the most valid characters that
  308. // it can on the previous block read. -Ac
  309. else {
  310. count = fOffset;
  311. fOffset = 0;
  312. }
  313. // convert bytes to characters
  314. final int total = count;
  315. int in;
  316. byte byte1;
  317. final byte byte0 = 0;
  318. for (in = 0; in < total; in++) {
  319. byte1 = fBuffer[in];
  320. if (byte1 >= byte0) {
  321. ch[out++] = (char)byte1;
  322. }
  323. else {
  324. break;
  325. }
  326. }
  327. for ( ; in < total; in++) {
  328. byte1 = fBuffer[in];
  329. // UTF-8: [0xxx xxxx]
  330. // Unicode: [0000 0000] [0xxx xxxx]
  331. if (byte1 >= byte0) {
  332. ch[out++] = (char)byte1;
  333. continue;
  334. }
  335. // UTF-8: [110y yyyy] [10xx xxxx]
  336. // Unicode: [0000 0yyy] [yyxx xxxx]
  337. int b0 = byte1 & 0x0FF;
  338. if ((b0 & 0xE0) == 0xC0 && (b0 & 0x1E) != 0) {
  339. int b1 = -1;
  340. if (++in < total) {
  341. b1 = fBuffer[in] & 0x00FF;
  342. }
  343. else {
  344. b1 = fInputStream.read();
  345. if (b1 == -1) {
  346. if (out > offset) {
  347. fBuffer[0] = (byte)b0;
  348. fOffset = 1;
  349. return out - offset;
  350. }
  351. expectedByte(2, 2);
  352. }
  353. count++;
  354. }
  355. if ((b1 & 0xC0) != 0x80) {
  356. if (out > offset) {
  357. fBuffer[0] = (byte)b0;
  358. fBuffer[1] = (byte)b1;
  359. fOffset = 2;
  360. return out - offset;
  361. }
  362. invalidByte(2, 2, b1);
  363. }
  364. int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
  365. ch[out++] = (char)c;
  366. count -= 1;
  367. continue;
  368. }
  369. // UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
  370. // Unicode: [zzzz yyyy] [yyxx xxxx]
  371. if ((b0 & 0xF0) == 0xE0) {
  372. int b1 = -1;
  373. if (++in < total) {
  374. b1 = fBuffer[in] & 0x00FF;
  375. }
  376. else {
  377. b1 = fInputStream.read();
  378. if (b1 == -1) {
  379. if (out > offset) {
  380. fBuffer[0] = (byte)b0;
  381. fOffset = 1;
  382. return out - offset;
  383. }
  384. expectedByte(2, 3);
  385. }
  386. count++;
  387. }
  388. if ((b1 & 0xC0) != 0x80
  389. || (b0 == 0xED && b1 >= 0xA0)
  390. || ((b0 & 0x0F) == 0 && (b1 & 0x20) == 0)) {
  391. if (out > offset) {
  392. fBuffer[0] = (byte)b0;
  393. fBuffer[1] = (byte)b1;
  394. fOffset = 2;
  395. return out - offset;
  396. }
  397. invalidByte(2, 3, b1);
  398. }
  399. int b2 = -1;
  400. if (++in < total) {
  401. b2 = fBuffer[in] & 0x00FF;
  402. }
  403. else {
  404. b2 = fInputStream.read();
  405. if (b2 == -1) {
  406. if (out > offset) {
  407. fBuffer[0] = (byte)b0;
  408. fBuffer[1] = (byte)b1;
  409. fOffset = 2;
  410. return out - offset;
  411. }
  412. expectedByte(3, 3);
  413. }
  414. count++;
  415. }
  416. if ((b2 & 0xC0) != 0x80) {
  417. if (out > offset) {
  418. fBuffer[0] = (byte)b0;
  419. fBuffer[1] = (byte)b1;
  420. fBuffer[2] = (byte)b2;
  421. fOffset = 3;
  422. return out - offset;
  423. }
  424. invalidByte(3, 3, b2);
  425. }
  426. int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
  427. (b2 & 0x003F);
  428. ch[out++] = (char)c;
  429. count -= 2;
  430. continue;
  431. }
  432. // UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
  433. // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
  434. // [1101 11yy] [yyxx xxxx] (low surrogate)
  435. // * uuuuu = wwww + 1
  436. if ((b0 & 0xF8) == 0xF0) {
  437. int b1 = -1;
  438. if (++in < total) {
  439. b1 = fBuffer[in] & 0x00FF;
  440. }
  441. else {
  442. b1 = fInputStream.read();
  443. if (b1 == -1) {
  444. if (out > offset) {
  445. fBuffer[0] = (byte)b0;
  446. fOffset = 1;
  447. return out - offset;
  448. }
  449. expectedByte(2, 4);
  450. }
  451. count++;
  452. }
  453. if ((b1 & 0xC0) != 0x80
  454. || ((b1 & 0x30) == 0 && (b0 & 0x07) == 0)) {
  455. if (out > offset) {
  456. fBuffer[0] = (byte)b0;
  457. fBuffer[1] = (byte)b1;
  458. fOffset = 2;
  459. return out - offset;
  460. }
  461. invalidByte(2, 4, b1);
  462. }
  463. int b2 = -1;
  464. if (++in < total) {
  465. b2 = fBuffer[in] & 0x00FF;
  466. }
  467. else {
  468. b2 = fInputStream.read();
  469. if (b2 == -1) {
  470. if (out > offset) {
  471. fBuffer[0] = (byte)b0;
  472. fBuffer[1] = (byte)b1;
  473. fOffset = 2;
  474. return out - offset;
  475. }
  476. expectedByte(3, 4);
  477. }
  478. count++;
  479. }
  480. if ((b2 & 0xC0) != 0x80) {
  481. if (out > offset) {
  482. fBuffer[0] = (byte)b0;
  483. fBuffer[1] = (byte)b1;
  484. fBuffer[2] = (byte)b2;
  485. fOffset = 3;
  486. return out - offset;
  487. }
  488. invalidByte(3, 4, b2);
  489. }
  490. int b3 = -1;
  491. if (++in < total) {
  492. b3 = fBuffer[in] & 0x00FF;
  493. }
  494. else {
  495. b3 = fInputStream.read();
  496. if (b3 == -1) {
  497. if (out > offset) {
  498. fBuffer[0] = (byte)b0;
  499. fBuffer[1] = (byte)b1;
  500. fBuffer[2] = (byte)b2;
  501. fOffset = 3;
  502. return out - offset;
  503. }
  504. expectedByte(4, 4);
  505. }
  506. count++;
  507. }
  508. if ((b3 & 0xC0) != 0x80) {
  509. if (out > offset) {
  510. fBuffer[0] = (byte)b0;
  511. fBuffer[1] = (byte)b1;
  512. fBuffer[2] = (byte)b2;
  513. fBuffer[3] = (byte)b3;
  514. fOffset = 4;
  515. return out - offset;
  516. }
  517. invalidByte(4, 4, b2);
  518. }
  519. // decode bytes into surrogate characters
  520. int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
  521. if (uuuuu > 0x10) {
  522. invalidSurrogate(uuuuu);
  523. }
  524. int wwww = uuuuu - 1;
  525. int zzzz = b1 & 0x000F;
  526. int yyyyyy = b2 & 0x003F;
  527. int xxxxxx = b3 & 0x003F;
  528. int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
  529. int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
  530. // set characters
  531. ch[out++] = (char)hs;
  532. ch[out++] = (char)ls;
  533. count -= 2;
  534. continue;
  535. }
  536. // error
  537. if (out > offset) {
  538. fBuffer[0] = (byte)b0;
  539. fOffset = 1;
  540. return out - offset;
  541. }
  542. invalidByte(1, 1, b0);
  543. }
  544. // return number of characters converted
  545. if (DEBUG_READ) {
  546. System.out.println("read(char[],"+offset+','+length+"): count="+count);
  547. }
  548. return count;
  549. } // read(char[],int,int)
  550. /**
  551. * Skip characters. This method will block until some characters are
  552. * available, an I/O error occurs, or the end of the stream is reached.
  553. *
  554. * @param n The number of characters to skip
  555. *
  556. * @return The number of characters actually skipped
  557. *
  558. * @exception IOException If an I/O error occurs
  559. */
  560. public long skip(long n) throws IOException {
  561. long remaining = n;
  562. final char[] ch = new char[fBuffer.length];
  563. do {
  564. int length = ch.length < remaining ? ch.length : (int)remaining;
  565. int count = read(ch, 0, length);
  566. if (count > 0) {
  567. remaining -= count;
  568. }
  569. else {
  570. break;
  571. }
  572. } while (remaining > 0);
  573. long skipped = n - remaining;
  574. return skipped;
  575. } // skip(long):long
  576. /**
  577. * Tell whether this stream is ready to be read.
  578. *
  579. * @return True if the next read() is guaranteed not to block for input,
  580. * false otherwise. Note that returning false does not guarantee that the
  581. * next read will block.
  582. *
  583. * @exception IOException If an I/O error occurs
  584. */
  585. public boolean ready() throws IOException {
  586. return false;
  587. } // ready()
  588. /**
  589. * Tell whether this stream supports the mark() operation.
  590. */
  591. public boolean markSupported() {
  592. return false;
  593. } // markSupported()
  594. /**
  595. * Mark the present position in the stream. Subsequent calls to reset()
  596. * will attempt to reposition the stream to this point. Not all
  597. * character-input streams support the mark() operation.
  598. *
  599. * @param readAheadLimit Limit on the number of characters that may be
  600. * read while still preserving the mark. After
  601. * reading this many characters, attempting to
  602. * reset the stream may fail.
  603. *
  604. * @exception IOException If the stream does not support mark(),
  605. * or if some other I/O error occurs
  606. */
  607. public void mark(int readAheadLimit) throws IOException {
  608. throw new IOException(fFormatter.formatMessage(fLocale, "OperationNotSupported", new Object[]{"mark()", "UTF-8"}));
  609. } // mark(int)
  610. /**
  611. * Reset the stream. If the stream has been marked, then attempt to
  612. * reposition it at the mark. If the stream has not been marked, then
  613. * attempt to reset it in some way appropriate to the particular stream,
  614. * for example by repositioning it to its starting point. Not all
  615. * character-input streams support the reset() operation, and some support
  616. * reset() without supporting mark().
  617. *
  618. * @exception IOException If the stream has not been marked,
  619. * or if the mark has been invalidated,
  620. * or if the stream does not support reset(),
  621. * or if some other I/O error occurs
  622. */
  623. public void reset() throws IOException {
  624. fOffset = 0;
  625. fSurrogate = -1;
  626. } // reset()
  627. /**
  628. * Close the stream. Once a stream has been closed, further read(),
  629. * ready(), mark(), or reset() invocations will throw an IOException.
  630. * Closing a previously-closed stream, however, has no effect.
  631. *
  632. * @exception IOException If an I/O error occurs
  633. */
  634. public void close() throws IOException {
  635. fInputStream.close();
  636. } // close()
  637. //
  638. // Private methods
  639. //
  640. /** Throws an exception for expected byte. */
  641. private void expectedByte(int position, int count)
  642. throws MalformedByteSequenceException {
  643. throw new MalformedByteSequenceException(fFormatter,
  644. fLocale,
  645. XMLMessageFormatter.XML_DOMAIN,
  646. "ExpectedByte",
  647. new Object[] {Integer.toString(position), Integer.toString(count)});
  648. } // expectedByte(int,int)
  649. /** Throws an exception for invalid byte. */
  650. private void invalidByte(int position, int count, int c)
  651. throws MalformedByteSequenceException {
  652. throw new MalformedByteSequenceException(fFormatter,
  653. fLocale,
  654. XMLMessageFormatter.XML_DOMAIN,
  655. "InvalidByte",
  656. new Object [] {Integer.toString(position), Integer.toString(count)});
  657. } // invalidByte(int,int,int)
  658. /** Throws an exception for invalid surrogate bits. */
  659. private void invalidSurrogate(int uuuuu) throws MalformedByteSequenceException {
  660. throw new MalformedByteSequenceException(fFormatter,
  661. fLocale,
  662. XMLMessageFormatter.XML_DOMAIN,
  663. "InvalidHighSurrogate",
  664. new Object[] {Integer.toHexString(uuuuu)});
  665. } // invalidSurrogate(int)
  666. } // class UTF8Reader