1. /*
  2. * Copyright 2001-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: ToTextStream.java,v 1.15 2004/02/17 04:18:18 minchau Exp $
  18. */
  19. package com.sun.org.apache.xml.internal.serializer;
  20. import java.io.IOException;
  21. import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  22. import com.sun.org.apache.xml.internal.res.XMLMessages;
  23. import org.xml.sax.Attributes;
  24. import org.xml.sax.SAXException;
  25. /**
  26. * @author Santiago Pericas-Geertsen
  27. */
  28. public class ToTextStream extends ToStream
  29. {
  30. /**
  31. * Default constructor.
  32. */
  33. public ToTextStream()
  34. {
  35. super();
  36. }
  37. /**
  38. * Receive notification of the beginning of a document.
  39. *
  40. * <p>The SAX parser will invoke this method only once, before any
  41. * other methods in this interface or in DTDHandler (except for
  42. * setDocumentLocator).</p>
  43. *
  44. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  45. * wrapping another exception.
  46. *
  47. * @throws org.xml.sax.SAXException
  48. */
  49. protected void startDocumentInternal() throws org.xml.sax.SAXException
  50. {
  51. super.startDocumentInternal();
  52. m_needToCallStartDocument = false;
  53. // No action for the moment.
  54. }
  55. /**
  56. * Receive notification of the end of a document.
  57. *
  58. * <p>The SAX parser will invoke this method only once, and it will
  59. * be the last method invoked during the parse. The parser shall
  60. * not invoke this method until it has either abandoned parsing
  61. * (because of an unrecoverable error) or reached the end of
  62. * input.</p>
  63. *
  64. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  65. * wrapping another exception.
  66. *
  67. * @throws org.xml.sax.SAXException
  68. */
  69. public void endDocument() throws org.xml.sax.SAXException
  70. {
  71. flushPending();
  72. flushWriter();
  73. if (m_tracer != null)
  74. super.fireEndDoc();
  75. }
  76. /**
  77. * Receive notification of the beginning of an element.
  78. *
  79. * <p>The Parser will invoke this method at the beginning of every
  80. * element in the XML document; there will be a corresponding
  81. * endElement() event for every startElement() event (even when the
  82. * element is empty). All of the element's content will be
  83. * reported, in order, before the corresponding endElement()
  84. * event.</p>
  85. *
  86. * <p>If the element name has a namespace prefix, the prefix will
  87. * still be attached. Note that the attribute list provided will
  88. * contain only attributes with explicit values (specified or
  89. * defaulted): #IMPLIED attributes will be omitted.</p>
  90. *
  91. *
  92. * @param namespaceURI The Namespace URI, or the empty string if the
  93. * element has no Namespace URI or if Namespace
  94. * processing is not being performed.
  95. * @param localName The local name (without prefix), or the
  96. * empty string if Namespace processing is not being
  97. * performed.
  98. * @param name The qualified name (with prefix), or the
  99. * empty string if qualified names are not available.
  100. * @param atts The attributes attached to the element, if any.
  101. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  102. * wrapping another exception.
  103. * @see #endElement
  104. * @see org.xml.sax.AttributeList
  105. *
  106. * @throws org.xml.sax.SAXException
  107. */
  108. public void startElement(
  109. String namespaceURI, String localName, String name, Attributes atts)
  110. throws org.xml.sax.SAXException
  111. {
  112. // time to fire off startElement event
  113. if (m_tracer != null) {
  114. super.fireStartElem(name);
  115. this.firePseudoAttributes();
  116. }
  117. return;
  118. }
  119. /**
  120. * Receive notification of the end of an element.
  121. *
  122. * <p>The SAX parser will invoke this method at the end of every
  123. * element in the XML document; there will be a corresponding
  124. * startElement() event for every endElement() event (even when the
  125. * element is empty).</p>
  126. *
  127. * <p>If the element name has a namespace prefix, the prefix will
  128. * still be attached to the name.</p>
  129. *
  130. *
  131. * @param namespaceURI The Namespace URI, or the empty string if the
  132. * element has no Namespace URI or if Namespace
  133. * processing is not being performed.
  134. * @param localName The local name (without prefix), or the
  135. * empty string if Namespace processing is not being
  136. * performed.
  137. * @param name The qualified name (with prefix), or the
  138. * empty string if qualified names are not available.
  139. * @param name The element type name
  140. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  141. * wrapping another exception.
  142. *
  143. * @throws org.xml.sax.SAXException
  144. */
  145. public void endElement(String namespaceURI, String localName, String name)
  146. throws org.xml.sax.SAXException
  147. {
  148. if (m_tracer != null)
  149. super.fireEndElem(name);
  150. }
  151. /**
  152. * Receive notification of character data.
  153. *
  154. * <p>The Parser will call this method to report each chunk of
  155. * character data. SAX parsers may return all contiguous character
  156. * data in a single chunk, or they may split it into several
  157. * chunks; however, all of the characters in any single event
  158. * must come from the same external entity, so that the Locator
  159. * provides useful information.</p>
  160. *
  161. * <p>The application must not attempt to read from the array
  162. * outside of the specified range.</p>
  163. *
  164. * <p>Note that some parsers will report whitespace using the
  165. * ignorableWhitespace() method rather than this one (validating
  166. * parsers must do so).</p>
  167. *
  168. * @param ch The characters from the XML document.
  169. * @param start The start position in the array.
  170. * @param length The number of characters to read from the array.
  171. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  172. * wrapping another exception.
  173. * @see #ignorableWhitespace
  174. * @see org.xml.sax.Locator
  175. */
  176. public void characters(char ch[], int start, int length)
  177. throws org.xml.sax.SAXException
  178. {
  179. // this.accum(ch, start, length);
  180. flushPending();
  181. try
  182. {
  183. writeNormalizedChars(ch, start, length, false, m_lineSepUse);
  184. if (m_tracer != null)
  185. super.fireCharEvent(ch, start, length);
  186. }
  187. catch(IOException ioe)
  188. {
  189. throw new SAXException(ioe);
  190. }
  191. }
  192. /**
  193. * If available, when the disable-output-escaping attribute is used,
  194. * output raw text without escaping.
  195. *
  196. * @param ch The characters from the XML document.
  197. * @param start The start position in the array.
  198. * @param length The number of characters to read from the array.
  199. *
  200. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  201. * wrapping another exception.
  202. */
  203. public void charactersRaw(char ch[], int start, int length)
  204. throws org.xml.sax.SAXException
  205. {
  206. try
  207. {
  208. writeNormalizedChars(ch, start, length, false, m_lineSepUse);
  209. }
  210. catch(IOException ioe)
  211. {
  212. throw new SAXException(ioe);
  213. }
  214. }
  215. /**
  216. * Normalize the characters, but don't escape. Different from
  217. * SerializerToXML#writeNormalizedChars because it does not attempt to do
  218. * XML escaping at all.
  219. *
  220. * @param ch The characters from the XML document.
  221. * @param start The start position in the array.
  222. * @param length The number of characters to read from the array.
  223. * @param isCData true if a CDATA block should be built around the characters.
  224. * @param useLineSep true if the operating systems
  225. * end-of-line separator should be output rather than a new-line character.
  226. *
  227. * @throws IOException
  228. * @throws org.xml.sax.SAXException
  229. */
  230. void writeNormalizedChars(
  231. final char ch[],
  232. final int start,
  233. final int length,
  234. final boolean isCData,
  235. final boolean useLineSep)
  236. throws IOException, org.xml.sax.SAXException
  237. {
  238. final java.io.Writer writer = m_writer;
  239. final int end = start + length;
  240. /* copy a few "constants" before the loop for performance */
  241. final char S_LINEFEED = CharInfo.S_LINEFEED;
  242. final int M_MAXCHARACTER = this.m_maxCharacter;
  243. if (isCData)
  244. {
  245. // This for() loop always increments i by one at the end
  246. // of the loop. Additional increments of i adjust for when
  247. // two input characters are processed.
  248. for (int i = start; i < end; i++)
  249. {
  250. final char c = ch[i];
  251. if (S_LINEFEED == c && useLineSep)
  252. {
  253. writer.write(m_lineSep, 0, m_lineSepLen);
  254. }
  255. else if (c > M_MAXCHARACTER)
  256. {
  257. if (i != 0)
  258. closeCDATA();
  259. // This needs to go into a function...
  260. if (isUTF16Surrogate(c))
  261. {
  262. writeUTF16Surrogate(c, ch, i, end);
  263. i++; // two input characters processed
  264. }
  265. else
  266. {
  267. writer.write(c);
  268. }
  269. if ((i != 0) && (i < (end - 1)))
  270. {
  271. writer.write(CDATA_DELIMITER_OPEN);
  272. m_cdataTagOpen = true;
  273. }
  274. }
  275. else if (
  276. ((i < (end - 2))
  277. && (']' == c)
  278. && (']' == ch[i + 1])
  279. && ('>' == ch[i + 2])))
  280. {
  281. writer.write(CDATA_CONTINUE);
  282. i += 2;
  283. }
  284. else
  285. {
  286. if (c <= M_MAXCHARACTER)
  287. {
  288. writer.write(c);
  289. }
  290. else if (isUTF16Surrogate(c))
  291. {
  292. writeUTF16Surrogate(c, ch, i, end);
  293. i++; // two input characters processed
  294. }
  295. else
  296. {
  297. /* The character is greater than the allowed
  298. * maximum value and it is not part of a UTF-16
  299. * pair that would be put out as a character reference.
  300. */
  301. String encoding = getEncoding();
  302. if (encoding != null)
  303. {
  304. /* The output encoding is known,
  305. * so somthing is wrong.
  306. */
  307. String integralValue = Integer.toString(c);
  308. throw new SAXException(XMLMessages.createXMLMessage(
  309. XMLErrorResources.ER_ILLEGAL_CHARACTER,
  310. new Object[]{ integralValue, encoding}));
  311. }
  312. else
  313. {
  314. /* The output encoding is not known,
  315. * so just write it out as-is.
  316. */
  317. writer.write(c);
  318. }
  319. }
  320. }
  321. }
  322. }
  323. else
  324. {
  325. // not in CDATA section
  326. for (int i = start; i < end; i++)
  327. {
  328. final char c = ch[i];
  329. if (S_LINEFEED == c && useLineSep)
  330. {
  331. writer.write(m_lineSep, 0, m_lineSepLen);
  332. }
  333. else if (c <= M_MAXCHARACTER)
  334. {
  335. writer.write(c);
  336. }
  337. else if (isUTF16Surrogate(c))
  338. {
  339. writeUTF16Surrogate(c, ch, i, end);
  340. i++; // two input characters processed
  341. }
  342. else
  343. {
  344. /* The character is greater than the allowed
  345. * maximum value and it is not part of a UTF-16
  346. * pair that would be put out as a character reference.
  347. */
  348. String encoding = getEncoding();
  349. if (encoding != null)
  350. {
  351. /* The output encoding is known,
  352. * so somthing is wrong.
  353. */
  354. String integralValue = Integer.toString(c);
  355. throw new SAXException(XMLMessages.createXMLMessage(
  356. XMLErrorResources.ER_ILLEGAL_CHARACTER,
  357. new Object[]{ integralValue, encoding}));
  358. }
  359. else
  360. {
  361. /* The output encoding is not known,
  362. * so just write it out as-is.
  363. */
  364. writer.write(c);
  365. }
  366. }
  367. }
  368. }
  369. }
  370. /**
  371. * Receive notification of cdata.
  372. *
  373. * <p>The Parser will call this method to report each chunk of
  374. * character data. SAX parsers may return all contiguous character
  375. * data in a single chunk, or they may split it into several
  376. * chunks; however, all of the characters in any single event
  377. * must come from the same external entity, so that the Locator
  378. * provides useful information.</p>
  379. *
  380. * <p>The application must not attempt to read from the array
  381. * outside of the specified range.</p>
  382. *
  383. * <p>Note that some parsers will report whitespace using the
  384. * ignorableWhitespace() method rather than this one (validating
  385. * parsers must do so).</p>
  386. *
  387. * @param ch The characters from the XML document.
  388. * @param start The start position in the array.
  389. * @param length The number of characters to read from the array.
  390. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  391. * wrapping another exception.
  392. * @see #ignorableWhitespace
  393. * @see org.xml.sax.Locator
  394. */
  395. public void cdata(char ch[], int start, int length)
  396. throws org.xml.sax.SAXException
  397. {
  398. try
  399. {
  400. writeNormalizedChars(ch, start, length, false, m_lineSepUse);
  401. if (m_tracer != null)
  402. super.fireCDATAEvent(ch, start, length);
  403. }
  404. catch(IOException ioe)
  405. {
  406. throw new SAXException(ioe);
  407. }
  408. }
  409. /**
  410. * Receive notification of ignorable whitespace in element content.
  411. *
  412. * <p>Validating Parsers must use this method to report each chunk
  413. * of ignorable whitespace (see the W3C XML 1.0 recommendation,
  414. * section 2.10): non-validating parsers may also use this method
  415. * if they are capable of parsing and using content models.</p>
  416. *
  417. * <p>SAX parsers may return all contiguous whitespace in a single
  418. * chunk, or they may split it into several chunks; however, all of
  419. * the characters in any single event must come from the same
  420. * external entity, so that the Locator provides useful
  421. * information.</p>
  422. *
  423. * <p>The application must not attempt to read from the array
  424. * outside of the specified range.</p>
  425. *
  426. * @param ch The characters from the XML document.
  427. * @param start The start position in the array.
  428. * @param length The number of characters to read from the array.
  429. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  430. * wrapping another exception.
  431. * @see #characters
  432. *
  433. * @throws org.xml.sax.SAXException
  434. */
  435. public void ignorableWhitespace(char ch[], int start, int length)
  436. throws org.xml.sax.SAXException
  437. {
  438. try
  439. {
  440. writeNormalizedChars(ch, start, length, false, m_lineSepUse);
  441. }
  442. catch(IOException ioe)
  443. {
  444. throw new SAXException(ioe);
  445. }
  446. }
  447. /**
  448. * Receive notification of a processing instruction.
  449. *
  450. * <p>The Parser will invoke this method once for each processing
  451. * instruction found: note that processing instructions may occur
  452. * before or after the main document element.</p>
  453. *
  454. * <p>A SAX parser should never report an XML declaration (XML 1.0,
  455. * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
  456. * using this method.</p>
  457. *
  458. * @param target The processing instruction target.
  459. * @param data The processing instruction data, or null if
  460. * none was supplied.
  461. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  462. * wrapping another exception.
  463. *
  464. * @throws org.xml.sax.SAXException
  465. */
  466. public void processingInstruction(String target, String data)
  467. throws org.xml.sax.SAXException
  468. {
  469. // flush anything pending first
  470. flushPending();
  471. if (m_tracer != null)
  472. super.fireEscapingEvent(target, data);
  473. }
  474. /**
  475. * Called when a Comment is to be constructed.
  476. * Note that Xalan will normally invoke the other version of this method.
  477. * %REVIEW% In fact, is this one ever needed, or was it a mistake?
  478. *
  479. * @param data The comment data.
  480. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  481. * wrapping another exception.
  482. */
  483. public void comment(String data) throws org.xml.sax.SAXException
  484. {
  485. final int length = data.length();
  486. if (length > m_charsBuff.length)
  487. {
  488. m_charsBuff = new char[length*2 + 1];
  489. }
  490. data.getChars(0, length, m_charsBuff, 0);
  491. comment(m_charsBuff, 0, length);
  492. }
  493. /**
  494. * Report an XML comment anywhere in the document.
  495. *
  496. * This callback will be used for comments inside or outside the
  497. * document element, including comments in the external DTD
  498. * subset (if read).
  499. *
  500. * @param ch An array holding the characters in the comment.
  501. * @param start The starting position in the array.
  502. * @param length The number of characters to use from the array.
  503. * @throws org.xml.sax.SAXException The application may raise an exception.
  504. */
  505. public void comment(char ch[], int start, int length)
  506. throws org.xml.sax.SAXException
  507. {
  508. flushPending();
  509. if (m_tracer != null)
  510. super.fireCommentEvent(ch, start, length);
  511. }
  512. /**
  513. * Receive notivication of a entityReference.
  514. *
  515. * @param name non-null reference to the name of the entity.
  516. *
  517. * @throws org.xml.sax.SAXException
  518. */
  519. public void entityReference(String name) throws org.xml.sax.SAXException
  520. {
  521. if (m_tracer != null)
  522. super.fireEntityReference(name);
  523. }
  524. /**
  525. * @see com.sun.org.apache.xml.internal.serializer.ExtendedContentHandler#addAttribute(String, String, String, String, String)
  526. */
  527. public void addAttribute(
  528. String uri,
  529. String localName,
  530. String rawName,
  531. String type,
  532. String value)
  533. {
  534. // do nothing, just forget all about the attribute
  535. }
  536. /**
  537. * @see org.xml.sax.ext.LexicalHandler#endCDATA()
  538. */
  539. public void endCDATA() throws SAXException
  540. {
  541. // do nothing
  542. }
  543. /**
  544. * @see com.sun.org.apache.xml.internal.serializer.ExtendedContentHandler#endElement(String)
  545. */
  546. public void endElement(String elemName) throws SAXException
  547. {
  548. if (m_tracer != null)
  549. super.fireEndElem(elemName);
  550. }
  551. /**
  552. * From XSLTC
  553. */
  554. public void startElement(
  555. String elementNamespaceURI,
  556. String elementLocalName,
  557. String elementName)
  558. throws SAXException
  559. {
  560. if (m_needToCallStartDocument)
  561. startDocumentInternal();
  562. // time to fire off startlement event.
  563. if (m_tracer != null) {
  564. super.fireStartElem(elementName);
  565. this.firePseudoAttributes();
  566. }
  567. return;
  568. }
  569. /**
  570. * From XSLTC
  571. */
  572. public void characters(String characters)
  573. throws SAXException
  574. {
  575. final int length = characters.length();
  576. if (length > m_charsBuff.length)
  577. {
  578. m_charsBuff = new char[length*2 + 1];
  579. }
  580. characters.getChars(0, length, m_charsBuff, 0);
  581. characters(m_charsBuff, 0, length);
  582. }
  583. /**
  584. * From XSLTC
  585. */
  586. public void addAttribute(String name, String value)
  587. {
  588. // do nothing, forget about the attribute
  589. }
  590. /**
  591. * Add a unique attribute
  592. */
  593. public void addUniqueAttribute(String qName, String value, int flags)
  594. throws SAXException
  595. {
  596. // do nothing, forget about the attribute
  597. }
  598. public boolean startPrefixMapping(
  599. String prefix,
  600. String uri,
  601. boolean shouldFlush)
  602. throws SAXException
  603. {
  604. // no namespace support for HTML
  605. return false;
  606. }
  607. public void startPrefixMapping(String prefix, String uri)
  608. throws org.xml.sax.SAXException
  609. {
  610. // no namespace support for HTML
  611. }
  612. public void namespaceAfterStartElement(
  613. final String prefix,
  614. final String uri)
  615. throws SAXException
  616. {
  617. // no namespace support for HTML
  618. }
  619. public void flushPending() throws org.xml.sax.SAXException
  620. {
  621. if (m_needToCallStartDocument)
  622. {
  623. startDocumentInternal();
  624. m_needToCallStartDocument = false;
  625. }
  626. }
  627. }