1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xalan" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, Lotus
  53. * Development Corporation., http://www.lotus.com. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package org.apache.xalan.serialize;
  58. import org.xml.sax.*;
  59. import java.util.*;
  60. import java.io.*;
  61. import org.apache.xpath.res.XPATHErrorResources;
  62. import org.apache.xalan.res.XSLMessages;
  63. import org.apache.xalan.templates.OutputProperties;
  64. import javax.xml.transform.OutputKeys;
  65. /**
  66. * <meta name="usage" content="general"/>
  67. * This class takes SAX events (in addition to some extra events
  68. * that SAX doesn't handle yet) and produces simple text only.
  69. */
  70. public class SerializerToText extends SerializerToXML
  71. {
  72. /**
  73. * Default constructor.
  74. */
  75. public SerializerToText()
  76. {
  77. super();
  78. }
  79. /**
  80. * Receive an object for locating the origin of SAX document events.
  81. *
  82. * <p>SAX parsers are strongly encouraged (though not absolutely
  83. * required) to supply a locator: if it does so, it must supply
  84. * the locator to the application by invoking this method before
  85. * invoking any of the other methods in the DocumentHandler
  86. * interface.</p>
  87. *
  88. * <p>The locator allows the application to determine the end
  89. * position of any document-related event, even if the parser is
  90. * not reporting an error. Typically, the application will
  91. * use this information for reporting its own errors (such as
  92. * character content that does not match an application's
  93. * business rules). The information returned by the locator
  94. * is probably not sufficient for use with a search engine.</p>
  95. *
  96. * <p>Note that the locator will return correct information only
  97. * during the invocation of the events in this interface. The
  98. * application should not attempt to use it at any other time.</p>
  99. *
  100. * @param locator An object that can return the location of
  101. * any SAX document event.
  102. * @see org.xml.sax.Locator
  103. */
  104. public void setDocumentLocator(Locator locator)
  105. {
  106. // No action for the moment.
  107. }
  108. /**
  109. * Receive notification of the beginning of a document.
  110. *
  111. * <p>The SAX parser will invoke this method only once, before any
  112. * other methods in this interface or in DTDHandler (except for
  113. * setDocumentLocator).</p>
  114. *
  115. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  116. * wrapping another exception.
  117. *
  118. * @throws org.xml.sax.SAXException
  119. */
  120. public void startDocument() throws org.xml.sax.SAXException
  121. {
  122. // No action for the moment.
  123. }
  124. /**
  125. * Receive notification of the end of a document.
  126. *
  127. * <p>The SAX parser will invoke this method only once, and it will
  128. * be the last method invoked during the parse. The parser shall
  129. * not invoke this method until it has either abandoned parsing
  130. * (because of an unrecoverable error) or reached the end of
  131. * input.</p>
  132. *
  133. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  134. * wrapping another exception.
  135. *
  136. * @throws org.xml.sax.SAXException
  137. */
  138. public void endDocument() throws org.xml.sax.SAXException
  139. {
  140. flushWriter();
  141. }
  142. /**
  143. * Receive notification of the beginning of an element.
  144. *
  145. * <p>The Parser will invoke this method at the beginning of every
  146. * element in the XML document; there will be a corresponding
  147. * endElement() event for every startElement() event (even when the
  148. * element is empty). All of the element's content will be
  149. * reported, in order, before the corresponding endElement()
  150. * event.</p>
  151. *
  152. * <p>If the element name has a namespace prefix, the prefix will
  153. * still be attached. Note that the attribute list provided will
  154. * contain only attributes with explicit values (specified or
  155. * defaulted): #IMPLIED attributes will be omitted.</p>
  156. *
  157. *
  158. * @param namespaceURI The Namespace URI, or the empty string if the
  159. * element has no Namespace URI or if Namespace
  160. * processing is not being performed.
  161. * @param localName The local name (without prefix), or the
  162. * empty string if Namespace processing is not being
  163. * performed.
  164. * @param name The qualified name (with prefix), or the
  165. * empty string if qualified names are not available.
  166. * @param atts The attributes attached to the element, if any.
  167. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  168. * wrapping another exception.
  169. * @see #endElement
  170. * @see org.xml.sax.AttributeList
  171. *
  172. * @throws org.xml.sax.SAXException
  173. */
  174. public void startElement(
  175. String namespaceURI, String localName, String name, Attributes atts)
  176. throws org.xml.sax.SAXException
  177. {
  178. // No action for the moment.
  179. }
  180. /**
  181. * Receive notification of the end of an element.
  182. *
  183. * <p>The SAX parser will invoke this method at the end of every
  184. * element in the XML document; there will be a corresponding
  185. * startElement() event for every endElement() event (even when the
  186. * element is empty).</p>
  187. *
  188. * <p>If the element name has a namespace prefix, the prefix will
  189. * still be attached to the name.</p>
  190. *
  191. *
  192. * @param namespaceURI The Namespace URI, or the empty string if the
  193. * element has no Namespace URI or if Namespace
  194. * processing is not being performed.
  195. * @param localName The local name (without prefix), or the
  196. * empty string if Namespace processing is not being
  197. * performed.
  198. * @param name The qualified name (with prefix), or the
  199. * empty string if qualified names are not available.
  200. * @param name The element type name
  201. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  202. * wrapping another exception.
  203. *
  204. * @throws org.xml.sax.SAXException
  205. */
  206. public void endElement(String namespaceURI, String localName, String name)
  207. throws org.xml.sax.SAXException
  208. {
  209. // No action for the moment.
  210. }
  211. /**
  212. * Receive notification of character data.
  213. *
  214. * <p>The Parser will call this method to report each chunk of
  215. * character data. SAX parsers may return all contiguous character
  216. * data in a single chunk, or they may split it into several
  217. * chunks; however, all of the characters in any single event
  218. * must come from the same external entity, so that the Locator
  219. * provides useful information.</p>
  220. *
  221. * <p>The application must not attempt to read from the array
  222. * outside of the specified range.</p>
  223. *
  224. * <p>Note that some parsers will report whitespace using the
  225. * ignorableWhitespace() method rather than this one (validating
  226. * parsers must do so).</p>
  227. *
  228. * @param ch The characters from the XML document.
  229. * @param start The start position in the array.
  230. * @param length The number of characters to read from the array.
  231. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  232. * wrapping another exception.
  233. * @see #ignorableWhitespace
  234. * @see org.xml.sax.Locator
  235. */
  236. public void characters(char ch[], int start, int length)
  237. throws org.xml.sax.SAXException
  238. {
  239. // this.accum(ch, start, length);
  240. try
  241. {
  242. writeNormalizedChars(ch, start, length, false);
  243. }
  244. catch(IOException ioe)
  245. {
  246. throw new SAXException(ioe);
  247. }
  248. this.flushWriter();
  249. // flushWriter();
  250. }
  251. /**
  252. * If available, when the disable-output-escaping attribute is used,
  253. * output raw text without escaping.
  254. *
  255. * @param ch The characters from the XML document.
  256. * @param start The start position in the array.
  257. * @param length The number of characters to read from the array.
  258. *
  259. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  260. * wrapping another exception.
  261. */
  262. public void charactersRaw(char ch[], int start, int length)
  263. throws org.xml.sax.SAXException
  264. {
  265. // accum(ch, start, length);
  266. try
  267. {
  268. writeNormalizedChars(ch, start, length, false);
  269. }
  270. catch(IOException ioe)
  271. {
  272. throw new SAXException(ioe);
  273. }
  274. flushWriter();
  275. // flushWriter();
  276. }
  277. /**
  278. * Once a surrogate has been detected, write the pair as a single
  279. * character reference.
  280. *
  281. * @param c the first part of the surrogate.
  282. * @param ch Character array.
  283. * @param i position Where the surrogate was detected.
  284. * @param end The end index of the significant characters.
  285. * @return i+1.
  286. * @throws IOException
  287. * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
  288. */
  289. protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
  290. throws IOException, org.xml.sax.SAXException
  291. {
  292. // UTF-16 surrogate
  293. int surrogateValue = getURF16SurrogateValue(c, ch, i, end);
  294. i++;
  295. // m_writer.write('x');
  296. m_writer.write(surrogateValue);
  297. return i;
  298. }
  299. /**
  300. * Normalize the characters, but don't escape. Different from
  301. * SerializerToXML#writeNormalizedChars because it does not attempt to do
  302. * XML escaping at all.
  303. *
  304. * @param ch The characters from the XML document.
  305. * @param start The start position in the array.
  306. * @param length The number of characters to read from the array.
  307. * @param isCData true if a CDATA block should be built around the characters.
  308. *
  309. * @throws IOException
  310. * @throws org.xml.sax.SAXException
  311. */
  312. void writeNormalizedChars(char ch[], int start, int length, boolean isCData)
  313. throws IOException, org.xml.sax.SAXException
  314. {
  315. int end = start + length;
  316. for (int i = start; i < end; i++)
  317. {
  318. char c = ch[i];
  319. if (CharInfo.S_LINEFEED == c)
  320. {
  321. m_writer.write(m_lineSep, 0, m_lineSepLen);
  322. }
  323. else if (isCData && (c > m_maxCharacter))
  324. {
  325. if (i != 0)
  326. m_writer.write("]]>");
  327. // This needs to go into a function...
  328. if (isUTF16Surrogate(c))
  329. {
  330. i = writeUTF16Surrogate(c, ch, i, end);
  331. }
  332. else
  333. {
  334. m_writer.write(c);
  335. }
  336. if ((i != 0) && (i < (end - 1)))
  337. m_writer.write("<![CDATA[");
  338. }
  339. else if (isCData
  340. && ((i < (end - 2)) && (']' == c) && (']' == ch[i + 1])
  341. && ('>' == ch[i + 2])))
  342. {
  343. m_writer.write("]]]]><![CDATA[>");
  344. i += 2;
  345. }
  346. else
  347. {
  348. if (c <= m_maxCharacter)
  349. {
  350. m_writer.write(c);
  351. }
  352. else if (isUTF16Surrogate(c))
  353. {
  354. i = writeUTF16Surrogate(c, ch, i, end);
  355. }
  356. else
  357. {
  358. m_writer.write(c);
  359. }
  360. }
  361. }
  362. }
  363. /**
  364. * Receive notification of cdata.
  365. *
  366. * <p>The Parser will call this method to report each chunk of
  367. * character data. SAX parsers may return all contiguous character
  368. * data in a single chunk, or they may split it into several
  369. * chunks; however, all of the characters in any single event
  370. * must come from the same external entity, so that the Locator
  371. * provides useful information.</p>
  372. *
  373. * <p>The application must not attempt to read from the array
  374. * outside of the specified range.</p>
  375. *
  376. * <p>Note that some parsers will report whitespace using the
  377. * ignorableWhitespace() method rather than this one (validating
  378. * parsers must do so).</p>
  379. *
  380. * @param ch The characters from the XML document.
  381. * @param start The start position in the array.
  382. * @param length The number of characters to read from the array.
  383. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  384. * wrapping another exception.
  385. * @see #ignorableWhitespace
  386. * @see org.xml.sax.Locator
  387. */
  388. public void cdata(char ch[], int start, int length)
  389. throws org.xml.sax.SAXException
  390. {
  391. // accum(ch, start, length);
  392. try
  393. {
  394. writeNormalizedChars(ch, start, length, false);
  395. }
  396. catch(IOException ioe)
  397. {
  398. throw new SAXException(ioe);
  399. }
  400. flushWriter();
  401. // flushWriter();
  402. }
  403. /**
  404. * Receive notification of ignorable whitespace in element content.
  405. *
  406. * <p>Validating Parsers must use this method to report each chunk
  407. * of ignorable whitespace (see the W3C XML 1.0 recommendation,
  408. * section 2.10): non-validating parsers may also use this method
  409. * if they are capable of parsing and using content models.</p>
  410. *
  411. * <p>SAX parsers may return all contiguous whitespace in a single
  412. * chunk, or they may split it into several chunks; however, all of
  413. * the characters in any single event must come from the same
  414. * external entity, so that the Locator provides useful
  415. * information.</p>
  416. *
  417. * <p>The application must not attempt to read from the array
  418. * outside of the specified range.</p>
  419. *
  420. * @param ch The characters from the XML document.
  421. * @param start The start position in the array.
  422. * @param length The number of characters to read from the array.
  423. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  424. * wrapping another exception.
  425. * @see #characters
  426. *
  427. * @throws org.xml.sax.SAXException
  428. */
  429. public void ignorableWhitespace(char ch[], int start, int length)
  430. throws org.xml.sax.SAXException
  431. {
  432. try
  433. {
  434. writeNormalizedChars(ch, start, length, false);
  435. }
  436. catch(IOException ioe)
  437. {
  438. throw new SAXException(ioe);
  439. }
  440. flushWriter();
  441. }
  442. /**
  443. * Receive notification of a processing instruction.
  444. *
  445. * <p>The Parser will invoke this method once for each processing
  446. * instruction found: note that processing instructions may occur
  447. * before or after the main document element.</p>
  448. *
  449. * <p>A SAX parser should never report an XML declaration (XML 1.0,
  450. * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
  451. * using this method.</p>
  452. *
  453. * @param target The processing instruction target.
  454. * @param data The processing instruction data, or null if
  455. * none was supplied.
  456. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  457. * wrapping another exception.
  458. *
  459. * @throws org.xml.sax.SAXException
  460. */
  461. public void processingInstruction(String target, String data)
  462. throws org.xml.sax.SAXException
  463. {
  464. // No action for the moment.
  465. }
  466. /**
  467. * Called when a Comment is to be constructed.
  468. * Note that Xalan will normally invoke the other version of this method.
  469. * %REVIEW% In fact, is this one ever needed, or was it a mistake?
  470. *
  471. * @param data The comment data.
  472. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  473. * wrapping another exception.
  474. */
  475. public void comment(String data) throws org.xml.sax.SAXException
  476. {
  477. // No action for the moment.
  478. }
  479. /**
  480. * Report an XML comment anywhere in the document.
  481. *
  482. * This callback will be used for comments inside or outside the
  483. * document element, including comments in the external DTD
  484. * subset (if read).
  485. *
  486. * @param ch An array holding the characters in the comment.
  487. * @param start The starting position in the array.
  488. * @param length The number of characters to use from the array.
  489. * @throws org.xml.sax.SAXException The application may raise an exception.
  490. */
  491. public void comment(char ch[], int start, int length)
  492. throws org.xml.sax.SAXException
  493. {
  494. // No action for the moment.
  495. }
  496. /**
  497. * Receive notivication of a entityReference.
  498. *
  499. * @param name non-null reference to the name of the entity.
  500. *
  501. * @throws org.xml.sax.SAXException
  502. */
  503. public void entityReference(String name) throws org.xml.sax.SAXException
  504. {
  505. // No action for the moment.
  506. }
  507. }