1. /*
  2. * Copyright 2001-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: ToHTMLStream.java,v 1.28 2004/02/18 22:57:44 minchau Exp $
  18. */
  19. package com.sun.org.apache.xml.internal.serializer;
  20. import java.io.IOException;
  21. import java.io.OutputStream;
  22. import java.io.UnsupportedEncodingException;
  23. import java.util.Properties;
  24. import javax.xml.transform.Result;
  25. import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  26. import com.sun.org.apache.xml.internal.res.XMLMessages;
  27. import com.sun.org.apache.xml.internal.utils.BoolStack;
  28. import com.sun.org.apache.xml.internal.utils.Trie;
  29. import org.xml.sax.Attributes;
  30. import org.xml.sax.SAXException;
  31. /**
  32. * @author Santiago Pericas-Geertsen
  33. * @author G. Todd Miller
  34. */
  35. public class ToHTMLStream extends ToStream
  36. {
  37. /** This flag is set while receiving events from the DTD */
  38. protected boolean m_inDTD = false;
  39. /** True if the current element is a block element. (seems like
  40. * this needs to be a stack. -sb). */
  41. private boolean m_inBlockElem = false;
  42. /**
  43. * Map that tells which XML characters should have special treatment, and it
  44. * provides character to entity name lookup.
  45. */
  46. protected static final CharInfo m_htmlcharInfo =
  47. // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
  48. CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
  49. /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
  50. static final Trie m_elementFlags = new Trie();
  51. static {
  52. // HTML 4.0 loose DTD
  53. m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
  54. m_elementFlags.put(
  55. "FRAME",
  56. new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  57. m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
  58. m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
  59. m_elementFlags.put(
  60. "ISINDEX",
  61. new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  62. m_elementFlags.put(
  63. "APPLET",
  64. new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
  65. m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
  66. m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
  67. m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
  68. // HTML 4.0 strict DTD
  69. m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  70. m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  71. m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  72. m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  73. m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  74. m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
  75. m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
  76. m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
  77. m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
  78. m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
  79. m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
  80. m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
  81. m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
  82. m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
  83. m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
  84. m_elementFlags.put(
  85. "SUP",
  86. new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
  87. m_elementFlags.put(
  88. "SUB",
  89. new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
  90. m_elementFlags.put(
  91. "SPAN",
  92. new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
  93. m_elementFlags.put(
  94. "BDO",
  95. new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
  96. m_elementFlags.put(
  97. "BR",
  98. new ElemDesc(
  99. 0
  100. | ElemDesc.SPECIAL
  101. | ElemDesc.ASPECIAL
  102. | ElemDesc.EMPTY
  103. | ElemDesc.BLOCK));
  104. m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
  105. m_elementFlags.put(
  106. "ADDRESS",
  107. new ElemDesc(
  108. 0
  109. | ElemDesc.BLOCK
  110. | ElemDesc.BLOCKFORM
  111. | ElemDesc.BLOCKFORMFIELDSET));
  112. m_elementFlags.put(
  113. "DIV",
  114. new ElemDesc(
  115. 0
  116. | ElemDesc.BLOCK
  117. | ElemDesc.BLOCKFORM
  118. | ElemDesc.BLOCKFORMFIELDSET));
  119. m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
  120. m_elementFlags.put(
  121. "MAP",
  122. new ElemDesc(
  123. 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
  124. m_elementFlags.put(
  125. "AREA",
  126. new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  127. m_elementFlags.put(
  128. "LINK",
  129. new ElemDesc(
  130. 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
  131. m_elementFlags.put(
  132. "IMG",
  133. new ElemDesc(
  134. 0
  135. | ElemDesc.SPECIAL
  136. | ElemDesc.ASPECIAL
  137. | ElemDesc.EMPTY
  138. | ElemDesc.WHITESPACESENSITIVE));
  139. m_elementFlags.put(
  140. "OBJECT",
  141. new ElemDesc(
  142. 0
  143. | ElemDesc.SPECIAL
  144. | ElemDesc.ASPECIAL
  145. | ElemDesc.HEADMISC
  146. | ElemDesc.WHITESPACESENSITIVE));
  147. m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
  148. m_elementFlags.put(
  149. "HR",
  150. new ElemDesc(
  151. 0
  152. | ElemDesc.BLOCK
  153. | ElemDesc.BLOCKFORM
  154. | ElemDesc.BLOCKFORMFIELDSET
  155. | ElemDesc.EMPTY));
  156. m_elementFlags.put(
  157. "P",
  158. new ElemDesc(
  159. 0
  160. | ElemDesc.BLOCK
  161. | ElemDesc.BLOCKFORM
  162. | ElemDesc.BLOCKFORMFIELDSET));
  163. m_elementFlags.put(
  164. "H1",
  165. new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
  166. m_elementFlags.put(
  167. "H2",
  168. new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
  169. m_elementFlags.put(
  170. "H3",
  171. new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
  172. m_elementFlags.put(
  173. "H4",
  174. new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
  175. m_elementFlags.put(
  176. "H5",
  177. new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
  178. m_elementFlags.put(
  179. "H6",
  180. new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
  181. m_elementFlags.put(
  182. "PRE",
  183. new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
  184. m_elementFlags.put(
  185. "Q",
  186. new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
  187. m_elementFlags.put(
  188. "BLOCKQUOTE",
  189. new ElemDesc(
  190. 0
  191. | ElemDesc.BLOCK
  192. | ElemDesc.BLOCKFORM
  193. | ElemDesc.BLOCKFORMFIELDSET));
  194. m_elementFlags.put("INS", new ElemDesc(0));
  195. m_elementFlags.put("DEL", new ElemDesc(0));
  196. m_elementFlags.put(
  197. "DL",
  198. new ElemDesc(
  199. 0
  200. | ElemDesc.BLOCK
  201. | ElemDesc.BLOCKFORM
  202. | ElemDesc.BLOCKFORMFIELDSET));
  203. m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
  204. m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
  205. m_elementFlags.put(
  206. "OL",
  207. new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
  208. m_elementFlags.put(
  209. "UL",
  210. new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
  211. m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
  212. m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
  213. m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
  214. m_elementFlags.put(
  215. "INPUT",
  216. new ElemDesc(
  217. 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
  218. m_elementFlags.put(
  219. "SELECT",
  220. new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
  221. m_elementFlags.put("OPTGROUP", new ElemDesc(0));
  222. m_elementFlags.put("OPTION", new ElemDesc(0));
  223. m_elementFlags.put(
  224. "TEXTAREA",
  225. new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
  226. m_elementFlags.put(
  227. "FIELDSET",
  228. new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
  229. m_elementFlags.put("LEGEND", new ElemDesc(0));
  230. m_elementFlags.put(
  231. "BUTTON",
  232. new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
  233. m_elementFlags.put(
  234. "TABLE",
  235. new ElemDesc(
  236. 0
  237. | ElemDesc.BLOCK
  238. | ElemDesc.BLOCKFORM
  239. | ElemDesc.BLOCKFORMFIELDSET));
  240. m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
  241. m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
  242. m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
  243. m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
  244. m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
  245. m_elementFlags.put(
  246. "COL",
  247. new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  248. m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
  249. m_elementFlags.put("TH", new ElemDesc(0));
  250. m_elementFlags.put("TD", new ElemDesc(0));
  251. m_elementFlags.put(
  252. "HEAD",
  253. new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
  254. m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
  255. m_elementFlags.put(
  256. "BASE",
  257. new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  258. m_elementFlags.put(
  259. "META",
  260. new ElemDesc(
  261. 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
  262. m_elementFlags.put(
  263. "STYLE",
  264. new ElemDesc(
  265. 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
  266. m_elementFlags.put(
  267. "SCRIPT",
  268. new ElemDesc(
  269. 0
  270. | ElemDesc.SPECIAL
  271. | ElemDesc.ASPECIAL
  272. | ElemDesc.HEADMISC
  273. | ElemDesc.RAW));
  274. m_elementFlags.put(
  275. "NOSCRIPT",
  276. new ElemDesc(
  277. 0
  278. | ElemDesc.BLOCK
  279. | ElemDesc.BLOCKFORM
  280. | ElemDesc.BLOCKFORMFIELDSET));
  281. m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
  282. // From "John Ky" <hand@syd.speednet.com.au
  283. // Transitional Document Type Definition ()
  284. // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
  285. m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  286. // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
  287. m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  288. m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  289. // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
  290. m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  291. // From "John Ky" <hand@syd.speednet.com.au
  292. m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  293. // HTML 4.0, section 16.5
  294. m_elementFlags.put(
  295. "IFRAME",
  296. new ElemDesc(
  297. 0
  298. | ElemDesc.BLOCK
  299. | ElemDesc.BLOCKFORM
  300. | ElemDesc.BLOCKFORMFIELDSET));
  301. // NS4 extensions
  302. m_elementFlags.put(
  303. "LAYER",
  304. new ElemDesc(
  305. 0
  306. | ElemDesc.BLOCK
  307. | ElemDesc.BLOCKFORM
  308. | ElemDesc.BLOCKFORMFIELDSET));
  309. m_elementFlags.put(
  310. "ILAYER",
  311. new ElemDesc(
  312. 0
  313. | ElemDesc.BLOCK
  314. | ElemDesc.BLOCKFORM
  315. | ElemDesc.BLOCKFORMFIELDSET));
  316. ElemDesc elemDesc;
  317. elemDesc = (ElemDesc) m_elementFlags.get("AREA");
  318. elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
  319. elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
  320. elemDesc = (ElemDesc) m_elementFlags.get("BASE");
  321. elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
  322. elemDesc = (ElemDesc) m_elementFlags.get("BLOCKQUOTE");
  323. elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
  324. elemDesc = (ElemDesc) m_elementFlags.get("Q");
  325. elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
  326. elemDesc = (ElemDesc) m_elementFlags.get("INS");
  327. elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
  328. elemDesc = (ElemDesc) m_elementFlags.get("DEL");
  329. elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
  330. elemDesc = (ElemDesc) m_elementFlags.get("A");
  331. elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
  332. elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
  333. elemDesc = (ElemDesc) m_elementFlags.get("LINK");
  334. elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
  335. elemDesc = (ElemDesc) m_elementFlags.get("INPUT");
  336. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  337. elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
  338. elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
  339. elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
  340. elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
  341. elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
  342. elemDesc = (ElemDesc) m_elementFlags.get("SELECT");
  343. elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
  344. elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
  345. elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP");
  346. elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
  347. elemDesc = (ElemDesc) m_elementFlags.get("OPTION");
  348. elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
  349. elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
  350. elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA");
  351. elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
  352. elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
  353. elemDesc = (ElemDesc) m_elementFlags.get("BUTTON");
  354. elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
  355. elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT");
  356. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  357. elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
  358. elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
  359. elemDesc = (ElemDesc) m_elementFlags.get("IMG");
  360. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  361. elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
  362. elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
  363. elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
  364. elemDesc = (ElemDesc) m_elementFlags.get("OBJECT");
  365. elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
  366. elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
  367. elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
  368. elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
  369. elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
  370. elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
  371. elemDesc = (ElemDesc) m_elementFlags.get("FORM");
  372. elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
  373. elemDesc = (ElemDesc) m_elementFlags.get("HEAD");
  374. elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
  375. // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
  376. elemDesc = (ElemDesc) m_elementFlags.get("FRAME");
  377. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  378. elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
  379. // HTML 4.0, section 16.5
  380. elemDesc = (ElemDesc) m_elementFlags.get("IFRAME");
  381. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  382. elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
  383. // NS4 extensions
  384. elemDesc = (ElemDesc) m_elementFlags.get("LAYER");
  385. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  386. elemDesc = (ElemDesc) m_elementFlags.get("ILAYER");
  387. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  388. elemDesc = (ElemDesc) m_elementFlags.get("DIV");
  389. elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
  390. }
  391. /**
  392. * Dummy element for elements not found.
  393. */
  394. static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
  395. /** True if URLs should be specially escaped with the %xx form. */
  396. private boolean m_specialEscapeURLs = true;
  397. /** True if the META tag should be omitted. */
  398. private boolean m_omitMetaTag = false;
  399. /**
  400. * Tells if the formatter should use special URL escaping.
  401. *
  402. * @param bool True if URLs should be specially escaped with the %xx form.
  403. */
  404. public void setSpecialEscapeURLs(boolean bool)
  405. {
  406. m_specialEscapeURLs = bool;
  407. }
  408. /**
  409. * Tells if the formatter should omit the META tag.
  410. *
  411. * @param bool True if the META tag should be omitted.
  412. */
  413. public void setOmitMetaTag(boolean bool)
  414. {
  415. m_omitMetaTag = bool;
  416. }
  417. /**
  418. * Specifies an output format for this serializer. It the
  419. * serializer has already been associated with an output format,
  420. * it will switch to the new format. This method should not be
  421. * called while the serializer is in the process of serializing
  422. * a document.
  423. *
  424. * @param format The output format to use
  425. */
  426. public void setOutputFormat(Properties format)
  427. {
  428. m_specialEscapeURLs =
  429. OutputPropertyUtils.getBooleanProperty(
  430. OutputPropertiesFactory.S_USE_URL_ESCAPING,
  431. format);
  432. m_omitMetaTag =
  433. OutputPropertyUtils.getBooleanProperty(
  434. OutputPropertiesFactory.S_OMIT_META_TAG,
  435. format);
  436. super.setOutputFormat(format);
  437. }
  438. /**
  439. * Tells if the formatter should use special URL escaping.
  440. *
  441. * @return True if URLs should be specially escaped with the %xx form.
  442. */
  443. private final boolean getSpecialEscapeURLs()
  444. {
  445. return m_specialEscapeURLs;
  446. }
  447. /**
  448. * Tells if the formatter should omit the META tag.
  449. *
  450. * @return True if the META tag should be omitted.
  451. */
  452. private final boolean getOmitMetaTag()
  453. {
  454. return m_omitMetaTag;
  455. }
  456. /**
  457. * Get a description of the given element.
  458. *
  459. * @param name non-null name of element, case insensitive.
  460. *
  461. * @return non-null reference to ElemDesc, which may be m_dummy if no
  462. * element description matches the given name.
  463. */
  464. public static final ElemDesc getElemDesc(String name)
  465. {
  466. /* this method used to return m_dummy when name was null
  467. * but now it doesn't check and and requires non-null name.
  468. */
  469. Object obj = m_elementFlags.get(name);
  470. if (null != obj)
  471. return (ElemDesc)obj;
  472. return m_dummy;
  473. }
  474. /**
  475. * Default constructor.
  476. */
  477. public ToHTMLStream()
  478. {
  479. super();
  480. m_charInfo = m_htmlcharInfo;
  481. // initialize namespaces
  482. m_prefixMap = new NamespaceMappings();
  483. }
  484. /** The name of the current element. */
  485. // private String m_currentElementName = null;
  486. /**
  487. * Receive notification of the beginning of a document.
  488. *
  489. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  490. * wrapping another exception.
  491. *
  492. * @throws org.xml.sax.SAXException
  493. */
  494. protected void startDocumentInternal() throws org.xml.sax.SAXException
  495. {
  496. super.startDocumentInternal();
  497. m_needToCallStartDocument = false;
  498. m_needToOutputDocTypeDecl = true;
  499. m_startNewLine = false;
  500. setOmitXMLDeclaration(true);
  501. if (true == m_needToOutputDocTypeDecl)
  502. {
  503. String doctypeSystem = getDoctypeSystem();
  504. String doctypePublic = getDoctypePublic();
  505. if ((null != doctypeSystem) || (null != doctypePublic))
  506. {
  507. final java.io.Writer writer = m_writer;
  508. try
  509. {
  510. writer.write("<!DOCTYPE HTML");
  511. if (null != doctypePublic)
  512. {
  513. writer.write(" PUBLIC \"");
  514. writer.write(doctypePublic);
  515. writer.write('"');
  516. }
  517. if (null != doctypeSystem)
  518. {
  519. if (null == doctypePublic)
  520. writer.write(" SYSTEM \"");
  521. else
  522. writer.write('"');
  523. writer.write(doctypeSystem);
  524. writer.write('"');
  525. }
  526. writer.write('>');
  527. outputLineSep();
  528. }
  529. catch(IOException e)
  530. {
  531. throw new SAXException(e);
  532. }
  533. }
  534. }
  535. m_needToOutputDocTypeDecl = false;
  536. }
  537. /**
  538. * Receive notification of the end of a document.
  539. *
  540. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  541. * wrapping another exception.
  542. *
  543. * @throws org.xml.sax.SAXException
  544. */
  545. public final void endDocument() throws org.xml.sax.SAXException
  546. {
  547. flushPending();
  548. if (m_doIndent && !m_isprevtext)
  549. {
  550. try
  551. {
  552. outputLineSep();
  553. }
  554. catch(IOException e)
  555. {
  556. throw new SAXException(e);
  557. }
  558. }
  559. flushWriter();
  560. if (m_tracer != null)
  561. super.fireEndDoc();
  562. }
  563. /**
  564. * Receive notification of the beginning of an element.
  565. *
  566. *
  567. * @param namespaceURI
  568. * @param localName
  569. * @param name The element type name.
  570. * @param atts The attributes attached to the element, if any.
  571. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  572. * wrapping another exception.
  573. * @see #endElement
  574. * @see org.xml.sax.AttributeList
  575. */
  576. public void startElement(
  577. String namespaceURI,
  578. String localName,
  579. String name,
  580. Attributes atts)
  581. throws org.xml.sax.SAXException
  582. {
  583. ElemContext elemContext = m_elemContext;
  584. // clean up any pending things first
  585. if (elemContext.m_startTagOpen)
  586. {
  587. closeStartTag();
  588. elemContext.m_startTagOpen = false;
  589. }
  590. else if (m_cdataTagOpen)
  591. {
  592. closeCDATA();
  593. m_cdataTagOpen = false;
  594. }
  595. else if (m_needToCallStartDocument)
  596. {
  597. startDocumentInternal();
  598. m_needToCallStartDocument = false;
  599. }
  600. // if this element has a namespace then treat it like XML
  601. if (null != namespaceURI && namespaceURI.length() > 0)
  602. {
  603. super.startElement(namespaceURI, localName, name, atts);
  604. return;
  605. }
  606. try
  607. {
  608. ElemDesc elemDesc = getElemDesc(name);
  609. int elemFlags = elemDesc.getFlags();
  610. // deal with indentation issues first
  611. if (m_doIndent)
  612. {
  613. boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
  614. if (m_ispreserve)
  615. m_ispreserve = false;
  616. else if (
  617. (null != elemContext.m_elementName)
  618. && (!m_inBlockElem
  619. || isBlockElement) /* && !isWhiteSpaceSensitive */
  620. )
  621. {
  622. m_startNewLine = true;
  623. indent();
  624. }
  625. m_inBlockElem = !isBlockElement;
  626. }
  627. // save any attributes for later processing
  628. if (atts != null)
  629. addAttributes(atts);
  630. m_isprevtext = false;
  631. final java.io.Writer writer = m_writer;
  632. writer.write('<');
  633. writer.write(name);
  634. if (m_tracer != null)
  635. firePseudoAttributes();
  636. if ((elemFlags & ElemDesc.EMPTY) != 0)
  637. {
  638. // an optimization for elements which are expected
  639. // to be empty.
  640. m_elemContext = elemContext.push();
  641. /* XSLTC sometimes calls namespaceAfterStartElement()
  642. * so we need to remember the name
  643. */
  644. m_elemContext.m_elementName = name;
  645. m_elemContext.m_elementDesc = elemDesc;
  646. return;
  647. }
  648. else
  649. {
  650. elemContext = elemContext.push(namespaceURI,localName,name);
  651. m_elemContext = elemContext;
  652. elemContext.m_elementDesc = elemDesc;
  653. elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
  654. }
  655. if ((elemFlags & ElemDesc.HEADELEM) != 0)
  656. {
  657. // This is the <HEAD> element, do some special processing
  658. closeStartTag();
  659. elemContext.m_startTagOpen = false;
  660. if (!m_omitMetaTag)
  661. {
  662. if (m_doIndent)
  663. indent();
  664. writer.write(
  665. "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
  666. String encoding = getEncoding();
  667. String encode = Encodings.getMimeEncoding(encoding);
  668. writer.write(encode);
  669. writer.write("\">");
  670. }
  671. }
  672. }
  673. catch (IOException e)
  674. {
  675. throw new SAXException(e);
  676. }
  677. }
  678. /**
  679. * Receive notification of the end of an element.
  680. *
  681. *
  682. * @param namespaceURI
  683. * @param localName
  684. * @param name The element type name
  685. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  686. * wrapping another exception.
  687. */
  688. public final void endElement(
  689. final String namespaceURI,
  690. final String localName,
  691. final String name)
  692. throws org.xml.sax.SAXException
  693. {
  694. // deal with any pending issues
  695. if (m_cdataTagOpen)
  696. closeCDATA();
  697. // if the element has a namespace, treat it like XML, not HTML
  698. if (null != namespaceURI && namespaceURI.length() > 0)
  699. {
  700. super.endElement(namespaceURI, localName, name);
  701. return;
  702. }
  703. try
  704. {
  705. ElemContext elemContext = m_elemContext;
  706. final ElemDesc elemDesc = elemContext.m_elementDesc;
  707. final int elemFlags = elemDesc.getFlags();
  708. final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
  709. // deal with any indentation issues
  710. if (m_doIndent)
  711. {
  712. final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
  713. boolean shouldIndent = false;
  714. if (m_ispreserve)
  715. {
  716. m_ispreserve = false;
  717. }
  718. else if (m_doIndent && (!m_inBlockElem || isBlockElement))
  719. {
  720. m_startNewLine = true;
  721. shouldIndent = true;
  722. }
  723. if (!elemContext.m_startTagOpen && shouldIndent)
  724. indent(elemContext.m_currentElemDepth - 1);
  725. m_inBlockElem = !isBlockElement;
  726. }
  727. final java.io.Writer writer = m_writer;
  728. if (!elemContext.m_startTagOpen)
  729. {
  730. writer.write("</");
  731. writer.write(name);
  732. writer.write('>');
  733. }
  734. else
  735. {
  736. // the start-tag open when this method was called,
  737. // so we need to process it now.
  738. if (m_tracer != null)
  739. super.fireStartElem(name);
  740. // the starting tag was still open when we received this endElement() call
  741. // so we need to process any gathered attributes NOW, before they go away.
  742. int nAttrs = m_attributes.getLength();
  743. if (nAttrs > 0)
  744. {
  745. processAttributes(m_writer, nAttrs);
  746. // clear attributes object for re-use with next element
  747. m_attributes.clear();
  748. }
  749. if (!elemEmpty)
  750. {
  751. // As per Dave/Paul recommendation 12/06/2000
  752. // if (shouldIndent)
  753. // writer.write('>');
  754. // indent(m_currentIndent);
  755. writer.write("></");
  756. writer.write(name);
  757. writer.write('>');
  758. }
  759. else
  760. {
  761. writer.write('>');
  762. }
  763. }
  764. // clean up because the element has ended
  765. if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
  766. m_ispreserve = true;
  767. m_isprevtext = false;
  768. // fire off the end element event
  769. if (m_tracer != null)
  770. super.fireEndElem(name);
  771. // OPTIMIZE-EMPTY
  772. if (elemEmpty)
  773. {
  774. // a quick exit if the HTML element had no children.
  775. // This block of code can be removed if the corresponding block of code
  776. // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
  777. m_elemContext = elemContext.m_prev;
  778. return;
  779. }
  780. // some more clean because the element has ended.
  781. if (!elemContext.m_startTagOpen)
  782. {
  783. if (m_doIndent && !m_preserves.isEmpty())
  784. m_preserves.pop();
  785. }
  786. m_elemContext = elemContext.m_prev;
  787. // m_isRawStack.pop();
  788. }
  789. catch (IOException e)
  790. {
  791. throw new SAXException(e);
  792. }
  793. }
  794. /**
  795. * Process an attribute.
  796. * @param writer The writer to write the processed output to.
  797. * @param name The name of the attribute.
  798. * @param value The value of the attribute.
  799. * @param elemDesc The description of the HTML element
  800. * that has this attribute.
  801. *
  802. * @throws org.xml.sax.SAXException
  803. */
  804. protected void processAttribute(
  805. java.io.Writer writer,
  806. String name,
  807. String value,
  808. ElemDesc elemDesc)
  809. throws IOException
  810. {
  811. writer.write(' ');
  812. if ( ((value.length() == 0) || value.equalsIgnoreCase(name))
  813. && elemDesc != null
  814. && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
  815. {
  816. writer.write(name);
  817. }
  818. else
  819. {
  820. // %REVIEW% %OPT%
  821. // Two calls to single-char write may NOT
  822. // be more efficient than one to string-write...
  823. writer.write(name);
  824. writer.write("=\"");
  825. if ( elemDesc != null
  826. && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
  827. writeAttrURI(writer, value, m_specialEscapeURLs);
  828. else
  829. writeAttrString(writer, value, this.getEncoding());
  830. writer.write('"');
  831. }
  832. }
  833. /**
  834. * Tell if a character is an ASCII digit.
  835. */
  836. private boolean isASCIIDigit(char c)
  837. {
  838. return (c >= '0' && c <= '9');
  839. }
  840. /**
  841. * Make an integer into an HH hex value.
  842. * Does no checking on the size of the input, since this
  843. * is only meant to be used locally by writeAttrURI.
  844. *
  845. * @param i must be a value less than 255.
  846. *
  847. * @return should be a two character string.
  848. */
  849. private static String makeHHString(int i)
  850. {
  851. String s = Integer.toHexString(i).toUpperCase();
  852. if (s.length() == 1)
  853. {
  854. s = "0" + s;
  855. }
  856. return s;
  857. }
  858. /**
  859. * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
  860. * @param str must be 2 characters long
  861. *
  862. * @return true or false
  863. */
  864. private boolean isHHSign(String str)
  865. {
  866. boolean sign = true;
  867. try
  868. {
  869. char r = (char) Integer.parseInt(str, 16);
  870. }
  871. catch (NumberFormatException e)
  872. {
  873. sign = false;
  874. }
  875. return sign;
  876. }
  877. /**
  878. * Write the specified <var>string</var> after substituting non ASCII characters,
  879. * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
  880. *
  881. * @param string String to convert to XML format.
  882. * @param doURLEscaping True if we should try to encode as
  883. * per http://www.ietf.org/rfc/rfc2396.txt.
  884. *
  885. * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
  886. */
  887. public void writeAttrURI(
  888. final java.io.Writer writer, String string, boolean doURLEscaping)
  889. throws IOException
  890. {
  891. // http://www.ietf.org/rfc/rfc2396.txt says:
  892. // A URI is always in an "escaped" form, since escaping or unescaping a
  893. // completed URI might change its semantics. Normally, the only time
  894. // escape encodings can safely be made is when the URI is being created
  895. // from its component parts; each component may have its own set of
  896. // characters that are reserved, so only the mechanism responsible for
  897. // generating or interpreting that component can determine whether or
  898. // not escaping a character will change its semantics. Likewise, a URI
  899. // must be separated into its components before the escaped characters
  900. // within those components can be safely decoded.
  901. //
  902. // ...So we do our best to do limited escaping of the URL, without
  903. // causing damage. If the URL is already properly escaped, in theory, this
  904. // function should not change the string value.
  905. final int end = string.length();
  906. if (end > m_attrBuff.length)
  907. {
  908. m_attrBuff = new char[end*2 + 1];
  909. }
  910. string.getChars(0,end, m_attrBuff, 0);
  911. final char[] chars = m_attrBuff;
  912. int cleanStart = 0;
  913. int cleanLength = 0;
  914. char ch = 0;
  915. for (int i = 0; i < end; i++)
  916. {
  917. ch = chars[i];
  918. if ((ch < 32) || (ch > 126))
  919. {
  920. if (cleanLength > 0)
  921. {
  922. writer.write(chars, cleanStart, cleanLength);
  923. cleanLength = 0;
  924. }
  925. if (doURLEscaping)
  926. {
  927. // Encode UTF16 to UTF8.
  928. // Reference is Unicode, A Primer, by Tony Graham.
  929. // Page 92.
  930. // Note that Kay doesn't escape 0x20...
  931. // if(ch == 0x20) // Not sure about this... -sb
  932. // {
  933. // writer.write(ch);
  934. // }
  935. // else
  936. if (ch <= 0x7F)
  937. {
  938. writer.write('%');
  939. writer.write(makeHHString(ch));
  940. }
  941. else if (ch <= 0x7FF)
  942. {
  943. // Clear low 6 bits before rotate, put high 4 bits in low byte,
  944. // and set two high bits.
  945. int high = (ch >> 6) | 0xC0;
  946. int low = (ch & 0x3F) | 0x80;
  947. // First 6 bits, + high bit
  948. writer.write('%');
  949. writer.write(makeHHString(high));
  950. writer.write('%');
  951. writer.write(makeHHString(low));
  952. }
  953. else if (isUTF16Surrogate(ch)) // high surrogate
  954. {
  955. // I'm sure this can be done in 3 instructions, but I choose
  956. // to try and do it exactly like it is done in the book, at least
  957. // until we are sure this is totally clean. I don't think performance
  958. // is a big issue with this particular function, though I could be
  959. // wrong. Also, the stuff below clearly does more masking than
  960. // it needs to do.
  961. // Clear high 6 bits.
  962. int highSurrogate = ((int) ch) & 0x03FF;
  963. // Middle 4 bits (wwww) + 1
  964. // "Note that the value of wwww from the high surrogate bit pattern
  965. // is incremented to make the uuuuu bit pattern in the scalar value
  966. // so the surrogate pair don't address the BMP."
  967. int wwww = ((highSurrogate & 0x03C0) >> 6);
  968. int uuuuu = wwww + 1;
  969. // next 4 bits
  970. int zzzz = (highSurrogate & 0x003C) >> 2;
  971. // low 2 bits
  972. int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
  973. // Get low surrogate character.
  974. ch = chars[++i];
  975. // Clear high 6 bits.
  976. int lowSurrogate = ((int) ch) & 0x03FF;
  977. // put the middle 4 bits into the bottom of yyyyyy (byte 3)
  978. yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
  979. // bottom 6 bits.
  980. int xxxxxx = (lowSurrogate & 0x003F);
  981. int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
  982. int byte2 =
  983. 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
  984. int byte3 = 0x80 | yyyyyy;
  985. int byte4 = 0x80 | xxxxxx;
  986. writer.write('%');
  987. writer.write(makeHHString(byte1));
  988. writer.write('%');
  989. writer.write(makeHHString(byte2));
  990. writer.write('%');
  991. writer.write(makeHHString(byte3));
  992. writer.write('%');
  993. writer.write(makeHHString(byte4));
  994. }
  995. else
  996. {
  997. int high = (ch >> 12) | 0xE0; // top 4 bits
  998. int middle = ((ch & 0x0FC0) >> 6) | 0x80;
  999. // middle 6 bits
  1000. int low = (ch & 0x3F) | 0x80;
  1001. // First 6 bits, + high bit
  1002. writer.write('%');
  1003. writer.write(makeHHString(high));
  1004. writer.write('%');
  1005. writer.write(makeHHString(middle));
  1006. writer.write('%');
  1007. writer.write(makeHHString(low));
  1008. }
  1009. }
  1010. else if (escapingNotNeeded(ch))
  1011. {
  1012. writer.write(ch);
  1013. }
  1014. else
  1015. {
  1016. writer.write("&#");
  1017. writer.write(Integer.toString(ch));
  1018. writer.write(';');
  1019. }
  1020. // In this character range we have first written out any previously accumulated
  1021. // "clean" characters, then processed the current more complicated character,
  1022. // which may have incremented "i".
  1023. // We now we reset the next possible clean character.
  1024. cleanStart = i + 1;
  1025. }
  1026. // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
  1027. // not allowing quotes in the URI proper syntax, nor in the fragment
  1028. // identifier, we believe that it's OK to double escape quotes.
  1029. else if (ch == '"')
  1030. {
  1031. // If the character is a '%' number number, try to avoid double-escaping.
  1032. // There is a question if this is legal behavior.
  1033. // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
  1034. // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
  1035. // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
  1036. // We are no longer escaping '%'
  1037. if (cleanLength > 0)
  1038. {
  1039. writer.write(chars, cleanStart, cleanLength);
  1040. cleanLength = 0;
  1041. }
  1042. // Mike Kay encodes this as ", so he may know something I don't?
  1043. if (doURLEscaping)
  1044. writer.write("%22");
  1045. else
  1046. writer.write("""); // we have to escape this, I guess.
  1047. // We have written out any clean characters, then the escaped '%' and now we
  1048. // We now we reset the next possible clean character.
  1049. cleanStart = i + 1;
  1050. }
  1051. else
  1052. {
  1053. // no processing for this character, just count how
  1054. // many characters in a row that we have that need no processing
  1055. cleanLength++;
  1056. }
  1057. }
  1058. // are there any clean characters at the end of the array
  1059. // that we haven't processed yet?
  1060. if (cleanLength > 1)
  1061. {
  1062. // if the whole string can be written out as-is do so
  1063. // otherwise write out the clean chars at the end of the
  1064. // array
  1065. if (cleanStart == 0)
  1066. writer.write(string);
  1067. else
  1068. writer.write(chars, cleanStart, cleanLength);
  1069. }
  1070. else if (cleanLength == 1)
  1071. {
  1072. // a little optimization for 1 clean character
  1073. // (we could have let the previous if(...) handle them all)
  1074. writer.write(ch);
  1075. }
  1076. }
  1077. /**
  1078. * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
  1079. * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>.
  1080. *
  1081. * @param string String to convert to XML format.
  1082. * @param encoding CURRENTLY NOT IMPLEMENTED.
  1083. *
  1084. * @throws org.xml.sax.SAXException
  1085. */
  1086. public void writeAttrString(
  1087. final java.io.Writer writer, String string, String encoding)
  1088. throws IOException
  1089. {
  1090. final int end = string.length();
  1091. if (end > m_attrBuff.length)
  1092. {
  1093. m_attrBuff = new char[end * 2 + 1];
  1094. }
  1095. string.getChars(0, end, m_attrBuff, 0);
  1096. final char[] chars = m_attrBuff;
  1097. int cleanStart = 0;
  1098. int cleanLength = 0;
  1099. char ch = 0;
  1100. for (int i = 0; i < end; i++)
  1101. {
  1102. ch = chars[i];
  1103. // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
  1104. // System.out.println("ch: "+(int)ch);
  1105. // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
  1106. // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
  1107. if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
  1108. {
  1109. cleanLength++;
  1110. }
  1111. else if ('<' == ch || '>' == ch)
  1112. {
  1113. cleanLength++; // no escaping in this case, as specified in 15.2
  1114. }
  1115. else if (
  1116. ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
  1117. {
  1118. cleanLength++; // no escaping in this case, as specified in 15.2
  1119. }
  1120. else
  1121. {
  1122. if (cleanLength > 0)
  1123. {
  1124. writer.write(chars,cleanStart,cleanLength);
  1125. cleanLength = 0;
  1126. }
  1127. int pos = accumDefaultEntity(writer, ch, i, chars, end, false, false);
  1128. if (i != pos)
  1129. {
  1130. i = pos - 1;
  1131. }
  1132. else
  1133. {
  1134. if (isUTF16Surrogate(ch))
  1135. {
  1136. writeUTF16Surrogate(ch, chars, i, end);
  1137. i++; // two input characters processed
  1138. // this increments by one and the for()
  1139. // loop itself increments by another one.
  1140. }
  1141. // The next is kind of a hack to keep from escaping in the case
  1142. // of Shift_JIS and the like.
  1143. /*
  1144. else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
  1145. && (ch != 160))
  1146. {
  1147. writer.write(ch); // no escaping in this case
  1148. }
  1149. else
  1150. */
  1151. String entityName = m_charInfo.getEntityNameForChar(ch);
  1152. if (null != entityName)
  1153. {
  1154. writer.write('&');
  1155. writer.write(entityName);
  1156. writer.write(';');
  1157. }
  1158. else if (escapingNotNeeded(ch))
  1159. {
  1160. writer.write(ch); // no escaping in this case
  1161. }
  1162. else
  1163. {
  1164. writer.write("&#");
  1165. writer.write(Integer.toString(ch));
  1166. writer.write(';');
  1167. }
  1168. }
  1169. cleanStart = i + 1;
  1170. }
  1171. } // end of for()
  1172. // are there any clean characters at the end of the array
  1173. // that we haven't processed yet?
  1174. if (cleanLength > 1)
  1175. {
  1176. // if the whole string can be written out as-is do so
  1177. // otherwise write out the clean chars at the end of the
  1178. // array
  1179. if (cleanStart == 0)
  1180. writer.write(string);
  1181. else
  1182. writer.write(chars, cleanStart, cleanLength);
  1183. }
  1184. else if (cleanLength == 1)
  1185. {
  1186. // a little optimization for 1 clean character
  1187. // (we could have let the previous if(...) handle them all)
  1188. writer.write(ch);
  1189. }
  1190. }
  1191. /**
  1192. * Receive notification of character data.
  1193. *
  1194. * <p>The Parser will call this method to report each chunk of
  1195. * character data. SAX parsers may return all contiguous character
  1196. * data in a single chunk, or they may split it into several
  1197. * chunks; however, all of the characters in any single event
  1198. * must come from the same external entity, so that the Locator
  1199. * provides useful information.</p>
  1200. *
  1201. * <p>The application must not attempt to read from the array
  1202. * outside of the specified range.</p>
  1203. *
  1204. * <p>Note that some parsers will report whitespace using the
  1205. * ignorableWhitespace() method rather than this one (validating
  1206. * parsers must do so).</p>
  1207. *
  1208. * @param chars The characters from the XML document.
  1209. * @param start The start position in the array.
  1210. * @param length The number of characters to read from the array.
  1211. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  1212. * wrapping another exception.
  1213. * @see #ignorableWhitespace
  1214. * @see org.xml.sax.Locator
  1215. *
  1216. * @throws org.xml.sax.SAXException
  1217. */
  1218. public final void characters(char chars[], int start, int length)
  1219. throws org.xml.sax.SAXException
  1220. {
  1221. if (m_elemContext.m_isRaw)
  1222. {
  1223. try
  1224. {
  1225. if (m_elemContext.m_startTagOpen)
  1226. {
  1227. closeStartTag();
  1228. m_elemContext.m_startTagOpen = false;
  1229. }
  1230. m_ispreserve = true;
  1231. // With m_ispreserve just set true it looks like shouldIndent()
  1232. // will always return false, so drop any possible indentation.
  1233. // if (shouldIndent())
  1234. // indent();
  1235. // writer.write("<![CDATA[");
  1236. // writer.write(chars, start, length);
  1237. writeNormalizedChars(chars, start, length, false, m_lineSepUse);
  1238. // writer.write("]]>");
  1239. // time to generate characters event
  1240. if (m_tracer != null)
  1241. super.fireCharEvent(chars, start, length);
  1242. return;
  1243. }
  1244. catch (IOException ioe)
  1245. {
  1246. throw new org.xml.sax.SAXException(
  1247. XMLMessages.createXMLMessage(
  1248. XMLErrorResources.ER_OIERROR,
  1249. null),
  1250. ioe);
  1251. //"IO error", ioe);
  1252. }
  1253. }
  1254. else
  1255. {
  1256. super.characters(chars, start, length);
  1257. }
  1258. }
  1259. /**
  1260. * Receive notification of cdata.
  1261. *
  1262. * <p>The Parser will call this method to report each chunk of
  1263. * character data. SAX parsers may return all contiguous character
  1264. * data in a single chunk, or they may split it into several
  1265. * chunks; however, all of the characters in any single event
  1266. * must come from the same external entity, so that the Locator
  1267. * provides useful information.</p>
  1268. *
  1269. * <p>The application must not attempt to read from the array
  1270. * outside of the specified range.</p>
  1271. *
  1272. * <p>Note that some parsers will report whitespace using the
  1273. * ignorableWhitespace() method rather than this one (validating
  1274. * parsers must do so).</p>
  1275. *
  1276. * @param ch The characters from the XML document.
  1277. * @param start The start position in the array.
  1278. * @param length The number of characters to read from the array.
  1279. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  1280. * wrapping another exception.
  1281. * @see #ignorableWhitespace
  1282. * @see org.xml.sax.Locator
  1283. *
  1284. * @throws org.xml.sax.SAXException
  1285. */
  1286. public final void cdata(char ch[], int start, int length)
  1287. throws org.xml.sax.SAXException
  1288. {
  1289. if ((null != m_elemContext.m_elementName)
  1290. && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
  1291. || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
  1292. {
  1293. try
  1294. {
  1295. if (m_elemContext.m_startTagOpen)
  1296. {
  1297. closeStartTag();
  1298. m_elemContext.m_startTagOpen = false;
  1299. }
  1300. m_ispreserve = true;
  1301. if (shouldIndent())
  1302. indent();
  1303. // writer.write(ch, start, length);
  1304. writeNormalizedChars(ch, start, length, true, m_lineSepUse);
  1305. }
  1306. catch (IOException ioe)
  1307. {
  1308. throw new org.xml.sax.SAXException(
  1309. XMLMessages.createXMLMessage(
  1310. XMLErrorResources.ER_OIERROR,
  1311. null),
  1312. ioe);
  1313. //"IO error", ioe);
  1314. }
  1315. }
  1316. else
  1317. {
  1318. super.cdata(ch, start, length);
  1319. }
  1320. }
  1321. /**
  1322. * Receive notification of a processing instruction.
  1323. *
  1324. * @param target The processing instruction target.
  1325. * @param data The processing instruction data, or null if
  1326. * none was supplied.
  1327. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  1328. * wrapping another exception.
  1329. *
  1330. * @throws org.xml.sax.SAXException
  1331. */
  1332. public void processingInstruction(String target, String data)
  1333. throws org.xml.sax.SAXException
  1334. {
  1335. // Process any pending starDocument and startElement first.
  1336. flushPending();
  1337. // Use a fairly nasty hack to tell if the next node is supposed to be
  1338. // unescaped text.
  1339. if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
  1340. {
  1341. startNonEscaping();
  1342. }
  1343. else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
  1344. {
  1345. endNonEscaping();
  1346. }
  1347. else
  1348. {
  1349. try
  1350. {
  1351. if (m_elemContext.m_startTagOpen)
  1352. {
  1353. closeStartTag();
  1354. m_elemContext.m_startTagOpen = false;
  1355. }
  1356. else if (m_needToCallStartDocument)
  1357. startDocumentInternal();
  1358. if (shouldIndent())
  1359. indent();
  1360. final java.io.Writer writer = m_writer;
  1361. //writer.write("<?" + target);
  1362. writer.write("<?");
  1363. writer.write(target);
  1364. if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
  1365. writer.write(' ');
  1366. //writer.write(data + ">"); // different from XML
  1367. writer.write(data); // different from XML
  1368. writer.write('>'); // different from XML
  1369. // Always output a newline char if not inside of an
  1370. // element. The whitespace is not significant in that
  1371. // case.
  1372. if (m_elemContext.m_currentElemDepth <= 0)
  1373. outputLineSep();
  1374. m_startNewLine = true;
  1375. }
  1376. catch(IOException e)
  1377. {
  1378. throw new SAXException(e);
  1379. }
  1380. }
  1381. // now generate the PI event
  1382. if (m_tracer != null)
  1383. super.fireEscapingEvent(target, data);
  1384. }
  1385. /**
  1386. * Receive notivication of a entityReference.
  1387. *
  1388. * @param name non-null reference to entity name string.
  1389. *
  1390. * @throws org.xml.sax.SAXException
  1391. */
  1392. public final void entityReference(String name)
  1393. throws org.xml.sax.SAXException
  1394. {
  1395. try
  1396. {
  1397. final java.io.Writer writer = m_writer;
  1398. writer.write('&');
  1399. writer.write(name);
  1400. writer.write(';');
  1401. } catch(IOException e)
  1402. {
  1403. throw new SAXException(e);
  1404. }
  1405. }
  1406. /**
  1407. * @see com.sun.org.apache.xml.internal.serializer.ExtendedContentHandler#endElement(String)
  1408. */
  1409. public final void endElement(String elemName) throws SAXException
  1410. {
  1411. endElement(null, null, elemName);
  1412. }
  1413. /**
  1414. * Process the attributes, which means to write out the currently
  1415. * collected attributes to the writer. The attributes are not
  1416. * cleared by this method
  1417. *
  1418. * @param writer the writer to write processed attributes to.
  1419. * @param nAttrs the number of attributes in m_attributes
  1420. * to be processed
  1421. *
  1422. * @throws org.xml.sax.SAXException
  1423. */
  1424. public void processAttributes(java.io.Writer writer, int nAttrs)
  1425. throws IOException,SAXException
  1426. {
  1427. /*
  1428. * process the collected attributes
  1429. */
  1430. for (int i = 0; i < nAttrs; i++)
  1431. {
  1432. processAttribute(
  1433. writer,
  1434. m_attributes.getQName(i),
  1435. m_attributes.getValue(i),
  1436. m_elemContext.m_elementDesc);
  1437. }
  1438. }
  1439. /**
  1440. * For the enclosing elements starting tag write out out any attributes
  1441. * followed by ">"
  1442. *
  1443. *@throws org.xml.sax.SAXException
  1444. */
  1445. protected void closeStartTag() throws SAXException
  1446. {
  1447. try
  1448. {
  1449. // finish processing attributes, time to fire off the start element event
  1450. if (m_tracer != null)
  1451. super.fireStartElem(m_elemContext.m_elementName);
  1452. int nAttrs = m_attributes.getLength();
  1453. if (nAttrs>0)
  1454. {
  1455. processAttributes(m_writer, nAttrs);
  1456. // clear attributes object for re-use with next element
  1457. m_attributes.clear();
  1458. }
  1459. m_writer.write('>');
  1460. /* whether Xalan or XSLTC, we have the prefix mappings now, so
  1461. * lets determine if the current element is specified in the cdata-
  1462. * section-elements list.
  1463. */
  1464. if (m_cdataSectionElements != null)
  1465. m_elemContext.m_isCdataSection = isCdataSection();
  1466. if (m_doIndent)
  1467. {
  1468. m_isprevtext = false;
  1469. m_preserves.push(m_ispreserve);
  1470. }
  1471. }
  1472. catch(IOException e)
  1473. {
  1474. throw new SAXException(e);
  1475. }
  1476. }
  1477. /**
  1478. * Initialize the serializer with the specified output stream and output
  1479. * format. Must be called before calling any of the serialize methods.
  1480. *
  1481. * @param output The output stream to use
  1482. * @param format The output format
  1483. * @throws UnsupportedEncodingException The encoding specified in the
  1484. * output format is not supported
  1485. */
  1486. protected synchronized void init(OutputStream output, Properties format)
  1487. throws UnsupportedEncodingException
  1488. {
  1489. if (null == format)
  1490. {
  1491. format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
  1492. }
  1493. super.init(output,format, false);
  1494. }
  1495. /**
  1496. * Specifies an output stream to which the document should be
  1497. * serialized. This method should not be called while the
  1498. * serializer is in the process of serializing a document.
  1499. * <p>
  1500. * The encoding specified in the output properties is used, or
  1501. * if no encoding was specified, the default for the selected
  1502. * output method.
  1503. *
  1504. * @param output The output stream
  1505. */
  1506. public void setOutputStream(OutputStream output)
  1507. {
  1508. try
  1509. {
  1510. Properties format;
  1511. if (null == m_format)
  1512. format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
  1513. else
  1514. format = m_format;
  1515. init(output, format, true);
  1516. }
  1517. catch (UnsupportedEncodingException uee)
  1518. {
  1519. // Should have been warned in init, I guess...
  1520. }
  1521. }
  1522. /**
  1523. * This method is used when a prefix/uri namespace mapping
  1524. * is indicated after the element was started with a
  1525. * startElement() and before and endElement().
  1526. * startPrefixMapping(prefix,uri) would be used before the
  1527. * startElement() call.
  1528. * @param uri the URI of the namespace
  1529. * @param prefix the prefix associated with the given URI.
  1530. *
  1531. * @see com.sun.org.apache.xml.internal.serializer.ExtendedContentHandler#namespaceAfterStartElement(String, String)
  1532. */
  1533. public void namespaceAfterStartElement(String prefix, String uri)
  1534. throws SAXException
  1535. {
  1536. // hack for XSLTC with finding URI for default namespace
  1537. if (m_elemContext.m_elementURI == null)
  1538. {
  1539. String prefix1 = getPrefixPart(m_elemContext.m_elementName);
  1540. if (prefix1 == null && EMPTYSTRING.equals(prefix))
  1541. {
  1542. // the elements URI is not known yet, and it
  1543. // doesn't have a prefix, and we are currently
  1544. // setting the uri for prefix "", so we have
  1545. // the uri for the element... lets remember it
  1546. m_elemContext.m_elementURI = uri;
  1547. }
  1548. }
  1549. startPrefixMapping(prefix,uri,false);
  1550. }
  1551. public void startDTD(String name, String publicId, String systemId)
  1552. throws SAXException
  1553. {
  1554. m_inDTD = true;
  1555. super.startDTD(name, publicId, systemId);
  1556. }
  1557. /**
  1558. * Report the end of DTD declarations.
  1559. * @throws org.xml.sax.SAXException The application may raise an exception.
  1560. * @see #startDTD
  1561. */
  1562. public void endDTD() throws org.xml.sax.SAXException
  1563. {
  1564. m_inDTD = false;
  1565. /* for ToHTMLStream the DOCTYPE is entirely output in the
  1566. * startDocumentInternal() method, so don't do anything here
  1567. */
  1568. }
  1569. /**
  1570. * This method does nothing.
  1571. */
  1572. public void attributeDecl(
  1573. String eName,
  1574. String aName,
  1575. String type,
  1576. String valueDefault,
  1577. String value)
  1578. throws SAXException
  1579. {
  1580. // The internal DTD subset is not serialized by the ToHTMLStream serializer
  1581. }
  1582. /**
  1583. * This method does nothing.
  1584. */
  1585. public void elementDecl(String name, String model) throws SAXException
  1586. {
  1587. // The internal DTD subset is not serialized by the ToHTMLStream serializer
  1588. }
  1589. /**
  1590. * This method does nothing.
  1591. */
  1592. public void internalEntityDecl(String name, String value)
  1593. throws SAXException
  1594. {
  1595. // The internal DTD subset is not serialized by the ToHTMLStream serializer
  1596. }
  1597. /**
  1598. * This method does nothing.
  1599. */
  1600. public void externalEntityDecl(
  1601. String name,
  1602. String publicId,
  1603. String systemId)
  1604. throws SAXException
  1605. {
  1606. // The internal DTD subset is not serialized by the ToHTMLStream serializer
  1607. }
  1608. /**
  1609. * This method is used to add an attribute to the currently open element.
  1610. * The caller has guaranted that this attribute is unique, which means that it
  1611. * not been seen before and will not be seen again.
  1612. *
  1613. * @param name the qualified name of the attribute
  1614. * @param value the value of the attribute which can contain only
  1615. * ASCII printable characters characters in the range 32 to 127 inclusive.
  1616. * @param flags the bit values of this integer give optimization information.
  1617. */
  1618. public void addUniqueAttribute(String name, String value, int flags)
  1619. throws SAXException
  1620. {
  1621. try
  1622. {
  1623. final java.io.Writer writer = m_writer;
  1624. if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
  1625. {
  1626. // "flags" has indicated that the characters
  1627. // '>' '<' '&' and '"' are not in the value and
  1628. // m_htmlcharInfo has recorded that there are no other
  1629. // entities in the range 0 to 127 so we write out the
  1630. // value directly
  1631. writer.write(' ');
  1632. writer.write(name);
  1633. writer.write("=\"");
  1634. writer.write(value);
  1635. writer.write('"');
  1636. }
  1637. else if (
  1638. (flags & HTML_ATTREMPTY) > 0
  1639. && (value.length() == 0 || value.equalsIgnoreCase(name)))
  1640. {
  1641. writer.write(' ');
  1642. writer.write(name);
  1643. }
  1644. else
  1645. {
  1646. writer.write(' ');
  1647. writer.write(name);
  1648. writer.write("=\"");
  1649. if ((flags & HTML_ATTRURL) > 0)
  1650. {
  1651. writeAttrURI(writer, value, m_specialEscapeURLs);
  1652. }
  1653. else
  1654. {
  1655. writeAttrString(writer, value, this.getEncoding());
  1656. }
  1657. writer.write('"');
  1658. }
  1659. } catch (IOException e) {
  1660. throw new SAXException(e);
  1661. }
  1662. }
  1663. public void comment(char ch[], int start, int length)
  1664. throws SAXException
  1665. {
  1666. // The internal DTD subset is not serialized by the ToHTMLStream serializer
  1667. if (m_inDTD)
  1668. return;
  1669. super.comment(ch, start, length);
  1670. }
  1671. public boolean reset()
  1672. {
  1673. boolean ret = super.reset();
  1674. if (!ret)
  1675. return false;
  1676. initToHTMLStream();
  1677. return true;
  1678. }
  1679. private void initToHTMLStream()
  1680. {
  1681. // m_elementDesc = null;
  1682. m_inBlockElem = false;
  1683. m_inDTD = false;
  1684. // m_isRawStack.clear();
  1685. m_omitMetaTag = false;
  1686. m_specialEscapeURLs = true;
  1687. }
  1688. }