1. /*
  2. * @(#)DocumentParser.java 1.16 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package javax.swing.text.html.parser;
  8. import javax.swing.text.SimpleAttributeSet;
  9. import javax.swing.text.html.HTMLEditorKit;
  10. import javax.swing.text.html.HTML;
  11. import javax.swing.text.ChangedCharSetException;
  12. import java.util.*;
  13. import java.io.*;
  14. import java.net.*;
  15. import sun.io.*;
  16. /**
  17. * A Parser for HTML Documents. Read an InputStream of HTML and
  18. * invoke the appropriate methods in the ParserCallback class.
  19. *
  20. * @version 1.16 11/29/01
  21. * @author Sunita Mani
  22. */
  23. public class DocumentParser extends javax.swing.text.html.parser.Parser {
  24. private int inbody;
  25. private int intitle;
  26. private int inhead;
  27. private int instyle;
  28. private boolean seentitle;
  29. private HTMLEditorKit.ParserCallback callback = null;
  30. private boolean ignoreCharSet = false;
  31. private static final boolean debugFlag = false;
  32. private static HTML.UnknownTag EndOfLineTag;
  33. static {
  34. EndOfLineTag = new HTML.UnknownTag("__EndOfLineTag__");
  35. }
  36. public DocumentParser(DTD dtd) {
  37. super(dtd);
  38. }
  39. public void parse(Reader in, HTMLEditorKit.ParserCallback callback, boolean ignoreCharSet) throws IOException {
  40. this.ignoreCharSet = ignoreCharSet;
  41. this.callback = callback;
  42. parse(in);
  43. // This is a temporary way to notify the callback of the end of line
  44. // string.
  45. // In the future there will be a public way to determine this.
  46. SimpleAttributeSet attr = new SimpleAttributeSet();
  47. attr.addAttribute("__EndOfLineString__", getEndOfLineString());
  48. callback.handleSimpleTag(EndOfLineTag, attr, getCurrentPos());
  49. }
  50. /**
  51. * Handle Start Tag.
  52. */
  53. protected void handleStartTag(TagElement tag) {
  54. Element elem = tag.getElement();
  55. if (elem == dtd.body) {
  56. inbody++;
  57. } else if (elem == dtd.html) {
  58. } else if (elem == dtd.head) {
  59. inhead++;
  60. } else if (elem == dtd.title) {
  61. intitle++;
  62. } else if (elem == dtd.style) {
  63. instyle++;
  64. }
  65. if (debugFlag) {
  66. if (tag.fictional()) {
  67. debug("Start Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
  68. } else {
  69. debug("Start Tag: " + tag.getHTMLTag() + " attributes: " +
  70. getAttributes() + " pos: " + getCurrentPos());
  71. }
  72. }
  73. if (tag.fictional()) {
  74. callback.handleStartTag(tag.getHTMLTag(), new SimpleAttributeSet(), getCurrentPos());
  75. } else {
  76. callback.handleStartTag(tag.getHTMLTag(), getAttributes(), getCurrentPos());
  77. flushAttributes();
  78. }
  79. }
  80. protected void handleComment(char text[]) {
  81. if (debugFlag) {
  82. debug("comment: ->" + new String(text) + "<-"
  83. + " pos: " + getCurrentPos());
  84. }
  85. callback.handleComment(text, getCurrentPos());
  86. }
  87. /**
  88. * Handle Empty Tag.
  89. */
  90. protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException {
  91. Element elem = tag.getElement();
  92. if (elem == dtd.meta && !ignoreCharSet) {
  93. SimpleAttributeSet atts = getAttributes();
  94. if (atts != null) {
  95. String content = (String)atts.getAttribute(HTML.Attribute.CONTENT);
  96. if (content != null) {
  97. if ("content-type".equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
  98. throw new ChangedCharSetException(content, false);
  99. } else if ("charset" .equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
  100. throw new ChangedCharSetException(content, true);
  101. }
  102. }
  103. }
  104. }
  105. if (inbody != 0 || elem == dtd.meta || elem == dtd.base || elem == dtd.isindex || elem == dtd.style || elem == dtd.link) {
  106. if (debugFlag) {
  107. if (tag.fictional()) {
  108. debug("Empty Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
  109. } else {
  110. debug("Empty Tag: " + tag.getHTMLTag() + " attributes: "
  111. + getAttributes() + " pos: " + getCurrentPos());
  112. }
  113. }
  114. if (tag.fictional()) {
  115. callback.handleSimpleTag(tag.getHTMLTag(), new SimpleAttributeSet(), getCurrentPos());
  116. } else {
  117. callback.handleSimpleTag(tag.getHTMLTag(), getAttributes(), getCurrentPos());
  118. flushAttributes();
  119. }
  120. }
  121. }
  122. /**
  123. * Handle End Tag.
  124. */
  125. protected void handleEndTag(TagElement tag) {
  126. Element elem = tag.getElement();
  127. if (elem == dtd.body) {
  128. inbody--;
  129. } else if (elem == dtd.title) {
  130. intitle--;
  131. seentitle = true;
  132. } else if (elem == dtd.head) {
  133. inhead--;
  134. } else if (elem == dtd.style) {
  135. instyle--;
  136. }
  137. if (debugFlag) {
  138. debug("End Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
  139. }
  140. callback.handleEndTag(tag.getHTMLTag(), getCurrentPos());
  141. }
  142. /**
  143. * Handle Text.
  144. */
  145. protected void handleText(char data[]) {
  146. if (data != null) {
  147. if (inbody != 0 || ((instyle != 0) ||
  148. ((intitle != 0) && !seentitle))) {
  149. if (debugFlag) {
  150. debug("text: ->" + new String(data) + "<-" + " pos: " + getCurrentPos());
  151. }
  152. callback.handleText(data, getCurrentPos());
  153. }
  154. }
  155. }
  156. /*
  157. * Error handling.
  158. */
  159. protected void handleError(int ln, String errorMsg) {
  160. if (debugFlag) {
  161. debug("Error: ->" + errorMsg + "<-" + " pos: " + getCurrentPos());
  162. }
  163. /* PENDING: need to improve the error string. */
  164. callback.handleError(errorMsg, getCurrentPos());
  165. }
  166. /*
  167. * debug messages
  168. */
  169. private void debug(String msg) {
  170. System.out.println(msg);
  171. }
  172. }