- /*
 - * @(#)Parser.java 1.24 00/02/02
 - *
 - * Copyright 1998-2000 Sun Microsystems, Inc. All Rights Reserved.
 - *
 - * This software is the proprietary information of Sun Microsystems, Inc.
 - * Use is subject to license terms.
 - *
 - */
 - package javax.swing.text.html.parser;
 - import javax.swing.text.SimpleAttributeSet;
 - import javax.swing.text.html.HTML;
 - import javax.swing.text.ChangedCharSetException;
 - import java.io.*;
 - import java.util.Hashtable;
 - import java.util.Properties;
 - import java.util.Vector;
 - import java.util.Enumeration;
 - import java.net.URL;
 - import sun.misc.MessageUtils;
 - /**
 - * A simple DTD-driven HTML parser. The parser reads an
 - * HTML file from an InputStream and calls various methods
 - * (which should be overridden in a subclass) when tags and
 - * data are encountered.
 - * <p>
 - * Unfortunately there are many badly implemented HTML parsers
 - * out there, and as a result there are many badly formatted
 - * HTML files. This parser attempts to parse most HTML files.
 - * This means that the implementation sometimes deviates from
 - * the SGML specification in favor of HTML.
 - * <p>
 - * The parser treats \r and \r\n as \n. Newlines after starttags
 - * and before end tags are ignored just as specified in the SGML/HTML
 - * specification.
 - *
 - * @see DTD
 - * @see TagElement
 - * @see SimpleAttributeSet
 - * @version 1.24, 02/02/00
 - * @author Arthur van Hoff
 - * @author Sunita Mani
 - */
 - public
 - class Parser implements DTDConstants {
 - private char text[] = new char[1024];
 - private int textpos = 0;
 - private TagElement last;
 - private boolean space;
 - private char str[] = new char[128];
 - private int strpos = 0;
 - protected DTD dtd = null;
 - private int ch;
 - private int ln;
 - private Reader in;
 - private Element recent;
 - private TagStack stack;
 - private boolean skipTag = false;
 - private TagElement lastFormSent = null;
 - private SimpleAttributeSet attributes = new SimpleAttributeSet();
 - // State for <html>, <head> and <body>. Since people like to slap
 - // together HTML documents without thinking, occasionally they
 - // have multiple instances of these tags. These booleans track
 - // the first sightings of these tags so they can be safely ignored
 - // by the parser if repeated.
 - private boolean seenHtml = false;
 - private boolean seenHead = false;
 - private boolean seenBody = false;
 - /**
 - * This flag determines whether or not the Parser will be strict
 - * in enforcing SGML compatibility. If false, it will be lenient
 - * with certain common classes of erroneous HTML constructs.
 - * Strict or not, in either case an error will be recorded.
 - *
 - */
 - protected boolean strict = false;
 - /** Number of \r\n's encountered. */
 - private int crlfCount;
 - /** Number of \r's encountered. A \r\n will not increment this. */
 - private int crCount;
 - /** Number of \n's encountered. A \r\n will not increment this. */
 - private int lfCount;
 - //
 - // To correctly identify the start of a tag/comment/text we need two
 - // ivars. Two are needed as handleText isn't invoked until the tag
 - // after the text has been parsed, that is the parser parses the text,
 - // then a tag, then invokes handleText followed by handleStart.
 - //
 - /** The start position of the current block. Block is overloaded here,
 - * it really means the current start position for the current comment,
 - * tag, text. Use getBlockStartPosition to access this. */
 - private int currentBlockStartPos;
 - /** Start position of the last block. */
 - private int lastBlockStartPos;
 - public Parser(DTD dtd) {
 - this.dtd = dtd;
 - }
 - /**
 - * @return the line number of the line currently being parsed
 - */
 - protected int getCurrentLine() {
 - return ln;
 - }
 - /**
 - * Returns the start position of the current block. Block is
 - * overloaded here, it really means the current start position for
 - * the current comment tag, text, block.... This is provided for
 - * subclassers that wish to know the start of the current block when
 - * called with one of the handleXXX methods.
 - */
 - int getBlockStartPosition() {
 - return Math.max(0, lastBlockStartPos - 1);
 - }
 - /**
 - * Makes a TagElement.
 - */
 - protected TagElement makeTag(Element elem, boolean fictional) {
 - return new TagElement(elem, fictional);
 - }
 - protected TagElement makeTag(Element elem) {
 - return makeTag(elem, false);
 - }
 - protected SimpleAttributeSet getAttributes() {
 - return attributes;
 - }
 - protected void flushAttributes() {
 - attributes.removeAttributes(attributes);
 - }
 - /**
 - * Called when PCDATA is encountered.
 - */
 - protected void handleText(char text[]) {
 - }
 - /**
 - * Called when an HTML title tag is encountered.
 - */
 - protected void handleTitle(char text[]) {
 - // default behavior is to call handleText. Subclasses
 - // can override if necessary.
 - handleText(text);
 - }
 - /**
 - * Called when an HTML comment is encountered.
 - */
 - protected void handleComment(char text[]) {
 - }
 - protected void handleEOFInComment() {
 - // We've reached EOF. Our recovery strategy is to
 - // see if we have more than one line in the comment;
 - // if so, we pretend that the comment was an unterminated
 - // single line comment, and reparse the lines after the
 - // first line as normal HTML content.
 - int commentEndPos = strIndexOf('\n');
 - if (commentEndPos >= 0) {
 - handleComment(getChars(0, commentEndPos));
 - try {
 - in.close();
 - in = new CharArrayReader(getChars(commentEndPos + 1));
 - ch = '>';
 - } catch (IOException e) {
 - error("ioexception");
 - }
 - resetStrBuffer();
 - } else {
 - // no newline, so signal an error
 - error("eof.comment");
 - }
 - }
 - /**
 - * Called when an empty tag is encountered.
 - */
 - protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException {
 - }
 - /**
 - * Called when a start tag is encountered.
 - */
 - protected void handleStartTag(TagElement tag) {
 - }
 - /**
 - * Called when an end tag is encountered.
 - */
 - protected void handleEndTag(TagElement tag) {
 - }
 - /**
 - * An error has occurred.
 - */
 - protected void handleError(int ln, String msg) {
 - /*
 - Thread.dumpStack();
 - System.out.println("**** " + stack);
 - System.out.println("line " + ln + ": error: " + msg);
 - System.out.println();
 - */
 - }
 - /**
 - * Output text.
 - */
 - void handleText(TagElement tag) {
 - if (tag.breaksFlow()) {
 - space = false;
 - }
 - if (textpos == 0) {
 - if ((!space) || (stack == null) || last.breaksFlow() ||
 - !stack.advance(dtd.pcdata)) {
 - last = tag;
 - space = false;
 - lastBlockStartPos = currentBlockStartPos;
 - return;
 - }
 - }
 - if (space) {
 - // enlarge buffer if needed
 - if (textpos + 1 > text.length) {
 - char newtext[] = new char[text.length + 200];
 - System.arraycopy(text, 0, newtext, 0, text.length);
 - text = newtext;
 - }
 - // output pending space
 - text[textpos++] = ' ';
 - space = false;
 - }
 - char newtext[] = new char[textpos];
 - System.arraycopy(text, 0, newtext, 0, textpos);
 - // Handles cases of bad html where the title tag
 - // was getting lost when we did error recovery.
 - if (tag.getElement().getName().equals("title")) {
 - handleTitle(newtext);
 - } else {
 - handleText(newtext);
 - }
 - lastBlockStartPos = currentBlockStartPos;
 - textpos = 0;
 - last = tag;
 - space = false;
 - }
 - /**
 - * Invoke the error handler.
 - */
 - protected void error(String err, String arg1, String arg2,
 - String arg3) {
 - // big hack, but this should never get used...
 - handleError (ln, err + arg1 + arg2 + arg3);
 - }
 - protected void error(String err, String arg1, String arg2) {
 - error(err, arg1, arg2, "?");
 - }
 - protected void error(String err, String arg1) {
 - error(err, arg1, "?", "?");
 - }
 - protected void error(String err) {
 - error(err, "?", "?", "?");
 - }
 - /**
 - * Handle a start tag. The new tag is pushed
 - * onto the tag stack. The attribute list is
 - * checked for required attributes.
 - */
 - protected void startTag(TagElement tag) throws ChangedCharSetException {
 - Element elem = tag.getElement();
 - // If the tag is an empty tag and texpos != 0
 - // this implies that there is text before the
 - // start tag that needs to be processed before
 - // handling the tag.
 - //
 - if (!elem.isEmpty() || textpos != 0) {
 - handleText(tag);
 - } else {
 - // this variable gets updated in handleText().
 - // Since in this case we do not call handleText()
 - // we need to update it here.
 - //
 - last = tag;
 - // Note that we should really check last.breakFlows before
 - // assuming this should be false.
 - space = false;
 - }
 - lastBlockStartPos = currentBlockStartPos;
 - // check required attributes
 - for (AttributeList a = elem.atts ; a != null ; a = a.next) {
 - if ((a.modifier == REQUIRED) && ((attributes.isEmpty()) || (!attributes.isDefined(a.name)))) {
 - error("req.att ", a.getName(), elem.getName());
 - }
 - }
 - if (elem.isEmpty()) {
 - handleEmptyTag(tag);
 - } else if (elem.getName().equals("form")) {
 - handleStartTag(tag);
 - } else {
 - recent = elem;
 - stack = new TagStack(tag, stack);
 - handleStartTag(tag);
 - }
 - }
 - /**
 - * Handle an end tag. The end tag is popped
 - * from the tag stack.
 - */
 - protected void endTag(boolean omitted) {
 - handleText(stack.tag);
 - if (omitted && !stack.elem.omitEnd()) {
 - error("end.missing", stack.elem.getName());
 - } else if (!stack.terminate()) {
 - error("end.unexpected", stack.elem.getName());
 - }
 - // handle the tag
 - handleEndTag(stack.tag);
 - stack = stack.next;
 - recent = (stack != null) ? stack.elem : null;
 - }
 - boolean ignoreElement(Element elem) {
 - String stackElement = stack.elem.getName();
 - String elemName = elem.getName();
 - /* We ignore all elements that are not valid in the context of
 - a table except <td>, <th> (these we handle in
 - legalElementContext()) and #pcdata. We also ignore the
 - <font> tag in the context of <ul> and <ol> We additonally
 - ignore the <meta> and the <style> tag if the body tag has
 - been seen. **/
 - if ((elemName.equals("html") && seenHtml) ||
 - (elemName.equals("head") && seenHead) ||
 - (elemName.equals("body") && seenBody)) {
 - return true;
 - }
 - if (elemName.equals("dt") || elemName.equals("dd")) {
 - TagStack s = stack;
 - while (s != null && !s.elem.getName().equals("dl")) {
 - s = s.next;
 - }
 - if (s == null) {
 - return true;
 - }
 - }
 - if (((stackElement.equals("table")) &&
 - (!elemName.equals("#pcdata")) && (!elemName.equals("input"))) ||
 - ((elemName.equals("font")) &&
 - (stackElement.equals("ul") || stackElement.equals("ol"))) ||
 - (elemName.equals("meta") && stack != null) ||
 - elemName.equals("style") ||
 - (stackElement.equals("table") && elemName.equals("a"))) {
 - return true;
 - }
 - return false;
 - }
 - /**
 - * Marks the first time a tag has been seen in a document
 - */
 - protected void markFirstTime(Element elem) {
 - String elemName = elem.getName();
 - if (elemName.equals("html")) {
 - seenHtml = true;
 - } else if (elemName.equals("head")) {
 - seenHead = true;
 - } else if (elemName.equals("body")) {
 - seenBody = true;
 - }
 - }
 - /**
 - * Create a legal content for an element.
 - */
 - boolean legalElementContext(Element elem) throws ChangedCharSetException {
 - // System.out.println("-- legalContext -- " + elem);
 - // Deal with the empty stack
 - if (stack == null) {
 - // System.out.println("-- stack is empty");
 - if (elem != dtd.html) {
 - // System.out.println("-- pushing html");
 - startTag(makeTag(dtd.html, true));
 - return legalElementContext(elem);
 - }
 - return true;
 - }
 - // Is it allowed in the current context
 - if (stack.advance(elem)) {
 - // System.out.println("-- legal context");
 - markFirstTime(elem);
 - return true;
 - }
 - boolean insertTag = false;
 - // The use of all error recovery strategies are contingent
 - // on the value of the strict property.
 - //
 - // These are commonly occuring errors. if insertTag is true,
 - // then we want to adopt an error recovery strategy that
 - // involves attempting to insert an additional tag to
 - // legalize the context. The two errors addressed here
 - // are:
 - // 1) when a <td> or <th> is seen soon after a <table> tag.
 - // In this case we insert a <tr>.
 - // 2) when any other tag apart from a <tr> is seen
 - // in the context of a <tr>. In this case we would
 - // like to add a <td>. If a <tr> is seen within a
 - // <tr> context, then we will close out the current
 - // <tr>.
 - //
 - // This insertion strategy is handled later in the method.
 - // The reason for checking this now, is that in other cases
 - // we would like to apply other error recovery strategies for example
 - // ignoring tags.
 - //
 - // In certain cases it is better to ignore a tag than try to
 - // fix the situation. So the first test is to see if this
 - // is what we need to do.
 - //
 - String stackElemName = stack.elem.getName();
 - String elemName = elem.getName();
 - if (!strict &&
 - ((stackElemName.equals("table") && elemName.equals("td")) ||
 - (stackElemName.equals("table") && elemName.equals("th")) ||
 - (stackElemName.equals("tr") && !elemName.equals("tr")))){
 - insertTag = true;
 - }
 - if (!strict && !insertTag && (stack.elem.getName() != elem.getName() ||
 - elem.getName().equals("body"))) {
 - if (skipTag = ignoreElement(elem)) {
 - error("tag.ignore", elem.getName());
 - return skipTag;
 - }
 - }
 - // Check for anything after the start of the table besides tr, td, th
 - // or caption, and if those aren't there, insert the <tr> and call
 - // legalElementContext again.
 - if (!strict && stackElemName.equals("table") &&
 - !elemName.equals("tr") && !elemName.equals("td") &&
 - !elemName.equals("th") && !elemName.equals("caption")) {
 - Element e = dtd.getElement("tr");
 - TagElement t = makeTag(e, true);
 - legalTagContext(t);
 - startTag(t);
 - error("start.missing", elem.getName());
 - return legalElementContext(elem);
 - }
 - // They try to find a legal context by checking if the current
 - // tag is valid in an enclosing context. If so
 - // close out the tags by outputing end tags and then
 - // insert the curent tag. If the tags that are
 - // being closed out do not have an optional end tag
 - // specification in the DTD then an html error is
 - // reported.
 - //
 - if (!insertTag && stack.terminate() && (!strict || stack.elem.omitEnd())) {
 - for (TagStack s = stack.next ; s != null ; s = s.next) {
 - if (s.advance(elem)) {
 - while (stack != s) {
 - endTag(true);
 - }
 - return true;
 - }
 - if (!s.terminate() || (strict && !s.elem.omitEnd())) {
 - break;
 - }
 - }
 - }
 - // Check if we know what tag is expected next.
 - // If so insert the tag. Report an error if the
 - // tag does not have its start tag spec in the DTD as optional.
 - //
 - Element next = stack.first();
 - if (next != null && (!strict || next.omitStart()) &&
 - !(next==dtd.head && elem==dtd.pcdata) ) {
 - // System.out.println("-- omitting start tag: " + next);
 - TagElement t = makeTag(next, true);
 - legalTagContext(t);
 - startTag(t);
 - if (!next.omitStart()) {
 - error("start.missing", elem.getName());
 - }
 - return legalElementContext(elem);
 - }
 - // Traverse the list of expected elements and determine if adding
 - // any of these elements would make for a legal context.
 - //
 - if (!strict) {
 - ContentModel content = stack.contentModel();
 - Vector elemVec = new Vector();
 - if (content != null) {
 - content.getElements(elemVec);
 - for (Enumeration v = elemVec.elements(); v.hasMoreElements();) {
 - Element e = (Element)v.nextElement();
 - // Ensure that this element has not been included as
 - // part of the exclusions in the DTD.
 - //
 - if (stack.excluded(e.getIndex())) {
 - continue;
 - }
 - boolean reqAtts = false;
 - for (AttributeList a = e.getAttributes(); a != null ; a = a.next) {
 - if (a.modifier == REQUIRED) {
 - reqAtts = true;
 - break;
 - }
 - }
 - // Ensure that no tag that has required attributes
 - // gets inserted.
 - //
 - if (reqAtts) {
 - continue;
 - }
 - ContentModel m = e.getContent();
 - if (m != null && m.first(elem)) {
 - // System.out.println("-- adding a legal tag: " + e);
 - TagElement t = makeTag(e, true);
 - legalTagContext(t);
 - startTag(t);
 - error("start.missing", e.getName());
 - return legalElementContext(elem);
 - }
 - }
 - }
 - }
 - // Check if the stack can be terminated. If so add the appropriate
 - // end tag. Report an error if the tag being ended does not have its
 - // end tag spec in the DTD as optional.
 - //
 - if (stack.terminate() && (stack.elem != dtd.body) && (!strict || stack.elem.omitEnd())) {
 - // System.out.println("-- omitting end tag: " + stack.elem);
 - if (!stack.elem.omitEnd()) {
 - error("end.missing", elem.getName());
 - }
 - endTag(true);
 - return legalElementContext(elem);
 - }
 - // At this point we know that something is screwed up.
 - return false;
 - }
 - /**
 - * Create a legal context for a tag.
 - */
 - void legalTagContext(TagElement tag) throws ChangedCharSetException {
 - if (legalElementContext(tag.getElement())) {
 - markFirstTime(tag.getElement());
 - return;
 - }
 - // Avoid putting a block tag in a flow tag.
 - if (tag.breaksFlow() && (stack != null) && !stack.tag.breaksFlow()) {
 - endTag(true);
 - legalTagContext(tag);
 - return;
 - }
 - // Avoid putting something wierd in the head of the document.
 - for (TagStack s = stack ; s != null ; s = s.next) {
 - if (s.tag.getElement() == dtd.head) {
 - while (stack != s) {
 - endTag(true);
 - }
 - endTag(true);
 - legalTagContext(tag);
 - return;
 - }
 - }
 - // Everything failed
 - error("tag.unexpected", tag.getElement().getName());
 - }
 - /**
 - * Error context. Something went wrong, make sure we are in
 - * the document's body context
 - */
 - void errorContext() throws ChangedCharSetException {
 - for (; (stack != null) && (stack.tag.getElement() != dtd.body) ; stack = stack.next) {
 - handleEndTag(stack.tag);
 - }
 - if (stack == null) {
 - legalElementContext(dtd.body);
 - startTag(makeTag(dtd.body, true));
 - }
 - }
 - /**
 - * Add a char to the string buffer.
 - */
 - void addString(int c) {
 - if (strpos == str.length) {
 - char newstr[] = new char[str.length + 128];
 - System.arraycopy(str, 0, newstr, 0, str.length);
 - str = newstr;
 - }
 - str[strpos++] = (char)c;
 - }
 - /**
 - * Get the string that's been accumulated.
 - */
 - String getString(int pos) {
 - char newStr[] = new char[strpos - pos];
 - System.arraycopy(str, pos, newStr, 0, strpos - pos);
 - strpos = pos;
 - return new String(newStr);
 - }
 - char[] getChars(int pos) {
 - char newStr[] = new char[strpos - pos];
 - System.arraycopy(str, pos, newStr, 0, strpos - pos);
 - strpos = pos;
 - return newStr;
 - }
 - char[] getChars(int pos, int endPos) {
 - char newStr[] = new char[endPos - pos];
 - System.arraycopy(str, pos, newStr, 0, endPos - pos);
 - // REMIND: it's not clear whether this version should set strpos or not
 - // strpos = pos;
 - return newStr;
 - }
 - void resetStrBuffer() {
 - strpos = 0;
 - }
 - int strIndexOf(char target) {
 - for (int i = 0; i < strpos; i++) {
 - if (str[i] == target) {
 - return i;
 - }
 - }
 - return -1;
 - }
 - /**
 - * Skip space.
 - * [5] 297:5
 - */
 - void skipSpace() throws IOException {
 - while (true) {
 - switch (ch) {
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - break;
 - case ' ':
 - case '\t':
 - ch = readCh();
 - break;
 - default:
 - return;
 - }
 - }
 - }
 - /**
 - * Parse identifier. Uppercase characters are folded
 - * to lowercase when lower is true. Returns falsed if
 - * no identifier is found. [55] 346:17
 - */
 - boolean parseIdentifier(boolean lower) throws IOException {
 - switch (ch) {
 - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 - case 'Y': case 'Z':
 - if (lower) {
 - ch = 'a' + (ch - 'A');
 - }
 - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 - case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 - case 'y': case 'z':
 - break;
 - default:
 - return false;
 - }
 - while (true) {
 - addString(ch);
 - switch (ch = readCh()) {
 - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 - case 'Y': case 'Z':
 - if (lower) {
 - ch = 'a' + (ch - 'A');
 - }
 - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 - case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 - case 'y': case 'z':
 - case '0': case '1': case '2': case '3': case '4':
 - case '5': case '6': case '7': case '8': case '9':
 - case '.': case '-':
 - case '_': // not officially allowed
 - break;
 - default:
 - return true;
 - }
 - }
 - }
 - /**
 - * Parse an entity reference. [59] 350:17
 - */
 - private char[] parseEntityReference() throws IOException {
 - int pos = strpos;
 - if ((ch = readCh()) == '#') {
 - int n = 0;
 - ch = readCh();
 - if ((ch >= '0') && (ch <= '9')) {
 - while ((ch >= '0') && (ch <= '9')) {
 - n = (n * 10) + ch - '0';
 - ch = readCh();
 - }
 - switch (ch) {
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - break;
 - case ';':
 - ch = readCh();
 - break;
 - }
 - char data[] = {(char)n};
 - return data;
 - }
 - addString('#');
 - if (!parseIdentifier(false)) {
 - error("ident.expected");
 - strpos = pos;
 - char data[] = {'&', '#'};
 - return data;
 - }
 - } else if (!parseIdentifier(false)) {
 - char data[] = {'&'};
 - return data;
 - }
 - switch (ch) {
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - break;
 - case ';':
 - ch = readCh();
 - break;
 - }
 - String nm = getString(pos);
 - Entity ent = dtd.getEntity(nm);
 - // entities are case sensitive - however if strict
 - // is false then we will try to make a match by
 - // converting the string to all lowercase.
 - //
 - if (!strict && (ent == null)) {
 - ent = dtd.getEntity(nm.toLowerCase());
 - }
 - if ((ent == null) || !ent.isGeneral()) {
 - if (nm.length() == 0) {
 - error("invalid.entref", nm);
 - return new char[0];
 - }
 - /* given that there is not a match restore the entity reference */
 - String str = "&" + nm;
 - char b[] = new char[str.length()];
 - str.getChars(0, b.length, b, 0);
 - return b;
 - }
 - return ent.getData();
 - }
 - /**
 - * Parse a comment. [92] 391:7
 - */
 - void parseComment() throws IOException {
 - while (true) {
 - int c = ch;
 - switch (c) {
 - case '-':
 - /** Presuming that the start string of a comment "<!--" has
 - already been parsed, the '-' character is valid only as
 - part of a comment termination and further more it must
 - be present in even numbers. Hence if strict is true, we
 - presume the comment has been terminated and return.
 - However if strict is false, then there is no even number
 - requirement and this character can appear anywhere in the
 - comment. The parser reads on until it sees the following
 - pattern: "-->" or "--!>".
 - **/
 - if (!strict && (strpos != 0) && (str[strpos - 1] == '-')) {
 - if ((ch = readCh()) == '>') {
 - return;
 - }
 - if (ch == '!') {
 - if ((ch = readCh()) == '>') {
 - return;
 - } else {
 - /* to account for extra read()'s that happened */
 - addString('-');
 - addString('!');
 - continue;
 - }
 - }
 - break;
 - }
 - if ((ch = readCh()) == '-') {
 - ch = readCh();
 - if (strict || ch == '>') {
 - return;
 - }
 - if (ch == '!') {
 - if ((ch = readCh()) == '>') {
 - return;
 - } else {
 - /* to account for extra read()'s that happened */
 - addString('-');
 - addString('!');
 - continue;
 - }
 - }
 - /* to account for the extra read() */
 - addString('-');
 - }
 - break;
 - case -1:
 - handleEOFInComment();
 - return;
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - break;
 - case '>':
 - ch = readCh();
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - c = '\n';
 - break;
 - default:
 - ch = readCh();
 - break;
 - }
 - addString(c);
 - }
 - }
 - /**
 - * Parse literal content. [46] 343:1 and [47] 344:1
 - */
 - void parseLiteral(boolean replace) throws IOException {
 - while (true) {
 - int c = ch;
 - switch (c) {
 - case -1:
 - error("eof.literal", stack.elem.getName());
 - endTag(true);
 - return;
 - case '>':
 - ch = readCh();
 - int i = textpos - (stack.elem.name.length() + 2), j = 0;
 - // match end tag
 - if ((i >= 0) && (text[i++] == '<') && (text[i] == '/')) {
 - while ((++i < textpos) &&
 - (Character.toLowerCase(text[i]) == stack.elem.name.charAt(j++)));
 - if (i == textpos) {
 - textpos -= (stack.elem.name.length() + 2);
 - if ((textpos > 0) && (text[textpos-1] == '\n')) {
 - textpos--;
 - }
 - endTag(false);
 - return;
 - }
 - }
 - break;
 - case '&':
 - char data[] = parseEntityReference();
 - if (textpos + data.length > text.length) {
 - char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];
 - System.arraycopy(text, 0, newtext, 0, text.length);
 - text = newtext;
 - }
 - System.arraycopy(data, 0, text, textpos, data.length);
 - textpos += data.length;
 - continue;
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - c = '\n';
 - break;
 - default:
 - ch = readCh();
 - break;
 - }
 - // output character
 - if (textpos == text.length) {
 - char newtext[] = new char[text.length + 128];
 - System.arraycopy(text, 0, newtext, 0, text.length);
 - text = newtext;
 - }
 - text[textpos++] = (char)c;
 - }
 - }
 - /**
 - * Parse attribute value. [33] 331:1
 - */
 - String parseAttributeValue(boolean lower) throws IOException {
 - int delim = -1;
 - // Check for a delimiter
 - switch(ch) {
 - case '\'':
 - case '"':
 - delim = ch;
 - ch = readCh();
 - break;
 - }
 - // Parse the rest of the value
 - while (true) {
 - int c = ch;
 - switch (c) {
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - if (delim < 0) {
 - return getString(0);
 - }
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - if (delim < 0) {
 - return getString(0);
 - }
 - break;
 - case '\t':
 - if (delim < 0)
 - c = ' ';
 - case ' ':
 - ch = readCh();
 - if (delim < 0) {
 - return getString(0);
 - }
 - break;
 - case '>':
 - case '<':
 - if (delim < 0) {
 - return getString(0);
 - }
 - ch = readCh();
 - break;
 - case '\'':
 - case '"':
 - ch = readCh();
 - if (c == delim) {
 - return getString(0);
 - } else if (delim == -1) {
 - error("attvalerr");
 - if (strict || ch == ' ') {
 - return getString(0);
 - } else {
 - continue;
 - }
 - }
 - break;
 - case '=':
 - if (delim < 0) {
 - /* In SGML a construct like <img src=/cgi-bin/foo?x=1>
 - is considered invalid since an = sign can only be contained
 - in an attributes value if the string is quoted.
 - */
 - error("attvalerr");
 - /* If strict is true then we return with the string we have thus far.
 - Otherwise we accept the = sign as part of the attribute's value and
 - process the rest of the img tag. */
 - if (strict) {
 - return getString(0);
 - }
 - }
 - ch = readCh();
 - break;
 - case '&':
 - if (strict && delim < 0) {
 - ch = readCh();
 - break;
 - }
 - char data[] = parseEntityReference();
 - for (int i = 0 ; i < data.length ; i++) {
 - c = data[i];
 - addString((lower && (c >= 'A') && (c <= 'Z')) ? 'a' + c - 'A' : c);
 - }
 - continue;
 - case -1:
 - return getString(0);
 - default:
 - if (lower && (c >= 'A') && (c <= 'Z')) {
 - c = 'a' + c - 'A';
 - }
 - ch = readCh();
 - break;
 - }
 - addString(c);
 - }
 - }
 - /**
 - * Parse attribute specification List. [31] 327:17
 - */
 - void parseAttributeSpecificationList(Element elem) throws IOException {
 - while (true) {
 - skipSpace();
 - switch (ch) {
 - case '/':
 - case '>':
 - case '<':
 - case -1:
 - return;
 - case '-':
 - if ((ch = readCh()) == '-') {
 - ch = readCh();
 - parseComment();
 - strpos = 0;
 - } else {
 - error("invalid.tagchar", "-", elem.getName());
 - ch = readCh();
 - }
 - continue;
 - }
 - AttributeList att = null;
 - String attname = null;
 - String attvalue = null;
 - if (parseIdentifier(true)) {
 - attname = getString(0);
 - skipSpace();
 - if (ch == '=') {
 - ch = readCh();
 - skipSpace();
 - att = elem.getAttribute(attname);
 - // Bug ID 4102750
 - // Load the NAME of an Attribute Case Sensitive
 - // The case of the NAME must be intact
 - // MG 021898
 - attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION) && (att.type != NAME));
 - // attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION));
 - } else {
 - attvalue = attname;
 - att = elem.getAttributeByValue(attvalue);
 - if (att == null) {
 - att = elem.getAttribute(attname);
 - if (att != null) {
 - attvalue = att.getValue();
 - }
 - else {
 - // Make it null so that NULL_ATTRIBUTE_VALUE is
 - // used
 - attvalue = null;
 - }
 - }
 - }
 - } else if (!strict && ch == ',') { // allows for comma separated attribute-value pairs
 - ch = readCh();
 - continue;
 - } else if (!strict && ch == '"') { // allows for quoted attributes
 - ch = readCh();
 - skipSpace();
 - if (parseIdentifier(true)) {
 - attname = getString(0);
 - if (ch == '"') {
 - ch = readCh();
 - }
 - skipSpace();
 - if (ch == '=') {
 - ch = readCh();
 - skipSpace();
 - att = elem.getAttribute(attname);
 - attvalue = parseAttributeValue((att != null) &&
 - (att.type != CDATA) &&
 - (att.type != NOTATION));
 - } else {
 - attvalue = attname;
 - att = elem.getAttributeByValue(attvalue);
 - if (att == null) {
 - att = elem.getAttribute(attname);
 - if (att != null) {
 - attvalue = att.getValue();
 - }
 - }
 - }
 - } else {
 - char str[] = {(char)ch};
 - error("invalid.tagchar", new String(str), elem.getName());
 - ch = readCh();
 - continue;
 - }
 - } else if (!strict && (attributes.isEmpty()) && (ch == '=')) {
 - ch = readCh();
 - skipSpace();
 - attname = elem.getName();
 - att = elem.getAttribute(attname);
 - attvalue = parseAttributeValue((att != null) &&
 - (att.type != CDATA) &&
 - (att.type != NOTATION));
 - } else if (!strict && (ch == '=')) {
 - ch = readCh();
 - skipSpace();
 - attvalue = parseAttributeValue(true);
 - error("attvalerr");
 - return;
 - } else {
 - char str[] = {(char)ch};
 - error("invalid.tagchar", new String(str), elem.getName());
 - if (!strict) {
 - ch = readCh();
 - continue;
 - } else {
 - return;
 - }
 - }
 - if (att != null) {
 - attname = att.getName();
 - } else {
 - error("invalid.tagatt", attname, elem.getName());
 - }
 - // Check out the value
 - if (attributes.isDefined(attname)) {
 - error("multi.tagatt", attname, elem.getName());
 - }
 - if (attvalue == null) {
 - attvalue = ((att != null) && (att.value != null)) ? att.value :
 - HTML.NULL_ATTRIBUTE_VALUE;
 - } else if ((att != null) && (att.values != null) && !att.values.contains(attvalue)) {
 - error("invalid.tagattval", attname, elem.getName());
 - }
 - HTML.Attribute attkey = HTML.getAttributeKey(attname);
 - if (attkey == null) {
 - attributes.addAttribute(attname, attvalue);
 - } else {
 - attributes.addAttribute(attkey, attvalue);
 - }
 - }
 - }
 - /**
 - * Parses th Document Declaration Type markup declaration.
 - * Currently ignores it.
 - */
 - public String parseDTDMarkup() throws IOException {
 - StringBuffer strBuff = new StringBuffer();
 - ch = readCh();
 - while(true) {
 - switch (ch) {
 - case '>':
 - ch = readCh();
 - return strBuff.toString();
 - case -1:
 - error("invalid.markup");
 - return strBuff.toString();
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - break;
 - case '"':
 - ch = readCh();
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - break;
 - default:
 - strBuff.append((char)(ch & 0xFF));
 - ch = readCh();
 - break;
 - }
 - }
 - }
 - /**
 - * Parse markup declarations.
 - * Currently only handles the Document Type Declaration markup.
 - * Returns true if it is a markup declaration false otherwise.
 - */
 - protected boolean parseMarkupDeclarations(StringBuffer strBuff) throws IOException {
 - /* Currently handles only the DOCTYPE */
 - if ((strBuff.length() == "DOCTYPE".length()) &&
 - (strBuff.toString().toUpperCase().equals("DOCTYPE"))) {
 - parseDTDMarkup();
 - return true;
 - }
 - return false;
 - }
 - /**
 - * Parse an invalid tag.
 - */
 - void parseInvalidTag() throws IOException {
 - // ignore all data upto the close bracket '>'
 - while (true) {
 - skipSpace();
 - switch (ch) {
 - case '>':
 - case -1:
 - ch = readCh();
 - return;
 - case '<':
 - return;
 - default:
 - ch = readCh();
 - }
 - }
 - }
 - /**
 - * Parse a start or end tag.
 - */
 - void parseTag() throws IOException {
 - Element elem = null;
 - boolean net = false;
 - boolean warned = false;
 - boolean unknown = false;
 - switch (ch = readCh()) {
 - case '!':
 - switch (ch = readCh()) {
 - case '-':
 - // Parse comment. [92] 391:7
 - while (true) {
 - if (ch == '-') {
 - if (!strict || ((ch = readCh()) == '-')) {
 - ch = readCh();
 - if (!strict && ch == '-') {
 - ch = readCh();
 - }
 - // send over any text you might see
 - // before parsing and sending the
 - // comment
 - if (textpos != 0) {
 - char newtext[] = new char[textpos];
 - System.arraycopy(text, 0, newtext, 0, textpos);
 - handleText(newtext);
 - lastBlockStartPos = currentBlockStartPos;
 - textpos = 0;
 - }
 - parseComment();
 - handleComment(getChars(0));
 - continue;
 - } else if (!warned) {
 - warned = true;
 - error("invalid.commentchar", "-");
 - }
 - }
 - skipSpace();
 - switch (ch) {
 - case '-':
 - continue;
 - case '>':
 - ch = readCh();
 - case -1:
 - return;
 - default:
 - ch = readCh();
 - if (!warned) {
 - warned = true;
 - error("invalid.commentchar",
 - String.valueOf((char)ch));
 - }
 - break;
 - }
 - }
 - default:
 - // deal with marked sections
 - StringBuffer strBuff = new StringBuffer();
 - while (true) {
 - strBuff.append((char)ch);
 - if (parseMarkupDeclarations(strBuff)) {
 - return;
 - }
 - switch(ch) {
 - case '>':
 - ch = readCh();
 - case -1:
 - error("invalid.markup");
 - return;
 - case '\n':
 - ln++;
 - ch = readCh();
 - lfCount++;
 - break;
 - case '\r':
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - break;
 - default:
 - ch = readCh();
 - break;
 - }
 - }
 - }
 - case '/':
 - // parse end tag [19] 317:4
 - switch (ch = readCh()) {
 - case '>':
 - ch = readCh();
 - case '<':
 - // empty end tag. either </> or </<
 - if (recent == null) {
 - error("invalid.shortend");
 - return;
 - }
 - elem = recent;
 - break;
 - default:
 - if (!parseIdentifier(true)) {
 - error("expected.endtagname");
 - return;
 - }
 - skipSpace();
 - switch (ch) {
 - case '>':
 - ch = readCh();
 - case '<':
 - break;
 - default:
 - error("expected", "'>'");
 - while ((ch != -1) && (ch != '\n') && (ch != '>')) {
 - ch = readCh();
 - }
 - if (ch == '>') {
 - ch = readCh();
 - }
 - break;
 - }
 - String elemStr = getString(0);
 - if (!dtd.elementExists(elemStr)) {
 - error("end.unrecognized", elemStr);
 - // Ignore RE before end tag
 - if ((textpos > 0) && (text[textpos-1] == '\n')) {
 - textpos--;
 - }
 - elem = dtd.getElement("unknown");
 - elem.name = elemStr;
 - unknown = true;
 - } else {
 - elem = dtd.getElement(elemStr);
 - }
 - break;
 - }
 - // If the stack is null, we're seeing end tags without any begin
 - // tags. Ignore them.
 - if (stack == null) {
 - error("end.extra.tag", elem.getName());
 - return;
 - }
 - // Ignore RE before end tag
 - if ((textpos > 0) && (text[textpos-1] == '\n')) {
 - // In a pre tag, if there are blank lines
 - // we do not want to remove the newline
 - // before the end tag. Hence this code.
 - //
 - if (stack.pre) {
 - if ((textpos > 1) && (text[textpos-2] != '\n')) {
 - textpos--;
 - }
 - } else {
 - textpos--;
 - }
 - }
 - // If the end tag is a form, since we did not put it
 - // on the tag stack, there is no corresponding start
 - // start tag to find. Hence do not touch the tag stack.
 - //
 - if (!strict && elem.getName().equals("form")) {
 - if (lastFormSent != null) {
 - handleEndTag(lastFormSent);
 - return;
 - } else {
 - // do nothing.
 - return;
 - }
 - }
 - if (unknown) {
 - // we will not see a corresponding start tag
 - // on the the stack. If we are seeing an
 - // end tag, lets send this on as an empty
 - // tag with the end tag attribute set to
 - // true.
 - TagElement t = makeTag(elem);
 - handleText(t);
 - attributes.addAttribute(HTML.Attribute.ENDTAG, "true");
 - handleEmptyTag(makeTag(elem));
 - unknown = false;
 - return;
 - }
 - // find the corresponding start tag
 - // A commonly occuring error appears to be the insertion
 - // of extra end tags in a table. The intent here is ignore
 - // such extra end tags.
 - //
 - if (!strict) {
 - String stackElem = stack.elem.getName();
 - if (stackElem.equals("table")) {
 - // If it isnt a valid end tag ignore it and return
 - //
 - if (!elem.getName().equals(stackElem)) {
 - error("tag.ignore", elem.getName());
 - return;
 - }
 - }
 - if (stackElem.equals("tr") ||
 - stackElem.equals("td")) {
 - if ((!elem.getName().equals("table")) &&
 - (!elem.getName().equals(stackElem))) {
 - error("tag.ignore", elem.getName());
 - return;
 - }
 - }
 - }
 - TagStack sp = stack;
 - while ((sp != null) && (elem != sp.elem)) {
 - sp = sp.next;
 - }
 - if (sp == null) {
 - error("unmatched.endtag", elem.getName());
 - return;
 - }
 - // People put font ending tags in the darndest places.
 - // Don't close other contexts based on them being between
 - // a font tag and the corresponding end tag. Instead,
 - // ignore the end tag like it doesn't exist and allow the end
 - // of the document to close us out.
 - String elemName = elem.getName();
 - if (stack != sp &&
 - (elemName.equals("font") ||
 - elemName.equals("center"))) {
 - // Since closing out a center tag can have real wierd
 - // effects on the formatting, make sure that tags
 - // for which omitting an end tag is legimitate
 - // get closed out.
 - //
 - if (elemName.equals("center")) {
 - while(stack.elem.omitEnd() && stack != sp) {
 - endTag(true);
 - }
 - if (stack.elem == elem) {
 - endTag(false);
 - }
 - }
 - return;
 - }
 - // People do the same thing with center tags. In this
 - // case we would like to close off the center tag but
 - // not necessarily all enclosing tags.
 - // end tags
 - while (stack != sp) {
 - endTag(true);
 - }
 - endTag(false);
 - return;
 - case -1:
 - error("eof");
 - return;
 - }
 - // start tag [14] 314:1
 - if (!parseIdentifier(true)) {
 - elem = recent;
 - if ((ch != '>') || (elem == null)) {
 - error("expected.tagname");
 - return;
 - }
 - } else {
 - String elemStr = getString(0);
 - if (elemStr.equals("image")) {
 - elemStr = new String("img");
 - }
 - /* determine if this element is part of the dtd. */
 - if (!dtd.elementExists(elemStr)) {
 - // parseInvalidTag();
 - error("tag.unrecognized ", elemStr);
 - elem = dtd.getElement("unknown");
 - elem.name = elemStr;
 - unknown = true;
 - } else {
 - elem = dtd.getElement(elemStr);
 - }
 - }
 - // Parse attributes
 - parseAttributeSpecificationList(elem);
 - switch (ch) {
 - case '/':
 - net = true;
 - case '>':
 - ch = readCh();
 - case '<':
 - break;
 - default:
 - error("expected", "'>'");
 - break;
 - }
 - if (!strict) {
 - if (elem.getName().equals("script")) {
 - error("javascript.unsupported");
 - }
 - }
 - // ignore RE after start tag
 - //
 - if (!elem.isEmpty()) {
 - if (ch == '\n') {
 - ln++;
 - lfCount++;
 - ch = readCh();
 - } else if (ch == '\r') {
 - ln++;
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - }
 - }
 - // ensure a legal context for the tag
 - TagElement tag = makeTag(elem, false);
 - /** In dealing with forms, we have decided to treat
 - them as legal in any context. Also, even though
 - they do have a start and an end tag, we will
 - not put this tag on the stack. This is to deal
 - several pages in the web oasis that choose to
 - start and end forms in any possible location. **/
 - if (!strict && elem.getName().equals("form")) {
 - if (lastFormSent == null) {
 - lastFormSent = tag;
 - } else {
 - handleEndTag(lastFormSent);
 - lastFormSent = tag;
 - }
 - } else {
 - // Smlly, if a tag is unknown, we will apply
 - // no legalTagContext logic to it.
 - //
 - if (!unknown) {
 - legalTagContext(tag);
 - // If skip tag is true, this implies that
 - // the tag was illegal and that the error
 - // recovery strategy adopted is to ignore
 - // the tag.
 - if (!strict && skipTag) {
 - skipTag = false;
 - return;
 - }
 - }
 - }
 - startTag(tag);
 - if (!elem.isEmpty()) {
 - switch (elem.getType()) {
 - case CDATA:
 - parseLiteral(false);
 - break;
 - case RCDATA:
 - parseLiteral(true);
 - break;
 - default:
 - if (stack != null) {
 - stack.net = net;
 - }
 - break;
 - }
 - }
 - }
 - /**
 - * Parse Content. [24] 320:1
 - */
 - void parseContent() throws IOException {
 - Thread curThread = Thread.currentThread();
 - for (;;) {
 - if (curThread.isInterrupted()) {
 - curThread.interrupt(); // resignal the interrupt
 - break;
 - }
 - int c = ch;
 - currentBlockStartPos = currentPosition;
 - switch (c) {
 - case '<':
 - parseTag();
 - lastBlockStartPos = currentPosition;
 - continue;
 - case '/':
 - ch = readCh();
 - if ((stack != null) && stack.net) {
 - // null end tag.
 - endTag(false);
 - continue;
 - }
 - break;
 - case -1:
 - return;
 - case '&':
 - if (textpos == 0) {
 - if (!legalElementContext(dtd.pcdata)) {
 - error("unexpected.pcdata");
 - }
 - if (last.breaksFlow()) {
 - space = false;
 - }
 - }
 - char data[] = parseEntityReference();
 - if (textpos + data.length + 1 > text.length) {
 - char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];
 - System.arraycopy(text, 0, newtext, 0, text.length);
 - text = newtext;
 - }
 - if (space) {
 - space = false;
 - text[textpos++] = ' ';
 - }
 - System.arraycopy(data, 0, text, textpos, data.length);
 - textpos += data.length;
 - continue;
 - case '\n':
 - ln++;
 - lfCount++;
 - ch = readCh();
 - if ((stack != null) && stack.pre) {
 - break;
 - }
 - space = true;
 - if (textpos == 0) {
 - lastBlockStartPos = currentPosition;
 - }
 - continue;
 - case '\r':
 - ln++;
 - c = '\n';
 - if ((ch = readCh()) == '\n') {
 - ch = readCh();
 - crlfCount++;
 - }
 - else {
 - crCount++;
 - }
 - if ((stack != null) && stack.pre) {
 - break;
 - }
 - if (textpos == 0) {
 - lastBlockStartPos = currentPosition;
 - }
 - space = true;
 - continue;
 - case '\t':
 - case ' ':
 - ch = readCh();
 - if ((stack != null) && stack.pre) {
 - break;
 - }
 - space = true;
 - if (textpos == 0) {
 - lastBlockStartPos = currentPosition;
 - }
 - continue;
 - default:
 - if (textpos == 0) {
 - if (!legalElementContext(dtd.pcdata)) {
 - error("unexpected.pcdata");
 - }
 - if (last.breaksFlow()) {
 - space = false;
 - }
 - }
 - ch = readCh();
 - break;
 - }
 - // enlarge buffer if needed
 - if (textpos + 2 > text.length) {
 - char newtext[] = new char[text.length + 128];
 - System.arraycopy(text, 0, newtext, 0, text.length);
 - text = newtext;
 - }
 - // output pending space
 - if (space) {
 - if (textpos == 0) {
 - lastBlockStartPos--;
 - }
 - text[textpos++] = ' ';
 - space = false;
 - }
 - text[textpos++] = (char)c;
 - }
 - }
 - /**
 - * Returns the end of line string. This will return the end of line
 - * string that has been encountered the most, one of \r, \n or \r\n.
 - */
 - String getEndOfLineString() {
 - if (crlfCount >= crCount) {
 - if (lfCount >= crlfCount) {
 - return "\n";
 - }
 - else {
 - return "\r\n";
 - }
 - }
 - else {
 - if (crCount > lfCount) {
 - return "\r";
 - }
 - else {
 - return "\n";
 - }
 - }
 - }
 - /**
 - * Parse an HTML stream, given a DTD.
 - */
 - public synchronized void parse(Reader in) throws IOException {
 - this.in = in;
 - this.ln = 1;
 - seenHtml = false;
 - seenHead = false;
 - seenBody = false;
 - crCount = lfCount = crlfCount = 0;
 - try {
 - try {
 - ch = readCh();
 - text = new char[1024];
 - str = new char[128];
 - parseContent();
 - // NOTE: interruption may have occurred. Control flows out
 - // of here normally.
 - while (stack != null) {
 - endTag(true);
 - }
 - } finally {
 - in.close();
 - }
 - } catch (IOException e) {
 - errorContext();
 - error("ioexception");
 - throw e;
 - } catch (Exception e) {
 - errorContext();
 - error("exception", e.getClass().getName(), e.getMessage());
 - e.printStackTrace();
 - } catch (ThreadDeath e) {
 - errorContext();
 - error("terminated");
 - e.printStackTrace();
 - throw e;
 - } finally {
 - for (; stack != null ; stack = stack.next) {
 - handleEndTag(stack.tag);
 - }
 - text = null;
 - str = null;
 - }
 - }
 - /*
 - * Input cache. This is much faster than calling down to a synchronized
 - * method of BufferedReader for each byte. Measurements done 5/30/97
 - * show that there's no point in having a bigger buffer: Increasing
 - * the buffer to 8192 had no measurable impact for a program discarding
 - * one character at a time (reading from an http URL to a local machine).
 - */
 - private char buf[] = new char[256];
 - private int pos;
 - private int len;
 - /*
 - tracks position relative to the beginning of the
 - document.
 - */
 - private int currentPosition;
 - private final int readCh() throws IOException {
 - if (pos >= len) {
 - // This loop allows us to ignore interrupts if the flag
 - // says so
 - for (;;) {
 - try {
 - len = in.read(buf);
 - break;
 - } catch (InterruptedIOException ex) {
 - throw ex;
 - }
 - }
 - if (len <= 0) {
 - return -1; // eof
 - }
 - pos = 0;
 - }
 - ++currentPosition;
 - return buf[pos++];
 - }
 - protected int getCurrentPos() {
 - return currentPosition;
 - }
 - }