- /*
- * @(#)CSSParser.java 1.7 03/01/23
- *
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
- */
- package javax.swing.text.html;
-
- import java.io.*;
-
- /**
- * A CSS parser. This works by way of a delegate that implements the
- * CSSParserCallback interface. The delegate is notified of the following
- * events:
- * <ul>
- * <li>Import statement: <code>handleImport</code>
- * <li>Selectors <code>handleSelector</code>. This is invoked for each
- * string. For example if the Reader contained p, bar , a {}, the delegate
- * would be notified 4 times, for 'p,' 'bar' ',' and 'a'.
- * <li>When a rule starts, <code>startRule</code>
- * <li>Properties in the rule via the <code>handleProperty</code>. This
- * is invoked one per property/value key, eg font size: foo;, would
- * cause the delegate to be notified once with a value of 'font size'.
- * <li>Values in the rule via the <code>handleValue</code>, this is notified
- * for the total value.
- * <li>When a rule ends, <code>endRule</code>
- * </ul>
- * This will parse much more than CSS 1, and loosely implements the
- * recommendation for <i>Forward-compatible parsing</i> in section
- * 7.1 of the CSS spec found at:
- * <a href=http://www.w3.org/TR/REC-CSS1>http://www.w3.org/TR/REC-CSS1</a>.
- * If an error results in parsing, a RuntimeException will be thrown.
- * <p>
- * This will preserve case. If the callback wishes to treat certain poritions
- * case insensitively (such as selectors), it should use toLowerCase, or
- * something similar.
- *
- * @author Scott Violet
- * @version 1.7 01/23/03
- */
- class CSSParser {
- // Parsing something like the following:
- // (@rule | ruleset | block)*
- //
- // @rule (block | identifier)*; (block with {} ends @rule)
- // block matching [] () {} (that is, [()] is a block, [(){}{[]}]
- // is a block, ()[] is two blocks)
- // identifier "*" | '*' | anything but a [](){} and whitespace
- //
- // ruleset selector decblock
- // selector (identifier | (block, except block '{}') )*
- // declblock declaration* block*
- // declaration (identifier* stopping when identifier ends with :)
- // (identifier* stopping when identifier ends with ;)
- //
- // comments /* */ can appear any where, and are stripped.
-
-
- // identifier - letters, digits, dashes and escaped characters
- // block starts with { ends with matching }, () [] and {} always occur
- // in matching pairs, '' and "" also occur in pairs, except " may be
-
-
- // Indicates the type of token being parsed.
- private static final int IDENTIFIER = 1;
- private static final int BRACKET_OPEN = 2;
- private static final int BRACKET_CLOSE = 3;
- private static final int BRACE_OPEN = 4;
- private static final int BRACE_CLOSE = 5;
- private static final int PAREN_OPEN = 6;
- private static final int PAREN_CLOSE = 7;
- private static final int END = -1;
-
- private static final char[] charMapping = { 0, 0, '[', ']', '{', '}', '(',
- ')', 0};
-
-
- /** Set to true if one character has been read ahead. */
- private boolean didPushChar;
- /** The read ahead character. */
- private int pushedChar;
- /** Temporary place to hold identifiers. */
- private StringBuffer unitBuffer;
- /** Used to indicate blocks. */
- private int[] unitStack;
- /** Number of valid blocks. */
- private int stackCount;
- /** Holds the incoming CSS rules. */
- private Reader reader;
- /** Set to true when the first non @ rule is encountered. */
- private boolean encounteredRuleSet;
- /** Notified of state. */
- private CSSParserCallback callback;
- /** nextToken() inserts the string here. */
- private char[] tokenBuffer;
- /** Current number of chars in tokenBufferLength. */
- private int tokenBufferLength;
- /** Set to true if any whitespace is read. */
- private boolean readWS;
-
-
- // The delegate interface.
- static interface CSSParserCallback {
- /** Called when an @import is encountered. */
- void handleImport(String importString);
- // There is currently no way to distinguish between '"foo,"' and
- // 'foo,'. But this generally isn't valid CSS. If it becomes
- // a problem, handleSelector will have to be told if the string is
- // quoted.
- void handleSelector(String selector);
- void startRule();
- // Property names are mapped to lower case before being passed to
- // the delegate.
- void handleProperty(String property);
- void handleValue(String value);
- void endRule();
- }
-
- CSSParser() {
- unitStack = new int[2];
- tokenBuffer = new char[80];
- unitBuffer = new StringBuffer();
- }
-
- void parse(Reader reader, CSSParserCallback callback,
- boolean inRule) throws IOException {
- this.callback = callback;
- stackCount = tokenBufferLength = 0;
- this.reader = reader;
- encounteredRuleSet = false;
- try {
- if (inRule) {
- parseDeclarationBlock();
- }
- else {
- while (getNextStatement());
- }
- } finally {
- callback = null;
- reader = null;
- }
- }
-
- /**
- * Gets the next statement, returning false if the end is reached. A
- * statement is either an @rule, or a ruleset.
- */
- private boolean getNextStatement() throws IOException {
- unitBuffer.setLength(0);
-
- int token = nextToken((char)0);
-
- switch (token) {
- case IDENTIFIER:
- if (tokenBufferLength > 0) {
- if (tokenBuffer[0] == '@') {
- parseAtRule();
- }
- else {
- encounteredRuleSet = true;
- parseRuleSet();
- }
- }
- return true;
- case BRACKET_OPEN:
- case BRACE_OPEN:
- case PAREN_OPEN:
- parseTillClosed(token);
- return true;
-
- case BRACKET_CLOSE:
- case BRACE_CLOSE:
- case PAREN_CLOSE:
- // Shouldn't happen...
- throw new RuntimeException("Unexpected top level block close");
-
- case END:
- return false;
- }
- return true;
- }
-
- /**
- * Parses an @ rule, stopping at a matching brace pair, or ;.
- */
- private void parseAtRule() throws IOException {
- // PENDING: make this more effecient.
- boolean done = false;
- boolean isImport = (tokenBufferLength == 7 &&
- tokenBuffer[0] == '@' && tokenBuffer[1] == 'i' &&
- tokenBuffer[2] == 'm' && tokenBuffer[3] == 'p' &&
- tokenBuffer[4] == 'o' && tokenBuffer[5] == 'r' &&
- tokenBuffer[6] == 't');
-
- unitBuffer.setLength(0);
- while (!done) {
- int nextToken = nextToken(';');
-
- switch (nextToken) {
- case IDENTIFIER:
- if (tokenBufferLength > 0 &&
- tokenBuffer[tokenBufferLength - 1] == ';') {
- --tokenBufferLength;
- done = true;
- }
- if (tokenBufferLength > 0) {
- if (unitBuffer.length() > 0 && readWS) {
- unitBuffer.append(' ');
- }
- unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
- }
- break;
-
- case BRACE_OPEN:
- if (unitBuffer.length() > 0 && readWS) {
- unitBuffer.append(' ');
- }
- unitBuffer.append(charMapping[nextToken]);
- parseTillClosed(nextToken);
- done = true;
- // Skip a tailing ';', not really to spec.
- {
- int nextChar = readWS();
- if (nextChar != -1 && nextChar != ';') {
- pushChar(nextChar);
- }
- }
- break;
-
- case BRACKET_OPEN: case PAREN_OPEN:
- unitBuffer.append(charMapping[nextToken]);
- parseTillClosed(nextToken);
- break;
-
- case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE:
- throw new RuntimeException("Unexpected close in @ rule");
-
- case END:
- done = true;
- break;
- }
- }
- if (isImport && !encounteredRuleSet) {
- callback.handleImport(unitBuffer.toString());
- }
- }
-
- /**
- * Parses the next rule set, which is a selector followed by a
- * declaration block.
- */
- private void parseRuleSet() throws IOException {
- if (parseSelectors()) {
- callback.startRule();
- parseDeclarationBlock();
- callback.endRule();
- }
- }
-
- /**
- * Parses a set of selectors, returning false if the end of the stream
- * is reached.
- */
- private boolean parseSelectors() throws IOException {
- // Parse the selectors
- int nextToken;
-
- if (tokenBufferLength > 0) {
- callback.handleSelector(new String(tokenBuffer, 0,
- tokenBufferLength));
- }
-
- unitBuffer.setLength(0);
- for (;;) {
- while ((nextToken = nextToken((char)0)) == IDENTIFIER) {
- if (tokenBufferLength > 0) {
- callback.handleSelector(new String(tokenBuffer, 0,
- tokenBufferLength));
- }
- }
- switch (nextToken) {
- case BRACE_OPEN:
- return true;
-
- case BRACKET_OPEN: case PAREN_OPEN:
- parseTillClosed(nextToken);
- // Not too sure about this, how we handle this isn't very
- // well spec'd.
- unitBuffer.setLength(0);
- break;
-
- case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE:
- throw new RuntimeException("Unexpected block close in selector");
-
- case END:
- // Prematurely hit end.
- return false;
- }
- }
- }
-
- /**
- * Parses a declaration block. Which a number of declarations followed
- * by a })].
- */
- private void parseDeclarationBlock() throws IOException {
- for (;;) {
- int token = parseDeclaration();
- switch (token) {
- case END: case BRACE_CLOSE:
- return;
-
- case BRACKET_CLOSE: case PAREN_CLOSE:
- // Bail
- throw new RuntimeException("Unexpected close in declaration block");
- case IDENTIFIER:
- break;
- }
- }
- }
-
- /**
- * Parses a single declaration, which is an identifier a : and another
- * identifier. This returns the last token seen.
- */
- // identifier+: identifier* ;|}
- private int parseDeclaration() throws IOException {
- int token;
-
- if ((token = parseIdentifiers(':', false)) != IDENTIFIER) {
- return token;
- }
- // Make the property name to lowercase
- for (int counter = unitBuffer.length() - 1; counter >= 0; counter--) {
- unitBuffer.setCharAt(counter, Character.toLowerCase
- (unitBuffer.charAt(counter)));
- }
- callback.handleProperty(unitBuffer.toString());
-
- token = parseIdentifiers(';', true);
- callback.handleValue(unitBuffer.toString());
- return token;
- }
-
- /**
- * Parses identifiers until <code>extraChar</code> is encountered,
- * returning the ending token, which will be IDENTIFIER if extraChar
- * is found.
- */
- private int parseIdentifiers(char extraChar,
- boolean wantsBlocks) throws IOException {
- int nextToken;
- int ubl;
-
- unitBuffer.setLength(0);
- for (;;) {
- nextToken = nextToken(extraChar);
-
- switch (nextToken) {
- case IDENTIFIER:
- if (tokenBufferLength > 0) {
- if (tokenBuffer[tokenBufferLength - 1] == extraChar) {
- if (--tokenBufferLength > 0) {
- if (readWS && unitBuffer.length() > 0) {
- unitBuffer.append(' ');
- }
- unitBuffer.append(tokenBuffer, 0,
- tokenBufferLength);
- }
- return IDENTIFIER;
- }
- if (readWS && unitBuffer.length() > 0) {
- unitBuffer.append(' ');
- }
- unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
- }
- break;
-
- case BRACKET_OPEN:
- case BRACE_OPEN:
- case PAREN_OPEN:
- ubl = unitBuffer.length();
- if (wantsBlocks) {
- unitBuffer.append(charMapping[nextToken]);
- }
- parseTillClosed(nextToken);
- if (!wantsBlocks) {
- unitBuffer.setLength(ubl);
- }
- break;
-
- case BRACE_CLOSE:
- // No need to throw for these two, we return token and
- // caller can do whatever.
- case BRACKET_CLOSE:
- case PAREN_CLOSE:
- case END:
- // Hit the end
- return nextToken;
- }
- }
- }
-
- /**
- * Parses till a matching block close is encountered. This is only
- * appropriate to be called at the top level (no nesting).
- */
- private void parseTillClosed(int openToken) throws IOException {
- int nextToken;
- boolean done = false;
-
- startBlock(openToken);
- while (!done) {
- nextToken = nextToken((char)0);
- switch (nextToken) {
- case IDENTIFIER:
- if (unitBuffer.length() > 0 && readWS) {
- unitBuffer.append(' ');
- }
- if (tokenBufferLength > 0) {
- unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
- }
- break;
-
- case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN:
- if (unitBuffer.length() > 0 && readWS) {
- unitBuffer.append(' ');
- }
- unitBuffer.append(charMapping[nextToken]);
- startBlock(nextToken);
- break;
-
- case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE:
- if (unitBuffer.length() > 0 && readWS) {
- unitBuffer.append(' ');
- }
- unitBuffer.append(charMapping[nextToken]);
- endBlock(nextToken);
- if (!inBlock()) {
- done = true;
- }
- break;
-
- case END:
- // Prematurely hit end.
- throw new RuntimeException("Unclosed block");
- }
- }
- }
-
- /**
- * Fetches the next token.
- */
- private int nextToken(char idChar) throws IOException {
- readWS = false;
-
- int nextChar = readWS();
-
- switch (nextChar) {
- case '\'':
- readTill('\'');
- if (tokenBufferLength > 0) {
- tokenBufferLength--;
- }
- return IDENTIFIER;
- case '"':
- readTill('"');
- if (tokenBufferLength > 0) {
- tokenBufferLength--;
- }
- return IDENTIFIER;
- case '[':
- return BRACKET_OPEN;
- case ']':
- return BRACKET_CLOSE;
- case '{':
- return BRACE_OPEN;
- case '}':
- return BRACE_CLOSE;
- case '(':
- return PAREN_OPEN;
- case ')':
- return PAREN_CLOSE;
- case -1:
- return END;
- default:
- pushChar(nextChar);
- getIdentifier(idChar);
- return IDENTIFIER;
- }
- }
-
- /**
- * Gets an identifier, returning true if the length of the string is greater than 0,
- * stopping when <code>stopChar</code>, whitespace, or one of {}()[] is
- * hit.
- */
- // NOTE: this could be combined with readTill, as they contain somewhat
- // similiar functionality.
- private boolean getIdentifier(char stopChar) throws IOException {
- boolean lastWasEscape = false;
- boolean done = false;
- int escapeCount = 0;
- int escapeChar = 0;
- int nextChar;
- int intStopChar = (int)stopChar;
- // 1 for '\', 2 for valid escape char [0-9a-fA-F], 3 for
- // stop character (white space, ()[]{}) 0 otherwise
- short type;
- int escapeOffset = 0;
-
- tokenBufferLength = 0;
- while (!done) {
- nextChar = readChar();
- switch (nextChar) {
- case '\\':
- type = 1;
- break;
-
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- type = 2;
- escapeOffset = nextChar - '0';
- break;
-
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- type = 2;
- escapeOffset = nextChar - 'a' + 10;
- break;
-
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- type = 2;
- escapeOffset = nextChar - 'A' + 10;
- break;
-
- case '\'': case '"': case '[': case ']': case '{': case '}':
- case '(': case ')':
- case ' ': case '\n': case '\t': case '\r':
- type = 3;
- break;
-
- case '/':
- type = 4;
- break;
-
- case -1:
- // Reached the end
- done = true;
- type = 0;
- break;
-
- default:
- type = 0;
- break;
- }
- if (lastWasEscape) {
- if (type == 2) {
- // Continue with escape.
- escapeChar = escapeChar * 16 + escapeOffset;
- if (++escapeCount == 4) {
- lastWasEscape = false;
- append((char)escapeChar);
- }
- }
- else {
- // no longer escaped
- lastWasEscape = false;
- if (escapeCount > 0) {
- append((char)escapeChar);
- // Make this simpler, reprocess the character.
- pushChar(nextChar);
- }
- else if (!done) {
- append((char)nextChar);
- }
- }
- }
- else if (!done) {
- if (type == 1) {
- lastWasEscape = true;
- escapeChar = escapeCount = 0;
- }
- else if (type == 3) {
- done = true;
- pushChar(nextChar);
- }
- else if (type == 4) {
- // Potential comment
- nextChar = readChar();
- if (nextChar == '*') {
- done = true;
- readComment();
- readWS = true;
- }
- else {
- append('/');
- if (nextChar == -1) {
- done = true;
- }
- else {
- pushChar(nextChar);
- }
- }
- }
- else {
- append((char)nextChar);
- if (nextChar == intStopChar) {
- done = true;
- }
- }
- }
- }
- return (tokenBufferLength > 0);
- }
-
- /**
- * Reads till a <code>stopChar</code> is encountered, escaping characters
- * as necessary.
- */
- private void readTill(char stopChar) throws IOException {
- boolean lastWasEscape = false;
- int escapeCount = 0;
- int escapeChar = 0;
- int nextChar;
- boolean done = false;
- int intStopChar = (int)stopChar;
- // 1 for '\', 2 for valid escape char [0-9a-fA-F], 0 otherwise
- short type;
- int escapeOffset = 0;
-
- tokenBufferLength = 0;
- while (!done) {
- nextChar = readChar();
- switch (nextChar) {
- case '\\':
- type = 1;
- break;
-
- case '0': case '1': case '2': case '3': case '4':case '5':
- case '6': case '7': case '8': case '9':
- type = 2;
- escapeOffset = nextChar - '0';
- break;
-
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- type = 2;
- escapeOffset = nextChar - 'a' + 10;
- break;
-
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- type = 2;
- escapeOffset = nextChar - 'A' + 10;
- break;
-
- case -1:
- // Prematurely reached the end!
- throw new RuntimeException("Unclosed " + stopChar);
-
- default:
- type = 0;
- break;
- }
- if (lastWasEscape) {
- if (type == 2) {
- // Continue with escape.
- escapeChar = escapeChar * 16 + escapeOffset;
- if (++escapeCount == 4) {
- lastWasEscape = false;
- append((char)escapeChar);
- }
- }
- else {
- // no longer escaped
- if (escapeCount > 0) {
- append((char)escapeChar);
- if (type == 1) {
- lastWasEscape = true;
- escapeChar = escapeCount = 0;
- }
- else {
- if (nextChar == intStopChar) {
- done = true;
- }
- append((char)nextChar);
- lastWasEscape = false;
- }
- }
- else {
- append((char)nextChar);
- lastWasEscape = false;
- }
- }
- }
- else if (type == 1) {
- lastWasEscape = true;
- escapeChar = escapeCount = 0;
- }
- else {
- if (nextChar == intStopChar) {
- done = true;
- }
- append((char)nextChar);
- }
- }
- }
-
- private void append(char character) {
- if (tokenBufferLength == tokenBuffer.length) {
- char[] newBuffer = new char[tokenBuffer.length * 2];
- System.arraycopy(tokenBuffer, 0, newBuffer, 0, tokenBuffer.length);
- tokenBuffer = newBuffer;
- }
- tokenBuffer[tokenBufferLength++] = character;
- }
-
- /**
- * Parses a comment block.
- */
- private void readComment() throws IOException {
- int nextChar;
-
- for(;;) {
- nextChar = readChar();
- switch (nextChar) {
- case -1:
- throw new RuntimeException("Unclosed comment");
- case '*':
- nextChar = readChar();
- if (nextChar == '/') {
- return;
- }
- else if (nextChar == -1) {
- throw new RuntimeException("Unclosed comment");
- }
- else {
- pushChar(nextChar);
- }
- break;
- default:
- break;
- }
- }
- }
-
- /**
- * Called when a block start is encountered ({[.
- */
- private void startBlock(int startToken) {
- if (stackCount == unitStack.length) {
- int[] newUS = new int[stackCount * 2];
-
- System.arraycopy(unitStack, 0, newUS, 0, stackCount);
- unitStack = newUS;
- }
- unitStack[stackCount++] = startToken;
- }
-
- /**
- * Called when an end block is encountered )]}
- */
- private void endBlock(int endToken) {
- int startToken;
-
- switch (endToken) {
- case BRACKET_CLOSE:
- startToken = BRACKET_OPEN;
- break;
- case BRACE_CLOSE:
- startToken = BRACE_OPEN;
- break;
- case PAREN_CLOSE:
- startToken = PAREN_OPEN;
- break;
- default:
- // Will never happen.
- startToken = -1;
- break;
- }
- if (stackCount > 0 && unitStack[stackCount - 1] == startToken) {
- stackCount--;
- }
- else {
- // Invalid state, should do something.
- throw new RuntimeException("Unmatched block");
- }
- }
-
- /**
- * @return true if currently in a block.
- */
- private boolean inBlock() {
- return (stackCount > 0);
- }
-
- /**
- * Skips any white space, returning the character after the white space.
- */
- private int readWS() throws IOException {
- int nextChar;
- while ((nextChar = readChar()) != -1 &&
- Character.isWhitespace((char)nextChar)) {
- readWS = true;
- }
- return nextChar;
- }
-
- /**
- * Reads a character from the stream.
- */
- private int readChar() throws IOException {
- if (didPushChar) {
- didPushChar = false;
- return pushedChar;
- }
- return reader.read();
- // Uncomment the following to do case insensitive parsing.
- /*
- if (retValue != -1) {
- return (int)Character.toLowerCase((char)retValue);
- }
- return retValue;
- */
- }
-
- /**
- * Supports one character look ahead, this will throw if called twice
- * in a row.
- */
- private void pushChar(int tempChar) {
- if (didPushChar) {
- // Should never happen.
- throw new RuntimeException("Can not handle look ahead of more than one character");
- }
- didPushChar = true;
- pushedChar = tempChar;
- }
- }