Rfc2253Parser

package javax.naming.ldap;

import java.util.List;
import java.util.ArrayList;

import javax.naming.InvalidNameException;

/*
 * RFC2253Parser implements a recursive descent parser for a single DN.
 */
final class Rfc2253Parser {

	private final String name;	// DN being parsed
	private final char[] chars;	// characters in LDAP name being parsed
	private final int len;	// length of "chars"
	private int cur = 0;	// index of first unconsumed char in "chars"

	/*
	 * Given an LDAP DN in string form, returns a parser for it.
	 */
	Rfc2253Parser(String name) {
	    this.name = name;
	    len = name.length();
	    chars = name.toCharArray();
	}

	/*
	 * Parses the DN, returning a List of its RDNs.
	 */
	// public List<Rdn> getDN() throws InvalidNameException {

	List parseDn() throws InvalidNameException {
	    cur = 0;

	    // ArrayList<Rdn> rdns =
	    //	new ArrayList<Rdn>(len / 3 + 10);  // leave room for growth

	    ArrayList rdns =
		new ArrayList(len / 3 + 10);  // leave room for growth

	    if (len == 0) {
		return rdns;
	    }

	    rdns.add(doParse(new Rdn()));
	    while (cur < len) {
		if (chars[cur] == ',' || chars[cur] == ';') {
		    ++cur;
		    rdns.add(0, doParse(new Rdn()));
		} else {
		    throw new InvalidNameException("Invalid name: " + name);
		}
	    }
	    return rdns;
	}

	/*
	 * Parses the DN, if it is known to contain a single RDN.
	 */
	Rdn parseRdn() throws InvalidNameException {
	    return parseRdn(new Rdn());
	}

	/*
	 * Parses the DN, if it is known to contain a single RDN.
	 */
	Rdn parseRdn(Rdn rdn) throws InvalidNameException {
	    rdn = doParse(rdn);
	    if (cur < len) {
		throw new InvalidNameException("Invalid RDN: " + name);
	    }
	    return rdn;
	}

	/*
	 * Parses the next RDN and returns it.  Throws an exception if
	 * none is found.  Leading and trailing whitespace is consumed.
	 */
	 private Rdn doParse(Rdn rdn) throws InvalidNameException {

	    while (cur < len) {
		consumeWhitespace();
		String attrType = parseAttrType();
		consumeWhitespace();
		if (cur >= len || chars[cur] != '=') {
		    throw new InvalidNameException("Invalid name: " + name);
		}
		++cur;		// consume '='
		consumeWhitespace();
		String value = parseAttrValue();
		consumeWhitespace();

		rdn.put(attrType, Rdn.unescapeValue(value));
		if (cur >= len || chars[cur] != '+') {
		    break;
		}
		++cur;		// consume '+'
	    }
	    rdn.sort();
	    return rdn;
	}

	/*
	 * Returns the attribute type that begins at the next unconsumed
	 * char.  No leading whitespace is expected.
	 * This routine is more generous than RFC 2253.  It accepts
	 * attribute types composed of any nonempty combination of Unicode
	 * letters, Unicode digits, '.', '-', and internal space characters.
	 */
	private String parseAttrType() throws InvalidNameException {

	    final int beg = cur;
	    while (cur < len) {
		char c = chars[cur];
		if (Character.isLetterOrDigit(c) ||
			c == '.' ||
			c == '-' ||
			c == ' ') {
		    ++cur;
		} else {
		    break;
		}
	    }
	    // Back out any trailing spaces.
	    while ((cur > beg) && (chars[cur - 1] == ' ')) {
		--cur;
	    }

	    if (beg == cur) {
		throw new InvalidNameException("Invalid name: " + name);
	    }
	    return new String(chars, beg, cur - beg);
	}

	/*
	 * Returns the attribute value that begins at the next unconsumed
	 * char.  No leading whitespace is expected.
	 */
	private String parseAttrValue() throws InvalidNameException {

	    if (cur < len && chars[cur] == '#') {
		return parseBinaryAttrValue();
	    } else if (cur < len && chars[cur] == '"') {
		return parseQuotedAttrValue();
	    } else {
		return parseStringAttrValue();
	    }
	}

	private String parseBinaryAttrValue() throws InvalidNameException {
	    final int beg = cur;
	    ++cur;			// consume '#'
	    while ((cur < len) &&
		    Character.isLetterOrDigit(chars[cur])) {
		++cur;
	    }
	    return new String(chars, beg, cur - beg);
	}

	private String parseQuotedAttrValue() throws InvalidNameException {

	    final int beg = cur;
	    ++cur;			// consume '"'

	    while ((cur < len) && chars[cur] != '"') {
		if (chars[cur] == '\\') {
		    ++cur;		// consume backslash, then what follows
		}
		++cur;
	    }
	    if (cur >= len) {	// no closing quote
		throw new InvalidNameException("Invalid name: " + name);
	    }
	    ++cur;	// consume closing quote

	    return new String(chars, beg, cur - beg);
	}

	private String parseStringAttrValue() throws InvalidNameException {

	    final int beg = cur;
	    int esc = -1;	// index of the most recently escaped character

	    while ((cur < len) && !atTerminator()) {
		if (chars[cur] == '\\') {
		    ++cur;		// consume backslash, then what follows
		    esc = cur;
		}
		++cur;
	    }
	    if (cur > len) {		// 'twas backslash followed by nothing
		throw new InvalidNameException("Invalid name: " + name);
	    }

	    // Trim off (unescaped) trailing whitespace.
	    int end;
	    for (end = cur; end > beg; end--) {
		if (!isWhitespace(chars[end - 1]) || (esc == end - 1)) {
		    break;
		}
	    }
	    return new String(chars, beg, end - beg);
	}

	private void consumeWhitespace() {
	    while ((cur < len) && isWhitespace(chars[cur])) {
		++cur;
	    }
	}

	/*
	 * Returns true if next unconsumed character is one that terminates
	 * a string attribute value.
	 */
	private boolean atTerminator() {
	    return (cur < len &&
		    (chars[cur] == ',' ||
			chars[cur] == ';' ||
			chars[cur] == '+'));
	}

	/*
	 * Best guess as to what RFC 2253 means by "whitespace".
	 */
        private static boolean isWhitespace(char c) {
	    return (c == ' ' || c == '\r');
        }
    }