1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2004 The Apache Software Foundation.
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl;
  58. import java.io.IOException;
  59. import com.sun.org.apache.xerces.internal.util.SymbolTable;
  60. import com.sun.org.apache.xerces.internal.util.XML11Char;
  61. import com.sun.org.apache.xerces.internal.util.XMLChar;
  62. import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  63. import com.sun.org.apache.xerces.internal.xni.XMLString;
  64. import com.sun.org.apache.xerces.internal.xni.XNIException;
  65. /**
  66. * This class is responsible for scanning the declarations found
  67. * in the internal and external subsets of a DTD in an XML document.
  68. * The scanner acts as the sources for the DTD information which is
  69. * communicated to the DTD handlers.
  70. * <p>
  71. * This component requires the following features and properties from the
  72. * component manager that uses it:
  73. * <ul>
  74. * <li>http://xml.org/sax/features/validation</li>
  75. * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
  76. * <li>http://apache.org/xml/properties/internal/symbol-table</li>
  77. * <li>http://apache.org/xml/properties/internal/error-reporter</li>
  78. * <li>http://apache.org/xml/properties/internal/entity-manager</li>
  79. * </ul>
  80. *
  81. * @author Arnaud Le Hors, IBM
  82. * @author Andy Clark, IBM
  83. * @author Glenn Marcy, IBM
  84. * @author Eric Ye, IBM
  85. *
  86. * @version $Id: XML11DTDScannerImpl.java,v 1.10 2004/01/26 19:37:35 mrglavas Exp $
  87. */
  88. public class XML11DTDScannerImpl
  89. extends XMLDTDScannerImpl {
  90. /** Array of 3 strings. */
  91. private String[] fStrings = new String[3];
  92. /** String. */
  93. private XMLString fString = new XMLString();
  94. /** String buffer. */
  95. private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  96. /** String buffer. */
  97. private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  98. private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
  99. //
  100. // Constructors
  101. //
  102. /** Default constructor. */
  103. public XML11DTDScannerImpl() {super();} // <init>()
  104. /** Constructor for he use of non-XMLComponentManagers. */
  105. public XML11DTDScannerImpl(SymbolTable symbolTable,
  106. XMLErrorReporter errorReporter, XMLEntityManager entityManager) {
  107. super(symbolTable, errorReporter, entityManager);
  108. }
  109. //
  110. // XMLDTDScanner methods
  111. //
  112. //
  113. // XMLScanner methods
  114. //
  115. // NOTE: this is a carbon copy of the code in XML11DocumentScannerImpl;
  116. // we need to override these methods in both places. Ah for
  117. // multiple inheritance...
  118. // This needs to be refactored!!! - NG
  119. /**
  120. * Scans public ID literal.
  121. *
  122. * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
  123. * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
  124. *
  125. * The returned string is normalized according to the following rule,
  126. * from http://www.w3.org/TR/REC-xml#dt-pubid:
  127. *
  128. * Before a match is attempted, all strings of white space in the public
  129. * identifier must be normalized to single space characters (#x20), and
  130. * leading and trailing white space must be removed.
  131. *
  132. * @param literal The string to fill in with the public ID literal.
  133. * @return True on success.
  134. *
  135. * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
  136. * the time of calling is lost.
  137. */
  138. protected boolean scanPubidLiteral(XMLString literal)
  139. throws IOException, XNIException
  140. {
  141. int quote = fEntityScanner.scanChar();
  142. if (quote != '\'' && quote != '"') {
  143. reportFatalError("QuoteRequiredInPublicID", null);
  144. return false;
  145. }
  146. fStringBuffer.clear();
  147. // skip leading whitespace
  148. boolean skipSpace = true;
  149. boolean dataok = true;
  150. while (true) {
  151. int c = fEntityScanner.scanChar();
  152. // REVISIT: it could really only be \n or 0x20; all else is normalized, no? - neilg
  153. if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
  154. if (!skipSpace) {
  155. // take the first whitespace as a space and skip the others
  156. fStringBuffer.append(' ');
  157. skipSpace = true;
  158. }
  159. }
  160. else if (c == quote) {
  161. if (skipSpace) {
  162. // if we finished on a space let's trim it
  163. fStringBuffer.length--;
  164. }
  165. literal.setValues(fStringBuffer);
  166. break;
  167. }
  168. else if (XMLChar.isPubid(c)) {
  169. fStringBuffer.append((char)c);
  170. skipSpace = false;
  171. }
  172. else if (c == -1) {
  173. reportFatalError("PublicIDUnterminated", null);
  174. return false;
  175. }
  176. else {
  177. dataok = false;
  178. reportFatalError("InvalidCharInPublicID",
  179. new Object[]{Integer.toHexString(c)});
  180. }
  181. }
  182. return dataok;
  183. }
  184. /**
  185. * Normalize whitespace in an XMLString converting all whitespace
  186. * characters to space characters.
  187. */
  188. protected void normalizeWhitespace(XMLString value) {
  189. int end = value.offset + value.length;
  190. for (int i = value.offset; i < end; i++) {
  191. int c = value.ch[i];
  192. if (XMLChar.isSpace(c)) {
  193. value.ch[i] = ' ';
  194. }
  195. }
  196. }
  197. // returns true if the given character is not
  198. // valid with respect to the version of
  199. // XML understood by this scanner.
  200. protected boolean isInvalid(int value) {
  201. return (!XML11Char.isXML11Valid(value));
  202. } // isInvalid(int): boolean
  203. // returns true if the given character is not
  204. // valid or may not be used outside a character reference
  205. // with respect to the version of XML understood by this scanner.
  206. protected boolean isInvalidLiteral(int value) {
  207. return (!XML11Char.isXML11ValidLiteral(value));
  208. } // isInvalidLiteral(int): boolean
  209. // returns true if the given character is
  210. // a valid nameChar with respect to the version of
  211. // XML understood by this scanner.
  212. protected boolean isValidNameChar(int value) {
  213. return (XML11Char.isXML11Name(value));
  214. } // isValidNameChar(int): boolean
  215. // returns true if the given character is
  216. // a valid nameStartChar with respect to the version of
  217. // XML understood by this scanner.
  218. protected boolean isValidNameStartChar(int value) {
  219. return (XML11Char.isXML11NameStart(value));
  220. } // isValidNameStartChar(int): boolean
  221. // returns true if the given character is
  222. // a valid NCName character with respect to the version of
  223. // XML understood by this scanner.
  224. protected boolean isValidNCName(int value) {
  225. return (XML11Char.isXML11NCName(value));
  226. } // isValidNCName(int): boolean
  227. // returns true if the given character is
  228. // a valid high surrogate for a nameStartChar
  229. // with respect to the version of XML understood
  230. // by this scanner.
  231. protected boolean isValidNameStartHighSurrogate(int value) {
  232. return XML11Char.isXML11NameHighSurrogate(value);
  233. } // isValidNameStartHighSurrogate(int): boolean
  234. // note that, according to 4.3.4 of the XML 1.1 spec, XML 1.1
  235. // documents may invoke 1.0 entities; thus either version decl (or none!)
  236. // is allowed to appear in this context
  237. protected boolean versionSupported(String version) {
  238. return version.equals("1.1") || version.equals ("1.0");
  239. } // versionSupported(String): boolean
  240. // returns the error message key for unsupported
  241. // versions of XML with respect to the version of
  242. // XML understood by this scanner.
  243. protected String getVersionNotSupportedKey () {
  244. return "VersionNotSupported11";
  245. } // getVersionNotSupportedKey: String
  246. } // class XML11DTDScannerImpl