1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2004 The Apache Software Foundation.
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl;
  58. import java.io.CharConversionException;
  59. import java.io.EOFException;
  60. import java.io.IOException;
  61. import com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException;
  62. import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  63. import com.sun.org.apache.xerces.internal.util.AugmentationsImpl;
  64. import com.sun.org.apache.xerces.internal.util.XMLAttributesImpl;
  65. import com.sun.org.apache.xerces.internal.util.XMLChar;
  66. import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  67. import com.sun.org.apache.xerces.internal.util.XMLSymbols;
  68. import com.sun.org.apache.xerces.internal.xni.Augmentations;
  69. import com.sun.org.apache.xerces.internal.xni.QName;
  70. import com.sun.org.apache.xerces.internal.xni.XMLAttributes;
  71. import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler;
  72. import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
  73. import com.sun.org.apache.xerces.internal.xni.XMLString;
  74. import com.sun.org.apache.xerces.internal.xni.XNIException;
  75. import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
  76. import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  77. import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  78. import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner;
  79. import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
  80. import com.sun.org.apache.xerces.internal.util.SecurityManager;
  81. /**
  82. * This class is responsible for scanning the structure and content
  83. * of document fragments. The scanner acts as the source for the
  84. * document information which is communicated to the document handler.
  85. * <p>
  86. * This component requires the following features and properties from the
  87. * component manager that uses it:
  88. * <ul>
  89. * <li>http://xml.org/sax/features/validation</li>
  90. * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
  91. * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
  92. * <li>http://apache.org/xml/properties/internal/symbol-table</li>
  93. * <li>http://apache.org/xml/properties/internal/error-reporter</li>
  94. * <li>http://apache.org/xml/properties/internal/entity-manager</li>
  95. * </ul>
  96. *
  97. * @author Glenn Marcy, IBM
  98. * @author Andy Clark, IBM
  99. * @author Arnaud Le Hors, IBM
  100. * @author Eric Ye, IBM
  101. *
  102. * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.52 2004/04/08 22:24:52 mrglavas Exp $
  103. */
  104. public class XMLDocumentFragmentScannerImpl
  105. extends XMLScanner
  106. implements XMLDocumentScanner, XMLComponent, XMLEntityHandler {
  107. //
  108. // Constants
  109. //
  110. protected SecurityManager fSecurityManager = null;
  111. // scanner states
  112. /** Scanner state: start of markup. */
  113. protected static final int SCANNER_STATE_START_OF_MARKUP = 1;
  114. /** Scanner state: comment. */
  115. protected static final int SCANNER_STATE_COMMENT = 2;
  116. /** Scanner state: processing instruction. */
  117. protected static final int SCANNER_STATE_PI = 3;
  118. /** Scanner state: DOCTYPE. */
  119. protected static final int SCANNER_STATE_DOCTYPE = 4;
  120. /** Scanner state: root element. */
  121. protected static final int SCANNER_STATE_ROOT_ELEMENT = 6;
  122. /** Scanner state: content. */
  123. protected static final int SCANNER_STATE_CONTENT = 7;
  124. /** Scanner state: reference. */
  125. protected static final int SCANNER_STATE_REFERENCE = 8;
  126. /** Scanner state: end of input. */
  127. protected static final int SCANNER_STATE_END_OF_INPUT = 13;
  128. /** Scanner state: terminated. */
  129. protected static final int SCANNER_STATE_TERMINATED = 14;
  130. /** Scanner state: CDATA section. */
  131. protected static final int SCANNER_STATE_CDATA = 15;
  132. /** Scanner state: Text declaration. */
  133. protected static final int SCANNER_STATE_TEXT_DECL = 16;
  134. // feature identifiers
  135. /** Feature identifier: namespaces. */
  136. protected static final String NAMESPACES =
  137. Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
  138. /** Feature identifier: notify built-in refereces. */
  139. protected static final String NOTIFY_BUILTIN_REFS =
  140. Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
  141. // property identifiers
  142. /** Property identifier: entity resolver. */
  143. protected static final String ENTITY_RESOLVER =
  144. Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
  145. // recognized features and properties
  146. /** Recognized features. */
  147. private static final String[] RECOGNIZED_FEATURES = {
  148. NAMESPACES,
  149. VALIDATION,
  150. NOTIFY_BUILTIN_REFS,
  151. NOTIFY_CHAR_REFS,
  152. };
  153. /** Feature defaults. */
  154. private static final Boolean[] FEATURE_DEFAULTS = {
  155. null,
  156. null,
  157. Boolean.FALSE,
  158. Boolean.FALSE,
  159. };
  160. /** Recognized properties. */
  161. private static final String[] RECOGNIZED_PROPERTIES = {
  162. SYMBOL_TABLE,
  163. ERROR_REPORTER,
  164. ENTITY_MANAGER,
  165. ENTITY_RESOLVER,
  166. };
  167. /** Property defaults. */
  168. private static final Object[] PROPERTY_DEFAULTS = {
  169. null,
  170. null,
  171. null,
  172. null,
  173. };
  174. // debugging
  175. /** Debug scanner state. */
  176. private static final boolean DEBUG_SCANNER_STATE = false;
  177. /** Debug dispatcher. */
  178. private static final boolean DEBUG_DISPATCHER = false;
  179. /** Debug content dispatcher scanning. */
  180. protected static final boolean DEBUG_CONTENT_SCANNING = false;
  181. //
  182. // Data
  183. //
  184. // protected data
  185. /** Document handler. */
  186. protected XMLDocumentHandler fDocumentHandler;
  187. /** Entity stack. */
  188. protected int[] fEntityStack = new int[4];
  189. /** Markup depth. */
  190. protected int fMarkupDepth;
  191. /** Scanner state. */
  192. protected int fScannerState;
  193. /** SubScanner state: inside scanContent method. */
  194. protected boolean fInScanContent = false;
  195. /** has external dtd */
  196. protected boolean fHasExternalDTD;
  197. /** Standalone. */
  198. protected boolean fStandalone;
  199. //variable to restrict attribute limit
  200. protected int fElementAttributeLimit;
  201. /** External subset resolver. **/
  202. protected ExternalSubsetResolver fExternalSubsetResolver;
  203. // element information
  204. /** Current element. */
  205. protected QName fCurrentElement;
  206. /** Element stack. */
  207. protected ElementStack fElementStack = new ElementStack();
  208. // other info
  209. /** Document system identifier.
  210. * REVISIT: So what's this used for? - NG
  211. * protected String fDocumentSystemId;
  212. ******/
  213. // features
  214. /** Notify built-in references. */
  215. protected boolean fNotifyBuiltInRefs = false;
  216. // dispatchers
  217. /** Active dispatcher. */
  218. protected Dispatcher fDispatcher;
  219. /** Content dispatcher. */
  220. protected Dispatcher fContentDispatcher = createContentDispatcher();
  221. // temporary variables
  222. /** Element QName. */
  223. protected QName fElementQName = new QName();
  224. /** Attribute QName. */
  225. protected QName fAttributeQName = new QName();
  226. /** Element attributes. */
  227. protected XMLAttributesImpl fAttributes = new XMLAttributesImpl();
  228. /** String. */
  229. protected XMLString fTempString = new XMLString();
  230. /** String. */
  231. protected XMLString fTempString2 = new XMLString();
  232. /** Array of 3 strings. */
  233. private String[] fStrings = new String[3];
  234. /** String buffer. */
  235. private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  236. /** String buffer. */
  237. private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  238. /** Another QName. */
  239. private QName fQName = new QName();
  240. /** Single character array. */
  241. private final char[] fSingleChar = new char[1];
  242. /** External entity. */
  243. private XMLEntityManager.ExternalEntity fExternalEntity = new XMLEntityManager.ExternalEntity();
  244. /**
  245. * Saw spaces after element name or between attributes.
  246. *
  247. * This is reserved for the case where scanning of a start element spans
  248. * several methods, as is the case when scanning the start of a root element
  249. * where a DTD external subset may be read after scanning the element name.
  250. */
  251. private boolean fSawSpace;
  252. //
  253. // Constructors
  254. //
  255. /** Default constructor. */
  256. public XMLDocumentFragmentScannerImpl() {} // <init>()
  257. //
  258. // XMLDocumentScanner methods
  259. //
  260. /**
  261. * Sets the input source.
  262. *
  263. * @param inputSource The input source.
  264. *
  265. * @throws IOException Thrown on i/o error.
  266. */
  267. public void setInputSource(XMLInputSource inputSource) throws IOException {
  268. fEntityManager.setEntityHandler(this);
  269. fEntityManager.startEntity("$fragment$", inputSource, false, true);
  270. //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
  271. } // setInputSource(XMLInputSource)
  272. /**
  273. * Scans a document.
  274. *
  275. * @param complete True if the scanner should scan the document
  276. * completely, pushing all events to the registered
  277. * document handler. A value of false indicates that
  278. * that the scanner should only scan the next portion
  279. * of the document and return. A scanner instance is
  280. * permitted to completely scan a document if it does
  281. * not support this "pull" scanning model.
  282. *
  283. * @return True if there is more to scan, false otherwise.
  284. */
  285. public boolean scanDocument(boolean complete)
  286. throws IOException, XNIException {
  287. // reset entity scanner
  288. fEntityScanner = fEntityManager.getEntityScanner();
  289. // keep dispatching "events"
  290. fEntityManager.setEntityHandler(this);
  291. do {
  292. if (!fDispatcher.dispatch(complete)) {
  293. return false;
  294. }
  295. } while (complete);
  296. // return success
  297. return true;
  298. } // scanDocument(boolean):boolean
  299. //
  300. // XMLComponent methods
  301. //
  302. /**
  303. * Resets the component. The component can query the component manager
  304. * about any features and properties that affect the operation of the
  305. * component.
  306. *
  307. * @param componentManager The component manager.
  308. *
  309. * @throws SAXException Thrown by component on initialization error.
  310. * For example, if a feature or property is
  311. * required for the operation of the component, the
  312. * component manager may throw a
  313. * SAXNotRecognizedException or a
  314. * SAXNotSupportedException.
  315. */
  316. public void reset(XMLComponentManager componentManager)
  317. throws XMLConfigurationException {
  318. super.reset(componentManager);
  319. // other settings
  320. //fDocumentSystemId = null;
  321. // sax features
  322. fAttributes.setNamespaces(fNamespaces);
  323. // initialize vars
  324. fMarkupDepth = 0;
  325. fCurrentElement = null;
  326. fElementStack.clear();
  327. fHasExternalDTD = false;
  328. fStandalone = false;
  329. fInScanContent = false;
  330. // setup dispatcher
  331. try {
  332. fSecurityManager = (SecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER);
  333. } catch (XMLConfigurationException e) {
  334. fSecurityManager = null;
  335. }
  336. fElementAttributeLimit = (fSecurityManager != null)?fSecurityManager.getElementAttrLimit():0;
  337. setScannerState(SCANNER_STATE_CONTENT);
  338. setDispatcher(fContentDispatcher);
  339. if (fParserSettings) {
  340. // parser settings have changed. reset them.
  341. // xerces features
  342. try {
  343. fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS);
  344. } catch (XMLConfigurationException e) {
  345. fNotifyBuiltInRefs = false;
  346. }
  347. // xerces properties
  348. try {
  349. Object resolver = componentManager.getProperty(ENTITY_RESOLVER);
  350. fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ?
  351. (ExternalSubsetResolver) resolver : null;
  352. }
  353. catch (XMLConfigurationException e) {
  354. fExternalSubsetResolver = null;
  355. }
  356. }
  357. } // reset(XMLComponentManager)
  358. /**
  359. * Returns a list of feature identifiers that are recognized by
  360. * this component. This method may return null if no features
  361. * are recognized by this component.
  362. */
  363. public String[] getRecognizedFeatures() {
  364. return (String[])(RECOGNIZED_FEATURES.clone());
  365. } // getRecognizedFeatures():String[]
  366. /**
  367. * Sets the state of a feature. This method is called by the component
  368. * manager any time after reset when a feature changes state.
  369. * <p>
  370. * <strong>Note:</strong> Components should silently ignore features
  371. * that do not affect the operation of the component.
  372. *
  373. * @param featureId The feature identifier.
  374. * @param state The state of the feature.
  375. *
  376. * @throws SAXNotRecognizedException The component should not throw
  377. * this exception.
  378. * @throws SAXNotSupportedException The component should not throw
  379. * this exception.
  380. */
  381. public void setFeature(String featureId, boolean state)
  382. throws XMLConfigurationException {
  383. super.setFeature(featureId, state);
  384. // Xerces properties
  385. if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
  386. final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
  387. if (suffixLength == Constants.NOTIFY_BUILTIN_REFS_FEATURE.length() &&
  388. featureId.endsWith(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
  389. fNotifyBuiltInRefs = state;
  390. }
  391. }
  392. } // setFeature(String,boolean)
  393. /**
  394. * Returns a list of property identifiers that are recognized by
  395. * this component. This method may return null if no properties
  396. * are recognized by this component.
  397. */
  398. public String[] getRecognizedProperties() {
  399. return (String[])(RECOGNIZED_PROPERTIES.clone());
  400. } // getRecognizedProperties():String[]
  401. /**
  402. * Sets the value of a property. This method is called by the component
  403. * manager any time after reset when a property changes value.
  404. * <p>
  405. * <strong>Note:</strong> Components should silently ignore properties
  406. * that do not affect the operation of the component.
  407. *
  408. * @param propertyId The property identifier.
  409. * @param value The value of the property.
  410. *
  411. * @throws SAXNotRecognizedException The component should not throw
  412. * this exception.
  413. * @throws SAXNotSupportedException The component should not throw
  414. * this exception.
  415. */
  416. public void setProperty(String propertyId, Object value)
  417. throws XMLConfigurationException {
  418. super.setProperty(propertyId, value);
  419. // Xerces properties
  420. if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
  421. final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
  422. if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
  423. propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
  424. fEntityManager = (XMLEntityManager)value;
  425. return;
  426. }
  427. if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() &&
  428. propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
  429. fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ?
  430. (ExternalSubsetResolver) value : null;
  431. return;
  432. }
  433. }
  434. } // setProperty(String,Object)
  435. /**
  436. * Returns the default state for a feature, or null if this
  437. * component does not want to report a default value for this
  438. * feature.
  439. *
  440. * @param featureId The feature identifier.
  441. *
  442. * @since Xerces 2.2.0
  443. */
  444. public Boolean getFeatureDefault(String featureId) {
  445. for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
  446. if (RECOGNIZED_FEATURES[i].equals(featureId)) {
  447. return FEATURE_DEFAULTS[i];
  448. }
  449. }
  450. return null;
  451. } // getFeatureDefault(String):Boolean
  452. /**
  453. * Returns the default state for a property, or null if this
  454. * component does not want to report a default value for this
  455. * property.
  456. *
  457. * @param propertyId The property identifier.
  458. *
  459. * @since Xerces 2.2.0
  460. */
  461. public Object getPropertyDefault(String propertyId) {
  462. for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
  463. if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
  464. return PROPERTY_DEFAULTS[i];
  465. }
  466. }
  467. return null;
  468. } // getPropertyDefault(String):Object
  469. //
  470. // XMLDocumentSource methods
  471. //
  472. /**
  473. * setDocumentHandler
  474. *
  475. * @param documentHandler
  476. */
  477. public void setDocumentHandler(XMLDocumentHandler documentHandler) {
  478. fDocumentHandler = documentHandler;
  479. } // setDocumentHandler(XMLDocumentHandler)
  480. /** Returns the document handler */
  481. public XMLDocumentHandler getDocumentHandler(){
  482. return fDocumentHandler;
  483. }
  484. //
  485. // XMLEntityHandler methods
  486. //
  487. /**
  488. * This method notifies of the start of an entity. The DTD has the
  489. * pseudo-name of "[dtd]" parameter entity names start with '%'; and
  490. * general entities are just specified by their name.
  491. *
  492. * @param name The name of the entity.
  493. * @param identifier The resource identifier.
  494. * @param encoding The auto-detected IANA encoding name of the entity
  495. * stream. This value will be null in those situations
  496. * where the entity encoding is not auto-detected (e.g.
  497. * internal entities or a document entity that is
  498. * parsed from a java.io.Reader).
  499. * @param augs Additional information that may include infoset augmentations
  500. *
  501. * @throws XNIException Thrown by handler to signal an error.
  502. */
  503. public void startEntity(String name,
  504. XMLResourceIdentifier identifier,
  505. String encoding, Augmentations augs) throws XNIException {
  506. // keep track of this entity before fEntityDepth is increased
  507. if (fEntityDepth == fEntityStack.length) {
  508. int[] entityarray = new int[fEntityStack.length * 2];
  509. System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length);
  510. fEntityStack = entityarray;
  511. }
  512. fEntityStack[fEntityDepth] = fMarkupDepth;
  513. super.startEntity(name, identifier, encoding, augs);
  514. // WFC: entity declared in external subset in standalone doc
  515. if(fStandalone && fEntityManager.isEntityDeclInExternalSubset(name)) {
  516. reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
  517. new Object[]{name});
  518. }
  519. // call handler
  520. if (fDocumentHandler != null && !fScanningAttribute) {
  521. if (!name.equals("[xml]")) {
  522. fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs);
  523. }
  524. }
  525. } // startEntity(String,XMLResourceIdentifier,String)
  526. /**
  527. * This method notifies the end of an entity. The DTD has the pseudo-name
  528. * of "[dtd]" parameter entity names start with '%'; and general entities
  529. * are just specified by their name.
  530. *
  531. * @param name The name of the entity.
  532. * @param augs Additional information that may include infoset augmentations
  533. *
  534. * @throws XNIException Thrown by handler to signal an error.
  535. */
  536. public void endEntity(String name, Augmentations augs) throws XNIException {
  537. // flush possible pending output buffer - see scanContent
  538. if (fInScanContent && fStringBuffer.length != 0
  539. && fDocumentHandler != null) {
  540. fDocumentHandler.characters(fStringBuffer, null);
  541. fStringBuffer.length = 0; // make sure we know it's been flushed
  542. }
  543. super.endEntity(name, augs);
  544. // make sure markup is properly balanced
  545. if (fMarkupDepth != fEntityStack[fEntityDepth]) {
  546. reportFatalError("MarkupEntityMismatch", null);
  547. }
  548. // call handler
  549. if (fDocumentHandler != null && !fScanningAttribute) {
  550. if (!name.equals("[xml]")) {
  551. fDocumentHandler.endGeneralEntity(name, augs);
  552. }
  553. }
  554. } // endEntity(String)
  555. //
  556. // Protected methods
  557. //
  558. // dispatcher factory methods
  559. /** Creates a content dispatcher. */
  560. protected Dispatcher createContentDispatcher() {
  561. return new FragmentContentDispatcher();
  562. } // createContentDispatcher():Dispatcher
  563. // scanning methods
  564. /**
  565. * Scans an XML or text declaration.
  566. * <p>
  567. * <pre>
  568. * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  569. * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  570. * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
  571. * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  572. * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
  573. * | ('"' ('yes' | 'no') '"'))
  574. *
  575. * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  576. * </pre>
  577. *
  578. * @param scanningTextDecl True if a text declaration is to
  579. * be scanned instead of an XML
  580. * declaration.
  581. */
  582. protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
  583. throws IOException, XNIException {
  584. // scan decl
  585. super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
  586. fMarkupDepth--;
  587. // pseudo-attribute values
  588. String version = fStrings[0];
  589. String encoding = fStrings[1];
  590. String standalone = fStrings[2];
  591. // set standalone
  592. fStandalone = standalone != null && standalone.equals("yes");
  593. fEntityManager.setStandalone(fStandalone);
  594. // call handler
  595. if (fDocumentHandler != null) {
  596. if (scanningTextDecl) {
  597. fDocumentHandler.textDecl(version, encoding, null);
  598. }
  599. else {
  600. fDocumentHandler.xmlDecl(version, encoding, standalone, null);
  601. }
  602. }
  603. // set encoding on reader
  604. if (encoding != null && !fEntityScanner.fCurrentEntity.isDeclaredEncoding()) {
  605. fEntityScanner.setEncoding(encoding);
  606. }
  607. } // scanXMLDeclOrTextDecl(boolean)
  608. /**
  609. * Scans a processing data. This is needed to handle the situation
  610. * where a document starts with a processing instruction whose
  611. * target name <em>starts with</em> "xml". (e.g. xmlfoo)
  612. *
  613. * @param target The PI target
  614. * @param data The string to fill in with the data
  615. */
  616. protected void scanPIData(String target, XMLString data)
  617. throws IOException, XNIException {
  618. super.scanPIData(target, data);
  619. fMarkupDepth--;
  620. // call handler
  621. if (fDocumentHandler != null) {
  622. fDocumentHandler.processingInstruction(target, data, null);
  623. }
  624. } // scanPIData(String)
  625. /**
  626. * Scans a comment.
  627. * <p>
  628. * <pre>
  629. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  630. * </pre>
  631. * <p>
  632. * <strong>Note:</strong> Called after scanning past '<!--'
  633. */
  634. protected void scanComment() throws IOException, XNIException {
  635. scanComment(fStringBuffer);
  636. fMarkupDepth--;
  637. // call handler
  638. if (fDocumentHandler != null) {
  639. fDocumentHandler.comment(fStringBuffer, null);
  640. }
  641. } // scanComment()
  642. /**
  643. * Scans a start element. This method will handle the binding of
  644. * namespace information and notifying the handler of the start
  645. * of the element.
  646. * <p>
  647. * <pre>
  648. * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  649. * [40] STag ::= '<' Name (S Attribute)* S? '>'
  650. * </pre>
  651. * <p>
  652. * <strong>Note:</strong> This method assumes that the leading
  653. * '<' character has been consumed.
  654. * <p>
  655. * <strong>Note:</strong> This method uses the fElementQName and
  656. * fAttributes variables. The contents of these variables will be
  657. * destroyed. The caller should copy important information out of
  658. * these variables before calling this method.
  659. *
  660. * @return True if element is empty. (i.e. It matches
  661. * production [44].
  662. */
  663. protected boolean scanStartElement()
  664. throws IOException, XNIException {
  665. if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement()");
  666. // name
  667. if (fNamespaces) {
  668. fEntityScanner.scanQName(fElementQName);
  669. }
  670. else {
  671. String name = fEntityScanner.scanName();
  672. fElementQName.setValues(null, name, name, null);
  673. }
  674. String rawname = fElementQName.rawname;
  675. // push element stack
  676. fCurrentElement = fElementStack.pushElement(fElementQName);
  677. // attributes
  678. boolean empty = false;
  679. fAttributes.removeAllAttributes();
  680. do {
  681. // spaces
  682. boolean sawSpace = fEntityScanner.skipSpaces();
  683. // end tag?
  684. int c = fEntityScanner.peekChar();
  685. if (c == '>') {
  686. fEntityScanner.scanChar();
  687. break;
  688. }
  689. else if (c == '/') {
  690. fEntityScanner.scanChar();
  691. if (!fEntityScanner.skipChar('>')) {
  692. reportFatalError("ElementUnterminated",
  693. new Object[]{rawname});
  694. }
  695. empty = true;
  696. break;
  697. }
  698. else if (!isValidNameStartChar(c) || !sawSpace) {
  699. // Second chance. Check if this character is a high
  700. // surrogate of a valid name start character.
  701. if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
  702. reportFatalError("ElementUnterminated",
  703. new Object[] { rawname });
  704. }
  705. }
  706. // attributes
  707. scanAttribute(fAttributes);
  708. if (fAttributes.getLength() > fElementAttributeLimit){
  709. fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  710. "ElementAttributeLimit",
  711. new Object[]{rawname, new Integer(fAttributes.getLength()) },
  712. XMLErrorReporter.SEVERITY_FATAL_ERROR );
  713. }
  714. } while (true);
  715. // call handler
  716. if (fDocumentHandler != null) {
  717. if (empty) {
  718. //decrease the markup depth..
  719. fMarkupDepth--;
  720. // check that this element was opened in the same entity
  721. if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
  722. reportFatalError("ElementEntityMismatch",
  723. new Object[]{fCurrentElement.rawname});
  724. }
  725. fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
  726. //pop the element off the stack..
  727. fElementStack.popElement(fElementQName);
  728. }
  729. else {
  730. fDocumentHandler.startElement(fElementQName, fAttributes, null);
  731. }
  732. }
  733. if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElement(): "+empty);
  734. return empty;
  735. } // scanStartElement():boolean
  736. /**
  737. * Scans the name of an element in a start or empty tag.
  738. *
  739. * @see #scanStartElement()
  740. */
  741. protected void scanStartElementName ()
  742. throws IOException, XNIException {
  743. // name
  744. if (fNamespaces) {
  745. fEntityScanner.scanQName(fElementQName);
  746. }
  747. else {
  748. String name = fEntityScanner.scanName();
  749. fElementQName.setValues(null, name, name, null);
  750. }
  751. // Must skip spaces here because the DTD scanner
  752. // would consume them at the end of the external subset.
  753. fSawSpace = fEntityScanner.skipSpaces();
  754. } // scanStartElementName()
  755. /**
  756. * Scans the remainder of a start or empty tag after the element name.
  757. *
  758. * @see #scanStartElement
  759. * @return True if element is empty.
  760. */
  761. protected boolean scanStartElementAfterName()
  762. throws IOException, XNIException {
  763. String rawname = fElementQName.rawname;
  764. // push element stack
  765. fCurrentElement = fElementStack.pushElement(fElementQName);
  766. // attributes
  767. boolean empty = false;
  768. fAttributes.removeAllAttributes();
  769. do {
  770. // end tag?
  771. int c = fEntityScanner.peekChar();
  772. if (c == '>') {
  773. fEntityScanner.scanChar();
  774. break;
  775. }
  776. else if (c == '/') {
  777. fEntityScanner.scanChar();
  778. if (!fEntityScanner.skipChar('>')) {
  779. reportFatalError("ElementUnterminated",
  780. new Object[]{rawname});
  781. }
  782. empty = true;
  783. break;
  784. }
  785. else if (!isValidNameStartChar(c) || !fSawSpace) {
  786. // Second chance. Check if this character is a high
  787. // surrogate of a valid name start character.
  788. if (!isValidNameStartHighSurrogate(c) || !fSawSpace) {
  789. reportFatalError("ElementUnterminated",
  790. new Object[] { rawname });
  791. }
  792. }
  793. // attributes
  794. scanAttribute(fAttributes);
  795. // spaces
  796. fSawSpace = fEntityScanner.skipSpaces();
  797. } while (true);
  798. // call handler
  799. if (fDocumentHandler != null) {
  800. if (empty) {
  801. //decrease the markup depth..
  802. fMarkupDepth--;
  803. // check that this element was opened in the same entity
  804. if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
  805. reportFatalError("ElementEntityMismatch",
  806. new Object[]{fCurrentElement.rawname});
  807. }
  808. fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
  809. //pop the element off the stack..
  810. fElementStack.popElement(fElementQName);
  811. }
  812. else {
  813. fDocumentHandler.startElement(fElementQName, fAttributes, null);
  814. }
  815. }
  816. if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElementAfterName(): "+empty);
  817. return empty;
  818. } // scanStartElementAfterName()
  819. /**
  820. * Scans an attribute.
  821. * <p>
  822. * <pre>
  823. * [41] Attribute ::= Name Eq AttValue
  824. * </pre>
  825. * <p>
  826. * <strong>Note:</strong> This method assumes that the next
  827. * character on the stream is the first character of the attribute
  828. * name.
  829. * <p>
  830. * <strong>Note:</strong> This method uses the fAttributeQName and
  831. * fQName variables. The contents of these variables will be
  832. * destroyed.
  833. *
  834. * @param attributes The attributes list for the scanned attribute.
  835. */
  836. protected void scanAttribute(XMLAttributes attributes)
  837. throws IOException, XNIException {
  838. if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()");
  839. // name
  840. if (fNamespaces) {
  841. fEntityScanner.scanQName(fAttributeQName);
  842. }
  843. else {
  844. String name = fEntityScanner.scanName();
  845. fAttributeQName.setValues(null, name, name, null);
  846. }
  847. // equals
  848. fEntityScanner.skipSpaces();
  849. if (!fEntityScanner.skipChar('=')) {
  850. reportFatalError("EqRequiredInAttribute",
  851. new Object[]{fCurrentElement.rawname,fAttributeQName.rawname});
  852. }
  853. fEntityScanner.skipSpaces();
  854. // content
  855. int oldLen = attributes.getLength();
  856. int attrIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
  857. // WFC: Unique Att Spec
  858. if (oldLen == attributes.getLength()) {
  859. reportFatalError("AttributeNotUnique",
  860. new Object[]{fCurrentElement.rawname,
  861. fAttributeQName.rawname});
  862. }
  863. //REVISIT: one more case needs to be included: external PE and standalone is no
  864. boolean isVC = fHasExternalDTD && !fStandalone;
  865. scanAttributeValue(fTempString, fTempString2,
  866. fAttributeQName.rawname, isVC,
  867. fCurrentElement.rawname);
  868. attributes.setValue(attrIndex, fTempString.toString());
  869. attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  870. attributes.setSpecified(attrIndex, true);
  871. if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()");
  872. } // scanAttribute(XMLAttributes)
  873. /**
  874. * Scans element content.
  875. *
  876. * @return Returns the next character on the stream.
  877. */
  878. protected int scanContent() throws IOException, XNIException {
  879. XMLString content = fTempString;
  880. int c = fEntityScanner.scanContent(content);
  881. if (c == '\r') {
  882. // happens when there is the character reference
  883. fEntityScanner.scanChar();
  884. fStringBuffer.clear();
  885. fStringBuffer.append(fTempString);
  886. fStringBuffer.append((char)c);
  887. content = fStringBuffer;
  888. c = -1;
  889. }
  890. if (fDocumentHandler != null && content.length > 0) {
  891. fDocumentHandler.characters(content, null);
  892. }
  893. if (c == ']' && fTempString.length == 0) {
  894. fStringBuffer.clear();
  895. fStringBuffer.append((char)fEntityScanner.scanChar());
  896. // remember where we are in case we get an endEntity before we
  897. // could flush the buffer out - this happens when we're parsing an
  898. // entity which ends with a ]
  899. fInScanContent = true;
  900. //
  901. // We work on a single character basis to handle cases such as:
  902. // ']]]>' which we might otherwise miss.
  903. //
  904. if (fEntityScanner.skipChar(']')) {
  905. fStringBuffer.append(']');
  906. while (fEntityScanner.skipChar(']')) {
  907. fStringBuffer.append(']');
  908. }
  909. if (fEntityScanner.skipChar('>')) {
  910. reportFatalError("CDEndInContent", null);
  911. }
  912. }
  913. if (fDocumentHandler != null && fStringBuffer.length != 0) {
  914. fDocumentHandler.characters(fStringBuffer, null);
  915. }
  916. fInScanContent = false;
  917. c = -1;
  918. }
  919. return c;
  920. } // scanContent():int
  921. /**
  922. * Scans a CDATA section.
  923. * <p>
  924. * <strong>Note:</strong> This method uses the fTempString and
  925. * fStringBuffer variables.
  926. *
  927. * @param complete True if the CDATA section is to be scanned
  928. * completely.
  929. *
  930. * @return True if CDATA is completely scanned.
  931. */
  932. protected boolean scanCDATASection(boolean complete)
  933. throws IOException, XNIException {
  934. // call handler
  935. if (fDocumentHandler != null) {
  936. fDocumentHandler.startCDATA(null);
  937. }
  938. while (true) {
  939. fStringBuffer.clear();
  940. if (!fEntityScanner.scanData("]]", fStringBuffer)) {
  941. if (fDocumentHandler != null && fStringBuffer.length > 0) {
  942. fDocumentHandler.characters(fStringBuffer, null);
  943. }
  944. int brackets = 0;
  945. while (fEntityScanner.skipChar(']')) {
  946. brackets++;
  947. }
  948. if (fDocumentHandler != null && brackets > 0) {
  949. fStringBuffer.clear();
  950. if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) {
  951. // Handle large sequences of ']'
  952. int chunks = brackets / XMLEntityManager.DEFAULT_BUFFER_SIZE;
  953. int remainder = brackets % XMLEntityManager.DEFAULT_BUFFER_SIZE;
  954. for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) {
  955. fStringBuffer.append(']');
  956. }
  957. for (int i = 0; i < chunks; i++) {
  958. fDocumentHandler.characters(fStringBuffer, null);
  959. }
  960. if (remainder != 0) {
  961. fStringBuffer.length = remainder;
  962. fDocumentHandler.characters(fStringBuffer, null);
  963. }
  964. }
  965. else {
  966. for (int i = 0; i < brackets; i++) {
  967. fStringBuffer.append(']');
  968. }
  969. fDocumentHandler.characters(fStringBuffer, null);
  970. }
  971. }
  972. if (fEntityScanner.skipChar('>')) {
  973. break;
  974. }
  975. if (fDocumentHandler != null) {
  976. fStringBuffer.clear();
  977. fStringBuffer.append("]]");
  978. fDocumentHandler.characters(fStringBuffer, null);
  979. }
  980. }
  981. else {
  982. if (fDocumentHandler != null) {
  983. fDocumentHandler.characters(fStringBuffer, null);
  984. }
  985. int c = fEntityScanner.peekChar();
  986. if (c != -1 && isInvalidLiteral(c)) {
  987. if (XMLChar.isHighSurrogate(c)) {
  988. fStringBuffer.clear();
  989. scanSurrogates(fStringBuffer);
  990. if (fDocumentHandler != null) {
  991. fDocumentHandler.characters(fStringBuffer, null);
  992. }
  993. }
  994. else {
  995. reportFatalError("InvalidCharInCDSect",
  996. new Object[]{Integer.toString(c,16)});
  997. fEntityScanner.scanChar();
  998. }
  999. }
  1000. }
  1001. }
  1002. fMarkupDepth--;
  1003. // call handler
  1004. if (fDocumentHandler != null) {
  1005. fDocumentHandler.endCDATA(null);
  1006. }
  1007. return true;
  1008. } // scanCDATASection(boolean):boolean
  1009. /**
  1010. * Scans an end element.
  1011. * <p>
  1012. * <pre>
  1013. * [42] ETag ::= '</' Name S? '>'
  1014. * </pre>
  1015. * <p>
  1016. * <strong>Note:</strong> This method uses the fElementQName variable.
  1017. * The contents of this variable will be destroyed. The caller should
  1018. * copy the needed information out of this variable before calling
  1019. * this method.
  1020. *
  1021. * @return The element depth.
  1022. */
  1023. protected int scanEndElement() throws IOException, XNIException {
  1024. if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanEndElement()");
  1025. fElementStack.popElement(fElementQName) ;
  1026. // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
  1027. //In scanners most of the time is consumed on checks done for XML characters, we can
  1028. // optimize on it and avoid the checks done for endElement,
  1029. //we will also avoid symbol table lookup - neeraj.bajaj@sun.com
  1030. // this should work both for namespace processing true or false...
  1031. //REVISIT: if the string is not the same as expected.. we need to do better error handling..
  1032. //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
  1033. if (!fEntityScanner.skipString(fElementQName.rawname)) {
  1034. reportFatalError("ETagRequired", new Object[]{fElementQName.rawname});
  1035. }
  1036. // end
  1037. fEntityScanner.skipSpaces();
  1038. if (!fEntityScanner.skipChar('>')) {
  1039. reportFatalError("ETagUnterminated",
  1040. new Object[]{fElementQName.rawname});
  1041. }
  1042. fMarkupDepth--;
  1043. //we have increased the depth for two markup "<" characters
  1044. fMarkupDepth--;
  1045. // check that this element was opened in the same entity
  1046. if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
  1047. reportFatalError("ElementEntityMismatch",
  1048. new Object[]{fCurrentElement.rawname});
  1049. }
  1050. // call handler
  1051. if (fDocumentHandler != null ) {
  1052. fDocumentHandler.endElement(fElementQName, null);
  1053. }
  1054. return fMarkupDepth;
  1055. } // scanEndElement():int
  1056. /**
  1057. * Scans a character reference.
  1058. * <p>
  1059. * <pre>
  1060. * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
  1061. * </pre>
  1062. */
  1063. protected void scanCharReference()
  1064. throws IOException, XNIException {
  1065. fStringBuffer2.clear();
  1066. int ch = scanCharReferenceValue(fStringBuffer2, null);
  1067. fMarkupDepth--;
  1068. if (ch != -1) {
  1069. // call handler
  1070. if (fDocumentHandler != null) {
  1071. if (fNotifyCharRefs) {
  1072. fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null);
  1073. }
  1074. Augmentations augs = null;
  1075. if (fValidation && ch <= 0x20) {
  1076. augs = new AugmentationsImpl();
  1077. augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE);
  1078. }
  1079. fDocumentHandler.characters(fStringBuffer2, augs);
  1080. if (fNotifyCharRefs) {
  1081. fDocumentHandler.endGeneralEntity(fCharRefLiteral, null);
  1082. }
  1083. }
  1084. }
  1085. } // scanCharReference()
  1086. /**
  1087. * Scans an entity reference.
  1088. *
  1089. * @throws IOException Thrown if i/o error occurs.
  1090. * @throws XNIException Thrown if handler throws exception upon
  1091. * notification.
  1092. */
  1093. protected void scanEntityReference() throws IOException, XNIException {
  1094. // name
  1095. String name = fEntityScanner.scanName();
  1096. if (name == null) {
  1097. reportFatalError("NameRequiredInReference", null);
  1098. return;
  1099. }
  1100. // end
  1101. if (!fEntityScanner.skipChar(';')) {
  1102. reportFatalError("SemicolonRequiredInReference", new Object []{name});
  1103. }
  1104. fMarkupDepth--;
  1105. // handle built-in entities
  1106. if (name == fAmpSymbol) {
  1107. handleCharacter('&', fAmpSymbol);
  1108. }
  1109. else if (name == fLtSymbol) {
  1110. handleCharacter('<', fLtSymbol);
  1111. }
  1112. else if (name == fGtSymbol) {
  1113. handleCharacter('>', fGtSymbol);
  1114. }
  1115. else if (name == fQuotSymbol) {
  1116. handleCharacter('"', fQuotSymbol);
  1117. }
  1118. else if (name == fAposSymbol) {
  1119. handleCharacter('\'', fAposSymbol);
  1120. }
  1121. // start general entity
  1122. else if (fEntityManager.isUnparsedEntity(name)) {
  1123. reportFatalError("ReferenceToUnparsedEntity", new Object[]{name});
  1124. }
  1125. else {
  1126. if (!fEntityManager.isDeclaredEntity(name)) {
  1127. //REVISIT: one more case needs to be included: external PE and standalone is no
  1128. if ( fHasExternalDTD && !fStandalone) {
  1129. if (fValidation)
  1130. fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared",
  1131. new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR);
  1132. }
  1133. else
  1134. reportFatalError("EntityNotDeclared", new Object[]{name});
  1135. }
  1136. fEntityManager.startEntity(name, false);
  1137. }
  1138. } // scanEntityReference()
  1139. // utility methods
  1140. /**
  1141. * Calls document handler with a single character resulting from
  1142. * built-in entity resolution.
  1143. *
  1144. * @param c
  1145. * @param entity built-in name
  1146. */
  1147. private void handleCharacter(char c, String entity) throws XNIException {
  1148. if (fDocumentHandler != null) {
  1149. if (fNotifyBuiltInRefs) {
  1150. fDocumentHandler.startGeneralEntity(entity, null, null, null);
  1151. }
  1152. fSingleChar[0] = c;
  1153. fTempString.setValues(fSingleChar, 0, 1);
  1154. fDocumentHandler.characters(fTempString, null);
  1155. if (fNotifyBuiltInRefs) {
  1156. fDocumentHandler.endGeneralEntity(entity, null);
  1157. }
  1158. }
  1159. } // handleCharacter(char)
  1160. /**
  1161. * Handles the end element. This method will make sure that
  1162. * the end element name matches the current element and notify
  1163. * the handler about the end of the element and the end of any
  1164. * relevent prefix mappings.
  1165. * <p>
  1166. * <strong>Note:</strong> This method uses the fQName variable.
  1167. * The contents of this variable will be destroyed.
  1168. *
  1169. * @param element The element.
  1170. *
  1171. * @return The element depth.
  1172. *
  1173. * @throws XNIException Thrown if the handler throws a SAX exception
  1174. * upon notification.
  1175. *
  1176. */
  1177. // REVISIT: need to remove this method. It's not called anymore, because
  1178. // the handling is done when the end tag is scanned. - SG
  1179. protected int handleEndElement(QName element, boolean isEmpty)
  1180. throws XNIException {
  1181. fMarkupDepth--;
  1182. // check that this element was opened in the same entity
  1183. if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
  1184. reportFatalError("ElementEntityMismatch",
  1185. new Object[]{fCurrentElement.rawname});
  1186. }
  1187. // make sure the elements match
  1188. QName startElement = fQName;
  1189. fElementStack.popElement(startElement);
  1190. if (element.rawname != startElement.rawname) {
  1191. reportFatalError("ETagRequired",
  1192. new Object[]{startElement.rawname});
  1193. }
  1194. // bind namespaces
  1195. if (fNamespaces) {
  1196. element.uri = startElement.uri;
  1197. }
  1198. // call handler
  1199. if (fDocumentHandler != null && !isEmpty) {
  1200. fDocumentHandler.endElement(element, null);
  1201. }
  1202. return fMarkupDepth;
  1203. } // callEndElement(QName,boolean):int
  1204. // helper methods
  1205. /**
  1206. * Sets the scanner state.
  1207. *
  1208. * @param state The new scanner state.
  1209. */
  1210. protected final void setScannerState(int state) {
  1211. fScannerState = state;
  1212. if (DEBUG_SCANNER_STATE) {
  1213. System.out.print("### setScannerState: ");
  1214. System.out.print(getScannerStateName(state));
  1215. System.out.println();
  1216. }
  1217. } // setScannerState(int)
  1218. /**
  1219. * Sets the dispatcher.
  1220. *
  1221. * @param dispatcher The new dispatcher.
  1222. */
  1223. protected final void setDispatcher(Dispatcher dispatcher) {
  1224. fDispatcher = dispatcher;
  1225. if (DEBUG_DISPATCHER) {
  1226. System.out.print("%%% setDispatcher: ");
  1227. System.out.print(getDispatcherName(dispatcher));
  1228. System.out.println();
  1229. }
  1230. }
  1231. //
  1232. // Private methods
  1233. //
  1234. /** Returns the scanner state name. */
  1235. protected String getScannerStateName(int state) {
  1236. switch (state) {
  1237. case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE";
  1238. case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT";
  1239. case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP";
  1240. case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT";
  1241. case SCANNER_STATE_PI: return "SCANNER_STATE_PI";
  1242. case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT";
  1243. case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE";
  1244. case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT";
  1245. case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED";
  1246. case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA";
  1247. case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL";
  1248. }
  1249. return "??? ("+state+')';
  1250. } // getScannerStateName(int):String
  1251. /** Returns the dispatcher name. */
  1252. public String getDispatcherName(Dispatcher dispatcher) {
  1253. if (DEBUG_DISPATCHER) {
  1254. if (dispatcher != null) {
  1255. String name = dispatcher.getClass().getName();
  1256. int index = name.lastIndexOf('.');
  1257. if (index != -1) {
  1258. name = name.substring(index + 1);
  1259. index = name.lastIndexOf('$');
  1260. if (index != -1) {
  1261. name = name.substring(index + 1);
  1262. }
  1263. }
  1264. return name;
  1265. }
  1266. }
  1267. return "null";
  1268. } // getDispatcherName():String
  1269. //
  1270. // Classes
  1271. //
  1272. /**
  1273. * Element stack. This stack operates without synchronization, error
  1274. * checking, and it re-uses objects instead of throwing popped items
  1275. * away.
  1276. *
  1277. * @author Andy Clark, IBM
  1278. */
  1279. protected static class ElementStack {
  1280. //
  1281. // Data
  1282. //
  1283. /** The stack data. */
  1284. protected QName[] fElements;
  1285. /** The size of the stack. */
  1286. protected int fSize;
  1287. //
  1288. // Constructors
  1289. //
  1290. /** Default constructor. */
  1291. public ElementStack() {
  1292. fElements = new QName[10];
  1293. for (int i = 0; i < fElements.length; i++) {
  1294. fElements[i] = new QName();
  1295. }
  1296. } // <init>()
  1297. //
  1298. // Public methods
  1299. //
  1300. /**
  1301. * Pushes an element on the stack.
  1302. * <p>
  1303. * <strong>Note:</strong> The QName values are copied into the
  1304. * stack. In other words, the caller does <em>not</em> orphan
  1305. * the element to the stack. Also, the QName object returned
  1306. * is <em>not</em> orphaned to the caller. It should be
  1307. * considered read-only.
  1308. *
  1309. * @param element The element to push onto the stack.
  1310. *
  1311. * @return Returns the actual QName object that stores the
  1312. */
  1313. public QName pushElement(QName element) {
  1314. if (fSize == fElements.length) {
  1315. QName[] array = new QName[fElements.length * 2];
  1316. System.arraycopy(fElements, 0, array, 0, fSize);
  1317. fElements = array;
  1318. for (int i = fSize; i < fElements.length; i++) {
  1319. fElements[i] = new QName();
  1320. }
  1321. }
  1322. fElements[fSize].setValues(element);
  1323. return fElements[fSize++];
  1324. } // pushElement(QName):QName
  1325. /**
  1326. * Pops an element off of the stack by setting the values of
  1327. * the specified QName.
  1328. * <p>
  1329. * <strong>Note:</strong> The object returned is <em>not</em>
  1330. * orphaned to the caller. Therefore, the caller should consider
  1331. * the object to be read-only.
  1332. */
  1333. public void popElement(QName element) {
  1334. element.setValues(fElements[--fSize]);
  1335. } // popElement(QName)
  1336. /** Clears the stack without throwing away existing QName objects. */
  1337. public void clear() {
  1338. fSize = 0;
  1339. } // clear()
  1340. } // class ElementStack
  1341. /**
  1342. * This interface defines an XML "event" dispatching model. Classes
  1343. * that implement this interface are responsible for scanning parts
  1344. * of the XML document and dispatching callbacks.
  1345. *
  1346. * @author Glenn Marcy, IBM
  1347. */
  1348. protected interface Dispatcher {
  1349. //
  1350. // Dispatcher methods
  1351. //
  1352. /**
  1353. * Dispatch an XML "event".
  1354. *
  1355. * @param complete True if this dispatcher is intended to scan
  1356. * and dispatch as much as possible.
  1357. *
  1358. * @return True if there is more to dispatch either from this
  1359. * or a another dispatcher.
  1360. *
  1361. * @throws IOException Thrown on i/o error.
  1362. * @throws XNIException Thrown on parse error.
  1363. */
  1364. public boolean dispatch(boolean complete)
  1365. throws IOException, XNIException;
  1366. } // interface Dispatcher
  1367. /**
  1368. * Dispatcher to handle content scanning.
  1369. *
  1370. * @author Andy Clark, IBM
  1371. * @author Eric Ye, IBM
  1372. */
  1373. protected class FragmentContentDispatcher
  1374. implements Dispatcher {
  1375. //
  1376. // Dispatcher methods
  1377. //
  1378. /**
  1379. * Dispatch an XML "event".
  1380. *
  1381. * @param complete True if this dispatcher is intended to scan
  1382. * and dispatch as much as possible.
  1383. *
  1384. * @return True if there is more to dispatch either from this
  1385. * or a another dispatcher.
  1386. *
  1387. * @throws IOException Thrown on i/o error.
  1388. * @throws XNIException Thrown on parse error.
  1389. */
  1390. public boolean dispatch(boolean complete)
  1391. throws IOException, XNIException {
  1392. try {
  1393. boolean again;
  1394. do {
  1395. again = false;
  1396. switch (fScannerState) {
  1397. case SCANNER_STATE_CONTENT: {
  1398. if (fEntityScanner.skipChar('<')) {
  1399. setScannerState(SCANNER_STATE_START_OF_MARKUP);
  1400. again = true;
  1401. }
  1402. else if (fEntityScanner.skipChar('&')) {
  1403. setScannerState(SCANNER_STATE_REFERENCE);
  1404. again = true;
  1405. }
  1406. else {
  1407. do {
  1408. int c = scanContent();
  1409. if (c == '<') {
  1410. fEntityScanner.scanChar();
  1411. setScannerState(SCANNER_STATE_START_OF_MARKUP);
  1412. break;
  1413. }
  1414. else if (c == '&') {
  1415. fEntityScanner.scanChar();
  1416. setScannerState(SCANNER_STATE_REFERENCE);
  1417. break;
  1418. }
  1419. else if (c != -1 && isInvalidLiteral(c)) {
  1420. if (XMLChar.isHighSurrogate(c)) {
  1421. // special case: surrogates
  1422. fStringBuffer.clear();
  1423. if (scanSurrogates(fStringBuffer)) {
  1424. // call handler
  1425. if (fDocumentHandler != null) {
  1426. fDocumentHandler.characters(fStringBuffer, null);
  1427. }
  1428. }
  1429. }
  1430. else {
  1431. reportFatalError("InvalidCharInContent",
  1432. new Object[] {
  1433. Integer.toString(c, 16)});
  1434. fEntityScanner.scanChar();
  1435. }
  1436. }
  1437. } while (complete);
  1438. }
  1439. break;
  1440. }
  1441. case SCANNER_STATE_START_OF_MARKUP: {
  1442. fMarkupDepth++;
  1443. if (fEntityScanner.skipChar('/')) {
  1444. if (scanEndElement() == 0) {
  1445. if (elementDepthIsZeroHook()) {
  1446. return true;
  1447. }
  1448. }
  1449. setScannerState(SCANNER_STATE_CONTENT);
  1450. }
  1451. else if (isValidNameStartChar(fEntityScanner.peekChar())) {
  1452. scanStartElement();
  1453. setScannerState(SCANNER_STATE_CONTENT);
  1454. }
  1455. else if (fEntityScanner.skipChar('!')) {
  1456. if (fEntityScanner.skipChar('-')) {
  1457. if (!fEntityScanner.skipChar('-')) {
  1458. reportFatalError("InvalidCommentStart",
  1459. null);
  1460. }
  1461. setScannerState(SCANNER_STATE_COMMENT);
  1462. again = true;
  1463. }
  1464. else if (fEntityScanner.skipString("[CDATA[")) {
  1465. setScannerState(SCANNER_STATE_CDATA);
  1466. again = true;
  1467. }
  1468. else if (!scanForDoctypeHook()) {
  1469. reportFatalError("MarkupNotRecognizedInContent",
  1470. null);
  1471. }
  1472. }
  1473. else if (fEntityScanner.skipChar('?')) {
  1474. setScannerState(SCANNER_STATE_PI);
  1475. again = true;
  1476. }
  1477. else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
  1478. scanStartElement();
  1479. setScannerState(SCANNER_STATE_CONTENT);
  1480. }
  1481. else {
  1482. reportFatalError("MarkupNotRecognizedInContent",
  1483. null);
  1484. setScannerState(SCANNER_STATE_CONTENT);
  1485. }
  1486. break;
  1487. }
  1488. case SCANNER_STATE_COMMENT: {
  1489. scanComment();
  1490. setScannerState(SCANNER_STATE_CONTENT);
  1491. break;
  1492. }
  1493. case SCANNER_STATE_PI: {
  1494. scanPI();
  1495. setScannerState(SCANNER_STATE_CONTENT);
  1496. break;
  1497. }
  1498. case SCANNER_STATE_CDATA: {
  1499. scanCDATASection(complete);
  1500. setScannerState(SCANNER_STATE_CONTENT);
  1501. break;
  1502. }
  1503. case SCANNER_STATE_REFERENCE: {
  1504. fMarkupDepth++;
  1505. // NOTE: We need to set the state beforehand
  1506. // because the XMLEntityHandler#startEntity
  1507. // callback could set the state to
  1508. // SCANNER_STATE_TEXT_DECL and we don't want
  1509. // to override that scanner state.
  1510. setScannerState(SCANNER_STATE_CONTENT);
  1511. if (fEntityScanner.skipChar('#')) {
  1512. scanCharReference();
  1513. }
  1514. else {
  1515. scanEntityReference();
  1516. }
  1517. break;
  1518. }
  1519. case SCANNER_STATE_TEXT_DECL: {
  1520. // scan text decl
  1521. if (fEntityScanner.skipString("<?xml")) {
  1522. fMarkupDepth++;
  1523. // NOTE: special case where entity starts with a PI
  1524. // whose name starts with "xml" (e.g. "xmlfoo")
  1525. if (isValidNameChar(fEntityScanner.peekChar())) {
  1526. fStringBuffer.clear();
  1527. fStringBuffer.append("xml");
  1528. if (fNamespaces) {
  1529. while (isValidNCName(fEntityScanner.peekChar())) {
  1530. fStringBuffer.append((char)fEntityScanner.scanChar());
  1531. }
  1532. }
  1533. else {
  1534. while (isValidNameChar(fEntityScanner.peekChar())) {
  1535. fStringBuffer.append((char)fEntityScanner.scanChar());
  1536. }
  1537. }
  1538. String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
  1539. scanPIData(target, fTempString);
  1540. }
  1541. // standard text declaration
  1542. else {
  1543. scanXMLDeclOrTextDecl(true);
  1544. }
  1545. }
  1546. // now that we've straightened out the readers, we can read in chunks:
  1547. fEntityManager.fCurrentEntity.mayReadChunks = true;
  1548. setScannerState(SCANNER_STATE_CONTENT);
  1549. break;
  1550. }
  1551. case SCANNER_STATE_ROOT_ELEMENT: {
  1552. if (scanRootElementHook()) {
  1553. return true;
  1554. }
  1555. setScannerState(SCANNER_STATE_CONTENT);
  1556. break;
  1557. }
  1558. case SCANNER_STATE_DOCTYPE: {
  1559. reportFatalError("DoctypeIllegalInContent",
  1560. null);
  1561. setScannerState(SCANNER_STATE_CONTENT);
  1562. }
  1563. }
  1564. } while (complete || again);
  1565. }
  1566. // encoding errors
  1567. catch (MalformedByteSequenceException e) {
  1568. fErrorReporter.reportError(e.getDomain(), e.getKey(),
  1569. e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
  1570. return false;
  1571. }
  1572. catch (CharConversionException e) {
  1573. reportFatalError("CharConversionFailure", null);
  1574. return false;
  1575. }
  1576. // premature end of file
  1577. catch (EOFException e) {
  1578. endOfFileHook(e);
  1579. return false;
  1580. }
  1581. return true;
  1582. } // dispatch(boolean):boolean
  1583. //
  1584. // Protected methods
  1585. //
  1586. // hooks
  1587. // NOTE: These hook methods are added so that the full document
  1588. // scanner can share the majority of code with this class.
  1589. /**
  1590. * Scan for DOCTYPE hook. This method is a hook for subclasses
  1591. * to add code to handle scanning for a the "DOCTYPE" string
  1592. * after the string "<!" has been scanned.
  1593. *
  1594. * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
  1595. * was not scanned.
  1596. */
  1597. protected boolean scanForDoctypeHook()
  1598. throws IOException, XNIException {
  1599. return false;
  1600. } // scanForDoctypeHook():boolean
  1601. /**
  1602. * Element depth iz zero. This methos is a hook for subclasses
  1603. * to add code to handle when the element depth hits zero. When
  1604. * scanning a document fragment, an element depth of zero is
  1605. * normal. However, when scanning a full XML document, the
  1606. * scanner must handle the trailing miscellanous section of
  1607. * the document after the end of the document's root element.
  1608. *
  1609. * @return True if the caller should stop and return true which
  1610. * allows the scanner to switch to a new scanning
  1611. * dispatcher. A return value of false indicates that
  1612. * the content dispatcher should continue as normal.
  1613. */
  1614. protected boolean elementDepthIsZeroHook()
  1615. throws IOException, XNIException {
  1616. return false;
  1617. } // elementDepthIsZeroHook():boolean
  1618. /**
  1619. * Scan for root element hook. This method is a hook for
  1620. * subclasses to add code that handles scanning for the root
  1621. * element. When scanning a document fragment, there is no
  1622. * "root" element. However, when scanning a full XML document,
  1623. * the scanner must handle the root element specially.
  1624. *
  1625. * @return True if the caller should stop and return true which
  1626. * allows the scanner to switch to a new scanning
  1627. * dispatcher. A return value of false indicates that
  1628. * the content dispatcher should continue as normal.
  1629. */
  1630. protected boolean scanRootElementHook()
  1631. throws IOException, XNIException {
  1632. return false;
  1633. } // scanRootElementHook():boolean
  1634. /**
  1635. * End of file hook. This method is a hook for subclasses to
  1636. * add code that handles the end of file. The end of file in
  1637. * a document fragment is OK if the markup depth is zero.
  1638. * However, when scanning a full XML document, an end of file
  1639. * is always premature.
  1640. */
  1641. protected void endOfFileHook(EOFException e)
  1642. throws IOException, XNIException {
  1643. // NOTE: An end of file is only only an error if we were
  1644. // in the middle of scanning some markup. -Ac
  1645. if (fMarkupDepth != 0) {
  1646. reportFatalError("PrematureEOF", null);
  1647. }
  1648. } // endOfFileHook()
  1649. } // class FragmentContentDispatcher
  1650. } // class XMLDocumentFragmentScannerImpl