1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2004 The Apache Software Foundation.
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl;
  58. import java.io.IOException;
  59. import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  60. import com.sun.org.apache.xerces.internal.util.SymbolTable;
  61. import com.sun.org.apache.xerces.internal.util.XMLChar;
  62. import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl;
  63. import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  64. import com.sun.org.apache.xerces.internal.xni.Augmentations;
  65. import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
  66. import com.sun.org.apache.xerces.internal.xni.XMLString;
  67. import com.sun.org.apache.xerces.internal.xni.XNIException;
  68. import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
  69. import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  70. import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  71. /**
  72. * This class is responsible for holding scanning methods common to
  73. * scanning the XML document structure and content as well as the DTD
  74. * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
  75. * from this base class.
  76. *
  77. * <p>
  78. * This component requires the following features and properties from the
  79. * component manager that uses it:
  80. * <ul>
  81. * <li>http://xml.org/sax/features/validation</li>
  82. * <li>http://xml.org/sax/features/namespaces</li>
  83. * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
  84. * <li>http://apache.org/xml/properties/internal/symbol-table</li>
  85. * <li>http://apache.org/xml/properties/internal/error-reporter</li>
  86. * <li>http://apache.org/xml/properties/internal/entity-manager</li>
  87. * </ul>
  88. *
  89. * @author Andy Clark, IBM
  90. * @author Arnaud Le Hors, IBM
  91. * @author Eric Ye, IBM
  92. *
  93. * @version $Id: XMLScanner.java,v 1.48 2004/04/25 05:05:50 mrglavas Exp $
  94. */
  95. public abstract class XMLScanner
  96. implements XMLComponent {
  97. //
  98. // Constants
  99. //
  100. // feature identifiers
  101. /** Feature identifier: validation. */
  102. protected static final String VALIDATION =
  103. Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
  104. /** Feature identifier: namespaces. */
  105. protected static final String NAMESPACES =
  106. Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
  107. /** Feature identifier: notify character references. */
  108. protected static final String NOTIFY_CHAR_REFS =
  109. Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
  110. protected static final String PARSER_SETTINGS =
  111. Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
  112. // property identifiers
  113. /** Property identifier: symbol table. */
  114. protected static final String SYMBOL_TABLE =
  115. Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
  116. /** Property identifier: error reporter. */
  117. protected static final String ERROR_REPORTER =
  118. Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
  119. /** Property identifier: entity manager. */
  120. protected static final String ENTITY_MANAGER =
  121. Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
  122. // debugging
  123. /** Debug attribute normalization. */
  124. protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
  125. //
  126. // Data
  127. //
  128. // features
  129. /**
  130. * Validation. This feature identifier is:
  131. * http://xml.org/sax/features/validation
  132. */
  133. protected boolean fValidation = false;
  134. /** Namespaces. */
  135. protected boolean fNamespaces;
  136. /** Character references notification. */
  137. protected boolean fNotifyCharRefs = false;
  138. /** Internal parser-settings feature */
  139. protected boolean fParserSettings = true;
  140. // properties
  141. /** Symbol table. */
  142. protected SymbolTable fSymbolTable;
  143. /** Error reporter. */
  144. protected XMLErrorReporter fErrorReporter;
  145. /** Entity manager. */
  146. protected XMLEntityManager fEntityManager;
  147. // protected data
  148. /** Entity scanner. */
  149. protected XMLEntityScanner fEntityScanner;
  150. /** Entity depth. */
  151. protected int fEntityDepth;
  152. /** Literal value of the last character refence scanned. */
  153. protected String fCharRefLiteral = null;
  154. /** Scanning attribute. */
  155. protected boolean fScanningAttribute;
  156. /** Report entity boundary. */
  157. protected boolean fReportEntity;
  158. // symbols
  159. /** Symbol: "version". */
  160. protected final static String fVersionSymbol = "version".intern();
  161. /** Symbol: "encoding". */
  162. protected final static String fEncodingSymbol = "encoding".intern();
  163. /** Symbol: "standalone". */
  164. protected final static String fStandaloneSymbol = "standalone".intern();
  165. /** Symbol: "amp". */
  166. protected final static String fAmpSymbol = "amp".intern();
  167. /** Symbol: "lt". */
  168. protected final static String fLtSymbol = "lt".intern();
  169. /** Symbol: "gt". */
  170. protected final static String fGtSymbol = "gt".intern();
  171. /** Symbol: "quot". */
  172. protected final static String fQuotSymbol = "quot".intern();
  173. /** Symbol: "apos". */
  174. protected final static String fAposSymbol = "apos".intern();
  175. // temporary variables
  176. // NOTE: These objects are private to help prevent accidental modification
  177. // of values by a subclass. If there were protected *and* the sub-
  178. // modified the values, it would be difficult to track down the real
  179. // cause of the bug. By making these private, we avoid this
  180. // possibility.
  181. /** String. */
  182. private XMLString fString = new XMLString();
  183. /** String buffer. */
  184. private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  185. /** String buffer. */
  186. private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  187. /** String buffer. */
  188. private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
  189. // temporary location for Resource identification information.
  190. protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
  191. //
  192. // XMLComponent methods
  193. //
  194. /**
  195. *
  196. *
  197. * @param componentManager The component manager.
  198. *
  199. * @throws SAXException Throws exception if required features and
  200. * properties cannot be found.
  201. */
  202. public void reset(XMLComponentManager componentManager)
  203. throws XMLConfigurationException {
  204. try {
  205. fParserSettings = componentManager.getFeature(PARSER_SETTINGS);
  206. } catch (XMLConfigurationException e) {
  207. fParserSettings = true;
  208. }
  209. if (!fParserSettings) {
  210. // parser settings have not been changed
  211. init();
  212. return;
  213. }
  214. // Xerces properties
  215. fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
  216. fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
  217. fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
  218. // sax features
  219. try {
  220. fValidation = componentManager.getFeature(VALIDATION);
  221. }
  222. catch (XMLConfigurationException e) {
  223. fValidation = false;
  224. }
  225. try {
  226. fNamespaces = componentManager.getFeature(NAMESPACES);
  227. }
  228. catch (XMLConfigurationException e) {
  229. fNamespaces = true;
  230. }
  231. try {
  232. fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS);
  233. }
  234. catch (XMLConfigurationException e) {
  235. fNotifyCharRefs = false;
  236. }
  237. init();
  238. } // reset(XMLComponentManager)
  239. /**
  240. * Sets the value of a property during parsing.
  241. *
  242. * @param propertyId
  243. * @param value
  244. */
  245. public void setProperty(String propertyId, Object value)
  246. throws XMLConfigurationException {
  247. // Xerces properties
  248. if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
  249. final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
  250. if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() &&
  251. propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
  252. fSymbolTable = (SymbolTable)value;
  253. }
  254. else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() &&
  255. propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
  256. fErrorReporter = (XMLErrorReporter)value;
  257. }
  258. else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
  259. propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
  260. fEntityManager = (XMLEntityManager)value;
  261. }
  262. }
  263. } // setProperty(String,Object)
  264. /*
  265. * Sets the feature of the scanner.
  266. */
  267. public void setFeature(String featureId, boolean value)
  268. throws XMLConfigurationException {
  269. if (VALIDATION.equals(featureId)) {
  270. fValidation = value;
  271. } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
  272. fNotifyCharRefs = value;
  273. }
  274. }
  275. /*
  276. * Gets the state of the feature of the scanner.
  277. */
  278. public boolean getFeature(String featureId)
  279. throws XMLConfigurationException {
  280. if (VALIDATION.equals(featureId)) {
  281. return fValidation;
  282. } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
  283. return fNotifyCharRefs;
  284. }
  285. throw new XMLConfigurationException(XMLConfigurationException.NOT_RECOGNIZED, featureId);
  286. }
  287. //
  288. // Protected methods
  289. //
  290. // anybody calling this had better have set Symtoltable!
  291. protected void reset() {
  292. init();
  293. // DTD preparsing defaults:
  294. fValidation = true;
  295. fNotifyCharRefs = false;
  296. }
  297. // common scanning methods
  298. /**
  299. * Scans an XML or text declaration.
  300. * <p>
  301. * <pre>
  302. * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  303. * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  304. * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
  305. * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  306. * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
  307. * | ('"' ('yes' | 'no') '"'))
  308. *
  309. * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  310. * </pre>
  311. *
  312. * @param scanningTextDecl True if a text declaration is to
  313. * be scanned instead of an XML
  314. * declaration.
  315. * @param pseudoAttributeValues An array of size 3 to return the version,
  316. * encoding and standalone pseudo attribute values
  317. * (in that order).
  318. *
  319. * <strong>Note:</strong> This method uses fString, anything in it
  320. * at the time of calling is lost.
  321. */
  322. protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
  323. String[] pseudoAttributeValues)
  324. throws IOException, XNIException {
  325. // pseudo-attribute values
  326. String version = null;
  327. String encoding = null;
  328. String standalone = null;
  329. // scan pseudo-attributes
  330. final int STATE_VERSION = 0;
  331. final int STATE_ENCODING = 1;
  332. final int STATE_STANDALONE = 2;
  333. final int STATE_DONE = 3;
  334. int state = STATE_VERSION;
  335. boolean dataFoundForTarget = false;
  336. boolean sawSpace = fEntityScanner.skipDeclSpaces();
  337. // since pseudoattributes are *not* attributes,
  338. // their quotes don't need to be preserved in external parameter entities.
  339. // the XMLEntityScanner#scanLiteral method will continue to
  340. // emit -1 in such cases when it finds a quote; this is
  341. // fine for other methods that parse scanned entities,
  342. // but not for the scanning of pseudoattributes. So,
  343. // temporarily, we must mark the current entity as not being "literal"
  344. XMLEntityManager.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
  345. boolean currLiteral = currEnt.literal;
  346. currEnt.literal = false;
  347. while (fEntityScanner.peekChar() != '?') {
  348. dataFoundForTarget = true;
  349. String name = scanPseudoAttribute(scanningTextDecl, fString);
  350. switch (state) {
  351. case STATE_VERSION: {
  352. if (name == fVersionSymbol) {
  353. if (!sawSpace) {
  354. reportFatalError(scanningTextDecl
  355. ? "SpaceRequiredBeforeVersionInTextDecl"
  356. : "SpaceRequiredBeforeVersionInXMLDecl",
  357. null);
  358. }
  359. version = fString.toString();
  360. state = STATE_ENCODING;
  361. if (!versionSupported(version)) {
  362. reportFatalError(getVersionNotSupportedKey(),
  363. new Object[]{version});
  364. }
  365. }
  366. else if (name == fEncodingSymbol) {
  367. if (!scanningTextDecl) {
  368. reportFatalError("VersionInfoRequired", null);
  369. }
  370. if (!sawSpace) {
  371. reportFatalError(scanningTextDecl
  372. ? "SpaceRequiredBeforeEncodingInTextDecl"
  373. : "SpaceRequiredBeforeEncodingInXMLDecl",
  374. null);
  375. }
  376. encoding = fString.toString();
  377. state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  378. }
  379. else {
  380. if (scanningTextDecl) {
  381. reportFatalError("EncodingDeclRequired", null);
  382. }
  383. else {
  384. reportFatalError("VersionInfoRequired", null);
  385. }
  386. }
  387. break;
  388. }
  389. case STATE_ENCODING: {
  390. if (name == fEncodingSymbol) {
  391. if (!sawSpace) {
  392. reportFatalError(scanningTextDecl
  393. ? "SpaceRequiredBeforeEncodingInTextDecl"
  394. : "SpaceRequiredBeforeEncodingInXMLDecl",
  395. null);
  396. }
  397. encoding = fString.toString();
  398. state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  399. // TODO: check encoding name; set encoding on
  400. // entity scanner
  401. }
  402. else if (!scanningTextDecl && name == fStandaloneSymbol) {
  403. if (!sawSpace) {
  404. reportFatalError("SpaceRequiredBeforeStandalone",
  405. null);
  406. }
  407. standalone = fString.toString();
  408. state = STATE_DONE;
  409. if (!standalone.equals("yes") && !standalone.equals("no")) {
  410. reportFatalError("SDDeclInvalid", new Object[] {standalone});
  411. }
  412. }
  413. else {
  414. reportFatalError("EncodingDeclRequired", null);
  415. }
  416. break;
  417. }
  418. case STATE_STANDALONE: {
  419. if (name == fStandaloneSymbol) {
  420. if (!sawSpace) {
  421. reportFatalError("SpaceRequiredBeforeStandalone",
  422. null);
  423. }
  424. standalone = fString.toString();
  425. state = STATE_DONE;
  426. if (!standalone.equals("yes") && !standalone.equals("no")) {
  427. reportFatalError("SDDeclInvalid", new Object[] {standalone});
  428. }
  429. }
  430. else {
  431. reportFatalError("EncodingDeclRequired", null);
  432. }
  433. break;
  434. }
  435. default: {
  436. reportFatalError("NoMorePseudoAttributes", null);
  437. }
  438. }
  439. sawSpace = fEntityScanner.skipDeclSpaces();
  440. }
  441. // restore original literal value
  442. if(currLiteral)
  443. currEnt.literal = true;
  444. // REVISIT: should we remove this error reporting?
  445. if (scanningTextDecl && state != STATE_DONE) {
  446. reportFatalError("MorePseudoAttributes", null);
  447. }
  448. // If there is no data in the xml or text decl then we fail to report error
  449. // for version or encoding info above.
  450. if (scanningTextDecl) {
  451. if (!dataFoundForTarget && encoding == null) {
  452. reportFatalError("EncodingDeclRequired", null);
  453. }
  454. }
  455. else {
  456. if (!dataFoundForTarget && version == null) {
  457. reportFatalError("VersionInfoRequired", null);
  458. }
  459. }
  460. // end
  461. if (!fEntityScanner.skipChar('?')) {
  462. reportFatalError("XMLDeclUnterminated", null);
  463. }
  464. if (!fEntityScanner.skipChar('>')) {
  465. reportFatalError("XMLDeclUnterminated", null);
  466. }
  467. // fill in return array
  468. pseudoAttributeValues[0] = version;
  469. pseudoAttributeValues[1] = encoding;
  470. pseudoAttributeValues[2] = standalone;
  471. } // scanXMLDeclOrTextDecl(boolean)
  472. /**
  473. * Scans a pseudo attribute.
  474. *
  475. * @param scanningTextDecl True if scanning this pseudo-attribute for a
  476. * TextDecl; false if scanning XMLDecl. This
  477. * flag is needed to report the correct type of
  478. * error.
  479. * @param value The string to fill in with the attribute
  480. * value.
  481. *
  482. * @return The name of the attribute
  483. *
  484. * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
  485. * at the time of calling is lost.
  486. */
  487. public String scanPseudoAttribute(boolean scanningTextDecl,
  488. XMLString value)
  489. throws IOException, XNIException {
  490. // REVISIT: This method is used for generic scanning of
  491. // pseudo attributes, but since there are only three such
  492. // attributes: version, encoding, and standalone there are
  493. // for performant ways of scanning them. Every decl must
  494. // have a version, and in TextDecls this version must
  495. // be followed by an encoding declaration. Also the
  496. // methods we invoke on the scanners allow non-ASCII
  497. // characters to be parsed in the decls, but since
  498. // we don't even know what the actual encoding of the
  499. // document is until we scan the encoding declaration
  500. // you cannot reliably read any characters outside
  501. // of the ASCII range here. -- mrglavas
  502. String name = fEntityScanner.scanName();
  503. XMLEntityManager.print(fEntityManager.getCurrentEntity());
  504. if (name == null) {
  505. reportFatalError("PseudoAttrNameExpected", null);
  506. }
  507. fEntityScanner.skipDeclSpaces();
  508. if (!fEntityScanner.skipChar('=')) {
  509. reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
  510. : "EqRequiredInXMLDecl", new Object[]{name});
  511. }
  512. fEntityScanner.skipDeclSpaces();
  513. int quote = fEntityScanner.peekChar();
  514. if (quote != '\'' && quote != '"') {
  515. reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
  516. : "QuoteRequiredInXMLDecl" , new Object[]{name});
  517. }
  518. fEntityScanner.scanChar();
  519. int c = fEntityScanner.scanLiteral(quote, value);
  520. if (c != quote) {
  521. fStringBuffer2.clear();
  522. do {
  523. fStringBuffer2.append(value);
  524. if (c != -1) {
  525. if (c == '&' || c == '%' || c == '<' || c == ']') {
  526. fStringBuffer2.append((char)fEntityScanner.scanChar());
  527. }
  528. // REVISIT: Even if you could reliably read non-ASCII chars
  529. // why bother scanning for surrogates here? Only ASCII chars
  530. // match the productions in XMLDecls and TextDecls. -- mrglavas
  531. else if (XMLChar.isHighSurrogate(c)) {
  532. scanSurrogates(fStringBuffer2);
  533. }
  534. else if (isInvalidLiteral(c)) {
  535. String key = scanningTextDecl
  536. ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
  537. reportFatalError(key,
  538. new Object[] {Integer.toString(c, 16)});
  539. fEntityScanner.scanChar();
  540. }
  541. }
  542. c = fEntityScanner.scanLiteral(quote, value);
  543. } while (c != quote);
  544. fStringBuffer2.append(value);
  545. value.setValues(fStringBuffer2);
  546. }
  547. if (!fEntityScanner.skipChar(quote)) {
  548. reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
  549. : "CloseQuoteMissingInXMLDecl",
  550. new Object[]{name});
  551. }
  552. // return
  553. return name;
  554. } // scanPseudoAttribute(XMLString):String
  555. /**
  556. * Scans a processing instruction.
  557. * <p>
  558. * <pre>
  559. * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  560. * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  561. * </pre>
  562. * <strong>Note:</strong> This method uses fString, anything in it
  563. * at the time of calling is lost.
  564. */
  565. protected void scanPI() throws IOException, XNIException {
  566. // target
  567. fReportEntity = false;
  568. String target = null;
  569. if(fNamespaces) {
  570. target = fEntityScanner.scanNCName();
  571. } else {
  572. target = fEntityScanner.scanName();
  573. }
  574. if (target == null) {
  575. reportFatalError("PITargetRequired", null);
  576. }
  577. // scan data
  578. scanPIData(target, fString);
  579. fReportEntity = true;
  580. } // scanPI()
  581. /**
  582. * Scans a processing data. This is needed to handle the situation
  583. * where a document starts with a processing instruction whose
  584. * target name <em>starts with</em> "xml". (e.g. xmlfoo)
  585. *
  586. * <strong>Note:</strong> This method uses fStringBuffer, anything in it
  587. * at the time of calling is lost.
  588. *
  589. * @param target The PI target
  590. * @param data The string to fill in with the data
  591. */
  592. protected void scanPIData(String target, XMLString data)
  593. throws IOException, XNIException {
  594. // check target
  595. if (target.length() == 3) {
  596. char c0 = Character.toLowerCase(target.charAt(0));
  597. char c1 = Character.toLowerCase(target.charAt(1));
  598. char c2 = Character.toLowerCase(target.charAt(2));
  599. if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
  600. reportFatalError("ReservedPITarget", null);
  601. }
  602. }
  603. // spaces
  604. if (!fEntityScanner.skipSpaces()) {
  605. if (fEntityScanner.skipString("?>")) {
  606. // we found the end, there is no data
  607. data.clear();
  608. return;
  609. }
  610. else {
  611. if(fNamespaces && fEntityScanner.peekChar() == ':') {
  612. fEntityScanner.scanChar();
  613. XMLStringBuffer colonName = new XMLStringBuffer(target);
  614. colonName.append(":");
  615. String str = fEntityScanner.scanName();
  616. if (str != null)
  617. colonName.append(str);
  618. reportFatalError("ColonNotLegalWithNS", new Object[] {colonName.toString()});
  619. fEntityScanner.skipSpaces();
  620. } else {
  621. // if there is data there should be some space
  622. reportFatalError("SpaceRequiredInPI", null);
  623. }
  624. }
  625. }
  626. fStringBuffer.clear();
  627. // data
  628. if (fEntityScanner.scanData("?>", fStringBuffer)) {
  629. do {
  630. int c = fEntityScanner.peekChar();
  631. if (c != -1) {
  632. if (XMLChar.isHighSurrogate(c)) {
  633. scanSurrogates(fStringBuffer);
  634. }
  635. else if (isInvalidLiteral(c)) {
  636. reportFatalError("InvalidCharInPI",
  637. new Object[]{Integer.toHexString(c)});
  638. fEntityScanner.scanChar();
  639. }
  640. }
  641. } while (fEntityScanner.scanData("?>", fStringBuffer));
  642. }
  643. data.setValues(fStringBuffer);
  644. } // scanPIData(String,XMLString)
  645. /**
  646. * Scans a comment.
  647. * <p>
  648. * <pre>
  649. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  650. * </pre>
  651. * <p>
  652. * <strong>Note:</strong> Called after scanning past '<!--'
  653. * <strong>Note:</strong> This method uses fString, anything in it
  654. * at the time of calling is lost.
  655. *
  656. * @param text The buffer to fill in with the text.
  657. */
  658. protected void scanComment(XMLStringBuffer text)
  659. throws IOException, XNIException {
  660. // text
  661. // REVISIT: handle invalid character, eof
  662. text.clear();
  663. while (fEntityScanner.scanData("--", text)) {
  664. int c = fEntityScanner.peekChar();
  665. if (c != -1) {
  666. if (XMLChar.isHighSurrogate(c)) {
  667. scanSurrogates(text);
  668. }
  669. else if (isInvalidLiteral(c)) {
  670. reportFatalError("InvalidCharInComment",
  671. new Object[] { Integer.toHexString(c) });
  672. fEntityScanner.scanChar();
  673. }
  674. }
  675. }
  676. if (!fEntityScanner.skipChar('>')) {
  677. reportFatalError("DashDashInComment", null);
  678. }
  679. } // scanComment()
  680. /**
  681. * Scans an attribute value and normalizes whitespace converting all
  682. * whitespace characters to space characters.
  683. *
  684. * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
  685. *
  686. * @param value The XMLString to fill in with the value.
  687. * @param nonNormalizedValue The XMLString to fill in with the
  688. * non-normalized value.
  689. * @param atName The name of the attribute being parsed (for error msgs).
  690. * @param checkEntities true if undeclared entities should be reported as VC violation,
  691. * false if undeclared entities should be reported as WFC violation.
  692. * @param eleName The name of element to which this attribute belongs.
  693. *
  694. * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
  695. * at the time of calling is lost.
  696. **/
  697. protected void scanAttributeValue(XMLString value,
  698. XMLString nonNormalizedValue,
  699. String atName,
  700. boolean checkEntities,String eleName)
  701. throws IOException, XNIException
  702. {
  703. // quote
  704. int quote = fEntityScanner.peekChar();
  705. if (quote != '\'' && quote != '"') {
  706. reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName});
  707. }
  708. fEntityScanner.scanChar();
  709. int entityDepth = fEntityDepth;
  710. int c = fEntityScanner.scanLiteral(quote, value);
  711. if (DEBUG_ATTR_NORMALIZATION) {
  712. System.out.println("** scanLiteral -> \""
  713. + value.toString() + "\"");
  714. }
  715. fStringBuffer2.clear();
  716. fStringBuffer2.append(value);
  717. normalizeWhitespace(value);
  718. if (DEBUG_ATTR_NORMALIZATION) {
  719. System.out.println("** normalizeWhitespace -> \""
  720. + value.toString() + "\"");
  721. }
  722. if (c != quote) {
  723. fScanningAttribute = true;
  724. fStringBuffer.clear();
  725. do {
  726. fStringBuffer.append(value);
  727. if (DEBUG_ATTR_NORMALIZATION) {
  728. System.out.println("** value2: \""
  729. + fStringBuffer.toString() + "\"");
  730. }
  731. if (c == '&') {
  732. fEntityScanner.skipChar('&');
  733. if (entityDepth == fEntityDepth) {
  734. fStringBuffer2.append('&');
  735. }
  736. if (fEntityScanner.skipChar('#')) {
  737. if (entityDepth == fEntityDepth) {
  738. fStringBuffer2.append('#');
  739. }
  740. int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
  741. if (ch != -1) {
  742. if (DEBUG_ATTR_NORMALIZATION) {
  743. System.out.println("** value3: \""
  744. + fStringBuffer.toString()
  745. + "\"");
  746. }
  747. }
  748. }
  749. else {
  750. String entityName = fEntityScanner.scanName();
  751. if (entityName == null) {
  752. reportFatalError("NameRequiredInReference", null);
  753. }
  754. else if (entityDepth == fEntityDepth) {
  755. fStringBuffer2.append(entityName);
  756. }
  757. if (!fEntityScanner.skipChar(';')) {
  758. reportFatalError("SemicolonRequiredInReference",
  759. new Object []{entityName});
  760. }
  761. else if (entityDepth == fEntityDepth) {
  762. fStringBuffer2.append(';');
  763. }
  764. if (entityName == fAmpSymbol) {
  765. fStringBuffer.append('&');
  766. if (DEBUG_ATTR_NORMALIZATION) {
  767. System.out.println("** value5: \""
  768. + fStringBuffer.toString()
  769. + "\"");
  770. }
  771. }
  772. else if (entityName == fAposSymbol) {
  773. fStringBuffer.append('\'');
  774. if (DEBUG_ATTR_NORMALIZATION) {
  775. System.out.println("** value7: \""
  776. + fStringBuffer.toString()
  777. + "\"");
  778. }
  779. }
  780. else if (entityName == fLtSymbol) {
  781. fStringBuffer.append('<');
  782. if (DEBUG_ATTR_NORMALIZATION) {
  783. System.out.println("** value9: \""
  784. + fStringBuffer.toString()
  785. + "\"");
  786. }
  787. }
  788. else if (entityName == fGtSymbol) {
  789. fStringBuffer.append('>');
  790. if (DEBUG_ATTR_NORMALIZATION) {
  791. System.out.println("** valueB: \""
  792. + fStringBuffer.toString()
  793. + "\"");
  794. }
  795. }
  796. else if (entityName == fQuotSymbol) {
  797. fStringBuffer.append('"');
  798. if (DEBUG_ATTR_NORMALIZATION) {
  799. System.out.println("** valueD: \""
  800. + fStringBuffer.toString()
  801. + "\"");
  802. }
  803. }
  804. else {
  805. if (fEntityManager.isExternalEntity(entityName)) {
  806. reportFatalError("ReferenceToExternalEntity",
  807. new Object[] { entityName });
  808. }
  809. else {
  810. if (!fEntityManager.isDeclaredEntity(entityName)) {
  811. //WFC & VC: Entity Declared
  812. if (checkEntities) {
  813. if (fValidation) {
  814. fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  815. "EntityNotDeclared",
  816. new Object[]{entityName},
  817. XMLErrorReporter.SEVERITY_ERROR);
  818. }
  819. }
  820. else {
  821. reportFatalError("EntityNotDeclared",
  822. new Object[]{entityName});
  823. }
  824. }
  825. fEntityManager.startEntity(entityName, true);
  826. }
  827. }
  828. }
  829. }
  830. else if (c == '<') {
  831. reportFatalError("LessthanInAttValue",
  832. new Object[] { eleName, atName });
  833. fEntityScanner.scanChar();
  834. if (entityDepth == fEntityDepth) {
  835. fStringBuffer2.append((char)c);
  836. }
  837. }
  838. else if (c == '%' || c == ']') {
  839. fEntityScanner.scanChar();
  840. fStringBuffer.append((char)c);
  841. if (entityDepth == fEntityDepth) {
  842. fStringBuffer2.append((char)c);
  843. }
  844. if (DEBUG_ATTR_NORMALIZATION) {
  845. System.out.println("** valueF: \""
  846. + fStringBuffer.toString() + "\"");
  847. }
  848. }
  849. else if (c == '\n' || c == '\r') {
  850. fEntityScanner.scanChar();
  851. fStringBuffer.append(' ');
  852. if (entityDepth == fEntityDepth) {
  853. fStringBuffer2.append('\n');
  854. }
  855. }
  856. else if (c != -1 && XMLChar.isHighSurrogate(c)) {
  857. fStringBuffer3.clear();
  858. if (scanSurrogates(fStringBuffer3)) {
  859. fStringBuffer.append(fStringBuffer3);
  860. if (entityDepth == fEntityDepth) {
  861. fStringBuffer2.append(fStringBuffer3);
  862. }
  863. if (DEBUG_ATTR_NORMALIZATION) {
  864. System.out.println("** valueI: \""
  865. + fStringBuffer.toString()
  866. + "\"");
  867. }
  868. }
  869. }
  870. else if (c != -1 && isInvalidLiteral(c)) {
  871. reportFatalError("InvalidCharInAttValue",
  872. new Object[] {eleName, atName, Integer.toString(c, 16)});
  873. fEntityScanner.scanChar();
  874. if (entityDepth == fEntityDepth) {
  875. fStringBuffer2.append((char)c);
  876. }
  877. }
  878. c = fEntityScanner.scanLiteral(quote, value);
  879. if (entityDepth == fEntityDepth) {
  880. fStringBuffer2.append(value);
  881. }
  882. normalizeWhitespace(value);
  883. } while (c != quote || entityDepth != fEntityDepth);
  884. fStringBuffer.append(value);
  885. if (DEBUG_ATTR_NORMALIZATION) {
  886. System.out.println("** valueN: \""
  887. + fStringBuffer.toString() + "\"");
  888. }
  889. value.setValues(fStringBuffer);
  890. fScanningAttribute = false;
  891. }
  892. nonNormalizedValue.setValues(fStringBuffer2);
  893. // quote
  894. int cquote = fEntityScanner.scanChar();
  895. if (cquote != quote) {
  896. reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
  897. }
  898. } // scanAttributeValue()
  899. /**
  900. * Scans External ID and return the public and system IDs.
  901. *
  902. * @param identifiers An array of size 2 to return the system id,
  903. * and public id (in that order).
  904. * @param optionalSystemId Specifies whether the system id is optional.
  905. *
  906. * <strong>Note:</strong> This method uses fString and fStringBuffer,
  907. * anything in them at the time of calling is lost.
  908. */
  909. protected void scanExternalID(String[] identifiers,
  910. boolean optionalSystemId)
  911. throws IOException, XNIException {
  912. String systemId = null;
  913. String publicId = null;
  914. if (fEntityScanner.skipString("PUBLIC")) {
  915. if (!fEntityScanner.skipSpaces()) {
  916. reportFatalError("SpaceRequiredAfterPUBLIC", null);
  917. }
  918. scanPubidLiteral(fString);
  919. publicId = fString.toString();
  920. if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
  921. reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
  922. }
  923. }
  924. if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
  925. if (publicId == null && !fEntityScanner.skipSpaces()) {
  926. reportFatalError("SpaceRequiredAfterSYSTEM", null);
  927. }
  928. int quote = fEntityScanner.peekChar();
  929. if (quote != '\'' && quote != '"') {
  930. if (publicId != null && optionalSystemId) {
  931. // looks like we don't have any system id
  932. // simply return the public id
  933. identifiers[0] = null;
  934. identifiers[1] = publicId;
  935. return;
  936. }
  937. reportFatalError("QuoteRequiredInSystemID", null);
  938. }
  939. fEntityScanner.scanChar();
  940. XMLString ident = fString;
  941. if (fEntityScanner.scanLiteral(quote, ident) != quote) {
  942. fStringBuffer.clear();
  943. do {
  944. fStringBuffer.append(ident);
  945. int c = fEntityScanner.peekChar();
  946. if (XMLChar.isMarkup(c) || c == ']') {
  947. fStringBuffer.append((char)fEntityScanner.scanChar());
  948. }
  949. } while (fEntityScanner.scanLiteral(quote, ident) != quote);
  950. fStringBuffer.append(ident);
  951. ident = fStringBuffer;
  952. }
  953. systemId = ident.toString();
  954. if (!fEntityScanner.skipChar(quote)) {
  955. reportFatalError("SystemIDUnterminated", null);
  956. }
  957. }
  958. // store result in array
  959. identifiers[0] = systemId;
  960. identifiers[1] = publicId;
  961. }
  962. /**
  963. * Scans public ID literal.
  964. *
  965. * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
  966. * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
  967. *
  968. * The returned string is normalized according to the following rule,
  969. * from http://www.w3.org/TR/REC-xml#dt-pubid:
  970. *
  971. * Before a match is attempted, all strings of white space in the public
  972. * identifier must be normalized to single space characters (#x20), and
  973. * leading and trailing white space must be removed.
  974. *
  975. * @param literal The string to fill in with the public ID literal.
  976. * @return True on success.
  977. *
  978. * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
  979. * the time of calling is lost.
  980. */
  981. protected boolean scanPubidLiteral(XMLString literal)
  982. throws IOException, XNIException
  983. {
  984. int quote = fEntityScanner.scanChar();
  985. if (quote != '\'' && quote != '"') {
  986. reportFatalError("QuoteRequiredInPublicID", null);
  987. return false;
  988. }
  989. fStringBuffer.clear();
  990. // skip leading whitespace
  991. boolean skipSpace = true;
  992. boolean dataok = true;
  993. while (true) {
  994. int c = fEntityScanner.scanChar();
  995. if (c == ' ' || c == '\n' || c == '\r') {
  996. if (!skipSpace) {
  997. // take the first whitespace as a space and skip the others
  998. fStringBuffer.append(' ');
  999. skipSpace = true;
  1000. }
  1001. }
  1002. else if (c == quote) {
  1003. if (skipSpace) {
  1004. // if we finished on a space let's trim it
  1005. fStringBuffer.length--;
  1006. }
  1007. literal.setValues(fStringBuffer);
  1008. break;
  1009. }
  1010. else if (XMLChar.isPubid(c)) {
  1011. fStringBuffer.append((char)c);
  1012. skipSpace = false;
  1013. }
  1014. else if (c == -1) {
  1015. reportFatalError("PublicIDUnterminated", null);
  1016. return false;
  1017. }
  1018. else {
  1019. dataok = false;
  1020. reportFatalError("InvalidCharInPublicID",
  1021. new Object[]{Integer.toHexString(c)});
  1022. }
  1023. }
  1024. return dataok;
  1025. }
  1026. /**
  1027. * Normalize whitespace in an XMLString converting all whitespace
  1028. * characters to space characters.
  1029. */
  1030. protected void normalizeWhitespace(XMLString value) {
  1031. int end = value.offset + value.length;
  1032. for (int i = value.offset; i < end; i++) {
  1033. int c = value.ch[i];
  1034. // Performance: For XML 1.0 documents take advantage of
  1035. // the fact that the only legal characters below 0x20
  1036. // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
  1037. // already determined the well-formedness of these
  1038. // characters it is sufficient (and safe) to check
  1039. // against 0x20. -- mrglavas
  1040. if (c < 0x20) {
  1041. value.ch[i] = ' ';
  1042. }
  1043. }
  1044. }
  1045. //
  1046. // XMLEntityHandler methods
  1047. //
  1048. /**
  1049. * This method notifies of the start of an entity. The document entity
  1050. * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
  1051. * parameter entity names start with '%'; and general entities are just
  1052. * specified by their name.
  1053. *
  1054. * @param name The name of the entity.
  1055. * @param identifier The resource identifier.
  1056. * @param encoding The auto-detected IANA encoding name of the entity
  1057. * stream. This value will be null in those situations
  1058. * where the entity encoding is not auto-detected (e.g.
  1059. * internal entities or a document entity that is
  1060. * parsed from a java.io.Reader).
  1061. * @param augs Additional information that may include infoset augmentations
  1062. *
  1063. * @throws XNIException Thrown by handler to signal an error.
  1064. */
  1065. public void startEntity(String name,
  1066. XMLResourceIdentifier identifier,
  1067. String encoding, Augmentations augs) throws XNIException {
  1068. // keep track of the entity depth
  1069. fEntityDepth++;
  1070. // must reset entity scanner
  1071. fEntityScanner = fEntityManager.getEntityScanner();
  1072. } // startEntity(String,XMLResourceIdentifier,String)
  1073. /**
  1074. * This method notifies the end of an entity. The document entity has
  1075. * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
  1076. * parameter entity names start with '%'; and general entities are just
  1077. * specified by their name.
  1078. *
  1079. * @param name The name of the entity.
  1080. * @param augs Additional information that may include infoset augmentations
  1081. *
  1082. * @throws XNIException Thrown by handler to signal an error.
  1083. */
  1084. public void endEntity(String name, Augmentations augs) throws XNIException {
  1085. // keep track of the entity depth
  1086. fEntityDepth--;
  1087. } // endEntity(String)
  1088. /**
  1089. * Scans a character reference and append the corresponding chars to the
  1090. * specified buffer.
  1091. *
  1092. * <p>
  1093. * <pre>
  1094. * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
  1095. * </pre>
  1096. *
  1097. * <strong>Note:</strong> This method uses fStringBuffer, anything in it
  1098. * at the time of calling is lost.
  1099. *
  1100. * @param buf the character buffer to append chars to
  1101. * @param buf2 the character buffer to append non-normalized chars to
  1102. *
  1103. * @return the character value or (-1) on conversion failure
  1104. */
  1105. protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
  1106. throws IOException, XNIException {
  1107. // scan hexadecimal value
  1108. boolean hex = false;
  1109. if (fEntityScanner.skipChar('x')) {
  1110. if (buf2 != null) { buf2.append('x'); }
  1111. hex = true;
  1112. fStringBuffer3.clear();
  1113. boolean digit = true;
  1114. int c = fEntityScanner.peekChar();
  1115. digit = (c >= '0' && c <= '9') ||
  1116. (c >= 'a' && c <= 'f') ||
  1117. (c >= 'A' && c <= 'F');
  1118. if (digit) {
  1119. if (buf2 != null) { buf2.append((char)c); }
  1120. fEntityScanner.scanChar();
  1121. fStringBuffer3.append((char)c);
  1122. do {
  1123. c = fEntityScanner.peekChar();
  1124. digit = (c >= '0' && c <= '9') ||
  1125. (c >= 'a' && c <= 'f') ||
  1126. (c >= 'A' && c <= 'F');
  1127. if (digit) {
  1128. if (buf2 != null) { buf2.append((char)c); }
  1129. fEntityScanner.scanChar();
  1130. fStringBuffer3.append((char)c);
  1131. }
  1132. } while (digit);
  1133. }
  1134. else {
  1135. reportFatalError("HexdigitRequiredInCharRef", null);
  1136. }
  1137. }
  1138. // scan decimal value
  1139. else {
  1140. fStringBuffer3.clear();
  1141. boolean digit = true;
  1142. int c = fEntityScanner.peekChar();
  1143. digit = c >= '0' && c <= '9';
  1144. if (digit) {
  1145. if (buf2 != null) { buf2.append((char)c); }
  1146. fEntityScanner.scanChar();
  1147. fStringBuffer3.append((char)c);
  1148. do {
  1149. c = fEntityScanner.peekChar();
  1150. digit = c >= '0' && c <= '9';
  1151. if (digit) {
  1152. if (buf2 != null) { buf2.append((char)c); }
  1153. fEntityScanner.scanChar();
  1154. fStringBuffer3.append((char)c);
  1155. }
  1156. } while (digit);
  1157. }
  1158. else {
  1159. reportFatalError("DigitRequiredInCharRef", null);
  1160. }
  1161. }
  1162. // end
  1163. if (!fEntityScanner.skipChar(';')) {
  1164. reportFatalError("SemicolonRequiredInCharRef", null);
  1165. }
  1166. if (buf2 != null) { buf2.append(';'); }
  1167. // convert string to number
  1168. int value = -1;
  1169. try {
  1170. value = Integer.parseInt(fStringBuffer3.toString(),
  1171. hex ? 16 : 10);
  1172. // character reference must be a valid XML character
  1173. if (isInvalid(value)) {
  1174. StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
  1175. if (hex) errorBuf.append('x');
  1176. errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
  1177. reportFatalError("InvalidCharRef",
  1178. new Object[]{errorBuf.toString()});
  1179. }
  1180. }
  1181. catch (NumberFormatException e) {
  1182. // Conversion failed, let -1 value drop through.
  1183. // If we end up here, the character reference was invalid.
  1184. StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
  1185. if (hex) errorBuf.append('x');
  1186. errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
  1187. reportFatalError("InvalidCharRef",
  1188. new Object[]{errorBuf.toString()});
  1189. }
  1190. // append corresponding chars to the given buffer
  1191. if (!XMLChar.isSupplemental(value)) {
  1192. buf.append((char) value);
  1193. }
  1194. else {
  1195. // character is supplemental, split it into surrogate chars
  1196. buf.append(XMLChar.highSurrogate(value));
  1197. buf.append(XMLChar.lowSurrogate(value));
  1198. }
  1199. // char refs notification code
  1200. if (fNotifyCharRefs && value != -1) {
  1201. String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
  1202. if (!fScanningAttribute) {
  1203. fCharRefLiteral = literal;
  1204. }
  1205. }
  1206. return value;
  1207. }
  1208. // returns true if the given character is not
  1209. // valid with respect to the version of
  1210. // XML understood by this scanner.
  1211. protected boolean isInvalid(int value) {
  1212. return (XMLChar.isInvalid(value));
  1213. } // isInvalid(int): boolean
  1214. // returns true if the given character is not
  1215. // valid or may not be used outside a character reference
  1216. // with respect to the version of XML understood by this scanner.
  1217. protected boolean isInvalidLiteral(int value) {
  1218. return (XMLChar.isInvalid(value));
  1219. } // isInvalidLiteral(int): boolean
  1220. // returns true if the given character is
  1221. // a valid nameChar with respect to the version of
  1222. // XML understood by this scanner.
  1223. protected boolean isValidNameChar(int value) {
  1224. return (XMLChar.isName(value));
  1225. } // isValidNameChar(int): boolean
  1226. // returns true if the given character is
  1227. // a valid nameStartChar with respect to the version of
  1228. // XML understood by this scanner.
  1229. protected boolean isValidNameStartChar(int value) {
  1230. return (XMLChar.isNameStart(value));
  1231. } // isValidNameStartChar(int): boolean
  1232. // returns true if the given character is
  1233. // a valid NCName character with respect to the version of
  1234. // XML understood by this scanner.
  1235. protected boolean isValidNCName(int value) {
  1236. return (XMLChar.isNCName(value));
  1237. } // isValidNCName(int): boolean
  1238. // returns true if the given character is
  1239. // a valid high surrogate for a nameStartChar
  1240. // with respect to the version of XML understood
  1241. // by this scanner.
  1242. protected boolean isValidNameStartHighSurrogate(int value) {
  1243. return false;
  1244. } // isValidNameStartHighSurrogate(int): boolean
  1245. protected boolean versionSupported(String version ) {
  1246. return version.equals("1.0");
  1247. } // version Supported
  1248. // returns the error message key for unsupported
  1249. // versions of XML with respect to the version of
  1250. // XML understood by this scanner.
  1251. protected String getVersionNotSupportedKey () {
  1252. return "VersionNotSupported";
  1253. } // getVersionNotSupportedKey: String
  1254. /**
  1255. * Scans surrogates and append them to the specified buffer.
  1256. * <p>
  1257. * <strong>Note:</strong> This assumes the current char has already been
  1258. * identified as a high surrogate.
  1259. *
  1260. * @param buf The StringBuffer to append the read surrogates to.
  1261. * @return True if it succeeded.
  1262. */
  1263. protected boolean scanSurrogates(XMLStringBuffer buf)
  1264. throws IOException, XNIException {
  1265. int high = fEntityScanner.scanChar();
  1266. int low = fEntityScanner.peekChar();
  1267. if (!XMLChar.isLowSurrogate(low)) {
  1268. reportFatalError("InvalidCharInContent",
  1269. new Object[] {Integer.toString(high, 16)});
  1270. return false;
  1271. }
  1272. fEntityScanner.scanChar();
  1273. // convert surrogates to supplemental character
  1274. int c = XMLChar.supplemental((char)high, (char)low);
  1275. // supplemental character must be a valid XML character
  1276. if (isInvalid(c)) {
  1277. reportFatalError("InvalidCharInContent",
  1278. new Object[]{Integer.toString(c, 16)});
  1279. return false;
  1280. }
  1281. // fill in the buffer
  1282. buf.append((char)high);
  1283. buf.append((char)low);
  1284. return true;
  1285. } // scanSurrogates():boolean
  1286. /**
  1287. * Convenience function used in all XML scanners.
  1288. */
  1289. protected void reportFatalError(String msgId, Object[] args)
  1290. throws XNIException {
  1291. fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  1292. msgId, args,
  1293. XMLErrorReporter.SEVERITY_FATAL_ERROR);
  1294. }
  1295. // private methods
  1296. private void init() {
  1297. fEntityScanner = null;
  1298. // initialize vars
  1299. fEntityDepth = 0;
  1300. fReportEntity = true;
  1301. fResourceIdentifier.clear();
  1302. }
  1303. } // class XMLScanner