1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2003 The Apache Software Foundation.
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl;
  58. import java.io.EOFException;
  59. import java.io.IOException;
  60. import java.util.Locale;
  61. import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  62. import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  63. import com.sun.org.apache.xerces.internal.util.SymbolTable;
  64. import com.sun.org.apache.xerces.internal.util.XMLChar;
  65. import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  66. import com.sun.org.apache.xerces.internal.xni.QName;
  67. import com.sun.org.apache.xerces.internal.xni.XMLLocator;
  68. import com.sun.org.apache.xerces.internal.xni.XMLString;
  69. /**
  70. * Implements the entity scanner methods.
  71. *
  72. * @author Andy Clark, IBM
  73. * @author Neil Graham, IBM
  74. * @version $Id: XMLEntityScanner.java,v 1.19 2003/11/13 18:45:59 mrglavas Exp $
  75. */
  76. public class XMLEntityScanner implements XMLLocator {
  77. // constants
  78. private static final boolean DEBUG_ENCODINGS = false;
  79. private static final boolean DEBUG_BUFFER = false;
  80. //
  81. // Data
  82. //
  83. private XMLEntityManager fEntityManager = null;
  84. protected XMLEntityManager.ScannedEntity fCurrentEntity = null;
  85. protected SymbolTable fSymbolTable = null;
  86. protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
  87. /**
  88. * Error reporter. This property identifier is:
  89. * http://apache.org/xml/properties/internal/error-reporter
  90. */
  91. protected XMLErrorReporter fErrorReporter;
  92. //
  93. // Constructors
  94. //
  95. /** Default constructor. */
  96. public XMLEntityScanner() {
  97. } // <init>()
  98. //
  99. // XMLEntityScanner methods
  100. //
  101. /**
  102. * Returns the base system identifier of the currently scanned
  103. * entity, or null if none is available.
  104. */
  105. public String getBaseSystemId() {
  106. return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
  107. } // getBaseSystemId():String
  108. /**
  109. * Sets the encoding of the scanner. This method is used by the
  110. * scanners if the XMLDecl or TextDecl line contains an encoding
  111. * pseudo-attribute.
  112. * <p>
  113. * <strong>Note:</strong> The underlying character reader on the
  114. * current entity will be changed to accomodate the new encoding.
  115. * However, the new encoding is ignored if the current reader was
  116. * not constructed from an input stream (e.g. an external entity
  117. * that is resolved directly to the appropriate java.io.Reader
  118. * object).
  119. *
  120. * @param encoding The IANA encoding name of the new encoding.
  121. *
  122. * @throws IOException Thrown if the new encoding is not supported.
  123. *
  124. * @see com.sun.org.apache.xerces.internal.util.EncodingMap
  125. */
  126. public void setEncoding(String encoding) throws IOException {
  127. if (DEBUG_ENCODINGS) {
  128. System.out.println("$$$ setEncoding: "+encoding);
  129. }
  130. if (fCurrentEntity.stream != null) {
  131. // if the encoding is the same, don't change the reader and
  132. // re-use the original reader used by the OneCharReader
  133. // NOTE: Besides saving an object, this overcomes deficiencies
  134. // in the UTF-16 reader supplied with the standard Java
  135. // distribution (up to and including 1.3). The UTF-16
  136. // decoder buffers 8K blocks even when only asked to read
  137. // a single char! -Ac
  138. if (fCurrentEntity.encoding == null ||
  139. !fCurrentEntity.encoding.equals(encoding)) {
  140. // UTF-16 is a bit of a special case. If the encoding is UTF-16,
  141. // and we know the endian-ness, we shouldn't change readers.
  142. // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
  143. // the endian-ness from the encoding we presently have.
  144. if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
  145. String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
  146. if(ENCODING.equals("UTF-16")) return;
  147. if(ENCODING.equals("ISO-10646-UCS-4")) {
  148. if(fCurrentEntity.encoding.equals("UTF-16BE")) {
  149. fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
  150. } else {
  151. fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
  152. }
  153. return;
  154. }
  155. if(ENCODING.equals("ISO-10646-UCS-2")) {
  156. if(fCurrentEntity.encoding.equals("UTF-16BE")) {
  157. fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
  158. } else {
  159. fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
  160. }
  161. return;
  162. }
  163. }
  164. // wrap a new reader around the input stream, changing
  165. // the encoding
  166. if (DEBUG_ENCODINGS) {
  167. System.out.println("$$$ creating new reader from stream: "+
  168. fCurrentEntity.stream);
  169. }
  170. //fCurrentEntity.stream.reset();
  171. fCurrentEntity.setReader(fCurrentEntity.stream, encoding, null);
  172. fCurrentEntity.encoding = encoding;
  173. } else {
  174. if (DEBUG_ENCODINGS)
  175. System.out.println("$$$ reusing old reader on stream");
  176. }
  177. }
  178. } // setEncoding(String)
  179. /** Returns true if the current entity being scanned is external. */
  180. public boolean isExternal() {
  181. return fCurrentEntity.isExternal();
  182. } // isExternal():boolean
  183. /**
  184. * Returns the next character on the input.
  185. * <p>
  186. * <strong>Note:</strong> The character is <em>not</em> consumed.
  187. *
  188. * @throws IOException Thrown if i/o error occurs.
  189. * @throws EOFException Thrown on end of file.
  190. */
  191. public int peekChar() throws IOException {
  192. if (DEBUG_BUFFER) {
  193. System.out.print("(peekChar: ");
  194. XMLEntityManager.print(fCurrentEntity);
  195. System.out.println();
  196. }
  197. // load more characters, if needed
  198. if (fCurrentEntity.position == fCurrentEntity.count) {
  199. load(0, true);
  200. }
  201. // peek at character
  202. int c = fCurrentEntity.ch[fCurrentEntity.position];
  203. // return peeked character
  204. if (DEBUG_BUFFER) {
  205. System.out.print(")peekChar: ");
  206. XMLEntityManager.print(fCurrentEntity);
  207. if (fCurrentEntity.isExternal()) {
  208. System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
  209. }
  210. else {
  211. System.out.println(" -> '"+(char)c+"'");
  212. }
  213. }
  214. if (fCurrentEntity.isExternal()) {
  215. return c != '\r' ? c : '\n';
  216. }
  217. else {
  218. return c;
  219. }
  220. } // peekChar():int
  221. /**
  222. * Returns the next character on the input.
  223. * <p>
  224. * <strong>Note:</strong> The character is consumed.
  225. *
  226. * @throws IOException Thrown if i/o error occurs.
  227. * @throws EOFException Thrown on end of file.
  228. */
  229. public int scanChar() throws IOException {
  230. if (DEBUG_BUFFER) {
  231. System.out.print("(scanChar: ");
  232. XMLEntityManager.print(fCurrentEntity);
  233. System.out.println();
  234. }
  235. // load more characters, if needed
  236. if (fCurrentEntity.position == fCurrentEntity.count) {
  237. load(0, true);
  238. }
  239. // scan character
  240. int c = fCurrentEntity.ch[fCurrentEntity.position++];
  241. boolean external = false;
  242. if (c == '\n' ||
  243. (c == '\r' && (external = fCurrentEntity.isExternal()))) {
  244. fCurrentEntity.lineNumber++;
  245. fCurrentEntity.columnNumber = 1;
  246. if (fCurrentEntity.position == fCurrentEntity.count) {
  247. fCurrentEntity.ch[0] = (char)c;
  248. load(1, false);
  249. }
  250. if (c == '\r' && external) {
  251. if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
  252. fCurrentEntity.position--;
  253. }
  254. c = '\n';
  255. }
  256. }
  257. // return character that was scanned
  258. if (DEBUG_BUFFER) {
  259. System.out.print(")scanChar: ");
  260. XMLEntityManager.print(fCurrentEntity);
  261. System.out.println(" -> '"+(char)c+"'");
  262. }
  263. fCurrentEntity.columnNumber++;
  264. return c;
  265. } // scanChar():int
  266. /**
  267. * Returns a string matching the NMTOKEN production appearing immediately
  268. * on the input as a symbol, or null if NMTOKEN Name string is present.
  269. * <p>
  270. * <strong>Note:</strong> The NMTOKEN characters are consumed.
  271. * <p>
  272. * <strong>Note:</strong> The string returned must be a symbol. The
  273. * SymbolTable can be used for this purpose.
  274. *
  275. * @throws IOException Thrown if i/o error occurs.
  276. * @throws EOFException Thrown on end of file.
  277. *
  278. * @see com.sun.org.apache.xerces.internal.util.SymbolTable
  279. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
  280. */
  281. public String scanNmtoken() throws IOException {
  282. if (DEBUG_BUFFER) {
  283. System.out.print("(scanNmtoken: ");
  284. XMLEntityManager.print(fCurrentEntity);
  285. System.out.println();
  286. }
  287. // load more characters, if needed
  288. if (fCurrentEntity.position == fCurrentEntity.count) {
  289. load(0, true);
  290. }
  291. // scan nmtoken
  292. int offset = fCurrentEntity.position;
  293. while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  294. if (++fCurrentEntity.position == fCurrentEntity.count) {
  295. int length = fCurrentEntity.position - offset;
  296. if (length == fCurrentEntity.ch.length) {
  297. // bad luck we have to resize our buffer
  298. char[] tmp = new char[fCurrentEntity.ch.length << 1];
  299. System.arraycopy(fCurrentEntity.ch, offset,
  300. tmp, 0, length);
  301. fCurrentEntity.ch = tmp;
  302. }
  303. else {
  304. System.arraycopy(fCurrentEntity.ch, offset,
  305. fCurrentEntity.ch, 0, length);
  306. }
  307. offset = 0;
  308. if (load(length, false)) {
  309. break;
  310. }
  311. }
  312. }
  313. int length = fCurrentEntity.position - offset;
  314. fCurrentEntity.columnNumber += length;
  315. // return nmtoken
  316. String symbol = null;
  317. if (length > 0) {
  318. symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  319. }
  320. if (DEBUG_BUFFER) {
  321. System.out.print(")scanNmtoken: ");
  322. XMLEntityManager.print(fCurrentEntity);
  323. System.out.println(" -> "+String.valueOf(symbol));
  324. }
  325. return symbol;
  326. } // scanNmtoken():String
  327. /**
  328. * Returns a string matching the Name production appearing immediately
  329. * on the input as a symbol, or null if no Name string is present.
  330. * <p>
  331. * <strong>Note:</strong> The Name characters are consumed.
  332. * <p>
  333. * <strong>Note:</strong> The string returned must be a symbol. The
  334. * SymbolTable can be used for this purpose.
  335. *
  336. * @throws IOException Thrown if i/o error occurs.
  337. * @throws EOFException Thrown on end of file.
  338. *
  339. * @see com.sun.org.apache.xerces.internal.util.SymbolTable
  340. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
  341. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
  342. */
  343. public String scanName() throws IOException {
  344. if (DEBUG_BUFFER) {
  345. System.out.print("(scanName: ");
  346. XMLEntityManager.print(fCurrentEntity);
  347. System.out.println();
  348. }
  349. // load more characters, if needed
  350. if (fCurrentEntity.position == fCurrentEntity.count) {
  351. load(0, true);
  352. }
  353. // scan name
  354. int offset = fCurrentEntity.position;
  355. if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
  356. if (++fCurrentEntity.position == fCurrentEntity.count) {
  357. fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  358. offset = 0;
  359. if (load(1, false)) {
  360. fCurrentEntity.columnNumber++;
  361. String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  362. if (DEBUG_BUFFER) {
  363. System.out.print(")scanName: ");
  364. XMLEntityManager.print(fCurrentEntity);
  365. System.out.println(" -> "+String.valueOf(symbol));
  366. }
  367. return symbol;
  368. }
  369. }
  370. while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  371. if (++fCurrentEntity.position == fCurrentEntity.count) {
  372. int length = fCurrentEntity.position - offset;
  373. if (length == fCurrentEntity.ch.length) {
  374. // bad luck we have to resize our buffer
  375. char[] tmp = new char[fCurrentEntity.ch.length << 1];
  376. System.arraycopy(fCurrentEntity.ch, offset,
  377. tmp, 0, length);
  378. fCurrentEntity.ch = tmp;
  379. }
  380. else {
  381. System.arraycopy(fCurrentEntity.ch, offset,
  382. fCurrentEntity.ch, 0, length);
  383. }
  384. offset = 0;
  385. if (load(length, false)) {
  386. break;
  387. }
  388. }
  389. }
  390. }
  391. int length = fCurrentEntity.position - offset;
  392. fCurrentEntity.columnNumber += length;
  393. // return name
  394. String symbol = null;
  395. if (length > 0) {
  396. symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  397. }
  398. if (DEBUG_BUFFER) {
  399. System.out.print(")scanName: ");
  400. XMLEntityManager.print(fCurrentEntity);
  401. System.out.println(" -> "+String.valueOf(symbol));
  402. }
  403. return symbol;
  404. } // scanName():String
  405. /**
  406. * Returns a string matching the NCName production appearing immediately
  407. * on the input as a symbol, or null if no NCName string is present.
  408. * <p>
  409. * <strong>Note:</strong> The NCName characters are consumed.
  410. * <p>
  411. * <strong>Note:</strong> The string returned must be a symbol. The
  412. * SymbolTable can be used for this purpose.
  413. *
  414. * @throws IOException Thrown if i/o error occurs.
  415. * @throws EOFException Thrown on end of file.
  416. *
  417. * @see com.sun.org.apache.xerces.internal.util.SymbolTable
  418. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNCName
  419. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNCNameStart
  420. */
  421. public String scanNCName() throws IOException {
  422. if (DEBUG_BUFFER) {
  423. System.out.print("(scanNCName: ");
  424. XMLEntityManager.print(fCurrentEntity);
  425. System.out.println();
  426. }
  427. // load more characters, if needed
  428. if (fCurrentEntity.position == fCurrentEntity.count) {
  429. load(0, true);
  430. }
  431. // scan name
  432. int offset = fCurrentEntity.position;
  433. if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
  434. if (++fCurrentEntity.position == fCurrentEntity.count) {
  435. fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  436. offset = 0;
  437. if (load(1, false)) {
  438. fCurrentEntity.columnNumber++;
  439. String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  440. if (DEBUG_BUFFER) {
  441. System.out.print(")scanNCName: ");
  442. XMLEntityManager.print(fCurrentEntity);
  443. System.out.println(" -> "+String.valueOf(symbol));
  444. }
  445. return symbol;
  446. }
  447. }
  448. while (XMLChar.isNCName(fCurrentEntity.ch[fCurrentEntity.position])) {
  449. if (++fCurrentEntity.position == fCurrentEntity.count) {
  450. int length = fCurrentEntity.position - offset;
  451. if (length == fCurrentEntity.ch.length) {
  452. // bad luck we have to resize our buffer
  453. char[] tmp = new char[fCurrentEntity.ch.length << 1];
  454. System.arraycopy(fCurrentEntity.ch, offset,
  455. tmp, 0, length);
  456. fCurrentEntity.ch = tmp;
  457. }
  458. else {
  459. System.arraycopy(fCurrentEntity.ch, offset,
  460. fCurrentEntity.ch, 0, length);
  461. }
  462. offset = 0;
  463. if (load(length, false)) {
  464. break;
  465. }
  466. }
  467. }
  468. }
  469. int length = fCurrentEntity.position - offset;
  470. fCurrentEntity.columnNumber += length;
  471. // return name
  472. String symbol = null;
  473. if (length > 0) {
  474. symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  475. }
  476. if (DEBUG_BUFFER) {
  477. System.out.print(")scanNCName: ");
  478. XMLEntityManager.print(fCurrentEntity);
  479. System.out.println(" -> "+String.valueOf(symbol));
  480. }
  481. return symbol;
  482. } // scanNCName():String
  483. /**
  484. * Scans a qualified name from the input, setting the fields of the
  485. * QName structure appropriately.
  486. * <p>
  487. * <strong>Note:</strong> The qualified name characters are consumed.
  488. * <p>
  489. * <strong>Note:</strong> The strings used to set the values of the
  490. * QName structure must be symbols. The SymbolTable can be used for
  491. * this purpose.
  492. *
  493. * @param qname The qualified name structure to fill.
  494. *
  495. * @return Returns true if a qualified name appeared immediately on
  496. * the input and was scanned, false otherwise.
  497. *
  498. * @throws IOException Thrown if i/o error occurs.
  499. * @throws EOFException Thrown on end of file.
  500. *
  501. * @see com.sun.org.apache.xerces.internal.util.SymbolTable
  502. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
  503. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
  504. */
  505. public boolean scanQName(QName qname) throws IOException {
  506. if (DEBUG_BUFFER) {
  507. System.out.print("(scanQName, "+qname+": ");
  508. XMLEntityManager.print(fCurrentEntity);
  509. System.out.println();
  510. }
  511. // load more characters, if needed
  512. if (fCurrentEntity.position == fCurrentEntity.count) {
  513. load(0, true);
  514. }
  515. // scan qualified name
  516. int offset = fCurrentEntity.position;
  517. if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
  518. if (++fCurrentEntity.position == fCurrentEntity.count) {
  519. fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  520. offset = 0;
  521. if (load(1, false)) {
  522. fCurrentEntity.columnNumber++;
  523. String name =
  524. fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  525. qname.setValues(null, name, name, null);
  526. if (DEBUG_BUFFER) {
  527. System.out.print(")scanQName, "+qname+": ");
  528. XMLEntityManager.print(fCurrentEntity);
  529. System.out.println(" -> true");
  530. }
  531. return true;
  532. }
  533. }
  534. int index = -1;
  535. while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  536. char c = fCurrentEntity.ch[fCurrentEntity.position];
  537. if (c == ':') {
  538. if (index != -1) {
  539. break;
  540. }
  541. index = fCurrentEntity.position;
  542. }
  543. if (++fCurrentEntity.position == fCurrentEntity.count) {
  544. int length = fCurrentEntity.position - offset;
  545. if (length == fCurrentEntity.ch.length) {
  546. // bad luck we have to resize our buffer
  547. char[] tmp = new char[fCurrentEntity.ch.length << 1];
  548. System.arraycopy(fCurrentEntity.ch, offset,
  549. tmp, 0, length);
  550. fCurrentEntity.ch = tmp;
  551. }
  552. else {
  553. System.arraycopy(fCurrentEntity.ch, offset,
  554. fCurrentEntity.ch, 0, length);
  555. }
  556. if (index != -1) {
  557. index = index - offset;
  558. }
  559. offset = 0;
  560. if (load(length, false)) {
  561. break;
  562. }
  563. }
  564. }
  565. int length = fCurrentEntity.position - offset;
  566. fCurrentEntity.columnNumber += length;
  567. if (length > 0) {
  568. String prefix = null;
  569. String localpart = null;
  570. String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
  571. offset, length);
  572. if (index != -1) {
  573. int prefixLength = index - offset;
  574. prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
  575. offset, prefixLength);
  576. int len = length - prefixLength - 1;
  577. int startLocal = index +1;
  578. if (!XMLChar.isNCNameStart(fCurrentEntity.ch[startLocal])){
  579. fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  580. "IllegalQName",
  581. null,
  582. XMLErrorReporter.SEVERITY_FATAL_ERROR);
  583. }
  584. localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
  585. startLocal, len);
  586. }
  587. else {
  588. localpart = rawname;
  589. }
  590. qname.setValues(prefix, localpart, rawname, null);
  591. if (DEBUG_BUFFER) {
  592. System.out.print(")scanQName, "+qname+": ");
  593. XMLEntityManager.print(fCurrentEntity);
  594. System.out.println(" -> true");
  595. }
  596. return true;
  597. }
  598. }
  599. // no qualified name found
  600. if (DEBUG_BUFFER) {
  601. System.out.print(")scanQName, "+qname+": ");
  602. XMLEntityManager.print(fCurrentEntity);
  603. System.out.println(" -> false");
  604. }
  605. return false;
  606. } // scanQName(QName):boolean
  607. /**
  608. * Scans a range of parsed character data, setting the fields of the
  609. * XMLString structure, appropriately.
  610. * <p>
  611. * <strong>Note:</strong> The characters are consumed.
  612. * <p>
  613. * <strong>Note:</strong> This method does not guarantee to return
  614. * the longest run of parsed character data. This method may return
  615. * before markup due to reaching the end of the input buffer or any
  616. * other reason.
  617. * <p>
  618. * <strong>Note:</strong> The fields contained in the XMLString
  619. * structure are not guaranteed to remain valid upon subsequent calls
  620. * to the entity scanner. Therefore, the caller is responsible for
  621. * immediately using the returned character data or making a copy of
  622. * the character data.
  623. *
  624. * @param content The content structure to fill.
  625. *
  626. * @return Returns the next character on the input, if known. This
  627. * value may be -1 but this does <em>note</em> designate
  628. * end of file.
  629. *
  630. * @throws IOException Thrown if i/o error occurs.
  631. * @throws EOFException Thrown on end of file.
  632. */
  633. public int scanContent(XMLString content) throws IOException {
  634. if (DEBUG_BUFFER) {
  635. System.out.print("(scanContent: ");
  636. XMLEntityManager.print(fCurrentEntity);
  637. System.out.println();
  638. }
  639. // load more characters, if needed
  640. if (fCurrentEntity.position == fCurrentEntity.count) {
  641. load(0, true);
  642. }
  643. else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  644. fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  645. load(1, false);
  646. fCurrentEntity.position = 0;
  647. }
  648. // normalize newlines
  649. int offset = fCurrentEntity.position;
  650. int c = fCurrentEntity.ch[offset];
  651. int newlines = 0;
  652. boolean external = fCurrentEntity.isExternal();
  653. if (c == '\n' || (c == '\r' && external)) {
  654. if (DEBUG_BUFFER) {
  655. System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
  656. XMLEntityManager.print(fCurrentEntity);
  657. System.out.println();
  658. }
  659. do {
  660. c = fCurrentEntity.ch[fCurrentEntity.position++];
  661. if (c == '\r' && external) {
  662. newlines++;
  663. fCurrentEntity.lineNumber++;
  664. fCurrentEntity.columnNumber = 1;
  665. if (fCurrentEntity.position == fCurrentEntity.count) {
  666. offset = 0;
  667. fCurrentEntity.position = newlines;
  668. if (load(newlines, false)) {
  669. break;
  670. }
  671. }
  672. if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  673. fCurrentEntity.position++;
  674. offset++;
  675. }
  676. /*** NEWLINE NORMALIZATION ***/
  677. else {
  678. newlines++;
  679. }
  680. }
  681. else if (c == '\n') {
  682. newlines++;
  683. fCurrentEntity.lineNumber++;
  684. fCurrentEntity.columnNumber = 1;
  685. if (fCurrentEntity.position == fCurrentEntity.count) {
  686. offset = 0;
  687. fCurrentEntity.position = newlines;
  688. if (load(newlines, false)) {
  689. break;
  690. }
  691. }
  692. }
  693. else {
  694. fCurrentEntity.position--;
  695. break;
  696. }
  697. } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  698. for (int i = offset; i < fCurrentEntity.position; i++) {
  699. fCurrentEntity.ch[i] = '\n';
  700. }
  701. int length = fCurrentEntity.position - offset;
  702. if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  703. content.setValues(fCurrentEntity.ch, offset, length);
  704. if (DEBUG_BUFFER) {
  705. System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  706. XMLEntityManager.print(fCurrentEntity);
  707. System.out.println();
  708. }
  709. return -1;
  710. }
  711. if (DEBUG_BUFFER) {
  712. System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  713. XMLEntityManager.print(fCurrentEntity);
  714. System.out.println();
  715. }
  716. }
  717. // inner loop, scanning for content
  718. while (fCurrentEntity.position < fCurrentEntity.count) {
  719. c = fCurrentEntity.ch[fCurrentEntity.position++];
  720. if (!XMLChar.isContent(c)) {
  721. fCurrentEntity.position--;
  722. break;
  723. }
  724. }
  725. int length = fCurrentEntity.position - offset;
  726. fCurrentEntity.columnNumber += length - newlines;
  727. content.setValues(fCurrentEntity.ch, offset, length);
  728. // return next character
  729. if (fCurrentEntity.position != fCurrentEntity.count) {
  730. c = fCurrentEntity.ch[fCurrentEntity.position];
  731. // REVISIT: Does this need to be updated to fix the
  732. // #x0D ^#x0A newline normalization problem? -Ac
  733. if (c == '\r' && external) {
  734. c = '\n';
  735. }
  736. }
  737. else {
  738. c = -1;
  739. }
  740. if (DEBUG_BUFFER) {
  741. System.out.print(")scanContent: ");
  742. XMLEntityManager.print(fCurrentEntity);
  743. System.out.println(" -> '"+(char)c+"'");
  744. }
  745. return c;
  746. } // scanContent(XMLString):int
  747. /**
  748. * Scans a range of attribute value data, setting the fields of the
  749. * XMLString structure, appropriately.
  750. * <p>
  751. * <strong>Note:</strong> The characters are consumed.
  752. * <p>
  753. * <strong>Note:</strong> This method does not guarantee to return
  754. * the longest run of attribute value data. This method may return
  755. * before the quote character due to reaching the end of the input
  756. * buffer or any other reason.
  757. * <p>
  758. * <strong>Note:</strong> The fields contained in the XMLString
  759. * structure are not guaranteed to remain valid upon subsequent calls
  760. * to the entity scanner. Therefore, the caller is responsible for
  761. * immediately using the returned character data or making a copy of
  762. * the character data.
  763. *
  764. * @param quote The quote character that signifies the end of the
  765. * attribute value data.
  766. * @param content The content structure to fill.
  767. *
  768. * @return Returns the next character on the input, if known. This
  769. * value may be -1 but this does <em>note</em> designate
  770. * end of file.
  771. *
  772. * @throws IOException Thrown if i/o error occurs.
  773. * @throws EOFException Thrown on end of file.
  774. */
  775. public int scanLiteral(int quote, XMLString content)
  776. throws IOException {
  777. if (DEBUG_BUFFER) {
  778. System.out.print("(scanLiteral, '"+(char)quote+"': ");
  779. XMLEntityManager.print(fCurrentEntity);
  780. System.out.println();
  781. }
  782. // load more characters, if needed
  783. if (fCurrentEntity.position == fCurrentEntity.count) {
  784. load(0, true);
  785. }
  786. else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  787. fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  788. load(1, false);
  789. fCurrentEntity.position = 0;
  790. }
  791. // normalize newlines
  792. int offset = fCurrentEntity.position;
  793. int c = fCurrentEntity.ch[offset];
  794. int newlines = 0;
  795. boolean external = fCurrentEntity.isExternal();
  796. if (c == '\n' || (c == '\r' && external)) {
  797. if (DEBUG_BUFFER) {
  798. System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
  799. XMLEntityManager.print(fCurrentEntity);
  800. System.out.println();
  801. }
  802. do {
  803. c = fCurrentEntity.ch[fCurrentEntity.position++];
  804. if (c == '\r' && external) {
  805. newlines++;
  806. fCurrentEntity.lineNumber++;
  807. fCurrentEntity.columnNumber = 1;
  808. if (fCurrentEntity.position == fCurrentEntity.count) {
  809. offset = 0;
  810. fCurrentEntity.position = newlines;
  811. if (load(newlines, false)) {
  812. break;
  813. }
  814. }
  815. if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  816. fCurrentEntity.position++;
  817. offset++;
  818. }
  819. /*** NEWLINE NORMALIZATION ***/
  820. else {
  821. newlines++;
  822. }
  823. /***/
  824. }
  825. else if (c == '\n') {
  826. newlines++;
  827. fCurrentEntity.lineNumber++;
  828. fCurrentEntity.columnNumber = 1;
  829. if (fCurrentEntity.position == fCurrentEntity.count) {
  830. offset = 0;
  831. fCurrentEntity.position = newlines;
  832. if (load(newlines, false)) {
  833. break;
  834. }
  835. }
  836. }
  837. else {
  838. fCurrentEntity.position--;
  839. break;
  840. }
  841. } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  842. for (int i = offset; i < fCurrentEntity.position; i++) {
  843. fCurrentEntity.ch[i] = '\n';
  844. }
  845. int length = fCurrentEntity.position - offset;
  846. if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  847. content.setValues(fCurrentEntity.ch, offset, length);
  848. if (DEBUG_BUFFER) {
  849. System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  850. XMLEntityManager.print(fCurrentEntity);
  851. System.out.println();
  852. }
  853. return -1;
  854. }
  855. if (DEBUG_BUFFER) {
  856. System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  857. XMLEntityManager.print(fCurrentEntity);
  858. System.out.println();
  859. }
  860. }
  861. // scan literal value
  862. while (fCurrentEntity.position < fCurrentEntity.count) {
  863. c = fCurrentEntity.ch[fCurrentEntity.position++];
  864. if ((c == quote &&
  865. (!fCurrentEntity.literal || external))
  866. || c == '%' || !XMLChar.isContent(c)) {
  867. fCurrentEntity.position--;
  868. break;
  869. }
  870. }
  871. int length = fCurrentEntity.position - offset;
  872. fCurrentEntity.columnNumber += length - newlines;
  873. content.setValues(fCurrentEntity.ch, offset, length);
  874. // return next character
  875. if (fCurrentEntity.position != fCurrentEntity.count) {
  876. c = fCurrentEntity.ch[fCurrentEntity.position];
  877. // NOTE: We don't want to accidentally signal the
  878. // end of the literal if we're expanding an
  879. // entity appearing in the literal. -Ac
  880. if (c == quote && fCurrentEntity.literal) {
  881. c = -1;
  882. }
  883. }
  884. else {
  885. c = -1;
  886. }
  887. if (DEBUG_BUFFER) {
  888. System.out.print(")scanLiteral, '"+(char)quote+"': ");
  889. XMLEntityManager.print(fCurrentEntity);
  890. System.out.println(" -> '"+(char)c+"'");
  891. }
  892. return c;
  893. } // scanLiteral(int,XMLString):int
  894. /**
  895. * Scans a range of character data up to the specified delimiter,
  896. * setting the fields of the XMLString structure, appropriately.
  897. * <p>
  898. * <strong>Note:</strong> The characters are consumed.
  899. * <p>
  900. * <strong>Note:</strong> This assumes that the internal buffer is
  901. * at least the same size, or bigger, than the length of the delimiter
  902. * and that the delimiter contains at least one character.
  903. * <p>
  904. * <strong>Note:</strong> This method does not guarantee to return
  905. * the longest run of character data. This method may return before
  906. * the delimiter due to reaching the end of the input buffer or any
  907. * other reason.
  908. * <p>
  909. * <strong>Note:</strong> The fields contained in the XMLString
  910. * structure are not guaranteed to remain valid upon subsequent calls
  911. * to the entity scanner. Therefore, the caller is responsible for
  912. * immediately using the returned character data or making a copy of
  913. * the character data.
  914. *
  915. * @param delimiter The string that signifies the end of the character
  916. * data to be scanned.
  917. * @param data The data structure to fill.
  918. *
  919. * @return Returns true if there is more data to scan, false otherwise.
  920. *
  921. * @throws IOException Thrown if i/o error occurs.
  922. * @throws EOFException Thrown on end of file.
  923. */
  924. public boolean scanData(String delimiter, XMLStringBuffer buffer)
  925. throws IOException {
  926. // REVISIT: This method does not need to use a string buffer.
  927. // The change would avoid the array copies and increase
  928. // performance. -Ac
  929. //
  930. // Currently, this method is called for scanning CDATA
  931. // sections, comments, and processing instruction data.
  932. // So if this code is updated to NOT buffer, the scanning
  933. // code for comments and processing instructions will
  934. // need to be updated to do its own buffering. The code
  935. // for CDATA sections is safe as-is. -Ac
  936. boolean found = false;
  937. int delimLen = delimiter.length();
  938. char charAt0 = delimiter.charAt(0);
  939. boolean external = fCurrentEntity.isExternal();
  940. if (DEBUG_BUFFER) {
  941. System.out.print("(scanData: ");
  942. XMLEntityManager.print(fCurrentEntity);
  943. System.out.println();
  944. }
  945. // load more characters, if needed
  946. if (fCurrentEntity.position == fCurrentEntity.count) {
  947. load(0, true);
  948. }
  949. boolean bNextEntity = false;
  950. while ((fCurrentEntity.position >= fCurrentEntity.count - delimLen)
  951. && (!bNextEntity))
  952. {
  953. System.arraycopy(fCurrentEntity.ch,
  954. fCurrentEntity.position,
  955. fCurrentEntity.ch,
  956. 0,
  957. fCurrentEntity.count - fCurrentEntity.position);
  958. bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false);
  959. fCurrentEntity.position = 0;
  960. }
  961. if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  962. // something must be wrong with the input: e.g., file ends an unterminated comment
  963. int length = fCurrentEntity.count - fCurrentEntity.position;
  964. buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
  965. fCurrentEntity.columnNumber += fCurrentEntity.count;
  966. fCurrentEntity.position = fCurrentEntity.count;
  967. load(0,true);
  968. return false;
  969. }
  970. // normalize newlines
  971. int offset = fCurrentEntity.position;
  972. int c = fCurrentEntity.ch[offset];
  973. int newlines = 0;
  974. if (c == '\n' || (c == '\r' && external)) {
  975. if (DEBUG_BUFFER) {
  976. System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
  977. XMLEntityManager.print(fCurrentEntity);
  978. System.out.println();
  979. }
  980. do {
  981. c = fCurrentEntity.ch[fCurrentEntity.position++];
  982. if (c == '\r' && external) {
  983. newlines++;
  984. fCurrentEntity.lineNumber++;
  985. fCurrentEntity.columnNumber = 1;
  986. if (fCurrentEntity.position == fCurrentEntity.count) {
  987. offset = 0;
  988. fCurrentEntity.position = newlines;
  989. if (load(newlines, false)) {
  990. break;
  991. }
  992. }
  993. if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  994. fCurrentEntity.position++;
  995. offset++;
  996. }
  997. /*** NEWLINE NORMALIZATION ***/
  998. else {
  999. newlines++;
  1000. }
  1001. }
  1002. else if (c == '\n') {
  1003. newlines++;
  1004. fCurrentEntity.lineNumber++;
  1005. fCurrentEntity.columnNumber = 1;
  1006. if (fCurrentEntity.position == fCurrentEntity.count) {
  1007. offset = 0;
  1008. fCurrentEntity.position = newlines;
  1009. fCurrentEntity.count = newlines;
  1010. if (load(newlines, false)) {
  1011. break;
  1012. }
  1013. }
  1014. }
  1015. else {
  1016. fCurrentEntity.position--;
  1017. break;
  1018. }
  1019. } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  1020. for (int i = offset; i < fCurrentEntity.position; i++) {
  1021. fCurrentEntity.ch[i] = '\n';
  1022. }
  1023. int length = fCurrentEntity.position - offset;
  1024. if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  1025. buffer.append(fCurrentEntity.ch, offset, length);
  1026. if (DEBUG_BUFFER) {
  1027. System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  1028. XMLEntityManager.print(fCurrentEntity);
  1029. System.out.println();
  1030. }
  1031. return true;
  1032. }
  1033. if (DEBUG_BUFFER) {
  1034. System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  1035. XMLEntityManager.print(fCurrentEntity);
  1036. System.out.println();
  1037. }
  1038. }
  1039. // iterate over buffer looking for delimiter
  1040. OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  1041. c = fCurrentEntity.ch[fCurrentEntity.position++];
  1042. if (c == charAt0) {
  1043. // looks like we just hit the delimiter
  1044. int delimOffset = fCurrentEntity.position - 1;
  1045. for (int i = 1; i < delimLen; i++) {
  1046. if (fCurrentEntity.position == fCurrentEntity.count) {
  1047. fCurrentEntity.position -= i;
  1048. break OUTER;
  1049. }
  1050. c = fCurrentEntity.ch[fCurrentEntity.position++];
  1051. if (delimiter.charAt(i) != c) {
  1052. fCurrentEntity.position--;
  1053. break;
  1054. }
  1055. }
  1056. if (fCurrentEntity.position == delimOffset + delimLen) {
  1057. found = true;
  1058. break;
  1059. }
  1060. }
  1061. else if (c == '\n' || (external && c == '\r')) {
  1062. fCurrentEntity.position--;
  1063. break;
  1064. }
  1065. else if (XMLChar.isInvalid(c)) {
  1066. fCurrentEntity.position--;
  1067. int length = fCurrentEntity.position - offset;
  1068. fCurrentEntity.columnNumber += length - newlines;
  1069. buffer.append(fCurrentEntity.ch, offset, length);
  1070. return true;
  1071. }
  1072. }
  1073. int length = fCurrentEntity.position - offset;
  1074. fCurrentEntity.columnNumber += length - newlines;
  1075. if (found) {
  1076. length -= delimLen;
  1077. }
  1078. buffer.append (fCurrentEntity.ch, offset, length);
  1079. // return true if string was skipped
  1080. if (DEBUG_BUFFER) {
  1081. System.out.print(")scanData: ");
  1082. XMLEntityManager.print(fCurrentEntity);
  1083. System.out.println(" -> " + !found);
  1084. }
  1085. return !found;
  1086. } // scanData(String,XMLString):boolean
  1087. /**
  1088. * Skips a character appearing immediately on the input.
  1089. * <p>
  1090. * <strong>Note:</strong> The character is consumed only if it matches
  1091. * the specified character.
  1092. *
  1093. * @param c The character to skip.
  1094. *
  1095. * @return Returns true if the character was skipped.
  1096. *
  1097. * @throws IOException Thrown if i/o error occurs.
  1098. * @throws EOFException Thrown on end of file.
  1099. */
  1100. public boolean skipChar(int c) throws IOException {
  1101. if (DEBUG_BUFFER) {
  1102. System.out.print("(skipChar, '"+(char)c+"': ");
  1103. XMLEntityManager.print(fCurrentEntity);
  1104. System.out.println();
  1105. }
  1106. // load more characters, if needed
  1107. if (fCurrentEntity.position == fCurrentEntity.count) {
  1108. load(0, true);
  1109. }
  1110. // skip character
  1111. int cc = fCurrentEntity.ch[fCurrentEntity.position];
  1112. if (cc == c) {
  1113. fCurrentEntity.position++;
  1114. if (c == '\n') {
  1115. fCurrentEntity.lineNumber++;
  1116. fCurrentEntity.columnNumber = 1;
  1117. }
  1118. else {
  1119. fCurrentEntity.columnNumber++;
  1120. }
  1121. if (DEBUG_BUFFER) {
  1122. System.out.print(")skipChar, '"+(char)c+"': ");
  1123. XMLEntityManager.print(fCurrentEntity);
  1124. System.out.println(" -> true");
  1125. }
  1126. return true;
  1127. }
  1128. else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
  1129. // handle newlines
  1130. if (fCurrentEntity.position == fCurrentEntity.count) {
  1131. fCurrentEntity.ch[0] = (char)cc;
  1132. load(1, false);
  1133. }
  1134. fCurrentEntity.position++;
  1135. if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  1136. fCurrentEntity.position++;
  1137. }
  1138. fCurrentEntity.lineNumber++;
  1139. fCurrentEntity.columnNumber = 1;
  1140. if (DEBUG_BUFFER) {
  1141. System.out.print(")skipChar, '"+(char)c+"': ");
  1142. XMLEntityManager.print(fCurrentEntity);
  1143. System.out.println(" -> true");
  1144. }
  1145. return true;
  1146. }
  1147. // character was not skipped
  1148. if (DEBUG_BUFFER) {
  1149. System.out.print(")skipChar, '"+(char)c+"': ");
  1150. XMLEntityManager.print(fCurrentEntity);
  1151. System.out.println(" -> false");
  1152. }
  1153. return false;
  1154. } // skipChar(int):boolean
  1155. /**
  1156. * Skips space characters appearing immediately on the input.
  1157. * <p>
  1158. * <strong>Note:</strong> The characters are consumed only if they are
  1159. * space characters.
  1160. *
  1161. * @return Returns true if at least one space character was skipped.
  1162. *
  1163. * @throws IOException Thrown if i/o error occurs.
  1164. * @throws EOFException Thrown on end of file.
  1165. *
  1166. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
  1167. */
  1168. public boolean skipSpaces() throws IOException {
  1169. if (DEBUG_BUFFER) {
  1170. System.out.print("(skipSpaces: ");
  1171. XMLEntityManager.print(fCurrentEntity);
  1172. System.out.println();
  1173. }
  1174. // load more characters, if needed
  1175. if (fCurrentEntity.position == fCurrentEntity.count) {
  1176. load(0, true);
  1177. }
  1178. // skip spaces
  1179. int c = fCurrentEntity.ch[fCurrentEntity.position];
  1180. if (XMLChar.isSpace(c)) {
  1181. boolean external = fCurrentEntity.isExternal();
  1182. do {
  1183. boolean entityChanged = false;
  1184. // handle newlines
  1185. if (c == '\n' || (external && c == '\r')) {
  1186. fCurrentEntity.lineNumber++;
  1187. fCurrentEntity.columnNumber = 1;
  1188. if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  1189. fCurrentEntity.ch[0] = (char)c;
  1190. entityChanged = load(1, true);
  1191. if (!entityChanged)
  1192. // the load change the position to be 1,
  1193. // need to restore it when entity not changed
  1194. fCurrentEntity.position = 0;
  1195. }
  1196. if (c == '\r' && external) {
  1197. // REVISIT: Does this need to be updated to fix the
  1198. // #x0D ^#x0A newline normalization problem? -Ac
  1199. if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
  1200. fCurrentEntity.position--;
  1201. }
  1202. }
  1203. /*** NEWLINE NORMALIZATION ***
  1204. else {
  1205. if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
  1206. && external) {
  1207. fCurrentEntity.position++;
  1208. }
  1209. }
  1210. /***/
  1211. }
  1212. else {
  1213. fCurrentEntity.columnNumber++;
  1214. }
  1215. // load more characters, if needed
  1216. if (!entityChanged)
  1217. fCurrentEntity.position++;
  1218. if (fCurrentEntity.position == fCurrentEntity.count) {
  1219. load(0, true);
  1220. }
  1221. } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
  1222. if (DEBUG_BUFFER) {
  1223. System.out.print(")skipSpaces: ");
  1224. XMLEntityManager.print(fCurrentEntity);
  1225. System.out.println(" -> true");
  1226. }
  1227. return true;
  1228. }
  1229. // no spaces were found
  1230. if (DEBUG_BUFFER) {
  1231. System.out.print(")skipSpaces: ");
  1232. XMLEntityManager.print(fCurrentEntity);
  1233. System.out.println(" -> false");
  1234. }
  1235. return false;
  1236. } // skipSpaces():boolean
  1237. /**
  1238. * Skips space characters appearing immediately on the input that would
  1239. * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
  1240. * normalization is performed. This is useful when scanning structures
  1241. * such as the XMLDecl and TextDecl that can only contain US-ASCII
  1242. * characters.
  1243. * <p>
  1244. * <strong>Note:</strong> The characters are consumed only if they would
  1245. * match non-terminal S before end of line normalization is performed.
  1246. *
  1247. * @return Returns true if at least one space character was skipped.
  1248. *
  1249. * @throws IOException Thrown if i/o error occurs.
  1250. * @throws EOFException Thrown on end of file.
  1251. *
  1252. * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
  1253. */
  1254. public boolean skipDeclSpaces() throws IOException {
  1255. if (DEBUG_BUFFER) {
  1256. System.out.print("(skipDeclSpaces: ");
  1257. XMLEntityManager.print(fCurrentEntity);
  1258. System.out.println();
  1259. }
  1260. // load more characters, if needed
  1261. if (fCurrentEntity.position == fCurrentEntity.count) {
  1262. load(0, true);
  1263. }
  1264. // skip spaces
  1265. int c = fCurrentEntity.ch[fCurrentEntity.position];
  1266. if (XMLChar.isSpace(c)) {
  1267. boolean external = fCurrentEntity.isExternal();
  1268. do {
  1269. boolean entityChanged = false;
  1270. // handle newlines
  1271. if (c == '\n' || (external && c == '\r')) {
  1272. fCurrentEntity.lineNumber++;
  1273. fCurrentEntity.columnNumber = 1;
  1274. if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  1275. fCurrentEntity.ch[0] = (char)c;
  1276. entityChanged = load(1, true);
  1277. if (!entityChanged)
  1278. // the load change the position to be 1,
  1279. // need to restore it when entity not changed
  1280. fCurrentEntity.position = 0;
  1281. }
  1282. if (c == '\r' && external) {
  1283. // REVISIT: Does this need to be updated to fix the
  1284. // #x0D ^#x0A newline normalization problem? -Ac
  1285. if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
  1286. fCurrentEntity.position--;
  1287. }
  1288. }
  1289. /*** NEWLINE NORMALIZATION ***
  1290. else {
  1291. if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
  1292. && external) {
  1293. fCurrentEntity.position++;
  1294. }
  1295. }
  1296. /***/
  1297. }
  1298. else {
  1299. fCurrentEntity.columnNumber++;
  1300. }
  1301. // load more characters, if needed
  1302. if (!entityChanged)
  1303. fCurrentEntity.position++;
  1304. if (fCurrentEntity.position == fCurrentEntity.count) {
  1305. load(0, true);
  1306. }
  1307. } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
  1308. if (DEBUG_BUFFER) {
  1309. System.out.print(")skipDeclSpaces: ");
  1310. XMLEntityManager.print(fCurrentEntity);
  1311. System.out.println(" -> true");
  1312. }
  1313. return true;
  1314. }
  1315. // no spaces were found
  1316. if (DEBUG_BUFFER) {
  1317. System.out.print(")skipDeclSpaces: ");
  1318. XMLEntityManager.print(fCurrentEntity);
  1319. System.out.println(" -> false");
  1320. }
  1321. return false;
  1322. } // skipDeclSpaces():boolean
  1323. /**
  1324. * Skips the specified string appearing immediately on the input.
  1325. * <p>
  1326. * <strong>Note:</strong> The characters are consumed only if they are
  1327. * space characters.
  1328. *
  1329. * @param s The string to skip.
  1330. *
  1331. * @return Returns true if the string was skipped.
  1332. *
  1333. * @throws IOException Thrown if i/o error occurs.
  1334. * @throws EOFException Thrown on end of file.
  1335. */
  1336. public boolean skipString(String s) throws IOException {
  1337. if (DEBUG_BUFFER) {
  1338. System.out.print("(skipString, \""+s+"\": ");
  1339. XMLEntityManager.print(fCurrentEntity);
  1340. System.out.println();
  1341. }
  1342. // load more characters, if needed
  1343. if (fCurrentEntity.position == fCurrentEntity.count) {
  1344. load(0, true);
  1345. }
  1346. // skip string
  1347. final int length = s.length();
  1348. for (int i = 0; i < length; i++) {
  1349. char c = fCurrentEntity.ch[fCurrentEntity.position++];
  1350. if (c != s.charAt(i)) {
  1351. fCurrentEntity.position -= i + 1;
  1352. if (DEBUG_BUFFER) {
  1353. System.out.print(")skipString, \""+s+"\": ");
  1354. XMLEntityManager.print(fCurrentEntity);
  1355. System.out.println(" -> false");
  1356. }
  1357. return false;
  1358. }
  1359. if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
  1360. System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
  1361. // REVISIT: Can a string to be skipped cross an
  1362. // entity boundary? -Ac
  1363. if (load(i + 1, false)) {
  1364. fCurrentEntity.position -= i + 1;
  1365. if (DEBUG_BUFFER) {
  1366. System.out.print(")skipString, \""+s+"\": ");
  1367. XMLEntityManager.print(fCurrentEntity);
  1368. System.out.println(" -> false");
  1369. }
  1370. return false;
  1371. }
  1372. }
  1373. }
  1374. if (DEBUG_BUFFER) {
  1375. System.out.print(")skipString, \""+s+"\": ");
  1376. XMLEntityManager.print(fCurrentEntity);
  1377. System.out.println(" -> true");
  1378. }
  1379. fCurrentEntity.columnNumber += length;
  1380. return true;
  1381. } // skipString(String):boolean
  1382. //
  1383. // Locator methods
  1384. //
  1385. /**
  1386. * Return the public identifier for the current document event.
  1387. * <p>
  1388. * The return value is the public identifier of the document
  1389. * entity or of the external parsed entity in which the markup
  1390. * triggering the event appears.
  1391. *
  1392. * @return A string containing the public identifier, or
  1393. * null if none is available.
  1394. */
  1395. public String getPublicId() {
  1396. return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
  1397. } // getPublicId():String
  1398. /**
  1399. * Return the expanded system identifier for the current document event.
  1400. * <p>
  1401. * The return value is the expanded system identifier of the document
  1402. * entity or of the external parsed entity in which the markup
  1403. * triggering the event appears.
  1404. * <p>
  1405. * If the system identifier is a URL, the parser must resolve it
  1406. * fully before passing it to the application.
  1407. *
  1408. * @return A string containing the expanded system identifier, or null
  1409. * if none is available.
  1410. */
  1411. public String getExpandedSystemId() {
  1412. if (fCurrentEntity != null) {
  1413. if (fCurrentEntity.entityLocation != null &&
  1414. fCurrentEntity.entityLocation.getExpandedSystemId() != null ) {
  1415. return fCurrentEntity.entityLocation.getExpandedSystemId();
  1416. }
  1417. else {
  1418. // get the current entity to return something appropriate:
  1419. return fCurrentEntity.getExpandedSystemId();
  1420. }
  1421. }
  1422. return null;
  1423. } // getExpandedSystemId():String
  1424. /**
  1425. * Return the literal system identifier for the current document event.
  1426. * <p>
  1427. * The return value is the literal system identifier of the document
  1428. * entity or of the external parsed entity in which the markup
  1429. * triggering the event appears.
  1430. * <p>
  1431. * @return A string containing the literal system identifier, or null
  1432. * if none is available.
  1433. */
  1434. public String getLiteralSystemId() {
  1435. if (fCurrentEntity != null) {
  1436. if (fCurrentEntity.entityLocation != null &&
  1437. fCurrentEntity.entityLocation.getLiteralSystemId() != null ) {
  1438. return fCurrentEntity.entityLocation.getLiteralSystemId();
  1439. }
  1440. else {
  1441. // get the current entity to do it:
  1442. return fCurrentEntity.getLiteralSystemId();
  1443. }
  1444. }
  1445. return null;
  1446. } // getLiteralSystemId():String
  1447. /**
  1448. * Return the line number where the current document event ends.
  1449. * <p>
  1450. * <strong>Warning:</strong> The return value from the method
  1451. * is intended only as an approximation for the sake of error
  1452. * reporting; it is not intended to provide sufficient information
  1453. * to edit the character content of the original XML document.
  1454. * <p>
  1455. * The return value is an approximation of the line number
  1456. * in the document entity or external parsed entity where the
  1457. * markup triggering the event appears.
  1458. * <p>
  1459. * If possible, the SAX driver should provide the line position
  1460. * of the first character after the text associated with the document
  1461. * event. The first line in the document is line 1.
  1462. *
  1463. * @return The line number, or -1 if none is available.
  1464. */
  1465. public int getLineNumber() {
  1466. if (fCurrentEntity != null) {
  1467. if (fCurrentEntity.isExternal()) {
  1468. return fCurrentEntity.lineNumber;
  1469. }
  1470. else {
  1471. // ask the current entity to return something appropriate:
  1472. return fCurrentEntity.getLineNumber();
  1473. }
  1474. }
  1475. return -1;
  1476. } // getLineNumber():int
  1477. /**
  1478. * Return the column number where the current document event ends.
  1479. * <p>
  1480. * <strong>Warning:</strong> The return value from the method
  1481. * is intended only as an approximation for the sake of error
  1482. * reporting; it is not intended to provide sufficient information
  1483. * to edit the character content of the original XML document.
  1484. * <p>
  1485. * The return value is an approximation of the column number
  1486. * in the document entity or external parsed entity where the
  1487. * markup triggering the event appears.
  1488. * <p>
  1489. * If possible, the SAX driver should provide the line position
  1490. * of the first character after the text associated with the document
  1491. * event.
  1492. * <p>
  1493. * If possible, the SAX driver should provide the line position
  1494. * of the first character after the text associated with the document
  1495. * event. The first column in each line is column 1.
  1496. *
  1497. * @return The column number, or -1 if none is available.
  1498. */
  1499. public int getColumnNumber() {
  1500. if (fCurrentEntity != null) {
  1501. if (fCurrentEntity.isExternal()) {
  1502. return fCurrentEntity.columnNumber;
  1503. }
  1504. else {
  1505. // ask current entity to find appropriate column number
  1506. return fCurrentEntity.getColumnNumber();
  1507. }
  1508. }
  1509. return -1;
  1510. } // getColumnNumber():int
  1511. /** Returns the encoding of the current entity.
  1512. * Note that, for a given entity, this value can only be
  1513. * considered final once the encoding declaration has been read (or once it
  1514. * has been determined that there is no such declaration) since, no encoding
  1515. * having been specified on the XMLInputSource, the parser
  1516. * will make an initial "guess" which could be in error.
  1517. */
  1518. public String getEncoding() {
  1519. if (fCurrentEntity != null) {
  1520. if (fCurrentEntity.isExternal()) {
  1521. return fCurrentEntity.encoding;
  1522. }
  1523. else {
  1524. // ask current entity to find appropriate column number
  1525. return fCurrentEntity.getEncoding();
  1526. }
  1527. }
  1528. return null;
  1529. } // getEncoding():String
  1530. /**
  1531. * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
  1532. */
  1533. public void setColumnNumber(int col) {
  1534. // no-op
  1535. }
  1536. /**
  1537. * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
  1538. */
  1539. public void setLineNumber(int line) {
  1540. //no-op
  1541. }
  1542. /**
  1543. * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
  1544. */
  1545. public void setBaseSystemId(String systemId) {
  1546. //no-op
  1547. }
  1548. /**
  1549. * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
  1550. */
  1551. public void setExpandedSystemId(String systemId) {
  1552. //no-op
  1553. }
  1554. /**
  1555. * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
  1556. */
  1557. public void setLiteralSystemId(String systemId) {
  1558. //no-op
  1559. }
  1560. /**
  1561. * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
  1562. */
  1563. public void setPublicId(String publicId) {
  1564. //no-op
  1565. }
  1566. // allow entity manager to tell us what the current entityis:
  1567. public void setCurrentEntity(XMLEntityManager.ScannedEntity ent) {
  1568. fCurrentEntity = ent;
  1569. }
  1570. // set buffer size:
  1571. public void setBufferSize(int size) {
  1572. // REVISIT: Buffer size passed to entity scanner
  1573. // was not being kept in synch with the actual size
  1574. // of the buffers in each scanned entity. If any
  1575. // of the buffers were actually resized, it was possible
  1576. // that the parser would throw an ArrayIndexOutOfBoundsException
  1577. // for documents which contained names which are longer than
  1578. // the current buffer size. Conceivably the buffer size passed
  1579. // to entity scanner could be used to determine a minimum size
  1580. // for resizing, if doubling its size is smaller than this
  1581. // minimum. -- mrglavas
  1582. fBufferSize = size;
  1583. }
  1584. // reset what little state we have...
  1585. public void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
  1586. XMLErrorReporter reporter) {
  1587. fCurrentEntity = null;
  1588. fSymbolTable = symbolTable;
  1589. fEntityManager = entityManager;
  1590. fErrorReporter = reporter;
  1591. }
  1592. //
  1593. // Private methods
  1594. //
  1595. /**
  1596. * Loads a chunk of text.
  1597. *
  1598. * @param offset The offset into the character buffer to
  1599. * read the next batch of characters.
  1600. * @param changeEntity True if the load should change entities
  1601. * at the end of the entity, otherwise leave
  1602. * the current entity in place and the entity
  1603. * boundary will be signaled by the return
  1604. * value.
  1605. *
  1606. * @returns Returns true if the entity changed as a result of this
  1607. * load operation.
  1608. */
  1609. final boolean load(int offset, boolean changeEntity)
  1610. throws IOException {
  1611. if (DEBUG_BUFFER) {
  1612. System.out.print("(load, "+offset+": ");
  1613. XMLEntityManager.print(fCurrentEntity);
  1614. System.out.println();
  1615. }
  1616. // read characters
  1617. int length = fCurrentEntity.mayReadChunks?
  1618. (fCurrentEntity.ch.length - offset):
  1619. (XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE);
  1620. if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length);
  1621. int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
  1622. if (DEBUG_BUFFER) System.out.println(" length actually read: "+count);
  1623. // reset count and position
  1624. boolean entityChanged = false;
  1625. if (count != -1) {
  1626. if (count != 0) {
  1627. fCurrentEntity.count = count + offset;
  1628. fCurrentEntity.position = offset;
  1629. }
  1630. }
  1631. // end of this entity
  1632. else {
  1633. fCurrentEntity.count = offset;
  1634. fCurrentEntity.position = offset;
  1635. entityChanged = true;
  1636. if (changeEntity) {
  1637. fEntityManager.endEntity();
  1638. if (fCurrentEntity == null) {
  1639. throw new EOFException();
  1640. }
  1641. // handle the trailing edges
  1642. if (fCurrentEntity.position == fCurrentEntity.count) {
  1643. load(0, true);
  1644. }
  1645. }
  1646. }
  1647. if (DEBUG_BUFFER) {
  1648. System.out.print(")load, "+offset+": ");
  1649. XMLEntityManager.print(fCurrentEntity);
  1650. System.out.println();
  1651. }
  1652. return entityChanged;
  1653. } // load(int, boolean):boolean
  1654. } // class XMLEntityScanner