1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.html.internal.dom;
  58. import org.w3c.dom.Element;
  59. import org.w3c.dom.Node;
  60. import org.w3c.dom.html.HTMLAnchorElement;
  61. import org.w3c.dom.html.HTMLAppletElement;
  62. import org.w3c.dom.html.HTMLAreaElement;
  63. import org.w3c.dom.html.HTMLCollection;
  64. import org.w3c.dom.html.HTMLElement;
  65. import org.w3c.dom.html.HTMLFormElement;
  66. import org.w3c.dom.html.HTMLImageElement;
  67. import org.w3c.dom.html.HTMLObjectElement;
  68. import org.w3c.dom.html.HTMLOptionElement;
  69. import org.w3c.dom.html.HTMLTableCellElement;
  70. import org.w3c.dom.html.HTMLTableRowElement;
  71. import org.w3c.dom.html.HTMLTableSectionElement;
  72. /**
  73. * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named
  74. * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to
  75. * look for is identified in the constructor by code. This collection is not
  76. * optimized for traversing large trees.
  77. * <p>
  78. * The collection has to meet two requirements: it has to be live, and it has
  79. * to traverse depth first and always return results in that order. As such,
  80. * using an object container (such as {@link java.util.Vector}) is expensive on
  81. * insert/remove operations. Instead, the collection has been implemented using
  82. * three traversing functions. As a result, operations on large documents will
  83. * result in traversal of the entire document tree and consume a considerable
  84. * amount of time.
  85. * <p>
  86. * Note that synchronization on the traversed document cannot be achieved.
  87. * The document itself cannot be locked, and locking each traversed node is
  88. * likely to lead to a dead lock condition. Therefore, there is a chance of the
  89. * document being changed as results are fetched; in all likelihood, the results
  90. * might be out dated, but not erroneous.
  91. *
  92. *
  93. * @version $Revision: 1.7 $ $Date: 2003/05/08 20:13:09 $
  94. * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
  95. * @see org.w3c.dom.html.HTMLCollection
  96. */
  97. class HTMLCollectionImpl
  98. implements HTMLCollection
  99. {
  100. /**
  101. * Request collection of all anchors in document: <A> elements that
  102. * have a <code>name</code> attribute.
  103. */
  104. static final short ANCHOR = 1;
  105. /**
  106. * Request collection of all forms in document: <FORM> elements.
  107. */
  108. static final short FORM = 2;
  109. /**
  110. * Request collection of all images in document: <IMAGE> elements.
  111. */
  112. static final short IMAGE = 3;
  113. /**
  114. * Request collection of all Applets in document: <APPLET> and
  115. * <OBJECT> elements (<OBJECT> must contain an Applet).
  116. */
  117. static final short APPLET = 4;
  118. /**
  119. * Request collection of all links in document: <A> and <AREA>
  120. * elements (must have a <code>href</code> attribute).
  121. */
  122. static final short LINK = 5;
  123. /**
  124. * Request collection of all options in selection: <OPTION> elments in
  125. * <SELECT> or <OPTGROUP>.
  126. */
  127. static final short OPTION = 6;
  128. /**
  129. * Request collection of all rows in table: <TR> elements in table or
  130. * table section.
  131. */
  132. static final short ROW = 7;
  133. /**
  134. * Request collection of all form elements: <INPUT>, <BUTTON>,
  135. * <SELECT>, <TEXT> and <TEXTAREA> elements inside form
  136. * <FORM>.
  137. */
  138. static final short ELEMENT = 8;
  139. /**
  140. * Request collection of all areas in map: <AREA> element in <MAP>
  141. * (non recursive).
  142. */
  143. static final short AREA = -1;
  144. /**
  145. * Request collection of all table bodies in table: <TBODY> element in
  146. * table <TABLE> (non recursive).
  147. */
  148. static final short TBODY = -2;
  149. /**
  150. * Request collection of all cells in row: <TD> elements in <TR>
  151. * (non recursive).
  152. */
  153. static final short CELL = -3;
  154. /**
  155. * Indicates what this collection is looking for. Holds one of the enumerated
  156. * values and used by {@link #collectionMatch}. Set by the constructor and
  157. * determine the collection's use for its life time.
  158. */
  159. private short _lookingFor;
  160. /**
  161. * This is the top level element underneath which the collection exists.
  162. */
  163. private Element _topLevel;
  164. /**
  165. * Construct a new collection that retrieves element of the specific type
  166. * (<code>lookingFor</code>) from the specific document portion
  167. * (<code>topLevel</code>).
  168. *
  169. * @param topLevel The element underneath which the collection exists
  170. * @param lookingFor Code indicating what elements to look for
  171. */
  172. HTMLCollectionImpl( HTMLElement topLevel, short lookingFor )
  173. {
  174. if ( topLevel == null )
  175. throw new NullPointerException( "HTM011 Argument 'topLevel' is null." );
  176. _topLevel = topLevel;
  177. _lookingFor = lookingFor;
  178. }
  179. /**
  180. * Returns the length of the collection. This method might traverse the
  181. * entire document tree.
  182. *
  183. * @return Length of the collection
  184. */
  185. public final int getLength()
  186. {
  187. // Call recursive function on top-level element.
  188. return getLength( _topLevel );
  189. }
  190. /**
  191. * Retrieves the indexed node from the collection. Nodes are numbered in
  192. * tree order - depth-first traversal order. This method might traverse
  193. * the entire document tree.
  194. *
  195. * @param index The index of the node to return
  196. * @return The specified node or null if no such node found
  197. */
  198. public final Node item( int index )
  199. {
  200. if ( index < 0 )
  201. throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." );
  202. // Call recursive function on top-level element.
  203. return item( _topLevel, new CollectionIndex( index ) );
  204. }
  205. /**
  206. * Retrieves the named node from the collection. The name is matched case
  207. * sensitive against the <TT>id</TT> attribute of each element in the
  208. * collection, returning the first match. The tree is traversed in
  209. * depth-first order. This method might traverse the entire document tree.
  210. *
  211. * @param name The name of the node to return
  212. * @return The specified node or null if no such node found
  213. */
  214. public final Node namedItem( String name )
  215. {
  216. if ( name == null )
  217. throw new NullPointerException( "HTM013 Argument 'name' is null." );
  218. // Call recursive function on top-level element.
  219. return namedItem( _topLevel, name );
  220. }
  221. /**
  222. * Recursive function returns the number of elements of a particular type
  223. * that exist under the top level element. This is a recursive function
  224. * and the top level element is passed along.
  225. *
  226. * @param topLevel Top level element from which to scan
  227. * @return Number of elements
  228. */
  229. private int getLength( Element topLevel )
  230. {
  231. int length;
  232. Node node;
  233. synchronized ( topLevel )
  234. {
  235. // Always count from zero and traverse all the childs of the
  236. // current element in the order they appear.
  237. length = 0;
  238. node = topLevel.getFirstChild();
  239. while ( node != null )
  240. {
  241. // If a particular node is an element (could be HTML or XML),
  242. // do two things: if it's the one we're looking for, count
  243. // another matched element; at any rate, traverse it's
  244. // children as well.
  245. if ( node instanceof Element )
  246. {
  247. if ( collectionMatch( (Element) node, null ) )
  248. ++ length;
  249. else if ( recurse() )
  250. length += getLength( (Element) node );
  251. }
  252. node = node.getNextSibling();
  253. }
  254. }
  255. return length;
  256. }
  257. /**
  258. * Recursive function returns the numbered element of a particular type
  259. * that exist under the top level element. This is a recursive function
  260. * and the top level element is passed along.
  261. * <p>
  262. * Note that this function must call itself with an index and get back both
  263. * the element (if one was found) and the new index which is decremeneted
  264. * for any like element found. Since integers are only passed by value,
  265. * this function makes use of a separate class ({@link CollectionIndex})
  266. * to hold that index.
  267. *
  268. * @param topLevel Top level element from which to scan
  269. * @param index The index of the item to retreive
  270. * @return Number of elements
  271. * @see CollectionIndex
  272. */
  273. private Node item( Element topLevel, CollectionIndex index )
  274. {
  275. Node node;
  276. Node result;
  277. synchronized ( topLevel )
  278. {
  279. // Traverse all the childs of the current element in the order
  280. // they appear. Count from the index backwards until you reach
  281. // matching element with an index of zero. Return that element.
  282. node = topLevel.getFirstChild();
  283. while ( node != null )
  284. {
  285. // If a particular node is an element (could be HTML or XML),
  286. // do two things: if it's the one we're looking for, decrease
  287. // the index and if zero, return this node; at any rate,
  288. // traverse it's children as well.
  289. if ( node instanceof Element )
  290. {
  291. if ( collectionMatch( (Element) node, null ) )
  292. {
  293. if ( index.isZero() )
  294. return node;
  295. index.decrement();
  296. } else if ( recurse() )
  297. {
  298. result = item( (Element) node, index );
  299. if ( result != null )
  300. return result;
  301. }
  302. }
  303. node = node.getNextSibling();
  304. }
  305. }
  306. return null;
  307. }
  308. /**
  309. * Recursive function returns an element of a particular type with the
  310. * specified name (<TT>id</TT> attribute).
  311. *
  312. * @param topLevel Top level element from which to scan
  313. * @param name The named element to look for
  314. * @return The first named element found
  315. */
  316. private Node namedItem( Element topLevel, String name )
  317. {
  318. Node node;
  319. Node result;
  320. synchronized ( topLevel )
  321. {
  322. // Traverse all the childs of the current element in the order
  323. // they appear.
  324. node = topLevel.getFirstChild();
  325. while ( node != null )
  326. {
  327. // If a particular node is an element (could be HTML or XML),
  328. // do two things: if it's the one we're looking for, and the
  329. // name (id attribute) attribute is the one we're looking for,
  330. // return this element; otherwise, traverse it's children.
  331. if ( node instanceof Element )
  332. {
  333. if ( collectionMatch( (Element) node, name ) )
  334. return node;
  335. else if ( recurse() )
  336. {
  337. result = namedItem( (Element) node, name );
  338. if ( result != null )
  339. return result;
  340. }
  341. }
  342. node = node.getNextSibling();
  343. }
  344. return node;
  345. }
  346. }
  347. /**
  348. * Returns true if scanning methods should iterate through the collection.
  349. * When looking for elements in the document, recursing is needed to traverse
  350. * the full document tree. When looking inside a specific element (e.g. for a
  351. * cell inside a row), recursing can lead to erroneous results.
  352. *
  353. * @return True if methods should recurse to traverse entire tree
  354. */
  355. protected boolean recurse()
  356. {
  357. return _lookingFor > 0;
  358. }
  359. /**
  360. * Determines if current element matches based on what we're looking for.
  361. * The element is passed along with an optional identifier name. If the
  362. * element is the one we're looking for, return true. If the name is also
  363. * specified, the name must match the <code>id</code> attribute
  364. * (match <code>name</code> first for anchors).
  365. *
  366. * @param elem The current element
  367. * @param name The identifier name or null
  368. * @return The element matches what we're looking for
  369. */
  370. protected boolean collectionMatch( Element elem, String name )
  371. {
  372. boolean match;
  373. synchronized ( elem )
  374. {
  375. // Begin with no matching. Depending on what we're looking for,
  376. // attempt to match based on the element type. This is the quickest
  377. // way to match involving only a cast. Do the expensive string
  378. // comparison later on.
  379. match = false;
  380. switch ( _lookingFor )
  381. {
  382. case ANCHOR:
  383. // Anchor is an <A> element with a 'name' attribute. Otherwise, it's
  384. // just a link.
  385. match = ( elem instanceof HTMLAnchorElement ) &&
  386. elem.getAttribute( "name" ).length() > 0;
  387. break;
  388. case FORM:
  389. // Any <FORM> element.
  390. match = ( elem instanceof HTMLFormElement );
  391. break;
  392. case IMAGE:
  393. // Any <IMG> element. <OBJECT> elements with images are not returned.
  394. match = ( elem instanceof HTMLImageElement );
  395. break;
  396. case APPLET:
  397. // Any <APPLET> element, and any <OBJECT> element which represents an
  398. // Applet. This is determined by 'codetype' attribute being
  399. // 'application/java' or 'classid' attribute starting with 'java:'.
  400. match = ( elem instanceof HTMLAppletElement ) ||
  401. ( elem instanceof HTMLObjectElement &&
  402. ( "application/java".equals( elem.getAttribute( "codetype" ) ) ||
  403. elem.getAttribute( "classid" ).startsWith( "java:" ) ) );
  404. break;
  405. case ELEMENT:
  406. // All form elements implement HTMLFormControl for easy identification.
  407. match = ( elem instanceof HTMLFormControl );
  408. break;
  409. case LINK:
  410. // Any <A> element, and any <AREA> elements with an 'href' attribute.
  411. match = ( ( elem instanceof HTMLAnchorElement ||
  412. elem instanceof HTMLAreaElement ) &&
  413. elem.getAttribute( "href" ).length() > 0 );
  414. break;
  415. case AREA:
  416. // Any <AREA> element.
  417. match = ( elem instanceof HTMLAreaElement );
  418. break;
  419. case OPTION:
  420. // Any <OPTION> element.
  421. match = ( elem instanceof HTMLOptionElement );
  422. break;
  423. case ROW:
  424. // Any <TR> element.
  425. match = ( elem instanceof HTMLTableRowElement );
  426. break;
  427. case TBODY:
  428. // Any <TBODY> element (one of three table section types).
  429. match = ( elem instanceof HTMLTableSectionElement &&
  430. elem.getTagName().equals( "tbody" ) );
  431. break;
  432. case CELL:
  433. // Any <TD> element.
  434. match = ( elem instanceof HTMLTableCellElement );
  435. break;
  436. }
  437. // If element type was matched and a name was specified, must also match
  438. // the name against either the 'id' or the 'name' attribute. The 'name'
  439. // attribute is relevant only for <A> elements for backward compatibility.
  440. if ( match && name != null )
  441. {
  442. // If an anchor and 'name' attribute matches, return true. Otherwise,
  443. // try 'id' attribute.
  444. if ( elem instanceof HTMLAnchorElement &&
  445. name.equals( elem.getAttribute( "name" ) ) )
  446. return true;
  447. match = name.equals( elem.getAttribute( "id" ) );
  448. }
  449. }
  450. return match;
  451. }
  452. }
  453. /**
  454. * {@link CollectionImpl#item} must traverse down the tree and decrement the
  455. * index until it matches an element who's index is zero. Since integers are
  456. * passed by value, this class servers to pass the index into each recursion
  457. * by reference. It encompasses all the operations that need be performed on
  458. * the index, although direct access is possible.
  459. *
  460. * @see CollectionImpl#item
  461. */
  462. class CollectionIndex
  463. {
  464. /**
  465. * Returns the current index.
  466. *
  467. * @return Current index
  468. */
  469. int getIndex()
  470. {
  471. return _index;
  472. }
  473. /**
  474. * Decrements the index by one.
  475. */
  476. void decrement()
  477. {
  478. -- _index;
  479. }
  480. /**
  481. * Returns true if index is zero (or negative).
  482. *
  483. * @return True if index is zero
  484. */
  485. boolean isZero()
  486. {
  487. return _index <= 0;
  488. }
  489. /**
  490. * Constructs a new index with the specified initial value. The index will
  491. * then be decremeneted until it reaches zero.
  492. *
  493. * @param index The initial value
  494. */
  495. CollectionIndex( int index )
  496. {
  497. _index = index;
  498. }
  499. /**
  500. * Holds the actual value that is passed by reference using this class.
  501. */
  502. private int _index;
  503. }