1. /* $Id: ExtendedBaseRules.java,v 1.15 2004/05/10 06:30:06 skitching Exp $
  2. *
  3. * Copyright 2001-2004 The Apache Software Foundation.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.commons.digester;
  18. import java.util.ArrayList;
  19. import java.util.Collections;
  20. import java.util.Comparator;
  21. import java.util.HashMap;
  22. import java.util.Iterator;
  23. import java.util.List;
  24. import java.util.Map;
  25. /**
  26. * <p>Extension of {@link RulesBase} for complex schema.</p>
  27. *
  28. * <p>This is an extension of the basic pattern matching scheme
  29. * intended to improve support for mapping complex xml-schema.
  30. * It is intended to be a minimal extension of the standard rules
  31. * big enough to support complex schema but without the full generality
  32. * offered by more exotic matching pattern rules.</p>
  33. *
  34. * <h4>When should you use this rather than the original?</h4>
  35. *
  36. * <p>These rules are complex and slower but offer more functionality.
  37. * The <code>RulesBase</code> matching set allows interaction between patterns.
  38. * This allows sophisticated matching schema to be created
  39. * but it also means that it can be hard to create and debug mappings
  40. * for complex schema.
  41. * This extension introduces <em>universal</em> versions of these patterns
  42. * that always act independently.</p>
  43. *
  44. * <p>Another three kinds of matching pattern are also introduced.
  45. * The parent matchs allow common method to be easily called for children.
  46. * The wildcard match allows rules to be specified for all elements.</p>
  47. *
  48. * <h4>The additional matching patterns:</h4>
  49. *
  50. * <ul>
  51. * <li><em>Parent Match </em> - Will match child elements of a particular
  52. * kind of parent. This is useful if a parent has a particular method
  53. * to call.
  54. * <ul>
  55. * <li><code>"a/b/c/?"</code> matches any child whose parent matches
  56. * <code>"a/b/c"</code>. Exact parent rules take precedence over
  57. * standard wildcard tail endings.</li>
  58. * <li><code>"*/a/b/c/?"</code> matches any child whose parent matches
  59. * "*/a/b/c"</code>. The longest matching still applies to parent
  60. * matches but the length excludes the '?', which effectively means
  61. * that standard wildcard matches with the same level of depth are
  62. * chosen in preference.</li>
  63. * </ul></li>
  64. * <li><em>Ancester Match</em> - Will match elements who parentage includes
  65. * a particular sequence of elements.
  66. * <ul>
  67. * <li><code>"a/b/*"</code> matches any element whose parentage path starts with
  68. * 'a' then 'b'. Exact parent and parent match rules take precedence. The longest
  69. * ancester match will take precedence.</li>
  70. * <li><code>"*/a/b/*"</code> matches any elements whose parentage path contains
  71. * an element 'a' followed by an element 'b'. The longest matching still applies
  72. * but the length excludes the '*' at the end.</li>
  73. * </ul>
  74. * </li>
  75. * <li><em>Universal Wildcard Match</em> - Any pattern prefixed with '!'
  76. * bypasses the longest matching rule. Even if there is an exact match
  77. * or a longer wildcard match, patterns prefixed by '!' will still be
  78. * tested to see if they match. This can be used for example to specify
  79. * universal construction rules.
  80. * <ul>
  81. * <li>Pattern <code>"!*/a/b"</code> matches whenever an 'b' element
  82. * is inside an 'a'.</li>
  83. * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent
  84. * matching <code>"a/b"</code>.</li>
  85. * <li>Pattern <code>"!*/a/b/?"</code> matches any child of a parent
  86. * matching <code>"!*/a/b"</code></li>
  87. * <li>Pattern <code>"!a/b/*"</code> matches any element whose parentage path starts with
  88. * "a" then "b".</li>
  89. * <li>Pattern <code>"!*/a/b/*"</code> matches any elements whose parentage path contains
  90. * 'a/b'</li>
  91. * </ul></li>
  92. * <li><em>Wild Match</em>
  93. * <ul>
  94. * <li>Pattern <code>"*"</code> matches every pattern that isn't matched
  95. * by any other basic rule.</li>
  96. * <li>Pattern <code>"!*"</code> matches every pattern.</li>
  97. * </ul></li>
  98. * </ul>
  99. *
  100. * <h4>Using The Extended Rules</h4>
  101. *
  102. * <p>The most important thing to remember
  103. * when using the extended rules is that universal
  104. * and non-universal patterns are completely independent.
  105. * Universal patterns are never effected by the addition of new patterns
  106. * or the removal of existing ones.
  107. * Non-universal patterns are never effected
  108. * by the addition of new <em>universal</em> patterns
  109. * or the removal of existing <em>universal</em> patterns.
  110. * As in the basic matching rules, non-universal (basic) patterns
  111. * <strong>can</strong> be effected
  112. * by the addition of new <em>non-universal</em> patterns
  113. * or the removal of existing <em>non-universal</em> patterns.
  114. * <p> This means that you can use universal patterns
  115. * to build up the simple parts of your structure
  116. * - for example defining universal creation and property setting rules.
  117. * More sophisticated and complex mapping will require non-universal patterns
  118. * and this might mean that some of the universal rules will need to be
  119. * replaced by a series of
  120. * special cases using non-universal rules.
  121. * But by using universal rules as your backbone,
  122. * these additions should not break your existing rules.</p>
  123. */
  124. public class ExtendedBaseRules extends RulesBase {
  125. // ----------------------------------------------------- Instance Variables
  126. /**
  127. * Counts the entry number for the rules.
  128. */
  129. private int counter = 0;
  130. /**
  131. * The decision algorithm used (unfortunately) doesn't preserve the entry
  132. * order.
  133. * This map is used by a comparator which orders the list of matches
  134. * before it's returned.
  135. * This map stores the entry number keyed by the rule.
  136. */
  137. private Map order = new HashMap();
  138. // --------------------------------------------------------- Public Methods
  139. /**
  140. * Register a new Rule instance matching the specified pattern.
  141. *
  142. * @param pattern Nesting pattern to be matched for this Rule
  143. * @param rule Rule instance to be registered
  144. */
  145. public void add(String pattern, Rule rule) {
  146. super.add(pattern, rule);
  147. counter++;
  148. order.put(rule, new Integer(counter));
  149. }
  150. /**
  151. * Return a List of all registered Rule instances that match the specified
  152. * nesting pattern, or a zero-length List if there are no matches. If more
  153. * than one Rule instance matches, they <strong>must</strong> be returned
  154. * in the order originally registered through the <code>add()</code>
  155. * method.
  156. *
  157. * @param pattern Nesting pattern to be matched
  158. */
  159. public List match(String namespace, String pattern) {
  160. // calculate the pattern of the parent
  161. // (if the element has one)
  162. String parentPattern = "";
  163. int lastIndex = pattern.lastIndexOf('/');
  164. boolean hasParent = true;
  165. if (lastIndex == -1) {
  166. // element has no parent
  167. hasParent = false;
  168. } else {
  169. // calculate the pattern of the parent
  170. parentPattern = pattern.substring(0, lastIndex);
  171. }
  172. // we keep the list of universal matches separate
  173. List universalList = new ArrayList(counter);
  174. // Universal all wildards ('!*')
  175. // These are always matched so always add them
  176. List tempList = (List) this.cache.get("!*");
  177. if (tempList != null) {
  178. universalList.addAll(tempList);
  179. }
  180. // Universal exact parent match
  181. // need to get this now since only wildcards are considered later
  182. tempList = (List) this.cache.get("!" + parentPattern + "/?");
  183. if (tempList != null) {
  184. universalList.addAll(tempList);
  185. }
  186. // base behaviour means that if we certain matches, we don't continue
  187. // but we just have a single combined loop and so we have to set
  188. // a variable
  189. boolean ignoreBasicMatches = false;
  190. // see if we have an exact basic pattern match
  191. List rulesList = (List) this.cache.get(pattern);
  192. if (rulesList != null) {
  193. // we have a match!
  194. // so ignore all basic matches from now on
  195. ignoreBasicMatches = true;
  196. } else {
  197. // see if we have an exact child match
  198. if (hasParent) {
  199. // matching children takes preference
  200. rulesList = (List) this.cache.get(parentPattern + "/?");
  201. if (rulesList != null) {
  202. // we have a match!
  203. // so ignore all basic matches from now on
  204. ignoreBasicMatches = true;
  205. } else {
  206. // we don't have a match yet - so try exact ancester
  207. //
  208. rulesList = findExactAncesterMatch(pattern);
  209. if (rulesList != null) {
  210. // we have a match!
  211. // so ignore all basic matches from now on
  212. ignoreBasicMatches = true;
  213. }
  214. }
  215. }
  216. }
  217. // OK - we're ready for the big loop!
  218. // Unlike the basic rules case,
  219. // we have to go through for all those universal rules in all cases.
  220. // Find the longest key, ie more discriminant
  221. String longKey = "";
  222. int longKeyLength = 0;
  223. Iterator keys = this.cache.keySet().iterator();
  224. while (keys.hasNext()) {
  225. String key = (String) keys.next();
  226. // find out if it's a univeral pattern
  227. // set a flag
  228. boolean isUniversal = key.startsWith("!");
  229. if (isUniversal) {
  230. // and find the underlying key
  231. key = key.substring(1, key.length());
  232. }
  233. // don't need to check exact matches
  234. boolean wildcardMatchStart = key.startsWith("*/");
  235. boolean wildcardMatchEnd = key.endsWith("/*");
  236. if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) {
  237. boolean parentMatched = false;
  238. boolean basicMatched = false;
  239. boolean ancesterMatched = false;
  240. boolean parentMatchEnd = key.endsWith("/?");
  241. if (parentMatchEnd) {
  242. // try for a parent match
  243. parentMatched = parentMatch(key, pattern, parentPattern);
  244. } else if (wildcardMatchEnd) {
  245. // check for ancester match
  246. if (wildcardMatchStart) {
  247. String patternBody = key.substring(2, key.length() - 2);
  248. if (pattern.endsWith(patternBody)) {
  249. ancesterMatched = true;
  250. } else {
  251. ancesterMatched = (pattern.indexOf(patternBody + "/") > -1);
  252. }
  253. } else {
  254. String bodyPattern = key.substring(0, key.length() - 2);
  255. if (pattern.startsWith(bodyPattern))
  256. {
  257. if (pattern.length() == bodyPattern.length()) {
  258. // exact match
  259. ancesterMatched = true;
  260. } else {
  261. ancesterMatched = ( pattern.charAt(bodyPattern.length()) == '/' );
  262. }
  263. } else {
  264. ancesterMatched = false;
  265. }
  266. }
  267. } else {
  268. // try for a base match
  269. basicMatched = basicMatch(key, pattern);
  270. }
  271. if (parentMatched || basicMatched || ancesterMatched) {
  272. if (isUniversal) {
  273. // universal rules go straight in
  274. // (no longest matching rule)
  275. tempList = (List) this.cache.get("!" + key);
  276. if (tempList != null) {
  277. universalList.addAll(tempList);
  278. }
  279. } else {
  280. if (!ignoreBasicMatches) {
  281. // ensure that all parent matches are SHORTER
  282. // than rules with same level of matching.
  283. //
  284. // the calculations below don't work for universal
  285. // matching, but we don't care because in that case
  286. // this if-stmt is not entered.
  287. int keyLength = key.length();
  288. if (wildcardMatchStart) {
  289. --keyLength;
  290. }
  291. if (wildcardMatchEnd) {
  292. --keyLength;
  293. } else if (parentMatchEnd) {
  294. --keyLength;
  295. }
  296. if (keyLength > longKeyLength) {
  297. rulesList = (List) this.cache.get(key);
  298. longKey = key;
  299. longKeyLength = keyLength;
  300. }
  301. }
  302. }
  303. }
  304. }
  305. }
  306. // '*' works in practice as a default matching
  307. // (this is because anything is a deeper match!)
  308. if (rulesList == null) {
  309. rulesList = (List) this.cache.get("*");
  310. }
  311. // if we've matched a basic pattern, then add to the universal list
  312. if (rulesList != null) {
  313. universalList.addAll(rulesList);
  314. }
  315. // don't filter if namespace is null
  316. if (namespace != null) {
  317. // remove invalid namespaces
  318. Iterator it = universalList.iterator();
  319. while (it.hasNext()) {
  320. Rule rule = (Rule) it.next();
  321. String ns_uri = rule.getNamespaceURI();
  322. if (ns_uri != null && !ns_uri.equals(namespace)) {
  323. it.remove();
  324. }
  325. }
  326. }
  327. // need to make sure that the collection is sort in the order
  328. // of addition. We use a custom comparator for this
  329. Collections.sort(
  330. universalList,
  331. new Comparator() {
  332. public int compare(Object o1, Object o2) throws ClassCastException {
  333. // Get the entry order from the map
  334. Integer i1 = (Integer) order.get(o1);
  335. Integer i2 = (Integer) order.get(o2);
  336. // and use that to perform the comparison
  337. if (i1 == null) {
  338. if (i2 == null) {
  339. return 0;
  340. } else {
  341. return -1;
  342. }
  343. } else if (i2 == null) {
  344. return 1;
  345. }
  346. return (i1.intValue() - i2.intValue());
  347. }
  348. });
  349. return universalList;
  350. }
  351. /**
  352. * Matching parent.
  353. */
  354. private boolean parentMatch(String key, String pattern, String parentPattern) {
  355. return parentPattern.endsWith(key.substring(1, key.length() - 2));
  356. }
  357. /**
  358. * Standard match.
  359. * Matches the end of the pattern to the key.
  360. */
  361. private boolean basicMatch(String key, String pattern) {
  362. return (pattern.equals(key.substring(2)) ||
  363. pattern.endsWith(key.substring(1)));
  364. }
  365. /**
  366. * Finds an exact ancester match for given pattern
  367. */
  368. private List findExactAncesterMatch(String parentPattern) {
  369. List matchingRules = null;
  370. int lastIndex = parentPattern.length();
  371. while (lastIndex-- > 0) {
  372. lastIndex = parentPattern.lastIndexOf('/', lastIndex);
  373. if (lastIndex > 0) {
  374. matchingRules = (List) this.cache.get(parentPattern.substring(0, lastIndex) + "/*");
  375. if (matchingRules != null) {
  376. return matchingRules;
  377. }
  378. }
  379. }
  380. return null;
  381. }
  382. }