1. /* ====================================================================
  2. * The Apache Software License, Version 1.1
  3. *
  4. * Copyright (c) 2002-2003 The Apache Software Foundation. All rights
  5. * reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. *
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in
  16. * the documentation and/or other materials provided with the
  17. * distribution.
  18. *
  19. * 3. The end-user documentation included with the redistribution, if
  20. * any, must include the following acknowledgement:
  21. * "This product includes software developed by the
  22. * Apache Software Foundation (http://www.apache.org/)."
  23. * Alternately, this acknowledgement may appear in the software itself,
  24. * if and wherever such third-party acknowledgements normally appear.
  25. *
  26. * 4. The names "The Jakarta Project", "Commons", and "Apache Software
  27. * Foundation" must not be used to endorse or promote products derived
  28. * from this software without prior written permission. For written
  29. * permission, please contact apache@apache.org.
  30. *
  31. * 5. Products derived from this software may not be called "Apache"
  32. * nor may "Apache" appear in their names without prior written
  33. * permission of the Apache Software Foundation.
  34. *
  35. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46. * SUCH DAMAGE.
  47. * ====================================================================
  48. *
  49. * This software consists of voluntary contributions made by many
  50. * individuals on behalf of the Apache Software Foundation. For more
  51. * information on the Apache Software Foundation, please see
  52. * <http://www.apache.org/>.
  53. */
  54. package org.apache.commons.lang;
  55. import java.io.Serializable;
  56. import java.util.HashMap;
  57. import java.util.HashSet;
  58. import java.util.Iterator;
  59. import java.util.Map;
  60. import java.util.Set;
  61. /**
  62. * <p>A set of characters.</p>
  63. *
  64. * <p>Instances are immutable, but instances of subclasses may not be.</p>
  65. *
  66. * @author Henri Yandell
  67. * @author Stephen Colebourne
  68. * @author Phil Steitz
  69. * @author Pete Gieser
  70. * @author Gary Gregory
  71. * @since 1.0
  72. * @version $Id: CharSet.java,v 1.20 2003/08/22 17:25:33 ggregory Exp $
  73. */
  74. public class CharSet implements Serializable {
  75. /** Serialization lock, Lang version 2.0. */
  76. private static final long serialVersionUID = 5947847346149275958L;
  77. /**
  78. * A CharSet defining no characters.
  79. * @since 2.0
  80. */
  81. public static final CharSet EMPTY = new CharSet((String) null);
  82. /**
  83. * A CharSet defining ASCII alphabetic characters "a-zA-Z".
  84. * @since 2.0
  85. */
  86. public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
  87. /**
  88. * A CharSet defining ASCII alphabetic characters "a-z".
  89. * @since 2.0
  90. */
  91. public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
  92. /**
  93. * A CharSet defining ASCII alphabetic characters "A-Z".
  94. * @since 2.0
  95. */
  96. public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
  97. /**
  98. * A CharSet defining ASCII alphabetic characters "0-9".
  99. * @since 2.0
  100. */
  101. public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
  102. /**
  103. * A Map of the common cases used in the factory.
  104. * Subclasses can add more common patterns if desired.
  105. * @since 2.0
  106. */
  107. protected static final Map COMMON = new HashMap();
  108. static {
  109. COMMON.put(null, EMPTY);
  110. COMMON.put("", EMPTY);
  111. COMMON.put("a-zA-Z", ASCII_ALPHA);
  112. COMMON.put("A-Za-z", ASCII_ALPHA);
  113. COMMON.put("a-z", ASCII_ALPHA_LOWER);
  114. COMMON.put("A-Z", ASCII_ALPHA_UPPER);
  115. COMMON.put("0-9", ASCII_NUMERIC);
  116. }
  117. /** The set of CharRange objects. */
  118. private Set set = new HashSet();
  119. //-----------------------------------------------------------------------
  120. /**
  121. * <p>Factory method to create a new CharSet using a special syntax.</p>
  122. *
  123. * <ul>
  124. * <li><code>null</code> or empty string ("")
  125. * - set containing no characters</li>
  126. * <li>Single character, such as "a"
  127. * - set containing just that character</li>
  128. * <li>Multi character, such as "a-e"
  129. * - set containing characters from one character to the other</li>
  130. * <li>Negated, such as "^a" or "^a-e"
  131. * - set containing all characters except those defined</li>
  132. * <li>Combinations, such as "abe-g"
  133. * - set containing all the characters from the individual sets</li>
  134. * </ul>
  135. *
  136. * <p>The matching order is:</p>
  137. * <ol>
  138. * <li>Negated multi character range, such as "^a-e"
  139. * <li>Ordinary multi character range, such as "a-e"
  140. * <li>Negated single character, such as "^a"
  141. * <li>Ordinary single character, such as "a"
  142. * </ol>
  143. * <p>Matching works left to right. Once a match is found the
  144. * search starts again from the next character.</p>
  145. *
  146. * <p>If the same range is defined twice using the same syntax, only
  147. * one range will be kept.
  148. * Thus, "a-ca-c" creates only one range of "a-c".</p>
  149. *
  150. * <p>If the start and end of a range are in the wrong order,
  151. * they are reversed. Thus "a-e" is the same as "e-a".
  152. * As a result, "a-ee-a" would create only one range,
  153. * as the "a-e" and "e-a" are the same.</p>
  154. *
  155. * <p>The set of characters represented is the union of the specified ranges.</p>
  156. *
  157. * <p>All CharSet objects returned by this method will be immutable.</p>
  158. *
  159. * @param setStr the String describing the set, may be null
  160. * @return a CharSet instance
  161. * @since 2.0
  162. */
  163. public static CharSet getInstance(String setStr) {
  164. Object set = COMMON.get(setStr);
  165. if (set != null) {
  166. return (CharSet) set;
  167. }
  168. return new CharSet(setStr);
  169. }
  170. //-----------------------------------------------------------------------
  171. /**
  172. * <p>Constructs a new CharSet using the set syntax.</p>
  173. *
  174. * @param setStr the String describing the set, may be null
  175. * @since 2.0
  176. */
  177. protected CharSet(String setStr) {
  178. super();
  179. add(setStr);
  180. }
  181. /**
  182. * <p>Constructs a new CharSet using the set syntax.
  183. * Each string is merged in with the set.</p>
  184. *
  185. * @param set Strings to merge into the initial set
  186. * @throws NullPointerException if set is <code>null</code>
  187. */
  188. protected CharSet(String[] set) {
  189. super();
  190. int sz = set.length;
  191. for (int i = 0; i < sz; i++) {
  192. add(set[i]);
  193. }
  194. }
  195. //-----------------------------------------------------------------------
  196. /**
  197. * <p>Add a set definition string to the <code>CharSet</code>.</p>
  198. *
  199. * @param str set definition string
  200. */
  201. protected void add(String str) {
  202. if (str == null) {
  203. return;
  204. }
  205. int len = str.length();
  206. int pos = 0;
  207. while (pos < len) {
  208. int remainder = (len - pos);
  209. if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
  210. // negated range
  211. set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true));
  212. pos += 4;
  213. } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
  214. // range
  215. set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2)));
  216. pos += 3;
  217. } else if (remainder >= 2 && str.charAt(pos) == '^') {
  218. // negated char
  219. set.add(new CharRange(str.charAt(pos + 1), true));
  220. pos += 2;
  221. } else {
  222. // char
  223. set.add(new CharRange(str.charAt(pos)));
  224. pos += 1;
  225. }
  226. }
  227. }
  228. //-----------------------------------------------------------------------
  229. /**
  230. * <p>Gets the internal set as an array of CharRange objects.</p>
  231. *
  232. * @return an array of immutable CharRange objects
  233. * @since 2.0
  234. */
  235. public CharRange[] getCharRanges() {
  236. return (CharRange[]) set.toArray(new CharRange[set.size()]);
  237. }
  238. //-----------------------------------------------------------------------
  239. /**
  240. * <p>Does the <code>CharSet</code> contain the specified
  241. * character <code>ch</code>.</p>
  242. *
  243. * @param ch the character to check for
  244. * @return <code>true</code> if the set contains the characters
  245. */
  246. public boolean contains(char ch) {
  247. for (Iterator it = set.iterator(); it.hasNext();) {
  248. CharRange range = (CharRange) it.next();
  249. if (range.contains(ch)) {
  250. return true;
  251. }
  252. }
  253. return false;
  254. }
  255. // Basics
  256. //-----------------------------------------------------------------------
  257. /**
  258. * <p>Compares two CharSet objects, returning true if they represent
  259. * exactly the same set of characters defined in the same way.</p>
  260. *
  261. * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
  262. * equal according to this method.</p>
  263. *
  264. * @param obj the object to compare to
  265. * @return true if equal
  266. * @since 2.0
  267. */
  268. public boolean equals(Object obj) {
  269. if (obj == this) {
  270. return true;
  271. }
  272. if (obj instanceof CharSet == false) {
  273. return false;
  274. }
  275. CharSet other = (CharSet) obj;
  276. return (set.equals(other.set));
  277. }
  278. /**
  279. * <p>Gets a hashCode compatable with the equals method.</p>
  280. *
  281. * @return a suitable hashCode
  282. * @since 2.0
  283. */
  284. public int hashCode() {
  285. return 89 + set.hashCode();
  286. }
  287. /**
  288. * <p>Gets a string representation of the set.</p>
  289. *
  290. * @return string representation of the set
  291. */
  292. public String toString() {
  293. return set.toString();
  294. }
  295. }