1. /* ====================================================================
  2. * The Apache Software License, Version 1.1
  3. *
  4. * Copyright (c) 2002-2003 The Apache Software Foundation. All rights
  5. * reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. *
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in
  16. * the documentation and/or other materials provided with the
  17. * distribution.
  18. *
  19. * 3. The end-user documentation included with the redistribution, if
  20. * any, must include the following acknowledgement:
  21. * "This product includes software developed by the
  22. * Apache Software Foundation (http://www.apache.org/)."
  23. * Alternately, this acknowledgement may appear in the software itself,
  24. * if and wherever such third-party acknowledgements normally appear.
  25. *
  26. * 4. The names "The Jakarta Project", "Commons", and "Apache Software
  27. * Foundation" must not be used to endorse or promote products derived
  28. * from this software without prior written permission. For written
  29. * permission, please contact apache@apache.org.
  30. *
  31. * 5. Products derived from this software may not be called "Apache"
  32. * nor may "Apache" appear in their names without prior written
  33. * permission of the Apache Software Foundation.
  34. *
  35. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46. * SUCH DAMAGE.
  47. * ====================================================================
  48. *
  49. * This software consists of voluntary contributions made by many
  50. * individuals on behalf of the Apache Software Foundation. For more
  51. * information on the Apache Software Foundation, please see
  52. * <http://www.apache.org/>.
  53. */
  54. package org.apache.commons.lang;
  55. /**
  56. * <p>Operations on <code>CharSet</code>s.</p>
  57. *
  58. * <p>This class handles <code>null</code> input gracefully.
  59. * An exception will not be thrown for a <code>null</code> input.
  60. * Each method documents its behaviour in more detail.</p>
  61. *
  62. * @see CharSet
  63. * @author <a href="bayard@generationjava.com">Henri Yandell</a>
  64. * @author Stephen Colebourne
  65. * @author Phil Steitz
  66. * @author Gary Gregory
  67. * @since 1.0
  68. * @version $Id: CharSetUtils.java,v 1.26 2003/08/22 17:25:33 ggregory Exp $
  69. */
  70. public class CharSetUtils {
  71. /**
  72. * <p>CharSetUtils instances should NOT be constructed in standard programming.
  73. * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p>
  74. *
  75. * <p>This constructor is public to permit tools that require a JavaBean instance
  76. * to operate.</p>
  77. */
  78. public CharSetUtils() {
  79. }
  80. // Factory
  81. //-----------------------------------------------------------------------
  82. /**
  83. * <p>Creates a <code>CharSet</code> instance which allows a certain amount of
  84. * set logic to be performed.</p>
  85. * <p>The syntax is:</p>
  86. * <ul>
  87. * <li>"aeio" which implies 'a','e',..</li>
  88. * <li>"^e" implies not e.</li>
  89. * <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
  90. * </ul>
  91. *
  92. * <pre>
  93. * CharSetUtils.evaluateSet(null) = null
  94. * CharSetUtils.evaluateSet([]) = CharSet matching nothing
  95. * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e
  96. * </pre>
  97. *
  98. * @param set the set, may be null
  99. * @return a CharSet instance, <code>null</code> if null input
  100. * @deprecated Use {@link CharSet#getInstance(String)}.
  101. * Method will be removed in Commons Lang 3.0.
  102. */
  103. public static CharSet evaluateSet(String[] set) {
  104. if (set == null) {
  105. return null;
  106. }
  107. return new CharSet(set);
  108. }
  109. // Squeeze
  110. //-----------------------------------------------------------------------
  111. /**
  112. * <p>Squeezes any repititions of a character that is mentioned in the
  113. * supplied set.</p>
  114. *
  115. * <pre>
  116. * CharSetUtils.squeeze(null, *) = null
  117. * CharSetUtils.squeeze("", *) = ""
  118. * CharSetUtils.squeeze(*, null) = *
  119. * CharSetUtils.squeeze(*, "") = *
  120. * CharSetUtils.squeeze("hello", "k-p") = "helo"
  121. * CharSetUtils.squeeze("hello", "a-e") = "hello"
  122. * </pre>
  123. *
  124. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  125. * @param str the string to squeeze, may be null
  126. * @param set the character set to use for manipulation, may be null
  127. * @return modified String, <code>null</code> if null string input
  128. */
  129. public static String squeeze(String str, String set) {
  130. if (str == null || str.length() == 0 || set == null || set.length() == 0) {
  131. return str;
  132. }
  133. String[] strs = new String[1];
  134. strs[0] = set;
  135. return squeeze(str, strs);
  136. }
  137. /**
  138. * <p>Squeezes any repititions of a character that is mentioned in the
  139. * supplied set.</p>
  140. *
  141. * <p>An example is:</p>
  142. * <ul>
  143. * <li>squeeze("hello", {"el"}) => "helo"</li>
  144. * </ul>
  145. *
  146. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  147. * @param str the string to squeeze, may be null
  148. * @param set the character set to use for manipulation, may be null
  149. * @return modified String, <code>null</code> if null string input
  150. */
  151. public static String squeeze(String str, String[] set) {
  152. if (str == null || str.length() == 0 || set == null || set.length == 0) {
  153. return str;
  154. }
  155. CharSet chars = evaluateSet(set);
  156. StringBuffer buffer = new StringBuffer(str.length());
  157. char[] chrs = str.toCharArray();
  158. int sz = chrs.length;
  159. char lastChar = ' ';
  160. char ch = ' ';
  161. for (int i = 0; i < sz; i++) {
  162. ch = chrs[i];
  163. if (chars.contains(ch)) {
  164. if ((ch == lastChar) && (i != 0)) {
  165. continue;
  166. }
  167. }
  168. buffer.append(ch);
  169. lastChar = ch;
  170. }
  171. return buffer.toString();
  172. }
  173. // Count
  174. //-----------------------------------------------------------------------
  175. /**
  176. * <p>Takes an argument in set-syntax, see evaluateSet,
  177. * and returns the number of characters present in the specified string.</p>
  178. *
  179. * <pre>
  180. * CharSetUtils.count(null, *) = 0
  181. * CharSetUtils.count("", *) = 0
  182. * CharSetUtils.count(*, null) = 0
  183. * CharSetUtils.count(*, "") = 0
  184. * CharSetUtils.count("hello", "k-p") = 3
  185. * CharSetUtils.count("hello", "a-e") = 1
  186. * </pre>
  187. *
  188. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  189. * @param str String to count characters in, may be null
  190. * @param set String set of characters to count, may be null
  191. * @return character count, zero if null string input
  192. */
  193. public static int count(String str, String set) {
  194. if (str == null || str.length() == 0 || set == null || set.length() == 0) {
  195. return 0;
  196. }
  197. String[] strs = new String[1];
  198. strs[0] = set;
  199. return count(str, strs);
  200. }
  201. /**
  202. * <p>Takes an argument in set-syntax, see evaluateSet,
  203. * and returns the number of characters present in the specified string.</p>
  204. *
  205. * <p>An example would be:</p>
  206. * <ul>
  207. * <li>count("hello", {"c-f", "o"}) returns 2.</li>
  208. * </ul>
  209. *
  210. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  211. * @param str String to count characters in, may be null
  212. * @param set String[] set of characters to count, may be null
  213. * @return character count, zero if null string input
  214. */
  215. public static int count(String str, String[] set) {
  216. if (str == null || str.length() == 0 || set == null || set.length == 0) {
  217. return 0;
  218. }
  219. CharSet chars = evaluateSet(set);
  220. int count = 0;
  221. char[] chrs = str.toCharArray();
  222. int sz = chrs.length;
  223. for(int i=0; i<sz; i++) {
  224. if(chars.contains(chrs[i])) {
  225. count++;
  226. }
  227. }
  228. return count;
  229. }
  230. // Keep
  231. //-----------------------------------------------------------------------
  232. /**
  233. * <p>Takes an argument in set-syntax, see evaluateSet,
  234. * and keeps any of characters present in the specified string.</p>
  235. *
  236. * <pre>
  237. * CharSetUtils.keep(null, *) = null
  238. * CharSetUtils.keep("", *) = ""
  239. * CharSetUtils.keep(*, null) = ""
  240. * CharSetUtils.keep(*, "") = ""
  241. * CharSetUtils.keep("hello", "hl") = "hll"
  242. * CharSetUtils.keep("hello", "le") = "ell"
  243. * </pre>
  244. *
  245. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  246. * @param str String to keep characters from, may be null
  247. * @param set String set of characters to keep, may be null
  248. * @return modified String, <code>null</code> if null string input
  249. * @since 2.0
  250. */
  251. public static String keep(String str, String set) {
  252. if (str == null) {
  253. return null;
  254. }
  255. if (str.length() == 0 || set == null || set.length() == 0) {
  256. return "";
  257. }
  258. String[] strs = new String[1];
  259. strs[0] = set;
  260. return keep(str, strs);
  261. }
  262. /**
  263. * <p>Takes an argument in set-syntax, see evaluateSet,
  264. * and keeps any of characters present in the specified string.</p>
  265. *
  266. * <p>An example would be:</p>
  267. * <ul>
  268. * <li>keep("hello", {"c-f", "o"})
  269. * returns "hll"</li>
  270. * </ul>
  271. *
  272. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  273. * @param str String to keep characters from, may be null
  274. * @param set String[] set of characters to keep, may be null
  275. * @return modified String, <code>null</code> if null string input
  276. * @since 2.0
  277. */
  278. public static String keep(String str, String[] set) {
  279. if (str == null) {
  280. return null;
  281. }
  282. if (str.length() == 0 || set == null || set.length == 0) {
  283. return "";
  284. }
  285. return modify(str, set, true);
  286. }
  287. // Delete
  288. //-----------------------------------------------------------------------
  289. /**
  290. * <p>Takes an argument in set-syntax, see evaluateSet,
  291. * and deletes any of characters present in the specified string.</p>
  292. *
  293. * <pre>
  294. * CharSetUtils.delete(null, *) = null
  295. * CharSetUtils.delete("", *) = ""
  296. * CharSetUtils.delete(*, null) = *
  297. * CharSetUtils.delete(*, "") = *
  298. * CharSetUtils.delete("hello", "hl") = "hll"
  299. * CharSetUtils.delete("hello", "le") = "ell"
  300. * </pre>
  301. *
  302. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  303. * @param str String to delete characters from, may be null
  304. * @param set String set of characters to delete, may be null
  305. * @return modified String, <code>null</code> if null string input
  306. */
  307. public static String delete(String str, String set) {
  308. if (str == null || str.length() == 0 || set == null || set.length() == 0) {
  309. return str;
  310. }
  311. String[] strs = new String[1];
  312. strs[0] = set;
  313. return delete(str, strs);
  314. }
  315. /**
  316. * <p>Takes an argument in set-syntax, see evaluateSet,
  317. * and deletes any of characters present in the specified string.</p>
  318. *
  319. * <p>An example would be:</p>
  320. * <ul>
  321. * <li>delete("hello", {"c-f", "o"}) returns
  322. * "hll"</li>
  323. * </ul>
  324. *
  325. * @see #evaluateSet(java.lang.String[]) for set-syntax.
  326. * @param str String to delete characters from, may be null
  327. * @param set String[] set of characters to delete, may be null
  328. * @return modified String, <code>null</code> if null string input
  329. */
  330. public static String delete(String str, String[] set) {
  331. if (str == null || str.length() == 0 || set == null || set.length == 0) {
  332. return str;
  333. }
  334. return modify(str, set, false);
  335. }
  336. //-----------------------------------------------------------------------
  337. // Implementation of delete and keep
  338. private static String modify(String str, String[] set, boolean expect) {
  339. CharSet chars = evaluateSet(set);
  340. StringBuffer buffer = new StringBuffer(str.length());
  341. char[] chrs = str.toCharArray();
  342. int sz = chrs.length;
  343. for(int i=0; i<sz; i++) {
  344. if(chars.contains(chrs[i]) == expect) {
  345. buffer.append(chrs[i]);
  346. }
  347. }
  348. return buffer.toString();
  349. }
  350. // Translate
  351. //-----------------------------------------------------------------------
  352. /**
  353. * <p>Translate characters in a String.
  354. * This is a multi character search and replace routine.</p>
  355. *
  356. * <p>An example is:</p>
  357. * <ul>
  358. * <li>translate("hello", "ho", "jy")
  359. * => jelly</li>
  360. * </ul>
  361. *
  362. * <p>If the length of characters to search for is greater than the
  363. * length of characters to replace, then the last character is
  364. * used.</p>
  365. *
  366. * <pre>
  367. * CharSetUtils.translate(null, *, *) = null
  368. * CharSetUtils.translate("", *, *) = ""
  369. * </pre>
  370. *
  371. * @param str String to replace characters in, may be null
  372. * @param searchChars a set of characters to search for, must not be null
  373. * @param replaceChars a set of characters to replace, must not be null or empty ("")
  374. * @return translated String, <code>null</code> if null string input
  375. * @throws NullPointerException if <code>with</code> or <code>repl</code>
  376. * is <code>null</code>
  377. * @throws ArrayIndexOutOfBoundsException if <code>with</code> is empty ("")
  378. * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}.
  379. * Method will be removed in Commons Lang 3.0.
  380. */
  381. public static String translate(String str, String searchChars, String replaceChars) {
  382. if (str == null || str.length() == 0) {
  383. return str;
  384. }
  385. StringBuffer buffer = new StringBuffer(str.length());
  386. char[] chrs = str.toCharArray();
  387. char[] withChrs = replaceChars.toCharArray();
  388. int sz = chrs.length;
  389. int withMax = replaceChars.length() - 1;
  390. for(int i=0; i<sz; i++) {
  391. int idx = searchChars.indexOf(chrs[i]);
  392. if(idx != -1) {
  393. if(idx > withMax) {
  394. idx = withMax;
  395. }
  396. buffer.append(withChrs[idx]);
  397. } else {
  398. buffer.append(chrs[i]);
  399. }
  400. }
  401. return buffer.toString();
  402. }
  403. }