1. /*
  2. * @(#)StringTokenizer.java 1.34 04/05/05
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package java.util;
  8. import java.lang.*;
  9. /**
  10. * The string tokenizer class allows an application to break a
  11. * string into tokens. The tokenization method is much simpler than
  12. * the one used by the <code>StreamTokenizer</code> class. The
  13. * <code>StringTokenizer</code> methods do not distinguish among
  14. * identifiers, numbers, and quoted strings, nor do they recognize
  15. * and skip comments.
  16. * <p>
  17. * The set of delimiters (the characters that separate tokens) may
  18. * be specified either at creation time or on a per-token basis.
  19. * <p>
  20. * An instance of <code>StringTokenizer</code> behaves in one of two
  21. * ways, depending on whether it was created with the
  22. * <code>returnDelims</code> flag having the value <code>true</code>
  23. * or <code>false</code>:
  24. * <ul>
  25. * <li>If the flag is <code>false</code>, delimiter characters serve to
  26. * separate tokens. A token is a maximal sequence of consecutive
  27. * characters that are not delimiters.
  28. * <li>If the flag is <code>true</code>, delimiter characters are themselves
  29. * considered to be tokens. A token is thus either one delimiter
  30. * character, or a maximal sequence of consecutive characters that are
  31. * not delimiters.
  32. * </ul><p>
  33. * A <tt>StringTokenizer</tt> object internally maintains a current
  34. * position within the string to be tokenized. Some operations advance this
  35. * current position past the characters processed.<p>
  36. * A token is returned by taking a substring of the string that was used to
  37. * create the <tt>StringTokenizer</tt> object.
  38. * <p>
  39. * The following is one example of the use of the tokenizer. The code:
  40. * <blockquote><pre>
  41. * StringTokenizer st = new StringTokenizer("this is a test");
  42. * while (st.hasMoreTokens()) {
  43. * System.out.println(st.nextToken());
  44. * }
  45. * </pre></blockquote>
  46. * <p>
  47. * prints the following output:
  48. * <blockquote><pre>
  49. * this
  50. * is
  51. * a
  52. * test
  53. * </pre></blockquote>
  54. *
  55. * <p>
  56. * <tt>StringTokenizer</tt> is a legacy class that is retained for
  57. * compatibility reasons although its use is discouraged in new code. It is
  58. * recommended that anyone seeking this functionality use the <tt>split</tt>
  59. * method of <tt>String</tt> or the java.util.regex package instead.
  60. * <p>
  61. * The following example illustrates how the <tt>String.split</tt>
  62. * method can be used to break up a string into its basic tokens:
  63. * <blockquote><pre>
  64. * String[] result = "this is a test".split("\\s");
  65. * for (int x=0; x<result.length; x++)
  66. * System.out.println(result[x]);
  67. * </pre></blockquote>
  68. * <p>
  69. * prints the following output:
  70. * <blockquote><pre>
  71. * this
  72. * is
  73. * a
  74. * test
  75. * </pre></blockquote>
  76. *
  77. * @author unascribed
  78. * @version 1.34, 05/05/04
  79. * @see java.io.StreamTokenizer
  80. * @since JDK1.0
  81. */
  82. public
  83. class StringTokenizer implements Enumeration<Object> {
  84. private int currentPosition;
  85. private int newPosition;
  86. private int maxPosition;
  87. private String str;
  88. private String delimiters;
  89. private boolean retDelims;
  90. private boolean delimsChanged;
  91. /**
  92. * maxDelimCodePoint stores the value of the delimiter character with the
  93. * highest value. It is used to optimize the detection of delimiter
  94. * characters.
  95. *
  96. * It is unlikely to provide any optimization benefit in the
  97. * hasSurrogates case because most string characters will be
  98. * smaller than the limit, but we keep it so that the two code
  99. * paths remain similar.
  100. */
  101. private int maxDelimCodePoint;
  102. /**
  103. * If delimiters include any surrogates (including surrogate
  104. * pairs), hasSurrogates is true and the tokenizer uses the
  105. * different code path. This is because String.indexOf(int)
  106. * doesn't handle unpaired surrogates as a single character.
  107. */
  108. private boolean hasSurrogates = false;
  109. /**
  110. * When hasSurrogates is true, delimiters are converted to code
  111. * points and isDelimiter(int) is used to determine if the given
  112. * codepoint is a delimiter.
  113. */
  114. private int[] delimiterCodePoints;
  115. /**
  116. * Set maxDelimCodePoint to the highest char in the delimiter set.
  117. */
  118. private void setMaxDelimCodePoint() {
  119. if (delimiters == null) {
  120. maxDelimCodePoint = 0;
  121. return;
  122. }
  123. int m = 0;
  124. int c;
  125. int count = 0;
  126. for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) {
  127. c = delimiters.charAt(i);
  128. if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) {
  129. c = delimiters.codePointAt(i);
  130. hasSurrogates = true;
  131. }
  132. if (m < c)
  133. m = c;
  134. count++;
  135. }
  136. maxDelimCodePoint = m;
  137. if (hasSurrogates) {
  138. delimiterCodePoints = new int[count];
  139. for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) {
  140. c = delimiters.codePointAt(j);
  141. delimiterCodePoints[i] = c;
  142. }
  143. }
  144. }
  145. /**
  146. * Constructs a string tokenizer for the specified string. All
  147. * characters in the <code>delim</code> argument are the delimiters
  148. * for separating tokens.
  149. * <p>
  150. * If the <code>returnDelims</code> flag is <code>true</code>, then
  151. * the delimiter characters are also returned as tokens. Each
  152. * delimiter is returned as a string of length one. If the flag is
  153. * <code>false</code>, the delimiter characters are skipped and only
  154. * serve as separators between tokens.
  155. * <p>
  156. * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
  157. * not throw an exception. However, trying to invoke other methods on the
  158. * resulting <tt>StringTokenizer</tt> may result in a
  159. * <tt>NullPointerException</tt>.
  160. *
  161. * @param str a string to be parsed.
  162. * @param delim the delimiters.
  163. * @param returnDelims flag indicating whether to return the delimiters
  164. * as tokens.
  165. * @exception NullPointerException if str is <CODE>null</CODE>
  166. */
  167. public StringTokenizer(String str, String delim, boolean returnDelims) {
  168. currentPosition = 0;
  169. newPosition = -1;
  170. delimsChanged = false;
  171. this.str = str;
  172. maxPosition = str.length();
  173. delimiters = delim;
  174. retDelims = returnDelims;
  175. setMaxDelimCodePoint();
  176. }
  177. /**
  178. * Constructs a string tokenizer for the specified string. The
  179. * characters in the <code>delim</code> argument are the delimiters
  180. * for separating tokens. Delimiter characters themselves will not
  181. * be treated as tokens.
  182. * <p>
  183. * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
  184. * not throw an exception. However, trying to invoke other methods on the
  185. * resulting <tt>StringTokenizer</tt> may result in a
  186. * <tt>NullPointerException</tt>.
  187. *
  188. * @param str a string to be parsed.
  189. * @param delim the delimiters.
  190. * @exception NullPointerException if str is <CODE>null</CODE>
  191. */
  192. public StringTokenizer(String str, String delim) {
  193. this(str, delim, false);
  194. }
  195. /**
  196. * Constructs a string tokenizer for the specified string. The
  197. * tokenizer uses the default delimiter set, which is
  198. * <code>" \t\n\r\f"</code>: the space character,
  199. * the tab character, the newline character, the carriage-return character,
  200. * and the form-feed character. Delimiter characters themselves will
  201. * not be treated as tokens.
  202. *
  203. * @param str a string to be parsed.
  204. * @exception NullPointerException if str is <CODE>null</CODE>
  205. */
  206. public StringTokenizer(String str) {
  207. this(str, " \t\n\r\f", false);
  208. }
  209. /**
  210. * Skips delimiters starting from the specified position. If retDelims
  211. * is false, returns the index of the first non-delimiter character at or
  212. * after startPos. If retDelims is true, startPos is returned.
  213. */
  214. private int skipDelimiters(int startPos) {
  215. if (delimiters == null)
  216. throw new NullPointerException();
  217. int position = startPos;
  218. while (!retDelims && position < maxPosition) {
  219. if (!hasSurrogates) {
  220. char c = str.charAt(position);
  221. if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0))
  222. break;
  223. position++;
  224. } else {
  225. int c = str.codePointAt(position);
  226. if ((c > maxDelimCodePoint) || !isDelimiter(c)) {
  227. break;
  228. }
  229. position += Character.charCount(c);
  230. }
  231. }
  232. return position;
  233. }
  234. /**
  235. * Skips ahead from startPos and returns the index of the next delimiter
  236. * character encountered, or maxPosition if no such delimiter is found.
  237. */
  238. private int scanToken(int startPos) {
  239. int position = startPos;
  240. while (position < maxPosition) {
  241. if (!hasSurrogates) {
  242. char c = str.charAt(position);
  243. if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))
  244. break;
  245. position++;
  246. } else {
  247. int c = str.codePointAt(position);
  248. if ((c <= maxDelimCodePoint) && isDelimiter(c))
  249. break;
  250. position += Character.charCount(c);
  251. }
  252. }
  253. if (retDelims && (startPos == position)) {
  254. if (!hasSurrogates) {
  255. char c = str.charAt(position);
  256. if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))
  257. position++;
  258. } else {
  259. int c = str.codePointAt(position);
  260. if ((c <= maxDelimCodePoint) && isDelimiter(c))
  261. position += Character.charCount(c);
  262. }
  263. }
  264. return position;
  265. }
  266. private boolean isDelimiter(int codePoint) {
  267. for (int i = 0; i < delimiterCodePoints.length; i++) {
  268. if (delimiterCodePoints[i] == codePoint) {
  269. return true;
  270. }
  271. }
  272. return false;
  273. }
  274. /**
  275. * Tests if there are more tokens available from this tokenizer's string.
  276. * If this method returns <tt>true</tt>, then a subsequent call to
  277. * <tt>nextToken</tt> with no argument will successfully return a token.
  278. *
  279. * @return <code>true</code> if and only if there is at least one token
  280. * in the string after the current position; <code>false</code>
  281. * otherwise.
  282. */
  283. public boolean hasMoreTokens() {
  284. /*
  285. * Temporarily store this position and use it in the following
  286. * nextToken() method only if the delimiters haven't been changed in
  287. * that nextToken() invocation.
  288. */
  289. newPosition = skipDelimiters(currentPosition);
  290. return (newPosition < maxPosition);
  291. }
  292. /**
  293. * Returns the next token from this string tokenizer.
  294. *
  295. * @return the next token from this string tokenizer.
  296. * @exception NoSuchElementException if there are no more tokens in this
  297. * tokenizer's string.
  298. */
  299. public String nextToken() {
  300. /*
  301. * If next position already computed in hasMoreElements() and
  302. * delimiters have changed between the computation and this invocation,
  303. * then use the computed value.
  304. */
  305. currentPosition = (newPosition >= 0 && !delimsChanged) ?
  306. newPosition : skipDelimiters(currentPosition);
  307. /* Reset these anyway */
  308. delimsChanged = false;
  309. newPosition = -1;
  310. if (currentPosition >= maxPosition)
  311. throw new NoSuchElementException();
  312. int start = currentPosition;
  313. currentPosition = scanToken(currentPosition);
  314. return str.substring(start, currentPosition);
  315. }
  316. /**
  317. * Returns the next token in this string tokenizer's string. First,
  318. * the set of characters considered to be delimiters by this
  319. * <tt>StringTokenizer</tt> object is changed to be the characters in
  320. * the string <tt>delim</tt>. Then the next token in the string
  321. * after the current position is returned. The current position is
  322. * advanced beyond the recognized token. The new delimiter set
  323. * remains the default after this call.
  324. *
  325. * @param delim the new delimiters.
  326. * @return the next token, after switching to the new delimiter set.
  327. * @exception NoSuchElementException if there are no more tokens in this
  328. * tokenizer's string.
  329. * @exception NullPointerException if delim is <CODE>null</CODE>
  330. */
  331. public String nextToken(String delim) {
  332. delimiters = delim;
  333. /* delimiter string specified, so set the appropriate flag. */
  334. delimsChanged = true;
  335. setMaxDelimCodePoint();
  336. return nextToken();
  337. }
  338. /**
  339. * Returns the same value as the <code>hasMoreTokens</code>
  340. * method. It exists so that this class can implement the
  341. * <code>Enumeration</code> interface.
  342. *
  343. * @return <code>true</code> if there are more tokens;
  344. * <code>false</code> otherwise.
  345. * @see java.util.Enumeration
  346. * @see java.util.StringTokenizer#hasMoreTokens()
  347. */
  348. public boolean hasMoreElements() {
  349. return hasMoreTokens();
  350. }
  351. /**
  352. * Returns the same value as the <code>nextToken</code> method,
  353. * except that its declared return value is <code>Object</code> rather than
  354. * <code>String</code>. It exists so that this class can implement the
  355. * <code>Enumeration</code> interface.
  356. *
  357. * @return the next token in the string.
  358. * @exception NoSuchElementException if there are no more tokens in this
  359. * tokenizer's string.
  360. * @see java.util.Enumeration
  361. * @see java.util.StringTokenizer#nextToken()
  362. */
  363. public Object nextElement() {
  364. return nextToken();
  365. }
  366. /**
  367. * Calculates the number of times that this tokenizer's
  368. * <code>nextToken</code> method can be called before it generates an
  369. * exception. The current position is not advanced.
  370. *
  371. * @return the number of tokens remaining in the string using the current
  372. * delimiter set.
  373. * @see java.util.StringTokenizer#nextToken()
  374. */
  375. public int countTokens() {
  376. int count = 0;
  377. int currpos = currentPosition;
  378. while (currpos < maxPosition) {
  379. currpos = skipDelimiters(currpos);
  380. if (currpos >= maxPosition)
  381. break;
  382. currpos = scanToken(currpos);
  383. count++;
  384. }
  385. return count;
  386. }
  387. }