1. /*
  2. * @(#)SimpleTextBoundary.java 1.24 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)SimpleTextBoundary.java 1.24 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1996-1998 Sun Microsystems, Inc.
  14. * All Rights Reserved.
  15. *
  16. * The original version of this source code and documentation
  17. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  18. * subsidiary of IBM. These materials are provided under terms
  19. * of a License Agreement between Taligent and Sun. This technology
  20. * is protected by multiple US and International patents.
  21. *
  22. * This notice and attribution to Taligent may not be removed.
  23. * Taligent is a registered trademark of Taligent, Inc.
  24. *
  25. * Permission to use, copy, modify, and distribute this software
  26. * and its documentation for NON-COMMERCIAL purposes and without
  27. * fee is hereby granted provided that this copyright notice
  28. * appears in all copies. Please refer to the file "copyright.html"
  29. * for further important copyright and licensing information.
  30. *
  31. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  32. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  33. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  34. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  35. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  36. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  37. *
  38. */
  39. package java.text;
  40. import java.io.IOException;
  41. /**
  42. * SimpleTextBoundary is an implementation of the BreakIterator
  43. * protocol. SimpleTextBoundary uses a state machine to compute breaks.
  44. * There are currently several subclasses of SimpleTextBoundary that
  45. * compute breaks for sentences, words, lines, and characters. They are
  46. * accessable through static functions of SimpleTextBoundary.
  47. *
  48. * @see BreakIterator
  49. */
  50. final class SimpleTextBoundary extends BreakIterator
  51. {
  52. private transient int pos;
  53. private transient CharacterIterator text;
  54. private TextBoundaryData data;
  55. // internally, the not-a-Unicode value is used as a sentinel value meaning
  56. // "the end of the string" for the purposes of looking up an appropriate
  57. // state transition when you've run off the end of the string
  58. private static final char END_OF_STRING = '\uffff';
  59. /**
  60. * Create a SimpleTextBoundary using the specified tables. Currently,
  61. * the table format is private.
  62. * @param data data used for boundary determination
  63. */
  64. protected SimpleTextBoundary(TextBoundaryData data)
  65. {
  66. this.data = data;
  67. text = new StringCharacterIterator("");
  68. pos = text.getBeginIndex();
  69. }
  70. /**
  71. * Compares the equality of two SimpleTextBoundary objects.
  72. * @param obj the SimpleTextBoundary object to be compared with.
  73. * @return true if the given obj is the same as this
  74. * SimpleTextBoundary object; false otherwise.
  75. */
  76. public boolean equals(Object obj)
  77. {
  78. if (this == obj)
  79. return true;
  80. if (!(obj instanceof SimpleTextBoundary))
  81. return false;
  82. SimpleTextBoundary that = (SimpleTextBoundary) obj;
  83. // The data classes are final and sharable. Only the
  84. // class type needs to be compared.
  85. if (this.data.getClass() != that.data.getClass())
  86. return false;
  87. if (this.hashCode() != that.hashCode())
  88. return false;
  89. if (pos != that.pos)
  90. return false;
  91. if (!text.equals(that.text))
  92. return false;
  93. return true;
  94. }
  95. /**
  96. * Compute a hashcode for this enumeration
  97. * @return A hash code
  98. */
  99. public int hashCode()
  100. {
  101. return getClass().hashCode() ^ text.hashCode();
  102. }
  103. /**
  104. * Overrides Cloneable
  105. */
  106. public Object clone()
  107. {
  108. try {
  109. SimpleTextBoundary other = (SimpleTextBoundary) super.clone();
  110. other.text = (CharacterIterator) text.clone();
  111. // The data classes are final and sharable.
  112. // They don't need to be cloned.
  113. return other;
  114. } catch (InternalError e) {
  115. throw new InternalError();
  116. }
  117. }
  118. /**
  119. * Get the text being scanned by the enumeration
  120. * @return the text being scanned by the enumeration
  121. */
  122. public CharacterIterator getText()
  123. {
  124. return text;
  125. }
  126. /**
  127. * Set a new text string for enumeration. The position of the
  128. * enumerator is reset to first().
  129. * @param newText new text to scan.
  130. */
  131. public void setText(String newText)
  132. {
  133. text = new StringCharacterIterator(newText);
  134. pos = text.getBeginIndex();
  135. }
  136. /**
  137. * Set a new text to scan. The position is reset to first().
  138. * @param newText new text to scan.
  139. */
  140. public void setText(CharacterIterator newText)
  141. {
  142. text = newText;
  143. pos = text.getBeginIndex();
  144. }
  145. /**
  146. * Return the first boundary. The iterator's current position is set
  147. * to the first boundary.
  148. */
  149. public int first()
  150. {
  151. pos = text.getBeginIndex();
  152. return pos;
  153. }
  154. /**
  155. * Return the last boundary. The iterator's current position is set
  156. * to the last boundary.
  157. */
  158. public int last()
  159. {
  160. pos = text.getEndIndex();
  161. return pos;
  162. }
  163. /**
  164. * Return the nth boundary from the current boundary
  165. * @param index which boundary to return. A value of 0
  166. * does nothing.
  167. * @return the nth boundary from the current position.
  168. */
  169. public int next(int increment)
  170. {
  171. int result = current();
  172. if (increment < 0) {
  173. for (int i = increment; (i < 0) && (result != DONE); ++i) {
  174. result = previous();
  175. }
  176. }
  177. else {
  178. for(int i = increment; (i > 0) && (result != DONE); --i) {
  179. result = next();
  180. }
  181. }
  182. return result;
  183. }
  184. /**
  185. * Return the boundary preceding the last boundary
  186. */
  187. public int previous()
  188. {
  189. if (pos > text.getBeginIndex()) {
  190. int startBoundary = pos;
  191. pos = previousSafePosition(pos-1);
  192. int prev = pos;
  193. int next = next();
  194. while (next < startBoundary && next != DONE) {
  195. prev = next;
  196. next = next();
  197. }
  198. pos = prev;
  199. return pos;
  200. }
  201. else {
  202. return DONE;
  203. }
  204. }
  205. /**
  206. * Return the next text boundary
  207. * @return the character offset of the text boundary or DONE if all
  208. * boundaries have been returned.
  209. */
  210. public int next()
  211. {
  212. int result = pos;
  213. if (pos < text.getEndIndex()) {
  214. pos = nextPosition(pos);
  215. result = pos;
  216. }
  217. else {
  218. result = DONE;
  219. }
  220. return result;
  221. }
  222. /**
  223. * Return true if the specified position is a boundary position.
  224. * @param offset the offset to check.
  225. * @return True if "offset" is a boundary position.
  226. */
  227. public boolean isBoundary(int offset) {
  228. int begin = text.getBeginIndex();
  229. if (offset < begin || offset >= text.getEndIndex())
  230. throw new IllegalArgumentException(
  231. "isBoundary offset out of bounds");
  232. if (offset == begin)
  233. return true;
  234. else
  235. return following(offset - 1) == offset;
  236. }
  237. /**
  238. * Return the first boundary after the specified offset
  239. * @param offset the offset to start
  240. * @return int the first boundary after offset
  241. */
  242. public int following(int offset)
  243. {
  244. if (offset < text.getBeginIndex() || offset >= text.getEndIndex())
  245. throw new IllegalArgumentException(
  246. "nextBoundaryAt offset out of bounds");
  247. pos = previousSafePosition(offset);
  248. int result;
  249. do {
  250. result = next();
  251. } while (result <= offset && result != DONE);
  252. return result;
  253. }
  254. /**
  255. * Return the last boundary preceding the specified offset
  256. * @param offset the offset to start
  257. * @return the last boundary before offset
  258. */
  259. public int preceding(int offset)
  260. {
  261. if (offset < text.getBeginIndex() || offset >= text.getEndIndex())
  262. throw new IllegalArgumentException("preceding() offset out of bounds");
  263. if (offset == text.getBeginIndex())
  264. return BreakIterator.DONE;
  265. pos = previousSafePosition(offset);
  266. int curr = pos;
  267. int last;
  268. do {
  269. last = curr;
  270. curr = next();
  271. } while (curr < offset && curr != BreakIterator.DONE);
  272. pos = last;
  273. return last;
  274. }
  275. /**
  276. * Return the boundary last returned by previous or next
  277. * @return int the boundary last returned by previous or next
  278. */
  279. public int current()
  280. {
  281. return pos;
  282. }
  283. //.................................................
  284. //utility functions. These functions don't change the current position.
  285. private int previousSafePosition(int offset)
  286. {
  287. int result = text.getBeginIndex();
  288. int state = data.backward().initialState();
  289. if (offset == result)
  290. ++offset;
  291. for (char c = text.setIndex(offset - 1);
  292. c != CharacterIterator.DONE && !data.backward().isEndState(state);
  293. c = text.previous()) {
  294. state = data.backward().get(state, mappedChar(c));
  295. if (data.backward().isMarkState(state)) {
  296. result = text.getIndex();
  297. }
  298. }
  299. return result;
  300. }
  301. private int nextPosition(int offset)
  302. {
  303. int getEndIndex = text.getEndIndex();
  304. int state = data.forward().initialState();
  305. for (char c = text.setIndex(offset);
  306. c != CharacterIterator.DONE && !data.forward().isEndState(state);
  307. c = text.next()) {
  308. state = data.forward().get(state, mappedChar(c));
  309. if (data.forward().isMarkState(state)) {
  310. getEndIndex = text.getIndex();
  311. }
  312. }
  313. if (data.forward().isEndState(state))
  314. return getEndIndex;
  315. else {
  316. state = data.forward().get(state, mappedChar(END_OF_STRING));
  317. if (data.forward().isMarkState(state))
  318. return text.getEndIndex();
  319. else
  320. return getEndIndex;
  321. }
  322. }
  323. protected int mappedChar(char c)
  324. {
  325. return data.map().mappedChar(c);
  326. }
  327. }