1. /*
  2. * @(#)SimpleTextBoundary.java 1.26 03/01/23
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation
  12. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  13. * subsidiary of IBM. These materials are provided under terms
  14. * of a License Agreement between Taligent and Sun. This technology
  15. * is protected by multiple US and International patents.
  16. *
  17. * This notice and attribution to Taligent may not be removed.
  18. * Taligent is a registered trademark of Taligent, Inc.
  19. *
  20. */
  21. package java.text;
  22. import java.io.IOException;
  23. /**
  24. * SimpleTextBoundary is an implementation of the BreakIterator
  25. * protocol. SimpleTextBoundary uses a state machine to compute breaks.
  26. * There are currently several subclasses of SimpleTextBoundary that
  27. * compute breaks for sentences, words, lines, and characters. They are
  28. * accessable through static functions of SimpleTextBoundary.
  29. *
  30. * @see BreakIterator
  31. */
  32. final class SimpleTextBoundary extends BreakIterator
  33. {
  34. private transient int pos;
  35. private transient CharacterIterator text;
  36. private TextBoundaryData data;
  37. // internally, the not-a-Unicode value is used as a sentinel value meaning
  38. // "the end of the string" for the purposes of looking up an appropriate
  39. // state transition when you've run off the end of the string
  40. private static final char END_OF_STRING = '\uffff';
  41. /**
  42. * Create a SimpleTextBoundary using the specified tables. Currently,
  43. * the table format is private.
  44. * @param data data used for boundary determination
  45. */
  46. protected SimpleTextBoundary(TextBoundaryData data)
  47. {
  48. this.data = data;
  49. text = new StringCharacterIterator("");
  50. pos = text.getBeginIndex();
  51. }
  52. /**
  53. * Compares the equality of two SimpleTextBoundary objects.
  54. * @param obj the SimpleTextBoundary object to be compared with.
  55. * @return true if the given obj is the same as this
  56. * SimpleTextBoundary object; false otherwise.
  57. */
  58. public boolean equals(Object obj)
  59. {
  60. if (this == obj)
  61. return true;
  62. if (!(obj instanceof SimpleTextBoundary))
  63. return false;
  64. SimpleTextBoundary that = (SimpleTextBoundary) obj;
  65. // The data classes are final and sharable. Only the
  66. // class type needs to be compared.
  67. if (this.data.getClass() != that.data.getClass())
  68. return false;
  69. if (this.hashCode() != that.hashCode())
  70. return false;
  71. if (pos != that.pos)
  72. return false;
  73. if (!text.equals(that.text))
  74. return false;
  75. return true;
  76. }
  77. /**
  78. * Compute a hashcode for this enumeration
  79. * @return A hash code
  80. */
  81. public int hashCode()
  82. {
  83. return getClass().hashCode() ^ text.hashCode();
  84. }
  85. /**
  86. * Overrides Cloneable
  87. */
  88. public Object clone()
  89. {
  90. try {
  91. SimpleTextBoundary other = (SimpleTextBoundary) super.clone();
  92. other.text = (CharacterIterator) text.clone();
  93. // The data classes are final and sharable.
  94. // They don't need to be cloned.
  95. return other;
  96. } catch (InternalError e) {
  97. throw new InternalError();
  98. }
  99. }
  100. /**
  101. * Get the text being scanned by the enumeration
  102. * @return the text being scanned by the enumeration
  103. */
  104. public CharacterIterator getText()
  105. {
  106. return text;
  107. }
  108. /**
  109. * Set a new text string for enumeration. The position of the
  110. * enumerator is reset to first().
  111. * @param newText new text to scan.
  112. */
  113. public void setText(String newText)
  114. {
  115. text = new StringCharacterIterator(newText);
  116. pos = text.getBeginIndex();
  117. }
  118. /**
  119. * Set a new text to scan. The position is reset to first().
  120. * @param newText new text to scan.
  121. */
  122. public void setText(CharacterIterator newText)
  123. {
  124. text = newText;
  125. pos = text.getBeginIndex();
  126. }
  127. /**
  128. * Return the first boundary. The iterator's current position is set
  129. * to the first boundary.
  130. */
  131. public int first()
  132. {
  133. pos = text.getBeginIndex();
  134. return pos;
  135. }
  136. /**
  137. * Return the last boundary. The iterator's current position is set
  138. * to the last boundary.
  139. */
  140. public int last()
  141. {
  142. pos = text.getEndIndex();
  143. return pos;
  144. }
  145. /**
  146. * Return the nth boundary from the current boundary
  147. * @param index which boundary to return. A value of 0
  148. * does nothing.
  149. * @return the nth boundary from the current position.
  150. */
  151. public int next(int increment)
  152. {
  153. int result = current();
  154. if (increment < 0) {
  155. for (int i = increment; (i < 0) && (result != DONE); ++i) {
  156. result = previous();
  157. }
  158. }
  159. else {
  160. for(int i = increment; (i > 0) && (result != DONE); --i) {
  161. result = next();
  162. }
  163. }
  164. return result;
  165. }
  166. /**
  167. * Return the boundary preceding the last boundary
  168. */
  169. public int previous()
  170. {
  171. if (pos > text.getBeginIndex()) {
  172. int startBoundary = pos;
  173. pos = previousSafePosition(pos-1);
  174. int prev = pos;
  175. int next = next();
  176. while (next < startBoundary && next != DONE) {
  177. prev = next;
  178. next = next();
  179. }
  180. pos = prev;
  181. return pos;
  182. }
  183. else {
  184. return DONE;
  185. }
  186. }
  187. /**
  188. * Return the next text boundary
  189. * @return the character offset of the text boundary or DONE if all
  190. * boundaries have been returned.
  191. */
  192. public int next()
  193. {
  194. int result = pos;
  195. if (pos < text.getEndIndex()) {
  196. pos = nextPosition(pos);
  197. result = pos;
  198. }
  199. else {
  200. result = DONE;
  201. }
  202. return result;
  203. }
  204. /**
  205. * Return true if the specified position is a boundary position.
  206. * @param offset the offset to check.
  207. * @return True if "offset" is a boundary position.
  208. */
  209. public boolean isBoundary(int offset) {
  210. int begin = text.getBeginIndex();
  211. if (offset < begin || offset >= text.getEndIndex())
  212. throw new IllegalArgumentException(
  213. "isBoundary offset out of bounds");
  214. if (offset == begin)
  215. return true;
  216. else
  217. return following(offset - 1) == offset;
  218. }
  219. /**
  220. * Return the first boundary after the specified offset
  221. * @param offset the offset to start
  222. * @return int the first boundary after offset
  223. */
  224. public int following(int offset)
  225. {
  226. if (offset < text.getBeginIndex() || offset >= text.getEndIndex())
  227. throw new IllegalArgumentException(
  228. "nextBoundaryAt offset out of bounds");
  229. pos = previousSafePosition(offset);
  230. int result;
  231. do {
  232. result = next();
  233. } while (result <= offset && result != DONE);
  234. return result;
  235. }
  236. /**
  237. * Return the last boundary preceding the specified offset
  238. * @param offset the offset to start
  239. * @return the last boundary before offset
  240. */
  241. public int preceding(int offset)
  242. {
  243. if (offset < text.getBeginIndex() || offset >= text.getEndIndex())
  244. throw new IllegalArgumentException("preceding() offset out of bounds");
  245. if (offset == text.getBeginIndex())
  246. return BreakIterator.DONE;
  247. pos = previousSafePosition(offset);
  248. int curr = pos;
  249. int last;
  250. do {
  251. last = curr;
  252. curr = next();
  253. } while (curr < offset && curr != BreakIterator.DONE);
  254. pos = last;
  255. return last;
  256. }
  257. /**
  258. * Return the boundary last returned by previous or next
  259. * @return int the boundary last returned by previous or next
  260. */
  261. public int current()
  262. {
  263. return pos;
  264. }
  265. //.................................................
  266. //utility functions. These functions don't change the current position.
  267. private int previousSafePosition(int offset)
  268. {
  269. int result = text.getBeginIndex();
  270. int state = data.backward().initialState();
  271. if (offset == result)
  272. ++offset;
  273. for (char c = text.setIndex(offset - 1);
  274. c != CharacterIterator.DONE && !data.backward().isEndState(state);
  275. c = text.previous()) {
  276. state = data.backward().get(state, mappedChar(c));
  277. if (data.backward().isMarkState(state)) {
  278. result = text.getIndex();
  279. }
  280. }
  281. return result;
  282. }
  283. private int nextPosition(int offset)
  284. {
  285. int getEndIndex = text.getEndIndex();
  286. int state = data.forward().initialState();
  287. for (char c = text.setIndex(offset);
  288. c != CharacterIterator.DONE && !data.forward().isEndState(state);
  289. c = text.next()) {
  290. state = data.forward().get(state, mappedChar(c));
  291. if (data.forward().isMarkState(state)) {
  292. getEndIndex = text.getIndex();
  293. }
  294. }
  295. if (data.forward().isEndState(state))
  296. return getEndIndex;
  297. else {
  298. state = data.forward().get(state, mappedChar(END_OF_STRING));
  299. if (data.forward().isMarkState(state))
  300. return text.getEndIndex();
  301. else
  302. return getEndIndex;
  303. }
  304. }
  305. protected int mappedChar(char c)
  306. {
  307. return data.map().mappedChar(c);
  308. }
  309. }