1. /*
  2. * @(#)SimpleTextBoundary.java 1.24 00/01/19
  3. *
  4. * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation
  15. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  16. * subsidiary of IBM. These materials are provided under terms
  17. * of a License Agreement between Taligent and Sun. This technology
  18. * is protected by multiple US and International patents.
  19. *
  20. * This notice and attribution to Taligent may not be removed.
  21. * Taligent is a registered trademark of Taligent, Inc.
  22. *
  23. */
  24. package java.text;
  25. import java.io.IOException;
  26. /**
  27. * SimpleTextBoundary is an implementation of the BreakIterator
  28. * protocol. SimpleTextBoundary uses a state machine to compute breaks.
  29. * There are currently several subclasses of SimpleTextBoundary that
  30. * compute breaks for sentences, words, lines, and characters. They are
  31. * accessable through static functions of SimpleTextBoundary.
  32. *
  33. * @see BreakIterator
  34. */
  35. final class SimpleTextBoundary extends BreakIterator
  36. {
  37. private transient int pos;
  38. private transient CharacterIterator text;
  39. private TextBoundaryData data;
  40. // internally, the not-a-Unicode value is used as a sentinel value meaning
  41. // "the end of the string" for the purposes of looking up an appropriate
  42. // state transition when you've run off the end of the string
  43. private static final char END_OF_STRING = '\uffff';
  44. /**
  45. * Create a SimpleTextBoundary using the specified tables. Currently,
  46. * the table format is private.
  47. * @param data data used for boundary determination
  48. */
  49. protected SimpleTextBoundary(TextBoundaryData data)
  50. {
  51. this.data = data;
  52. text = new StringCharacterIterator("");
  53. pos = text.getBeginIndex();
  54. }
  55. /**
  56. * Compares the equality of two SimpleTextBoundary objects.
  57. * @param obj the SimpleTextBoundary object to be compared with.
  58. * @return true if the given obj is the same as this
  59. * SimpleTextBoundary object; false otherwise.
  60. */
  61. public boolean equals(Object obj)
  62. {
  63. if (this == obj)
  64. return true;
  65. if (!(obj instanceof SimpleTextBoundary))
  66. return false;
  67. SimpleTextBoundary that = (SimpleTextBoundary) obj;
  68. // The data classes are final and sharable. Only the
  69. // class type needs to be compared.
  70. if (this.data.getClass() != that.data.getClass())
  71. return false;
  72. if (this.hashCode() != that.hashCode())
  73. return false;
  74. if (pos != that.pos)
  75. return false;
  76. if (!text.equals(that.text))
  77. return false;
  78. return true;
  79. }
  80. /**
  81. * Compute a hashcode for this enumeration
  82. * @return A hash code
  83. */
  84. public int hashCode()
  85. {
  86. return getClass().hashCode() ^ text.hashCode();
  87. }
  88. /**
  89. * Overrides Cloneable
  90. */
  91. public Object clone()
  92. {
  93. try {
  94. SimpleTextBoundary other = (SimpleTextBoundary) super.clone();
  95. other.text = (CharacterIterator) text.clone();
  96. // The data classes are final and sharable.
  97. // They don't need to be cloned.
  98. return other;
  99. } catch (InternalError e) {
  100. throw new InternalError();
  101. }
  102. }
  103. /**
  104. * Get the text being scanned by the enumeration
  105. * @return the text being scanned by the enumeration
  106. */
  107. public CharacterIterator getText()
  108. {
  109. return text;
  110. }
  111. /**
  112. * Set a new text string for enumeration. The position of the
  113. * enumerator is reset to first().
  114. * @param newText new text to scan.
  115. */
  116. public void setText(String newText)
  117. {
  118. text = new StringCharacterIterator(newText);
  119. pos = text.getBeginIndex();
  120. }
  121. /**
  122. * Set a new text to scan. The position is reset to first().
  123. * @param newText new text to scan.
  124. */
  125. public void setText(CharacterIterator newText)
  126. {
  127. text = newText;
  128. pos = text.getBeginIndex();
  129. }
  130. /**
  131. * Return the first boundary. The iterator's current position is set
  132. * to the first boundary.
  133. */
  134. public int first()
  135. {
  136. pos = text.getBeginIndex();
  137. return pos;
  138. }
  139. /**
  140. * Return the last boundary. The iterator's current position is set
  141. * to the last boundary.
  142. */
  143. public int last()
  144. {
  145. pos = text.getEndIndex();
  146. return pos;
  147. }
  148. /**
  149. * Return the nth boundary from the current boundary
  150. * @param index which boundary to return. A value of 0
  151. * does nothing.
  152. * @return the nth boundary from the current position.
  153. */
  154. public int next(int increment)
  155. {
  156. int result = current();
  157. if (increment < 0) {
  158. for (int i = increment; (i < 0) && (result != DONE); ++i) {
  159. result = previous();
  160. }
  161. }
  162. else {
  163. for(int i = increment; (i > 0) && (result != DONE); --i) {
  164. result = next();
  165. }
  166. }
  167. return result;
  168. }
  169. /**
  170. * Return the boundary preceding the last boundary
  171. */
  172. public int previous()
  173. {
  174. if (pos > text.getBeginIndex()) {
  175. int startBoundary = pos;
  176. pos = previousSafePosition(pos-1);
  177. int prev = pos;
  178. int next = next();
  179. while (next < startBoundary && next != DONE) {
  180. prev = next;
  181. next = next();
  182. }
  183. pos = prev;
  184. return pos;
  185. }
  186. else {
  187. return DONE;
  188. }
  189. }
  190. /**
  191. * Return the next text boundary
  192. * @return the character offset of the text boundary or DONE if all
  193. * boundaries have been returned.
  194. */
  195. public int next()
  196. {
  197. int result = pos;
  198. if (pos < text.getEndIndex()) {
  199. pos = nextPosition(pos);
  200. result = pos;
  201. }
  202. else {
  203. result = DONE;
  204. }
  205. return result;
  206. }
  207. /**
  208. * Return true if the specified position is a boundary position.
  209. * @param offset the offset to check.
  210. * @return True if "offset" is a boundary position.
  211. */
  212. public boolean isBoundary(int offset) {
  213. int begin = text.getBeginIndex();
  214. if (offset < begin || offset >= text.getEndIndex())
  215. throw new IllegalArgumentException(
  216. "isBoundary offset out of bounds");
  217. if (offset == begin)
  218. return true;
  219. else
  220. return following(offset - 1) == offset;
  221. }
  222. /**
  223. * Return the first boundary after the specified offset
  224. * @param offset the offset to start
  225. * @return int the first boundary after offset
  226. */
  227. public int following(int offset)
  228. {
  229. if (offset < text.getBeginIndex() || offset >= text.getEndIndex())
  230. throw new IllegalArgumentException(
  231. "nextBoundaryAt offset out of bounds");
  232. pos = previousSafePosition(offset);
  233. int result;
  234. do {
  235. result = next();
  236. } while (result <= offset && result != DONE);
  237. return result;
  238. }
  239. /**
  240. * Return the last boundary preceding the specified offset
  241. * @param offset the offset to start
  242. * @return the last boundary before offset
  243. */
  244. public int preceding(int offset)
  245. {
  246. if (offset < text.getBeginIndex() || offset >= text.getEndIndex())
  247. throw new IllegalArgumentException("preceding() offset out of bounds");
  248. if (offset == text.getBeginIndex())
  249. return BreakIterator.DONE;
  250. pos = previousSafePosition(offset);
  251. int curr = pos;
  252. int last;
  253. do {
  254. last = curr;
  255. curr = next();
  256. } while (curr < offset && curr != BreakIterator.DONE);
  257. pos = last;
  258. return last;
  259. }
  260. /**
  261. * Return the boundary last returned by previous or next
  262. * @return int the boundary last returned by previous or next
  263. */
  264. public int current()
  265. {
  266. return pos;
  267. }
  268. //.................................................
  269. //utility functions. These functions don't change the current position.
  270. private int previousSafePosition(int offset)
  271. {
  272. int result = text.getBeginIndex();
  273. int state = data.backward().initialState();
  274. if (offset == result)
  275. ++offset;
  276. for (char c = text.setIndex(offset - 1);
  277. c != CharacterIterator.DONE && !data.backward().isEndState(state);
  278. c = text.previous()) {
  279. state = data.backward().get(state, mappedChar(c));
  280. if (data.backward().isMarkState(state)) {
  281. result = text.getIndex();
  282. }
  283. }
  284. return result;
  285. }
  286. private int nextPosition(int offset)
  287. {
  288. int getEndIndex = text.getEndIndex();
  289. int state = data.forward().initialState();
  290. for (char c = text.setIndex(offset);
  291. c != CharacterIterator.DONE && !data.forward().isEndState(state);
  292. c = text.next()) {
  293. state = data.forward().get(state, mappedChar(c));
  294. if (data.forward().isMarkState(state)) {
  295. getEndIndex = text.getIndex();
  296. }
  297. }
  298. if (data.forward().isEndState(state))
  299. return getEndIndex;
  300. else {
  301. state = data.forward().get(state, mappedChar(END_OF_STRING));
  302. if (data.forward().isMarkState(state))
  303. return text.getEndIndex();
  304. else
  305. return getEndIndex;
  306. }
  307. }
  308. protected int mappedChar(char c)
  309. {
  310. return data.map().mappedChar(c);
  311. }
  312. }