1. /*
  2. * @(#)TextBoundaryData.java 1.14 03/01/23
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation
  12. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  13. * subsidiary of IBM. These materials are provided under terms
  14. * of a License Agreement between Taligent and Sun. This technology
  15. * is protected by multiple US and International patents.
  16. *
  17. * This notice and attribution to Taligent may not be removed.
  18. * Taligent is a registered trademark of Taligent, Inc.
  19. *
  20. */
  21. package java.text;
  22. /**
  23. * This class wraps up the data tables needed for SimpleTextBoundary.
  24. * It is subclassed for each type of text boundary.
  25. */
  26. abstract class TextBoundaryData
  27. {
  28. private WordBreakTable forwardStateTable = null;
  29. private WordBreakTable backwardStateTable = null;
  30. private UnicodeClassMapping mappingTable = null;
  31. protected TextBoundaryData(WordBreakTable fwd, WordBreakTable bwd, UnicodeClassMapping map) {
  32. forwardStateTable = fwd;
  33. backwardStateTable = bwd;
  34. mappingTable = map;
  35. }
  36. public WordBreakTable forward() {
  37. return forwardStateTable;
  38. }
  39. public WordBreakTable backward() {
  40. return backwardStateTable;
  41. }
  42. public UnicodeClassMapping map() {
  43. return mappingTable;
  44. }
  45. // useful Unicode constants
  46. protected static final char ASCII_END_OF_TEXT
  47. = '\u0003';
  48. protected static final char ASCII_HORIZONTAL_TABULATION
  49. = '\u0009';
  50. protected static final char ASCII_LINEFEED
  51. = (char)0x000A;
  52. protected static final char ASCII_VERTICAL_TABULATION
  53. = '\u000B';
  54. protected static final char ASCII_FORM_FEED
  55. = '\u000C';
  56. protected static final char ASCII_CARRIAGE_RETURN
  57. = (char)0x000D;
  58. protected static final char ASCII_SPACE
  59. = '\u0020';
  60. protected static final char ASCII_EXCLAMATION_MARK
  61. = '\u0021';
  62. protected static final char ASCII_QUOTATION_MARK
  63. = '\u0022';
  64. protected static final char ASCII_NUMBER_SIGN
  65. = '\u0023';
  66. protected static final char ASCII_DOLLAR_SIGN
  67. = '\u0024';
  68. protected static final char ASCII_PERCENT
  69. = '\u0025';
  70. protected static final char ASCII_AMPERSAND
  71. = '\u0026';
  72. protected static final char ASCII_APOSTROPHE
  73. = (char)0x0027;
  74. protected static final char ASCII_COMMA
  75. = '\u002C';
  76. protected static final char ASCII_FULL_STOP
  77. = '\u002E';
  78. protected static final char ASCII_COLON
  79. = '\u003A';
  80. protected static final char ASCII_SEMICOLON
  81. = '\u003B';
  82. protected static final char ASCII_QUESTION_MARK
  83. = '\u003F';
  84. protected static final char ASCII_NONBREAKING_SPACE
  85. = '\u00A0';
  86. protected static final char ASCII_CENT_SIGN
  87. = '\u00A2';
  88. protected static final char ASCII_POUND_SIGN
  89. = '\u00a3';
  90. protected static final char ASCII_YEN_SIGN
  91. = '\u00a5';
  92. protected static final char LATIN1_SOFTHYPHEN
  93. = '\u00AD';
  94. protected static final char LATIN1_DEGREE_SIGN
  95. = '\u00B0';
  96. protected static final char ARABIC_PERCENT_SIGN
  97. = '\u066A';
  98. protected static final char ARABIC_DECIMAL_SEPARATOR
  99. = '\u066B';
  100. protected static final char HANGUL_CHOSEONG_LOW
  101. = '\u1100';
  102. protected static final char HANGUL_CHOSEONG_HIGH
  103. = '\u115f';
  104. protected static final char HANGUL_JUNGSEONG_LOW
  105. = '\u1160';
  106. protected static final char HANGUL_JUNGSEONG_HIGH
  107. = '\u11A7';
  108. protected static final char HANGUL_JONGSEONG_LOW
  109. = '\u11A8';
  110. protected static final char HANGUL_JONGSEONG_HIGH
  111. = '\u11FF';
  112. protected static final char FIGURE_SPACE
  113. = '\u2007';
  114. protected static final char NONBREAKING_HYPHEN
  115. = '\u2011';
  116. protected static final char PUNCTUATION_HYPHENATION_POINT
  117. = '\u2027';
  118. protected static final char PUNCTUATION_LINE_SEPARATOR
  119. = '\u2028';
  120. protected static final char PUNCTUATION_PARAGRAPH_SEPARATOR
  121. = '\u2029';
  122. protected static final char PER_MILLE_SIGN
  123. = '\u2030';
  124. protected static final char PER_TEN_THOUSAND_SIGN
  125. = '\u2031';
  126. protected static final char PRIME
  127. = '\u2032';
  128. protected static final char DOUBLE_PRIME
  129. = '\u2033';
  130. protected static final char TRIPLE_PRIME
  131. = '\u2034';
  132. protected static final char DEGREE_CELSIUS
  133. = '\u2103';
  134. protected static final char DEGREE_FAHRENHEIT
  135. = '\u2109';
  136. protected static final char PUNCTUATION_IDEOGRAPHIC_COMMA
  137. = '\u3001';
  138. protected static final char PUNCTUATION_IDEOGRAPHIC_FULL_STOP
  139. = '\u3002';
  140. protected static final char IDEOGRAPHIC_ITERATION_MARK
  141. = '\u3005';
  142. protected static final char HIRAGANA_LETTER_SMALL_A
  143. = '\u3041';
  144. protected static final char HIRAGANA_LETTER_A
  145. = '\u3042';
  146. protected static final char HIRAGANA_LETTER_SMALL_I
  147. = '\u3043';
  148. protected static final char HIRAGANA_LETTER_I
  149. = '\u3044';
  150. protected static final char HIRAGANA_LETTER_SMALL_U
  151. = '\u3045';
  152. protected static final char HIRAGANA_LETTER_U
  153. = '\u3046';
  154. protected static final char HIRAGANA_LETTER_SMALL_E
  155. = '\u3047';
  156. protected static final char HIRAGANA_LETTER_E
  157. = '\u3048';
  158. protected static final char HIRAGANA_LETTER_SMALL_O
  159. = '\u3049';
  160. protected static final char HIRAGANA_LETTER_O
  161. = '\u304A';
  162. protected static final char HIRAGANA_LETTER_DI
  163. = '\u3062';
  164. protected static final char HIRAGANA_LETTER_SMALL_TU
  165. = '\u3063';
  166. protected static final char HIRAGANA_LETTER_TU
  167. = '\u3064';
  168. protected static final char HIRAGANA_LETTER_MO
  169. = '\u3082';
  170. protected static final char HIRAGANA_LETTER_SMALL_YA
  171. = '\u3083';
  172. protected static final char HIRAGANA_LETTER_YA
  173. = '\u3084';
  174. protected static final char HIRAGANA_LETTER_SMALL_YU
  175. = '\u3085';
  176. protected static final char HIRAGANA_LETTER_YU
  177. = '\u3086';
  178. protected static final char HIRAGANA_LETTER_SMALL_YO
  179. = '\u3087';
  180. protected static final char HIRAGANA_LETTER_YO
  181. = '\u3088';
  182. protected static final char HIRAGANA_LETTER_RO
  183. = '\u308D';
  184. protected static final char HIRAGANA_LETTER_SMALL_WA
  185. = '\u308E';
  186. protected static final char HIRAGANA_LETTER_WA
  187. = '\u308F';
  188. protected static final char HIRAGANA_LETTER_VU
  189. = '\u3094';
  190. protected static final char COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK
  191. = '\u3099';
  192. protected static final char HIRAGANA_SEMIVOICED_SOUND_MARK
  193. = '\u309C';
  194. protected static final char HIRAGANA_ITERATION_MARK
  195. = '\u309D';
  196. protected static final char HIRAGANA_VOICED_ITERATION_MARK
  197. = '\u309E';
  198. protected static final char KATAKANA_LETTER_SMALL_A
  199. = '\u30A1';
  200. protected static final char KATAKANA_LETTER_A
  201. = '\u30A2';
  202. protected static final char KATAKANA_LETTER_SMALL_I
  203. = '\u30A3';
  204. protected static final char KATAKANA_LETTER_I
  205. = '\u30A4';
  206. protected static final char KATAKANA_LETTER_SMALL_U
  207. = '\u30A5';
  208. protected static final char KATAKANA_LETTER_U
  209. = '\u30A6';
  210. protected static final char KATAKANA_LETTER_SMALL_E
  211. = '\u30A7';
  212. protected static final char KATAKANA_LETTER_E
  213. = '\u30A8';
  214. protected static final char KATAKANA_LETTER_SMALL_O
  215. = '\u30A9';
  216. protected static final char KATAKANA_LETTER_O
  217. = '\u30AA';
  218. protected static final char KATAKANA_LETTER_DI
  219. = '\u30C2';
  220. protected static final char KATAKANA_LETTER_SMALL_TU
  221. = '\u30C3';
  222. protected static final char KATAKANA_LETTER_TU
  223. = '\u30C4';
  224. protected static final char KATAKANA_LETTER_MO
  225. = '\u30E2';
  226. protected static final char KATAKANA_LETTER_SMALL_YA
  227. = '\u30E3';
  228. protected static final char KATAKANA_LETTER_YA
  229. = '\u30E4';
  230. protected static final char KATAKANA_LETTER_SMALL_YU
  231. = '\u30E5';
  232. protected static final char KATAKANA_LETTER_YU
  233. = '\u30E6';
  234. protected static final char KATAKANA_LETTER_SMALL_YO
  235. = '\u30E7';
  236. protected static final char KATAKANA_LETTER_YO
  237. = '\u30E8';
  238. protected static final char KATAKANA_LETTER_RO
  239. = '\u30ED';
  240. protected static final char KATAKANA_LETTER_SMALL_WA
  241. = '\u30EE';
  242. protected static final char KATAKANA_LETTER_WA
  243. = '\u30EF';
  244. protected static final char KATAKANA_LETTER_VU
  245. = '\u30F4';
  246. protected static final char KATAKANA_LETTER_SMALL_KA
  247. = '\u30F5';
  248. protected static final char KATAKANA_LETTER_SMALL_KE
  249. = '\u30F6';
  250. protected static final char KATAKANA_LETTER_VA
  251. = '\u30F7';
  252. protected static final char KATAKANA_LETTER_VO
  253. = '\u30FA';
  254. protected static final char KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
  255. = '\u30FC';
  256. protected static final char KATAKANA_ITERATION_MARK
  257. = '\u30FD';
  258. protected static final char KATAKANA_VOICED_ITERATION_MARK
  259. = '\u30FE';
  260. protected static final char UNICODE_LOW_BOUND_HAN
  261. = '\u4E00';
  262. protected static final char UNICODE_HIGH_BOUND_HAN
  263. = '\u9FA5';
  264. protected static final char HANGUL_SYL_LOW
  265. = '\uAC00';
  266. protected static final char HANGUL_SYL_HIGH
  267. = '\uD7A3';
  268. protected static final char CJK_COMPATIBILITY_F900
  269. = '\uF900';
  270. protected static final char CJK_COMPATIBILITY_FA2D
  271. = '\uFA2D';
  272. protected static final char UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE
  273. = '\uFEFF';
  274. protected static final char FULLWIDTH_EXCLAMATION_MARK
  275. = '\uFF01';
  276. protected static final char FULLWIDTH_COMMA
  277. = '\uFF0C';
  278. protected static final char FULLWIDTH_FULL_STOP
  279. = '\uFF0E';
  280. protected static final char FULLWIDTH_QUESTION_MARK
  281. = '\uFF1F';
  282. // SimpleTextBoundary has an internal convention that the not-a-Unicode value
  283. // $FFFF is used to signify the end of the string when looking up a proper state
  284. // transition for the end of the string
  285. protected static final char END_OF_STRING
  286. = '\uFFFF';
  287. }