1. /*
  2. * @(#)TextBoundaryData.java 1.12 00/01/19
  3. *
  4. * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation
  15. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  16. * subsidiary of IBM. These materials are provided under terms
  17. * of a License Agreement between Taligent and Sun. This technology
  18. * is protected by multiple US and International patents.
  19. *
  20. * This notice and attribution to Taligent may not be removed.
  21. * Taligent is a registered trademark of Taligent, Inc.
  22. *
  23. */
  24. package java.text;
  25. /**
  26. * This class wraps up the data tables needed for SimpleTextBoundary.
  27. * It is subclassed for each type of text boundary.
  28. */
  29. abstract class TextBoundaryData
  30. {
  31. private WordBreakTable forwardStateTable = null;
  32. private WordBreakTable backwardStateTable = null;
  33. private UnicodeClassMapping mappingTable = null;
  34. protected TextBoundaryData(WordBreakTable fwd, WordBreakTable bwd, UnicodeClassMapping map) {
  35. forwardStateTable = fwd;
  36. backwardStateTable = bwd;
  37. mappingTable = map;
  38. }
  39. public WordBreakTable forward() {
  40. return forwardStateTable;
  41. }
  42. public WordBreakTable backward() {
  43. return backwardStateTable;
  44. }
  45. public UnicodeClassMapping map() {
  46. return mappingTable;
  47. }
  48. // useful Unicode constants
  49. protected static final char ASCII_END_OF_TEXT
  50. = '\u0003';
  51. protected static final char ASCII_HORIZONTAL_TABULATION
  52. = '\u0009';
  53. protected static final char ASCII_LINEFEED
  54. = (char)0x000A;
  55. protected static final char ASCII_VERTICAL_TABULATION
  56. = '\u000B';
  57. protected static final char ASCII_FORM_FEED
  58. = '\u000C';
  59. protected static final char ASCII_CARRIAGE_RETURN
  60. = (char)0x000D;
  61. protected static final char ASCII_SPACE
  62. = '\u0020';
  63. protected static final char ASCII_EXCLAMATION_MARK
  64. = '\u0021';
  65. protected static final char ASCII_QUOTATION_MARK
  66. = '\u0022';
  67. protected static final char ASCII_NUMBER_SIGN
  68. = '\u0023';
  69. protected static final char ASCII_DOLLAR_SIGN
  70. = '\u0024';
  71. protected static final char ASCII_PERCENT
  72. = '\u0025';
  73. protected static final char ASCII_AMPERSAND
  74. = '\u0026';
  75. protected static final char ASCII_APOSTROPHE
  76. = (char)0x0027;
  77. protected static final char ASCII_COMMA
  78. = '\u002C';
  79. protected static final char ASCII_FULL_STOP
  80. = '\u002E';
  81. protected static final char ASCII_COLON
  82. = '\u003A';
  83. protected static final char ASCII_SEMICOLON
  84. = '\u003B';
  85. protected static final char ASCII_QUESTION_MARK
  86. = '\u003F';
  87. protected static final char ASCII_NONBREAKING_SPACE
  88. = '\u00A0';
  89. protected static final char ASCII_CENT_SIGN
  90. = '\u00A2';
  91. protected static final char ASCII_POUND_SIGN
  92. = '\u00a3';
  93. protected static final char ASCII_YEN_SIGN
  94. = '\u00a5';
  95. protected static final char LATIN1_SOFTHYPHEN
  96. = '\u00AD';
  97. protected static final char LATIN1_DEGREE_SIGN
  98. = '\u00B0';
  99. protected static final char ARABIC_PERCENT_SIGN
  100. = '\u066A';
  101. protected static final char ARABIC_DECIMAL_SEPARATOR
  102. = '\u066B';
  103. protected static final char HANGUL_CHOSEONG_LOW
  104. = '\u1100';
  105. protected static final char HANGUL_CHOSEONG_HIGH
  106. = '\u115f';
  107. protected static final char HANGUL_JUNGSEONG_LOW
  108. = '\u1160';
  109. protected static final char HANGUL_JUNGSEONG_HIGH
  110. = '\u11A7';
  111. protected static final char HANGUL_JONGSEONG_LOW
  112. = '\u11A8';
  113. protected static final char HANGUL_JONGSEONG_HIGH
  114. = '\u11FF';
  115. protected static final char FIGURE_SPACE
  116. = '\u2007';
  117. protected static final char NONBREAKING_HYPHEN
  118. = '\u2011';
  119. protected static final char PUNCTUATION_HYPHENATION_POINT
  120. = '\u2027';
  121. protected static final char PUNCTUATION_LINE_SEPARATOR
  122. = '\u2028';
  123. protected static final char PUNCTUATION_PARAGRAPH_SEPARATOR
  124. = '\u2029';
  125. protected static final char PER_MILLE_SIGN
  126. = '\u2030';
  127. protected static final char PER_TEN_THOUSAND_SIGN
  128. = '\u2031';
  129. protected static final char PRIME
  130. = '\u2032';
  131. protected static final char DOUBLE_PRIME
  132. = '\u2033';
  133. protected static final char TRIPLE_PRIME
  134. = '\u2034';
  135. protected static final char DEGREE_CELSIUS
  136. = '\u2103';
  137. protected static final char DEGREE_FAHRENHEIT
  138. = '\u2109';
  139. protected static final char PUNCTUATION_IDEOGRAPHIC_COMMA
  140. = '\u3001';
  141. protected static final char PUNCTUATION_IDEOGRAPHIC_FULL_STOP
  142. = '\u3002';
  143. protected static final char IDEOGRAPHIC_ITERATION_MARK
  144. = '\u3005';
  145. protected static final char HIRAGANA_LETTER_SMALL_A
  146. = '\u3041';
  147. protected static final char HIRAGANA_LETTER_A
  148. = '\u3042';
  149. protected static final char HIRAGANA_LETTER_SMALL_I
  150. = '\u3043';
  151. protected static final char HIRAGANA_LETTER_I
  152. = '\u3044';
  153. protected static final char HIRAGANA_LETTER_SMALL_U
  154. = '\u3045';
  155. protected static final char HIRAGANA_LETTER_U
  156. = '\u3046';
  157. protected static final char HIRAGANA_LETTER_SMALL_E
  158. = '\u3047';
  159. protected static final char HIRAGANA_LETTER_E
  160. = '\u3048';
  161. protected static final char HIRAGANA_LETTER_SMALL_O
  162. = '\u3049';
  163. protected static final char HIRAGANA_LETTER_O
  164. = '\u304A';
  165. protected static final char HIRAGANA_LETTER_DI
  166. = '\u3062';
  167. protected static final char HIRAGANA_LETTER_SMALL_TU
  168. = '\u3063';
  169. protected static final char HIRAGANA_LETTER_TU
  170. = '\u3064';
  171. protected static final char HIRAGANA_LETTER_MO
  172. = '\u3082';
  173. protected static final char HIRAGANA_LETTER_SMALL_YA
  174. = '\u3083';
  175. protected static final char HIRAGANA_LETTER_YA
  176. = '\u3084';
  177. protected static final char HIRAGANA_LETTER_SMALL_YU
  178. = '\u3085';
  179. protected static final char HIRAGANA_LETTER_YU
  180. = '\u3086';
  181. protected static final char HIRAGANA_LETTER_SMALL_YO
  182. = '\u3087';
  183. protected static final char HIRAGANA_LETTER_YO
  184. = '\u3088';
  185. protected static final char HIRAGANA_LETTER_RO
  186. = '\u308D';
  187. protected static final char HIRAGANA_LETTER_SMALL_WA
  188. = '\u308E';
  189. protected static final char HIRAGANA_LETTER_WA
  190. = '\u308F';
  191. protected static final char HIRAGANA_LETTER_VU
  192. = '\u3094';
  193. protected static final char COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK
  194. = '\u3099';
  195. protected static final char HIRAGANA_SEMIVOICED_SOUND_MARK
  196. = '\u309C';
  197. protected static final char HIRAGANA_ITERATION_MARK
  198. = '\u309D';
  199. protected static final char HIRAGANA_VOICED_ITERATION_MARK
  200. = '\u309E';
  201. protected static final char KATAKANA_LETTER_SMALL_A
  202. = '\u30A1';
  203. protected static final char KATAKANA_LETTER_A
  204. = '\u30A2';
  205. protected static final char KATAKANA_LETTER_SMALL_I
  206. = '\u30A3';
  207. protected static final char KATAKANA_LETTER_I
  208. = '\u30A4';
  209. protected static final char KATAKANA_LETTER_SMALL_U
  210. = '\u30A5';
  211. protected static final char KATAKANA_LETTER_U
  212. = '\u30A6';
  213. protected static final char KATAKANA_LETTER_SMALL_E
  214. = '\u30A7';
  215. protected static final char KATAKANA_LETTER_E
  216. = '\u30A8';
  217. protected static final char KATAKANA_LETTER_SMALL_O
  218. = '\u30A9';
  219. protected static final char KATAKANA_LETTER_O
  220. = '\u30AA';
  221. protected static final char KATAKANA_LETTER_DI
  222. = '\u30C2';
  223. protected static final char KATAKANA_LETTER_SMALL_TU
  224. = '\u30C3';
  225. protected static final char KATAKANA_LETTER_TU
  226. = '\u30C4';
  227. protected static final char KATAKANA_LETTER_MO
  228. = '\u30E2';
  229. protected static final char KATAKANA_LETTER_SMALL_YA
  230. = '\u30E3';
  231. protected static final char KATAKANA_LETTER_YA
  232. = '\u30E4';
  233. protected static final char KATAKANA_LETTER_SMALL_YU
  234. = '\u30E5';
  235. protected static final char KATAKANA_LETTER_YU
  236. = '\u30E6';
  237. protected static final char KATAKANA_LETTER_SMALL_YO
  238. = '\u30E7';
  239. protected static final char KATAKANA_LETTER_YO
  240. = '\u30E8';
  241. protected static final char KATAKANA_LETTER_RO
  242. = '\u30ED';
  243. protected static final char KATAKANA_LETTER_SMALL_WA
  244. = '\u30EE';
  245. protected static final char KATAKANA_LETTER_WA
  246. = '\u30EF';
  247. protected static final char KATAKANA_LETTER_VU
  248. = '\u30F4';
  249. protected static final char KATAKANA_LETTER_SMALL_KA
  250. = '\u30F5';
  251. protected static final char KATAKANA_LETTER_SMALL_KE
  252. = '\u30F6';
  253. protected static final char KATAKANA_LETTER_VA
  254. = '\u30F7';
  255. protected static final char KATAKANA_LETTER_VO
  256. = '\u30FA';
  257. protected static final char KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
  258. = '\u30FC';
  259. protected static final char KATAKANA_ITERATION_MARK
  260. = '\u30FD';
  261. protected static final char KATAKANA_VOICED_ITERATION_MARK
  262. = '\u30FE';
  263. protected static final char UNICODE_LOW_BOUND_HAN
  264. = '\u4E00';
  265. protected static final char UNICODE_HIGH_BOUND_HAN
  266. = '\u9FA5';
  267. protected static final char HANGUL_SYL_LOW
  268. = '\uAC00';
  269. protected static final char HANGUL_SYL_HIGH
  270. = '\uD7A3';
  271. protected static final char CJK_COMPATIBILITY_F900
  272. = '\uF900';
  273. protected static final char CJK_COMPATIBILITY_FA2D
  274. = '\uFA2D';
  275. protected static final char UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE
  276. = '\uFEFF';
  277. protected static final char FULLWIDTH_EXCLAMATION_MARK
  278. = '\uFF01';
  279. protected static final char FULLWIDTH_COMMA
  280. = '\uFF0C';
  281. protected static final char FULLWIDTH_FULL_STOP
  282. = '\uFF0E';
  283. protected static final char FULLWIDTH_QUESTION_MARK
  284. = '\uFF1F';
  285. // SimpleTextBoundary has an internal convention that the not-a-Unicode value
  286. // $FFFF is used to signify the end of the string when looking up a proper state
  287. // transition for the end of the string
  288. protected static final char END_OF_STRING
  289. = '\uFFFF';
  290. }