- /*
- * @(#)TextBoundaryData.java 1.12 00/01/19
- *
- * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
- *
- * This software is the proprietary information of Sun Microsystems, Inc.
- * Use is subject to license terms.
- *
- */
-
- /*
- * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
- * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
- *
- * The original version of this source code and documentation
- * is copyrighted and owned by Taligent, Inc., a wholly-owned
- * subsidiary of IBM. These materials are provided under terms
- * of a License Agreement between Taligent and Sun. This technology
- * is protected by multiple US and International patents.
- *
- * This notice and attribution to Taligent may not be removed.
- * Taligent is a registered trademark of Taligent, Inc.
- *
- */
-
- package java.text;
-
- /**
- * This class wraps up the data tables needed for SimpleTextBoundary.
- * It is subclassed for each type of text boundary.
- */
- abstract class TextBoundaryData
- {
- private WordBreakTable forwardStateTable = null;
- private WordBreakTable backwardStateTable = null;
- private UnicodeClassMapping mappingTable = null;
-
- protected TextBoundaryData(WordBreakTable fwd, WordBreakTable bwd, UnicodeClassMapping map) {
- forwardStateTable = fwd;
- backwardStateTable = bwd;
- mappingTable = map;
- }
-
- public WordBreakTable forward() {
- return forwardStateTable;
- }
-
- public WordBreakTable backward() {
- return backwardStateTable;
- }
-
- public UnicodeClassMapping map() {
- return mappingTable;
- }
-
- // useful Unicode constants
- protected static final char ASCII_END_OF_TEXT
- = '\u0003';
- protected static final char ASCII_HORIZONTAL_TABULATION
- = '\u0009';
- protected static final char ASCII_LINEFEED
- = (char)0x000A;
- protected static final char ASCII_VERTICAL_TABULATION
- = '\u000B';
- protected static final char ASCII_FORM_FEED
- = '\u000C';
- protected static final char ASCII_CARRIAGE_RETURN
- = (char)0x000D;
- protected static final char ASCII_SPACE
- = '\u0020';
- protected static final char ASCII_EXCLAMATION_MARK
- = '\u0021';
- protected static final char ASCII_QUOTATION_MARK
- = '\u0022';
- protected static final char ASCII_NUMBER_SIGN
- = '\u0023';
- protected static final char ASCII_DOLLAR_SIGN
- = '\u0024';
- protected static final char ASCII_PERCENT
- = '\u0025';
- protected static final char ASCII_AMPERSAND
- = '\u0026';
- protected static final char ASCII_APOSTROPHE
- = (char)0x0027;
- protected static final char ASCII_COMMA
- = '\u002C';
- protected static final char ASCII_FULL_STOP
- = '\u002E';
- protected static final char ASCII_COLON
- = '\u003A';
- protected static final char ASCII_SEMICOLON
- = '\u003B';
- protected static final char ASCII_QUESTION_MARK
- = '\u003F';
- protected static final char ASCII_NONBREAKING_SPACE
- = '\u00A0';
- protected static final char ASCII_CENT_SIGN
- = '\u00A2';
- protected static final char ASCII_POUND_SIGN
- = '\u00a3';
- protected static final char ASCII_YEN_SIGN
- = '\u00a5';
- protected static final char LATIN1_SOFTHYPHEN
- = '\u00AD';
- protected static final char LATIN1_DEGREE_SIGN
- = '\u00B0';
- protected static final char ARABIC_PERCENT_SIGN
- = '\u066A';
- protected static final char ARABIC_DECIMAL_SEPARATOR
- = '\u066B';
- protected static final char HANGUL_CHOSEONG_LOW
- = '\u1100';
- protected static final char HANGUL_CHOSEONG_HIGH
- = '\u115f';
- protected static final char HANGUL_JUNGSEONG_LOW
- = '\u1160';
- protected static final char HANGUL_JUNGSEONG_HIGH
- = '\u11A7';
- protected static final char HANGUL_JONGSEONG_LOW
- = '\u11A8';
- protected static final char HANGUL_JONGSEONG_HIGH
- = '\u11FF';
- protected static final char FIGURE_SPACE
- = '\u2007';
- protected static final char NONBREAKING_HYPHEN
- = '\u2011';
- protected static final char PUNCTUATION_HYPHENATION_POINT
- = '\u2027';
- protected static final char PUNCTUATION_LINE_SEPARATOR
- = '\u2028';
- protected static final char PUNCTUATION_PARAGRAPH_SEPARATOR
- = '\u2029';
- protected static final char PER_MILLE_SIGN
- = '\u2030';
- protected static final char PER_TEN_THOUSAND_SIGN
- = '\u2031';
- protected static final char PRIME
- = '\u2032';
- protected static final char DOUBLE_PRIME
- = '\u2033';
- protected static final char TRIPLE_PRIME
- = '\u2034';
- protected static final char DEGREE_CELSIUS
- = '\u2103';
- protected static final char DEGREE_FAHRENHEIT
- = '\u2109';
- protected static final char PUNCTUATION_IDEOGRAPHIC_COMMA
- = '\u3001';
- protected static final char PUNCTUATION_IDEOGRAPHIC_FULL_STOP
- = '\u3002';
- protected static final char IDEOGRAPHIC_ITERATION_MARK
- = '\u3005';
- protected static final char HIRAGANA_LETTER_SMALL_A
- = '\u3041';
- protected static final char HIRAGANA_LETTER_A
- = '\u3042';
- protected static final char HIRAGANA_LETTER_SMALL_I
- = '\u3043';
- protected static final char HIRAGANA_LETTER_I
- = '\u3044';
- protected static final char HIRAGANA_LETTER_SMALL_U
- = '\u3045';
- protected static final char HIRAGANA_LETTER_U
- = '\u3046';
- protected static final char HIRAGANA_LETTER_SMALL_E
- = '\u3047';
- protected static final char HIRAGANA_LETTER_E
- = '\u3048';
- protected static final char HIRAGANA_LETTER_SMALL_O
- = '\u3049';
- protected static final char HIRAGANA_LETTER_O
- = '\u304A';
- protected static final char HIRAGANA_LETTER_DI
- = '\u3062';
- protected static final char HIRAGANA_LETTER_SMALL_TU
- = '\u3063';
- protected static final char HIRAGANA_LETTER_TU
- = '\u3064';
- protected static final char HIRAGANA_LETTER_MO
- = '\u3082';
- protected static final char HIRAGANA_LETTER_SMALL_YA
- = '\u3083';
- protected static final char HIRAGANA_LETTER_YA
- = '\u3084';
- protected static final char HIRAGANA_LETTER_SMALL_YU
- = '\u3085';
- protected static final char HIRAGANA_LETTER_YU
- = '\u3086';
- protected static final char HIRAGANA_LETTER_SMALL_YO
- = '\u3087';
- protected static final char HIRAGANA_LETTER_YO
- = '\u3088';
- protected static final char HIRAGANA_LETTER_RO
- = '\u308D';
- protected static final char HIRAGANA_LETTER_SMALL_WA
- = '\u308E';
- protected static final char HIRAGANA_LETTER_WA
- = '\u308F';
- protected static final char HIRAGANA_LETTER_VU
- = '\u3094';
- protected static final char COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK
- = '\u3099';
- protected static final char HIRAGANA_SEMIVOICED_SOUND_MARK
- = '\u309C';
- protected static final char HIRAGANA_ITERATION_MARK
- = '\u309D';
- protected static final char HIRAGANA_VOICED_ITERATION_MARK
- = '\u309E';
- protected static final char KATAKANA_LETTER_SMALL_A
- = '\u30A1';
- protected static final char KATAKANA_LETTER_A
- = '\u30A2';
- protected static final char KATAKANA_LETTER_SMALL_I
- = '\u30A3';
- protected static final char KATAKANA_LETTER_I
- = '\u30A4';
- protected static final char KATAKANA_LETTER_SMALL_U
- = '\u30A5';
- protected static final char KATAKANA_LETTER_U
- = '\u30A6';
- protected static final char KATAKANA_LETTER_SMALL_E
- = '\u30A7';
- protected static final char KATAKANA_LETTER_E
- = '\u30A8';
- protected static final char KATAKANA_LETTER_SMALL_O
- = '\u30A9';
- protected static final char KATAKANA_LETTER_O
- = '\u30AA';
- protected static final char KATAKANA_LETTER_DI
- = '\u30C2';
- protected static final char KATAKANA_LETTER_SMALL_TU
- = '\u30C3';
- protected static final char KATAKANA_LETTER_TU
- = '\u30C4';
- protected static final char KATAKANA_LETTER_MO
- = '\u30E2';
- protected static final char KATAKANA_LETTER_SMALL_YA
- = '\u30E3';
- protected static final char KATAKANA_LETTER_YA
- = '\u30E4';
- protected static final char KATAKANA_LETTER_SMALL_YU
- = '\u30E5';
- protected static final char KATAKANA_LETTER_YU
- = '\u30E6';
- protected static final char KATAKANA_LETTER_SMALL_YO
- = '\u30E7';
- protected static final char KATAKANA_LETTER_YO
- = '\u30E8';
- protected static final char KATAKANA_LETTER_RO
- = '\u30ED';
- protected static final char KATAKANA_LETTER_SMALL_WA
- = '\u30EE';
- protected static final char KATAKANA_LETTER_WA
- = '\u30EF';
- protected static final char KATAKANA_LETTER_VU
- = '\u30F4';
- protected static final char KATAKANA_LETTER_SMALL_KA
- = '\u30F5';
- protected static final char KATAKANA_LETTER_SMALL_KE
- = '\u30F6';
- protected static final char KATAKANA_LETTER_VA
- = '\u30F7';
- protected static final char KATAKANA_LETTER_VO
- = '\u30FA';
- protected static final char KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
- = '\u30FC';
- protected static final char KATAKANA_ITERATION_MARK
- = '\u30FD';
- protected static final char KATAKANA_VOICED_ITERATION_MARK
- = '\u30FE';
- protected static final char UNICODE_LOW_BOUND_HAN
- = '\u4E00';
- protected static final char UNICODE_HIGH_BOUND_HAN
- = '\u9FA5';
- protected static final char HANGUL_SYL_LOW
- = '\uAC00';
- protected static final char HANGUL_SYL_HIGH
- = '\uD7A3';
- protected static final char CJK_COMPATIBILITY_F900
- = '\uF900';
- protected static final char CJK_COMPATIBILITY_FA2D
- = '\uFA2D';
- protected static final char UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE
- = '\uFEFF';
- protected static final char FULLWIDTH_EXCLAMATION_MARK
- = '\uFF01';
- protected static final char FULLWIDTH_COMMA
- = '\uFF0C';
- protected static final char FULLWIDTH_FULL_STOP
- = '\uFF0E';
- protected static final char FULLWIDTH_QUESTION_MARK
- = '\uFF1F';
-
- // SimpleTextBoundary has an internal convention that the not-a-Unicode value
- // $FFFF is used to signify the end of the string when looking up a proper state
- // transition for the end of the string
- protected static final char END_OF_STRING
- = '\uFFFF';
- }
-