- /*
- * @(#)LineBreakData.java 1.17 00/01/19
- *
- * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
- *
- * This software is the proprietary information of Sun Microsystems, Inc.
- * Use is subject to license terms.
- *
- */
-
- /*
- * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
- * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
- *
- * The original version of this source code and documentation
- * is copyrighted and owned by Taligent, Inc., a wholly-owned
- * subsidiary of IBM. These materials are provided under terms
- * of a License Agreement between Taligent and Sun. This technology
- * is protected by multiple US and International patents.
- *
- * This notice and attribution to Taligent may not be removed.
- * Taligent is a registered trademark of Taligent, Inc.
- *
- */
-
- package java.text;
-
- /**
- * The LineBreakData contains data used by SimpleTextBoundary
- * to determine line breaks.
- * @see #BreakIterator
- */
- final class LineBreakData extends TextBoundaryData
- {
- // THEORY OF OPERATION: This class contains all the tables necessary to do
- // character-break iteration. This class descends from TextBoundaryData, which
- // is abstract. This class doesn't define any non-static members; it inherits the
- // non-static members from TextBoundaryData and fills them in with pointers to
- // the static members defined here.
- // There are two main parts to a TextBoundaryData object: the state-transition
- // tables and the character-mapping tables. The forward state table defines the
- // transitions for a deterministic finite state machine that locates character
- // boundaries. The rows are the states and the columns are character categories.
- // The cell values consist of two parts: The first is the row number of the next
- // state to transition to, or a "stop" value (0). (Because 0 is the stop value
- // rather than a valid state number, row 0 of the array isn't ever looked at; we
- // fill it with STOP values by convention.) The second part is a flag indicating
- // whether the iterator should update its break position on this transition. When
- // the flag is set, the sign bit of the value is turned on (SI is used to represent
- // the flag bit being turned on-- we do it this way rather than just using negative
- // numbers because we still need to see the SI flag when the value of the transition
- // is STOP. SI_STOP is used to denote this.) The starting state in all state tables
- // is 1.
- // The backward state table works the same way as the forward state table, but is
- // usually simplified. The iterator uses the backward state table only to find a
- // "safe place" to start iterating forward. It then seeks forward from the "safe
- // place" to the actual break position using the forward table. A "safe place" is
- // a spot in the text that is guaranteed to be a break position.
- // The character-category mapping tables are split into several pieces, one for
- // each stage of the category-mapping process: 1) kRawMapping maps generic Unicode
- // character categories to the character categories used by this break iterator.
- // The index of the array is the Unicode category number as returned by
- // Character.getType(). 2) The kExceptionFlags table is a table of Boolean values
- // indicating whether all the characters in the Unicode category have the
- // raw-mapping value. The rows correspond to the rows of the raw-mapping table. If
- // an entry is true, then we find the right category using... 3) The kExceptionChar
- // table. This table is a sorted list of SpecialMapping objects. Each entry defines
- // a range of contiguous characters that share the same category and the category
- // number. This list is binary-searched to find an entry corresponding to the
- // charactre being mapped. Only characters whose breaking category is different from
- // the raw-mapping value (the breaking category for their Unicode category) are
- // listed in this table. 4) The kAsciiValues table is a fast-path table for characters
- // in the Latin1 range. This table maps straight from a character value to a
- // category number, bypassing all the other tables. The programmer must take care
- // that all of the different category-mapping tables are consistent.
- // In the current implementation, all of these tables are created and maintained
- // by hand, not using a tool.
-
- private static final byte BREAK = 0;
- //always breaks (must be present as first item)
- private static final byte blank = 1;
- //spaces, tabs, nulls.
- private static final byte cr = 2;
- //carriage return
- private static final byte nonBlank = 3;
- //everything not included elsewhere
- private static final byte op = 4;
- //hyphens....
- private static final byte jwrd = 5;
- //hiragana, katakana, and kanji
- private static final byte preJwrd = 6;
- //characters that bind to the beginning of a Japanese word
- private static final byte postJwrd = 7;
- //characters that bind to the end of a Japanese word
- private static final byte digit = 8;
- //digits
- private static final byte numPunct = 9;
- //punctuation that can appear within a number
- private static final byte currency = 10;
- //currency symbols that can precede a number
- private static final byte quote = 11;
- // the ASCII quotation mark
- private static final byte nsm = 12;
- // non-spacing marks
- private static final byte nbsp = 13;
- // non-breaking characters
- private static final byte EOS = 14;
- private static final int COL_COUNT = 15;
-
- private static final byte SI = (byte)0x80;
- private static final byte STOP = (byte) 0;
- private static final byte SI_STOP = (byte)SI + STOP;
-
- public LineBreakData() {
- super(kLineForward, kLineBackward, kLineMap);
- }
-
- // This table locates legal line-break positions. i.e., a process that word-wraps a line of
- // text can use this version of the BreakIterator to tell it where the legal places for
- // breaking a line are.
- // The rules implemented here are as follows:
- // 1) There is always a legal break position after a line or paragraph separator, but
- // one can occur before only when the preceding character is also a line or paragraph
- // separator. (The CR-LF sequence is also kept together.) (states 4 and 7)
- // 2) There is never a break before a non-spacing mark, unless it's preceded by a line
- // or paragraph separator. (the nsm column)
- // 3) There is never a break on either side of a non-breaking space (or other non-breaking
- // chartacters). (the nbsp column, and state 1)
- // 4) There is always a break before and after Kanji and Kana characters, except for certain
- // punctuation that must be kept with the following character and certain punctuation
- // and diacritic marks that must be kept with the preceding character. (states 5 and 8)
- // 5) There is always a legal break position following a dash, except when it is followed
- // by a digit, a line/paragraph separator, or whitespace. (state 6)
- // 6) There is never a break before a whitespace character. There is a break after a
- // whitespace character, except when it's followed by a line/paragraph separator.
- // (state 2)
- // 7) Breaks don't occur anywhere else. (state 1)
- private static final byte kLineForwardData[] =
- {
- // brk bl cr nBl
- // op kan prJ poJ
- // dgt np curr quote
- // nsm nbsp EOS
- // 00 - dummy state
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP,
- // 01 - main dispatch state. This state eats pre-Kanji punctuation,
- // non-breaking spaces, and non-spacing diacritics without transitioning
- // to other states.
- (byte)(SI+4), (byte)(SI+2), (byte)(SI+7), (byte)(SI+3),
- (byte)(SI+6), (byte)(SI+5), (byte)(SI+1), (byte)(SI+8),
- (byte)(SI+9), (byte)(SI+8), (byte)(SI+1), (byte)(SI+3),
- (byte)(SI+1), (byte)(SI+1), SI_STOP,
- // 02 - This state eats whitespce and stops on almost anything else
- // (the exceptions are non-breaking spaces, which go back to 1,
- // and CRs and LFs)
- (byte)(SI+4), (byte)(SI+2), (byte)(SI+7), SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- (byte)(SI+2), (byte)(SI+1), SI_STOP,
- // 03 - This state eats non-whitespace characters that aren't
- // otherwise accounted for. The only difference between
- // this and state 1 is that it stops on Kanji (you can break
- // between any two Kanji characters)
- (byte)(SI+4), (byte)(SI+2), (byte)(SI+7), (byte)(SI+3),
- (byte)(SI+6), SI_STOP, (byte)(SI+1), (byte)(SI+8),
- (byte)(SI+9), (byte)(SI+8), (byte)(SI+1), (byte)(SI+3),
- (byte)(SI+3), (byte)(SI+1), SI_STOP,
- // 04 - this is the state you go to when you see a hard line-
- // breaking character. It eats that character and stops.
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP,
- // 05 - this is the state that handles Kanji. It handles
- // post-Kanji punctuation, whitespace, non-breaking spaces,
- // and line terminators, but stops on everything else
- // (including more Kanji)
- (byte)(SI+4), (byte)(SI+2), (byte)(SI+7), SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, (byte)(SI+8),
- SI_STOP, (byte)(SI+8), SI_STOP, SI_STOP,
- (byte)(SI+5), (byte)(SI+1), SI_STOP,
- // 06 - This state handles dashes. It'll continue on
- // whitespace, more dashes, line terminators, and digits
- // (the dash is a minus sign), but stops on everything else
- // (unless there's an nbsp, a dash is always a legal
- // break position).
- (byte)(SI+4), SI_STOP, (byte)(SI+7), SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- (byte)(SI+9), SI_STOP, (byte)(SI+11), SI_STOP,
- (byte)(SI+6), (byte)(SI+1), SI_STOP,
- // 07 - This state handles CRs. A CR is a line terminator
- // when it appears alone, and considered "half" a line
- // terminator when it occurs right before any other line
- // terminator (except another CR).
- (byte)(SI+4), SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, SI_STOP,
- SI_STOP, SI_STOP, SI_STOP,
- // 08 - This state eats post-Kanji punctuation, and passes
- // whitespace, non-breaking characters, dashes, line terminators,
- // etc. It stops on almost everything else.
- (byte)(SI+4), (byte)(SI+2), (byte)(SI+7), SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, (byte)(SI+8),
- SI_STOP, (byte)(SI+8), SI_STOP, (byte)(SI+3),
- (byte)(SI+8), (byte)(SI+1), SI_STOP,
- // 09 - This state is the main "number" state. It eats
- // digits.
- (byte)(SI+4), (byte)(SI+2), (byte)(SI+7), (byte)(SI+3),
- (byte)(SI+6), SI_STOP, SI_STOP, (byte)(SI+8),
- (byte)(SI+9), (byte)(SI+10), (byte)(SI+10), (byte)(SI+3),
- (byte)(SI+9), (byte)(SI+1), SI_STOP,
- // 10 - This state is the secondary "number" state. It
- // easts punctuation that can occur inside a number.
- (byte)(SI+4), (byte)(SI+2), (byte)(SI+7), SI_STOP,
- SI_STOP, SI_STOP, SI_STOP, (byte)(SI+8),
- (byte)(SI+9), (byte)(SI+8), SI_STOP, SI_STOP,
- (byte)(SI+10), (byte)(SI+1), SI_STOP,
- // 11 - This state is here to allow a dash to go before a
- // currency symbol and still be treated as a minus sign
- // (if the character after the currency symbol is a digit).
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP, STOP,
- (byte)(SI+9), STOP, STOP, STOP,
- (byte)(11), (byte)(SI+1), STOP
- };
-
- private static final WordBreakTable kLineForward
- = new WordBreakTable(COL_COUNT, kLineForwardData);
-
- // This table locates unambiguous break positions when iterating backward.
- // It implements the following rules:
- // 1) For most characters, there is a break before them if they're preceded
- // by whitespace, Kanji, or a line/paragraph separator. (CR-LF is kept together)
- // 2) There is a break before a Kanji character, except when it's preceded by
- // a Kanji-prefix character. (state 4)
- // 3) There is NOT a break before a Kanji-suffix character, except when preceded
- // by whitespace, a line/paragraph separator, or a dash. (state 3)
- // 4) There is never a break on either side of a non-break character. (the nbsp column)
- // 5) There is never a break before a non-spacing mark (the nsm column)
- // [In this set of rules, "break" means "unambiguous break position". There may sometimes
- // be actual breaks in positions this table always skips.]
- private static final byte kLineBackwardData[] =
- {
- // brk bl cr nBl
- // op kan prJ poJ
- // dgt np curr quote
- // nsm nbsp EOS
- /*00*/
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP, STOP,
- STOP, STOP, STOP,
- /*01*/
- (byte)(SI+1), (byte)(SI+1), (byte)(SI+1), (byte)(SI+2),
- (byte)(SI+2), (byte)(SI+4), (byte)(SI+2), (byte)(SI+3),
- (byte)(SI+2), (byte)(SI+3), (byte)(SI+2), (byte)(SI+2),
- (byte)(SI+1), (byte)(SI+2), STOP,
- /*02*/
- STOP, STOP, STOP, (byte)(SI+2),
- (byte)(SI+2), STOP, (byte)(SI+2), (byte)(SI+3),
- (byte)(SI+2), (byte)(SI+3), (byte)(SI+2), (byte)(SI+2),
- (byte)(SI+2), (byte)(SI+2), STOP,
- /*03*/
- STOP, STOP, STOP, (byte)(SI+2),
- STOP, (byte)(SI+4), (byte)(SI+2), (byte)(SI+3),
- (byte)(SI+2), (byte)(SI+3), (byte)(SI+2), (byte)(SI+2),
- (byte)(SI+3), (byte)(SI+2), STOP,
- /*04*/
- STOP, STOP, STOP, STOP,
- STOP, STOP, (byte)(SI+2), STOP,
- STOP, STOP, (byte)(SI+2), STOP,
- (byte)(SI+4), (byte)(SI+4), STOP
- };
-
- private static final WordBreakTable kLineBackward
- = new WordBreakTable(COL_COUNT, kLineBackwardData);
-
- private static final int kRawMapping[] =
- {
- nonBlank, //UNASSIGNED = 0,
- nonBlank, //UPPERCASE_LETTER = 1,
- nonBlank, //LOWERCASE_LETTER = 2,
- nonBlank, //TITLECASE_LETTER = 3,
- nonBlank, //MODIFIER_LETTER = 4,
- nonBlank, //OTHER_LETTER = 5,
- nsm, //NON_SPACING_MARK = 6,
- nsm, //ENCLOSING_MARK = 7,
- nonBlank, //COMBINING_SPACING_MARK = 8,
- digit, //DECIMAL_DIGIT_NUMBER = 9,
- nonBlank, //LETTER_NUMBER = 10,
- digit, //OTHER_NUMBER = 11,
- blank, //SPACE_SEPARATOR = 12,
- blank, //LINE_SEPARATOR = 13,
- blank, //PARAGRAPH_SEPARATOR = 14, ???????????
- blank, //CONTROL = 15,
- nonBlank, //PRIVATE_USE = 16,
- nonBlank, //FORMAT = 17
- nonBlank, //???? = 18,
- nonBlank, //SURROGATE = 19,
- op, //DASH_PUNCTUATION = 20,
- preJwrd, //START_PUNCTUATION = 21,
- postJwrd, //END_PUNCTUATION = 22,
- nonBlank, //CONNECTOR_PUNCTUATION = 23,
- nonBlank, //OTHER_PUNCTUATION = 24,
- nonBlank, //MATH_SYMBOL = 25,
- preJwrd, //CURRENCY_SYMBOL = 26,
- nonBlank, //MODIFIER_SYMBOL = 27,
- nonBlank //OTHER_SYMBOL = 28;
- };
-
- private static SpecialMapping kExceptionChar[] =
- {
- //note: the ranges in this table must be sorted in ascending order as
- // required by the UnicodeClassMapping class.
- new SpecialMapping(ASCII_END_OF_TEXT, BREAK),
- new SpecialMapping(ASCII_HORIZONTAL_TABULATION,
- ASCII_FORM_FEED, BREAK),
- new SpecialMapping(ASCII_CARRIAGE_RETURN, cr),
- new SpecialMapping(ASCII_EXCLAMATION_MARK, postJwrd),
- new SpecialMapping(ASCII_QUOTATION_MARK, quote),
- new SpecialMapping(ASCII_DOLLAR_SIGN, preJwrd),
- new SpecialMapping(ASCII_PERCENT, postJwrd),
- new SpecialMapping(ASCII_COMMA, numPunct),
- new SpecialMapping(ASCII_FULL_STOP, numPunct),
- new SpecialMapping(ASCII_COLON, ASCII_SEMICOLON, postJwrd),
- new SpecialMapping(ASCII_QUESTION_MARK, postJwrd),
- new SpecialMapping(ASCII_NONBREAKING_SPACE, nbsp),
- new SpecialMapping(ASCII_CENT_SIGN, postJwrd),
- new SpecialMapping(LATIN1_SOFTHYPHEN, op),
- new SpecialMapping(LATIN1_DEGREE_SIGN, postJwrd),
- new SpecialMapping(ARABIC_PERCENT_SIGN, postJwrd),
- new SpecialMapping(FIGURE_SPACE, nbsp),
- new SpecialMapping(NONBREAKING_HYPHEN, nbsp),
- new SpecialMapping(PUNCTUATION_LINE_SEPARATOR,
- PUNCTUATION_PARAGRAPH_SEPARATOR, BREAK),
- new SpecialMapping(PER_MILLE_SIGN, postJwrd),
- new SpecialMapping(PER_TEN_THOUSAND_SIGN, postJwrd),
- new SpecialMapping(PRIME, TRIPLE_PRIME, postJwrd),
- new SpecialMapping(DEGREE_CELSIUS, postJwrd),
- new SpecialMapping(DEGREE_FAHRENHEIT, postJwrd),
- new SpecialMapping(PUNCTUATION_IDEOGRAPHIC_COMMA,
- PUNCTUATION_IDEOGRAPHIC_FULL_STOP, postJwrd),
- new SpecialMapping(IDEOGRAPHIC_ITERATION_MARK, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_A, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_A, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_I, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_I, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_U, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_U, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_E, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_E, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_O, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_O, HIRAGANA_LETTER_DI, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_TU, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_TU, HIRAGANA_LETTER_MO, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_YA, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_YA, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_YU, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_YU, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_YO, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_YO, HIRAGANA_LETTER_RO, jwrd),
- new SpecialMapping(HIRAGANA_LETTER_SMALL_WA, postJwrd),
- new SpecialMapping(HIRAGANA_LETTER_WA, HIRAGANA_LETTER_VU, jwrd),
- new SpecialMapping(COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK,
- HIRAGANA_SEMIVOICED_SOUND_MARK, postJwrd),
- new SpecialMapping(HIRAGANA_ITERATION_MARK, HIRAGANA_VOICED_ITERATION_MARK, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_A, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_A, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_I, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_I, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_U, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_U, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_E, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_E, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_O, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_O, KATAKANA_LETTER_DI, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_TU, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_TU, KATAKANA_LETTER_MO, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_YA, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_YA, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_YU, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_YU, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_YO, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_YO, KATAKANA_LETTER_RO, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_WA, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_WA, KATAKANA_LETTER_VU, jwrd),
- new SpecialMapping(KATAKANA_LETTER_SMALL_KA, KATAKANA_LETTER_SMALL_KE, postJwrd),
- new SpecialMapping(KATAKANA_LETTER_VA, KATAKANA_LETTER_VO, jwrd),
- new SpecialMapping(KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK, postJwrd),
- new SpecialMapping(KATAKANA_ITERATION_MARK, KATAKANA_VOICED_ITERATION_MARK, postJwrd),
- new SpecialMapping(UNICODE_LOW_BOUND_HAN,UNICODE_HIGH_BOUND_HAN,jwrd),
- new SpecialMapping(CJK_COMPATIBILITY_F900,
- CJK_COMPATIBILITY_FA2D, jwrd),
- new SpecialMapping(UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE, nbsp),
- new SpecialMapping(FULLWIDTH_EXCLAMATION_MARK, postJwrd),
- new SpecialMapping(FULLWIDTH_COMMA, postJwrd),
- new SpecialMapping(FULLWIDTH_FULL_STOP, postJwrd),
- new SpecialMapping(FULLWIDTH_QUESTION_MARK, postJwrd),
- new SpecialMapping(END_OF_STRING, EOS)
- };
-
- private static final boolean LineExceptionFlags[] = {
- false, // kNonCharacter = 0,
- false, // kUppercaseLetter = 1,
- false, // kLowercaseLetter = 2,
- false, // kTitlecaseLetter = 3,
- true, // kModifierLetter = 4,
- true, // kOtherLetter = 5,
- true, // kNonSpacingMark = 6,
- false, // kEnclosingMark = 7,
- false, // kCombiningSpacingMark = 8,
- false, // kDecimalNumber = 9,
- false, // kLetterNumber = 10,
- false, // kOtherNumber = 11,
- true, // kSpaceSeparator = 12,
- true, // kLineSeparator = 13,
- true, // kParagraphSeparator = 14,
- true, // kControlCharacter = 15,
- true, // kFormatCharacter = 16,
- false, // UNDEFINED = 17,
- false, // kPrivateUseCharacter = 18,
- false, // kSurrogate = 19,
- true, // kDashPunctuation = 20,
- false, // kOpenPunctuation = 21,
- false, // kClosePunctuation = 22,
- false, // kConnectorPunctuation = 23,
- true, // kOtherPunctuation = 24,
- false, // kMathSymbol = 25,
- true, // kCurrencySymbol = 26,
- false, // kModifierSymbol = 27,
- true // kOtherSymbol = 28
- };
-
- private static final int kLineAsciiValues[] = {
- // null soh stx etx eot enq ask bell
- blank, blank, blank, BREAK, blank, blank, blank, blank,
- // bs ht lf vt ff cr so si
- blank, BREAK, BREAK, BREAK, BREAK, cr, blank, blank,
- // dle dc1 dc2 dc3 dc4 nak syn etb
- blank, blank, blank, blank, blank, blank, blank, blank,
- // can em sub esc fs gs rs us
- blank, blank, blank, blank, blank, blank, blank, blank,
- // sp ! " # $ % & '
- blank, postJwrd, quote, nonBlank, currency, postJwrd, nonBlank, nonBlank,
- // ( ) * + , - . /
- preJwrd, postJwrd, nonBlank, nonBlank, numPunct, op, numPunct, nonBlank,
- // 0 1 2 3 4 5 6 7
- digit, digit, digit, digit, digit, digit, digit, digit,
- // 8 9 : ; < = > ?
- digit, digit, postJwrd, postJwrd, nonBlank, nonBlank, nonBlank, postJwrd,
- // @ A B C D E F G
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // H I J K L M N O
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // P Q R S T U V W
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // X Y Z [ \ ] ^ _
- nonBlank, nonBlank, nonBlank, preJwrd, nonBlank, postJwrd, nonBlank, nonBlank,
- // ` a b c d e f g
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // h i j k l m n o
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // p q r s t u v w
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // x y z { | } ~ del
- nonBlank, nonBlank, nonBlank, preJwrd, nonBlank, postJwrd, nonBlank, blank,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- blank, blank, blank, blank, blank, blank, blank, blank,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- blank, blank, blank, blank, blank, blank, blank, blank,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- blank, blank, blank, blank, blank, blank, blank, blank,
- // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
- blank, blank, blank, blank, blank, blank, blank, blank,
- // nbsp inv-! cents pounds currency yen broken-bar section
- nbsp, nonBlank, postJwrd, currency, currency, currency, nonBlank, nonBlank,
- // umlaut copyright super-a gui-left not soft-hyph registered macron
- nonBlank, nonBlank, nonBlank, preJwrd, nonBlank, op, nonBlank, nonBlank,
- // degree +/- super-2 super-3 acute micro paragraph bullet
- postJwrd, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // cedilla super-1 super-o gui-right 1/4 1/2 3/4 inv-?
- nonBlank, nonBlank, nonBlank, postJwrd, digit, digit, digit, nonBlank,
- // A-grave A-acute A-hat A-tilde A-umlaut A-ring AE C-cedilla
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // E-grave E-acute E-hat E-umlaut I-grave I-acute I-hat I-umlaut
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // Edh N-tilde O-grave O-acute O-hat O-tilde O-umlaut times
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // O=slash U-grave U-acute U-hat U-umlaut Y-acute Thorn ess-zed
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // a-grave a-acute a-hat a-tilde a-umlaut a-ring ae c-cedilla
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // e-grave e-acute e-hat e-umlaut i-grave i-acute i-hat i-umlaut
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // edh n-tilde o-grave o-acute o-hat o-tilde o-umlaut over
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank,
- // o-slash u-grave u-acute u-hat u-umlaut y-acute thorn y=umlaut
- nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank, nonBlank
- };
-
- private static final UnicodeClassMapping kLineMap
- = new UnicodeClassMapping(kRawMapping, kExceptionChar, LineExceptionFlags,
- kLineAsciiValues);
- }