1. // This file was generated AUTOMATICALLY from a template file Sun Feb 22 22:52:17 PST 2004
  2. /* @(#)Character.java.template 1.7 03/01/13
  3. *
  4. * Copyright 1994-2002 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. package java.lang;
  11. /**
  12. * The <code>Character</code> class wraps a value of the primitive
  13. * type <code>char</code> in an object. An object of type
  14. * <code>Character</code> contains a single field whose type is
  15. * <code>char</code>.
  16. * <p>
  17. * In addition, this class provides several methods for determining
  18. * a character's category (lowercase letter, digit, etc.) and for converting
  19. * characters from uppercase to lowercase and vice versa.
  20. * <p>
  21. * Character information is based on the Unicode Standard, version 3.0.
  22. * <p>
  23. * The methods and data of class <code>Character</code> are defined by
  24. * the information in the <i>UnicodeData</i> file that is part of the
  25. * Unicode Character Database maintained by the Unicode
  26. * Consortium. This file specifies various properties including name
  27. * and general category for every defined Unicode code point or
  28. * character range.
  29. * <p>
  30. * The file and its description are available from the Unicode Consortium at:
  31. * <ul>
  32. * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  33. * </ul>
  34. *
  35. * @author Lee Boynton
  36. * @author Guy Steele
  37. * @author Akira Tanaka
  38. * @since 1.0
  39. */
  40. public final
  41. class Character extends Object implements java.io.Serializable, Comparable {
  42. /**
  43. * The minimum radix available for conversion to and from strings.
  44. * The constant value of this field is the smallest value permitted
  45. * for the radix argument in radix-conversion methods such as the
  46. * <code>digit</code> method, the <code>forDigit</code>
  47. * method, and the <code>toString</code> method of class
  48. * <code>Integer</code>.
  49. *
  50. * @see java.lang.Character#digit(char, int)
  51. * @see java.lang.Character#forDigit(int, int)
  52. * @see java.lang.Integer#toString(int, int)
  53. * @see java.lang.Integer#valueOf(java.lang.String)
  54. */
  55. public static final int MIN_RADIX = 2;
  56. /**
  57. * The maximum radix available for conversion to and from strings.
  58. * The constant value of this field is the largest value permitted
  59. * for the radix argument in radix-conversion methods such as the
  60. * <code>digit</code> method, the <code>forDigit</code>
  61. * method, and the <code>toString</code> method of class
  62. * <code>Integer</code>.
  63. *
  64. * @see java.lang.Character#digit(char, int)
  65. * @see java.lang.Character#forDigit(int, int)
  66. * @see java.lang.Integer#toString(int, int)
  67. * @see java.lang.Integer#valueOf(java.lang.String)
  68. */
  69. public static final int MAX_RADIX = 36;
  70. /**
  71. * The constant value of this field is the smallest value of type
  72. * <code>char</code>, <code>'\u0000'</code>.
  73. *
  74. * @since 1.0.2
  75. */
  76. public static final char MIN_VALUE = '\u0000';
  77. /**
  78. * The constant value of this field is the largest value of type
  79. * <code>char</code>, <code>'\uFFFF'</code>.
  80. *
  81. * @since 1.0.2
  82. */
  83. public static final char MAX_VALUE = '\uffff';
  84. /**
  85. * The <code>Class</code> instance representing the primitive type
  86. * <code>char</code>.
  87. *
  88. * @since 1.1
  89. */
  90. public static final Class TYPE = Class.getPrimitiveClass("char");
  91. /*
  92. * Normative general types
  93. */
  94. /*
  95. * General character types
  96. */
  97. /**
  98. * General category "Cn" in the Unicode specification.
  99. * @since 1.1
  100. */
  101. public static final byte
  102. UNASSIGNED = 0;
  103. /**
  104. * General category "Lu" in the Unicode specification.
  105. * @since 1.1
  106. */
  107. public static final byte
  108. UPPERCASE_LETTER = 1;
  109. /**
  110. * General category "Ll" in the Unicode specification.
  111. * @since 1.1
  112. */
  113. public static final byte
  114. LOWERCASE_LETTER = 2;
  115. /**
  116. * General category "Lt" in the Unicode specification.
  117. * @since 1.1
  118. */
  119. public static final byte
  120. TITLECASE_LETTER = 3;
  121. /**
  122. * General category "Lm" in the Unicode specification.
  123. * @since 1.1
  124. */
  125. public static final byte
  126. MODIFIER_LETTER = 4;
  127. /**
  128. * General category "Lo" in the Unicode specification.
  129. * @since 1.1
  130. */
  131. public static final byte
  132. OTHER_LETTER = 5;
  133. /**
  134. * General category "Mn" in the Unicode specification.
  135. * @since 1.1
  136. */
  137. public static final byte
  138. NON_SPACING_MARK = 6;
  139. /**
  140. * General category "Me" in the Unicode specification.
  141. * @since 1.1
  142. */
  143. public static final byte
  144. ENCLOSING_MARK = 7;
  145. /**
  146. * General category "Mc" in the Unicode specification.
  147. * @since 1.1
  148. */
  149. public static final byte
  150. COMBINING_SPACING_MARK = 8;
  151. /**
  152. * General category "Nd" in the Unicode specification.
  153. * @since 1.1
  154. */
  155. public static final byte
  156. DECIMAL_DIGIT_NUMBER = 9;
  157. /**
  158. * General category "Nl" in the Unicode specification.
  159. * @since 1.1
  160. */
  161. public static final byte
  162. LETTER_NUMBER = 10;
  163. /**
  164. * General category "No" in the Unicode specification.
  165. * @since 1.1
  166. */
  167. public static final byte
  168. OTHER_NUMBER = 11;
  169. /**
  170. * General category "Zs" in the Unicode specification.
  171. * @since 1.1
  172. */
  173. public static final byte
  174. SPACE_SEPARATOR = 12;
  175. /**
  176. * General category "Zl" in the Unicode specification.
  177. * @since 1.1
  178. */
  179. public static final byte
  180. LINE_SEPARATOR = 13;
  181. /**
  182. * General category "Zp" in the Unicode specification.
  183. * @since 1.1
  184. */
  185. public static final byte
  186. PARAGRAPH_SEPARATOR = 14;
  187. /**
  188. * General category "Cc" in the Unicode specification.
  189. * @since 1.1
  190. */
  191. public static final byte
  192. CONTROL = 15;
  193. /**
  194. * General category "Cf" in the Unicode specification.
  195. * @since 1.1
  196. */
  197. public static final byte
  198. FORMAT = 16;
  199. /**
  200. * General category "Co" in the Unicode specification.
  201. * @since 1.1
  202. */
  203. public static final byte
  204. PRIVATE_USE = 18;
  205. /**
  206. * General category "Cs" in the Unicode specification.
  207. * @since 1.1
  208. */
  209. public static final byte
  210. SURROGATE = 19;
  211. /**
  212. * General category "Pd" in the Unicode specification.
  213. * @since 1.1
  214. */
  215. public static final byte
  216. DASH_PUNCTUATION = 20;
  217. /**
  218. * General category "Ps" in the Unicode specification.
  219. * @since 1.1
  220. */
  221. public static final byte
  222. START_PUNCTUATION = 21;
  223. /**
  224. * General category "Pe" in the Unicode specification.
  225. * @since 1.1
  226. */
  227. public static final byte
  228. END_PUNCTUATION = 22;
  229. /**
  230. * General category "Pc" in the Unicode specification.
  231. * @since 1.1
  232. */
  233. public static final byte
  234. CONNECTOR_PUNCTUATION = 23;
  235. /**
  236. * General category "Po" in the Unicode specification.
  237. * @since 1.1
  238. */
  239. public static final byte
  240. OTHER_PUNCTUATION = 24;
  241. /**
  242. * General category "Sm" in the Unicode specification.
  243. * @since 1.1
  244. */
  245. public static final byte
  246. MATH_SYMBOL = 25;
  247. /**
  248. * General category "Sc" in the Unicode specification.
  249. * @since 1.1
  250. */
  251. public static final byte
  252. CURRENCY_SYMBOL = 26;
  253. /**
  254. * General category "Sk" in the Unicode specification.
  255. * @since 1.1
  256. */
  257. public static final byte
  258. MODIFIER_SYMBOL = 27;
  259. /**
  260. * General category "So" in the Unicode specification.
  261. * @since 1.1
  262. */
  263. public static final byte
  264. OTHER_SYMBOL = 28;
  265. /**
  266. * General category "Pi" in the Unicode specification.
  267. * @since 1.4
  268. */
  269. public static final byte
  270. INITIAL_QUOTE_PUNCTUATION = 29;
  271. /**
  272. * General category "Pf" in the Unicode specification.
  273. * @since 1.4
  274. */
  275. public static final byte
  276. FINAL_QUOTE_PUNCTUATION = 30;
  277. /**
  278. * Error or non-char flag
  279. * @since 1.4
  280. */
  281. static final char CHAR_ERROR = '\uFFFF';
  282. /**
  283. * Undefined bidirectional character type. Undefined <code>char</code>
  284. * values have undefined directionality in the Unicode specification.
  285. * @since 1.4
  286. */
  287. public static final byte DIRECTIONALITY_UNDEFINED = -1;
  288. /**
  289. * Strong bidirectional character type "L" in the Unicode specification.
  290. * @since 1.4
  291. */
  292. public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
  293. /**
  294. * Strong bidirectional character type "R" in the Unicode specification.
  295. * @since 1.4
  296. */
  297. public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
  298. /**
  299. * Strong bidirectional character type "AL" in the Unicode specification.
  300. * @since 1.4
  301. */
  302. public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
  303. /**
  304. * Weak bidirectional character type "EN" in the Unicode specification.
  305. * @since 1.4
  306. */
  307. public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
  308. /**
  309. * Weak bidirectional character type "ES" in the Unicode specification.
  310. * @since 1.4
  311. */
  312. public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
  313. /**
  314. * Weak bidirectional character type "ET" in the Unicode specification.
  315. * @since 1.4
  316. */
  317. public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
  318. /**
  319. * Weak bidirectional character type "AN" in the Unicode specification.
  320. * @since 1.4
  321. */
  322. public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
  323. /**
  324. * Weak bidirectional character type "CS" in the Unicode specification.
  325. * @since 1.4
  326. */
  327. public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
  328. /**
  329. * Weak bidirectional character type "NSM" in the Unicode specification.
  330. * @since 1.4
  331. */
  332. public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
  333. /**
  334. * Weak bidirectional character type "BN" in the Unicode specification.
  335. * @since 1.4
  336. */
  337. public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
  338. /**
  339. * Neutral bidirectional character type "B" in the Unicode specification.
  340. * @since 1.4
  341. */
  342. public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
  343. /**
  344. * Neutral bidirectional character type "S" in the Unicode specification.
  345. * @since 1.4
  346. */
  347. public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
  348. /**
  349. * Neutral bidirectional character type "WS" in the Unicode specification.
  350. * @since 1.4
  351. */
  352. public static final byte DIRECTIONALITY_WHITESPACE = 12;
  353. /**
  354. * Neutral bidirectional character type "ON" in the Unicode specification.
  355. * @since 1.4
  356. */
  357. public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
  358. /**
  359. * Strong bidirectional character type "LRE" in the Unicode specification.
  360. * @since 1.4
  361. */
  362. public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
  363. /**
  364. * Strong bidirectional character type "LRO" in the Unicode specification.
  365. * @since 1.4
  366. */
  367. public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
  368. /**
  369. * Strong bidirectional character type "RLE" in the Unicode specification.
  370. * @since 1.4
  371. */
  372. public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
  373. /**
  374. * Strong bidirectional character type "RLO" in the Unicode specification.
  375. * @since 1.4
  376. */
  377. public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
  378. /**
  379. * Weak bidirectional character type "PDF" in the Unicode specification.
  380. * @since 1.4
  381. */
  382. public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
  383. // Maximum character handled by internal fast-path code which
  384. // avoids initializing large tables.
  385. // Note: performance of this "fast-path" code may be sub-optimal
  386. // in negative cases for some accessors due to complicated ranges.
  387. // Should revisit after optimization of table initialization.
  388. private static final int FAST_PATH_MAX = 255;
  389. /**
  390. * Instances of this class represent particular subsets of the Unicode
  391. * character set. The only family of subsets defined in the
  392. * <code>Character</code> class is <code>{@link Character.UnicodeBlock
  393. * UnicodeBlock}</code>. Other portions of the Java API may define other
  394. * subsets for their own purposes.
  395. *
  396. * @since 1.2
  397. */
  398. public static class Subset {
  399. private String name;
  400. /**
  401. * Constructs a new <code>Subset</code> instance.
  402. *
  403. * @exception NullPointerException if name is <code>null</code>
  404. * @param name The name of this subset
  405. */
  406. protected Subset(String name) {
  407. if (name == null) {
  408. throw new NullPointerException("name");
  409. }
  410. this.name = name;
  411. }
  412. /**
  413. * Compares two <code>Subset</code> objects for equality.
  414. * This method returns <code>true</code> if and only if
  415. * <code>this</code> and the argument refer to the same
  416. * object; since this method is <code>final</code>, this
  417. * guarantee holds for all subclasses.
  418. */
  419. public final boolean equals(Object obj) {
  420. return (this == obj);
  421. }
  422. /**
  423. * Returns the standard hash code as defined by the
  424. * <code>{@link Object#hashCode}</code> method. This method
  425. * is <code>final</code> in order to ensure that the
  426. * <code>equals</code> and <code>hashCode</code> methods will
  427. * be consistent in all subclasses.
  428. */
  429. public final int hashCode() {
  430. return super.hashCode();
  431. }
  432. /**
  433. * Returns the name of this subset.
  434. */
  435. public final String toString() {
  436. return name;
  437. }
  438. }
  439. /**
  440. * A family of character subsets representing the character blocks in the
  441. * Unicode specification. Character blocks generally define characters
  442. * used for a specific script or purpose. A character is contained by
  443. * at most one Unicode block.
  444. *
  445. * @since 1.2
  446. */
  447. public static final class UnicodeBlock extends Subset {
  448. private UnicodeBlock(String name) {
  449. super(name);
  450. }
  451. /**
  452. * Constant for the Unicode character block of the same name.
  453. */
  454. public static final UnicodeBlock
  455. BASIC_LATIN
  456. = new UnicodeBlock("BASIC_LATIN"),
  457. LATIN_1_SUPPLEMENT
  458. = new UnicodeBlock("LATIN_1_SUPPLEMENT"),
  459. LATIN_EXTENDED_A
  460. = new UnicodeBlock("LATIN_EXTENDED_A"),
  461. LATIN_EXTENDED_B
  462. = new UnicodeBlock("LATIN_EXTENDED_B"),
  463. IPA_EXTENSIONS
  464. = new UnicodeBlock("IPA_EXTENSIONS"),
  465. SPACING_MODIFIER_LETTERS
  466. = new UnicodeBlock("SPACING_MODIFIER_LETTERS"),
  467. COMBINING_DIACRITICAL_MARKS
  468. = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"),
  469. GREEK
  470. = new UnicodeBlock("GREEK"),
  471. CYRILLIC
  472. = new UnicodeBlock("CYRILLIC"),
  473. ARMENIAN
  474. = new UnicodeBlock("ARMENIAN"),
  475. HEBREW
  476. = new UnicodeBlock("HEBREW"),
  477. ARABIC
  478. = new UnicodeBlock("ARABIC"),
  479. DEVANAGARI
  480. = new UnicodeBlock("DEVANAGARI"),
  481. BENGALI
  482. = new UnicodeBlock("BENGALI"),
  483. GURMUKHI
  484. = new UnicodeBlock("GURMUKHI"),
  485. GUJARATI
  486. = new UnicodeBlock("GUJARATI"),
  487. ORIYA
  488. = new UnicodeBlock("ORIYA"),
  489. TAMIL
  490. = new UnicodeBlock("TAMIL"),
  491. TELUGU
  492. = new UnicodeBlock("TELUGU"),
  493. KANNADA
  494. = new UnicodeBlock("KANNADA"),
  495. MALAYALAM
  496. = new UnicodeBlock("MALAYALAM"),
  497. THAI
  498. = new UnicodeBlock("THAI"),
  499. LAO
  500. = new UnicodeBlock("LAO"),
  501. TIBETAN
  502. = new UnicodeBlock("TIBETAN"),
  503. GEORGIAN
  504. = new UnicodeBlock("GEORGIAN"),
  505. HANGUL_JAMO
  506. = new UnicodeBlock("HANGUL_JAMO"),
  507. LATIN_EXTENDED_ADDITIONAL
  508. = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"),
  509. GREEK_EXTENDED
  510. = new UnicodeBlock("GREEK_EXTENDED"),
  511. GENERAL_PUNCTUATION
  512. = new UnicodeBlock("GENERAL_PUNCTUATION"),
  513. SUPERSCRIPTS_AND_SUBSCRIPTS
  514. = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"),
  515. CURRENCY_SYMBOLS
  516. = new UnicodeBlock("CURRENCY_SYMBOLS"),
  517. COMBINING_MARKS_FOR_SYMBOLS
  518. = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"),
  519. LETTERLIKE_SYMBOLS
  520. = new UnicodeBlock("LETTERLIKE_SYMBOLS"),
  521. NUMBER_FORMS
  522. = new UnicodeBlock("NUMBER_FORMS"),
  523. ARROWS
  524. = new UnicodeBlock("ARROWS"),
  525. MATHEMATICAL_OPERATORS
  526. = new UnicodeBlock("MATHEMATICAL_OPERATORS"),
  527. MISCELLANEOUS_TECHNICAL
  528. = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"),
  529. CONTROL_PICTURES
  530. = new UnicodeBlock("CONTROL_PICTURES"),
  531. OPTICAL_CHARACTER_RECOGNITION
  532. = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"),
  533. ENCLOSED_ALPHANUMERICS
  534. = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"),
  535. BOX_DRAWING
  536. = new UnicodeBlock("BOX_DRAWING"),
  537. BLOCK_ELEMENTS
  538. = new UnicodeBlock("BLOCK_ELEMENTS"),
  539. GEOMETRIC_SHAPES
  540. = new UnicodeBlock("GEOMETRIC_SHAPES"),
  541. MISCELLANEOUS_SYMBOLS
  542. = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"),
  543. DINGBATS
  544. = new UnicodeBlock("DINGBATS"),
  545. CJK_SYMBOLS_AND_PUNCTUATION
  546. = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"),
  547. HIRAGANA
  548. = new UnicodeBlock("HIRAGANA"),
  549. KATAKANA
  550. = new UnicodeBlock("KATAKANA"),
  551. BOPOMOFO
  552. = new UnicodeBlock("BOPOMOFO"),
  553. HANGUL_COMPATIBILITY_JAMO
  554. = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"),
  555. KANBUN
  556. = new UnicodeBlock("KANBUN"),
  557. ENCLOSED_CJK_LETTERS_AND_MONTHS
  558. = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"),
  559. CJK_COMPATIBILITY
  560. = new UnicodeBlock("CJK_COMPATIBILITY"),
  561. CJK_UNIFIED_IDEOGRAPHS
  562. = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"),
  563. HANGUL_SYLLABLES
  564. = new UnicodeBlock("HANGUL_SYLLABLES"),
  565. SURROGATES_AREA
  566. = new UnicodeBlock("SURROGATES_AREA"),
  567. PRIVATE_USE_AREA
  568. = new UnicodeBlock("PRIVATE_USE_AREA"),
  569. CJK_COMPATIBILITY_IDEOGRAPHS
  570. = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"),
  571. ALPHABETIC_PRESENTATION_FORMS
  572. = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"),
  573. ARABIC_PRESENTATION_FORMS_A
  574. = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"),
  575. COMBINING_HALF_MARKS
  576. = new UnicodeBlock("COMBINING_HALF_MARKS"),
  577. CJK_COMPATIBILITY_FORMS
  578. = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"),
  579. SMALL_FORM_VARIANTS
  580. = new UnicodeBlock("SMALL_FORM_VARIANTS"),
  581. ARABIC_PRESENTATION_FORMS_B
  582. = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"),
  583. HALFWIDTH_AND_FULLWIDTH_FORMS
  584. = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"),
  585. SPECIALS
  586. = new UnicodeBlock("SPECIALS");
  587. /**
  588. * Constant for the Unicode character block of the same name.
  589. *
  590. * @since 1.4
  591. */
  592. public static final UnicodeBlock
  593. SYRIAC
  594. = new UnicodeBlock("SYRIAC"),
  595. THAANA
  596. = new UnicodeBlock("THAANA"),
  597. SINHALA
  598. = new UnicodeBlock("SINHALA"),
  599. MYANMAR
  600. = new UnicodeBlock("MYANMAR"),
  601. ETHIOPIC
  602. = new UnicodeBlock("ETHIOPIC"),
  603. CHEROKEE
  604. = new UnicodeBlock("CHEROKEE"),
  605. UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
  606. = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"),
  607. OGHAM
  608. = new UnicodeBlock("OGHAM"),
  609. RUNIC
  610. = new UnicodeBlock("RUNIC"),
  611. KHMER
  612. = new UnicodeBlock("KHMER"),
  613. MONGOLIAN
  614. = new UnicodeBlock("MONGOLIAN"),
  615. BRAILLE_PATTERNS
  616. = new UnicodeBlock("BRAILLE_PATTERNS"),
  617. CJK_RADICALS_SUPPLEMENT
  618. = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"),
  619. KANGXI_RADICALS
  620. = new UnicodeBlock("KANGXI_RADICALS"),
  621. IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
  622. new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"),
  623. BOPOMOFO_EXTENDED
  624. = new UnicodeBlock("BOPOMOFO_EXTENDED"),
  625. CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
  626. = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"),
  627. YI_SYLLABLES
  628. = new UnicodeBlock("YI_SYLLABLES"),
  629. YI_RADICALS
  630. = new UnicodeBlock("YI_RADICALS");
  631. private static final char blockStarts[] = {
  632. '\u0000', // Basic Latin
  633. '\u0080', // Latin-1 Supplement
  634. '\u0100', // Latin Extended-A
  635. '\u0180', // Latin Extended-B
  636. '\u0250', // IPA Extensions
  637. '\u02B0', // Spacing Modifier Letters
  638. '\u0300', // Combining Diacritical Marks
  639. '\u0370', // Greek
  640. '\u0400', // Cyrillic
  641. '\u0500', // unassigned
  642. '\u0530', // Armenian
  643. '\u0590', // Hebrew
  644. '\u0600', // Arabic
  645. '\u0700', // Syriac
  646. '\u0750', // unassigned
  647. '\u0780', // Thaana
  648. '\u07C0', // unassigned
  649. '\u0900', // Devanagari
  650. '\u0980', // Bengali
  651. '\u0A00', // Gurmukhi
  652. '\u0A80', // Gujarati
  653. '\u0B00', // Oriya
  654. '\u0B80', // Tamil
  655. '\u0C00', // Telugu
  656. '\u0C80', // Kannada
  657. '\u0D00', // Malayalam
  658. '\u0D80', // Sinhala
  659. '\u0E00', // Thai
  660. '\u0E80', // Lao
  661. '\u0F00', // Tibetan
  662. '\u1000', // Myanmar
  663. '\u10A0', // Georgian
  664. '\u1100', // Hangul Jamo
  665. '\u1200', // Ethiopic
  666. '\u1380', // unassigned
  667. '\u13A0', // Cherokee
  668. '\u1400', // Unified Canadian Aboriginal Syllabics
  669. '\u1680', // Ogham
  670. '\u16A0', // Runic
  671. '\u1700', // unassigned
  672. '\u1780', // Khmer
  673. '\u1800', // Mongolian
  674. '\u18B0', // unassigned
  675. '\u1E00', // Latin Extended Additional
  676. '\u1F00', // Greek Extended
  677. '\u2000', // General Punctuation
  678. '\u2070', // Superscripts and Subscripts
  679. '\u20A0', // Currency Symbols
  680. '\u20D0', // Combining Marks for Symbols
  681. '\u2100', // Letterlike Symbols
  682. '\u2150', // Number Forms
  683. '\u2190', // Arrows
  684. '\u2200', // Mathematical Operators
  685. '\u2300', // Miscellaneous Technical
  686. '\u2400', // Control Pictures
  687. '\u2440', // Optical Character Recognition
  688. '\u2460', // Enclosed Alphanumerics
  689. '\u2500', // Box Drawing
  690. '\u2580', // Block Elements
  691. '\u25A0', // Geometric Shapes
  692. '\u2600', // Miscellaneous Symbols
  693. '\u2700', // Dingbats
  694. '\u27C0', // unassigned
  695. '\u2800', // Braille Patterns
  696. '\u2900', // unassigned
  697. '\u2E80', // CJK Radicals Supplement
  698. '\u2F00', // Kangxi Radicals
  699. '\u2FE0', // unassigned
  700. '\u2FF0', // Ideographic Description Characters
  701. '\u3000', // CJK Symbols and Punctuation
  702. '\u3040', // Hiragana
  703. '\u30A0', // Katakana
  704. '\u3100', // Bopomofo
  705. '\u3130', // Hangul Compatibility Jamo
  706. '\u3190', // Kanbun
  707. '\u31A0', // Bopomofo Extended
  708. '\u31C0', // unassigned
  709. '\u3200', // Enclosed CJK Letters and Months
  710. '\u3300', // CJK Compatibility
  711. '\u3400', // CJK Unified Ideographs Extension A
  712. '\u4DB6', // unassigned
  713. '\u4E00', // CJK Unified Ideographs
  714. '\uA000', // Yi Syllables
  715. '\uA490', // Yi Radicals
  716. '\uA4D0', // unassigned
  717. '\uAC00', // Hangul Syllables
  718. '\uD7A4', // unassigned
  719. '\uD800', // Surrogates
  720. '\uE000', // Private Use
  721. '\uF900', // CJK Compatibility Ideographs
  722. '\uFB00', // Alphabetic Presentation Forms
  723. '\uFB50', // Arabic Presentation Forms-A
  724. '\uFE00', // unassigned
  725. '\uFE20', // Combining Half Marks
  726. '\uFE30', // CJK Compatibility Forms
  727. '\uFE50', // Small Form Variants
  728. '\uFE70', // Arabic Presentation Forms-B
  729. '\uFEFF', // Specials
  730. '\uFF00', // Halfwidth and Fullwidth Forms
  731. '\uFFF0', // Specials
  732. '\uFFFE', // non-characters
  733. };
  734. private static final UnicodeBlock[] blocks = {
  735. BASIC_LATIN,
  736. LATIN_1_SUPPLEMENT,
  737. LATIN_EXTENDED_A,
  738. LATIN_EXTENDED_B,
  739. IPA_EXTENSIONS,
  740. SPACING_MODIFIER_LETTERS,
  741. COMBINING_DIACRITICAL_MARKS,
  742. GREEK,
  743. CYRILLIC,
  744. null,
  745. ARMENIAN,
  746. HEBREW,
  747. ARABIC,
  748. SYRIAC,
  749. null,
  750. THAANA,
  751. null,
  752. DEVANAGARI,
  753. BENGALI,
  754. GURMUKHI,
  755. GUJARATI,
  756. ORIYA,
  757. TAMIL,
  758. TELUGU,
  759. KANNADA,
  760. MALAYALAM,
  761. SINHALA,
  762. THAI,
  763. LAO,
  764. TIBETAN,
  765. MYANMAR,
  766. GEORGIAN,
  767. HANGUL_JAMO,
  768. ETHIOPIC,
  769. null,
  770. CHEROKEE,
  771. UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
  772. OGHAM,
  773. RUNIC,
  774. null,
  775. KHMER,
  776. MONGOLIAN,
  777. null,
  778. LATIN_EXTENDED_ADDITIONAL,
  779. GREEK_EXTENDED,
  780. GENERAL_PUNCTUATION,
  781. SUPERSCRIPTS_AND_SUBSCRIPTS,
  782. CURRENCY_SYMBOLS,
  783. COMBINING_MARKS_FOR_SYMBOLS,
  784. LETTERLIKE_SYMBOLS,
  785. NUMBER_FORMS,
  786. ARROWS,
  787. MATHEMATICAL_OPERATORS,
  788. MISCELLANEOUS_TECHNICAL,
  789. CONTROL_PICTURES,
  790. OPTICAL_CHARACTER_RECOGNITION,
  791. ENCLOSED_ALPHANUMERICS,
  792. BOX_DRAWING,
  793. BLOCK_ELEMENTS,
  794. GEOMETRIC_SHAPES,
  795. MISCELLANEOUS_SYMBOLS,
  796. DINGBATS,
  797. null,
  798. BRAILLE_PATTERNS,
  799. null,
  800. CJK_RADICALS_SUPPLEMENT,
  801. KANGXI_RADICALS,
  802. null,
  803. IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
  804. CJK_SYMBOLS_AND_PUNCTUATION,
  805. HIRAGANA,
  806. KATAKANA,
  807. BOPOMOFO,
  808. HANGUL_COMPATIBILITY_JAMO,
  809. KANBUN,
  810. BOPOMOFO_EXTENDED,
  811. null,
  812. ENCLOSED_CJK_LETTERS_AND_MONTHS,
  813. CJK_COMPATIBILITY,
  814. CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
  815. null,
  816. CJK_UNIFIED_IDEOGRAPHS,
  817. YI_SYLLABLES,
  818. YI_RADICALS,
  819. null,
  820. HANGUL_SYLLABLES,
  821. null,
  822. SURROGATES_AREA,
  823. PRIVATE_USE_AREA,
  824. CJK_COMPATIBILITY_IDEOGRAPHS,
  825. ALPHABETIC_PRESENTATION_FORMS,
  826. ARABIC_PRESENTATION_FORMS_A,
  827. null,
  828. COMBINING_HALF_MARKS,
  829. CJK_COMPATIBILITY_FORMS,
  830. SMALL_FORM_VARIANTS,
  831. ARABIC_PRESENTATION_FORMS_B,
  832. SPECIALS,
  833. HALFWIDTH_AND_FULLWIDTH_FORMS,
  834. SPECIALS,
  835. null,
  836. };
  837. /**
  838. * Returns the object representing the Unicode block containing the
  839. * given character, or <code>null</code> if the character is not a
  840. * member of a defined block.
  841. *
  842. * @param c The character in question
  843. * @return The <code>UnicodeBlock</code> instance representing the
  844. * Unicode block of which this character is a member, or
  845. * <code>null</code> if the character is not a member of any
  846. * Unicode block
  847. */
  848. public static UnicodeBlock of(char c) {
  849. int top, bottom, current;
  850. bottom = 0;
  851. top = blockStarts.length;
  852. current = top2;
  853. // invariant: top > current >= bottom && ch >= unicodeBlockStarts[bottom]
  854. while (top - bottom > 1) {
  855. if (c >= blockStarts[current]) {
  856. bottom = current;
  857. } else {
  858. top = current;
  859. }
  860. current = (top + bottom) / 2;
  861. }
  862. return blocks[current];
  863. }
  864. }
  865. /**
  866. * The value of the <code>Character</code>.
  867. *
  868. * @serial
  869. */
  870. private char value;
  871. /** use serialVersionUID from JDK 1.0.2 for interoperability */
  872. private static final long serialVersionUID = 3786198910865385080L;
  873. /**
  874. * Constructs a newly allocated <code>Character</code> object that
  875. * represents the specified <code>char</code> value.
  876. *
  877. * @param value the value to be represented by the
  878. * <code>Character</code> object.
  879. */
  880. public Character(char value) {
  881. this.value = value;
  882. }
  883. /**
  884. * Returns the value of this <code>Character</code> object.
  885. * @return the primitive <code>char</code> value represented by
  886. * this object.
  887. */
  888. public char charValue() {
  889. return value;
  890. }
  891. /**
  892. * Returns a hash code for this <code>Character</code>.
  893. * @return a hash code value for this object.
  894. */
  895. public int hashCode() {
  896. return (int)value;
  897. }
  898. /**
  899. * Compares this object against the specified object.
  900. * The result is <code>true</code> if and only if the argument is not
  901. * <code>null</code> and is a <code>Character</code> object that
  902. * represents the same <code>char</code> value as this object.
  903. *
  904. * @param obj the object to compare with.
  905. * @return <code>true</code> if the objects are the same;
  906. * <code>false</code> otherwise.
  907. */
  908. public boolean equals(Object obj) {
  909. if (obj instanceof Character) {
  910. return value == ((Character)obj).charValue();
  911. }
  912. return false;
  913. }
  914. /**
  915. * Returns a <code>String</code> object representing this
  916. * <code>Character</code>'s value. The result is a string of
  917. * length 1 whose sole component is the primitive
  918. * <code>char</code> value represented by this
  919. * <code>Character</code> object.
  920. *
  921. * @return a string representation of this object.
  922. */
  923. public String toString() {
  924. char buf[] = {value};
  925. return String.valueOf(buf);
  926. }
  927. /**
  928. * Returns a <code>String</code> object representing the
  929. * specified <code>char</code>. The result is a string of length
  930. * 1 consisting solely of the specified <code>char</code>.
  931. *
  932. * @param c the <code>char</code> to be converted
  933. * @return the string representation of the specified <code>char</code>
  934. * @since 1.4
  935. */
  936. public static String toString(char c) {
  937. return String.valueOf(c);
  938. }
  939. /**
  940. * Determines if the specified character is a lowercase character.
  941. * <p>
  942. * A character is lowercase if its general category type, provided
  943. * by <code>Character.getType(ch)</code>, is
  944. * <code>LOWERCASE_LETTER</code>.
  945. * <p>
  946. * The following are examples of lowercase characters:
  947. * <p><blockquote><pre>
  948. * a b c d e f g h i j k l m n o p q r s t u v w x y z
  949. * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
  950. * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
  951. * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
  952. * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
  953. * </pre></blockquote>
  954. * <p> Many other Unicode characters are lowercase too.
  955. * <p>
  956. *
  957. * @param ch the character to be tested.
  958. * @return <code>true</code> if the character is lowercase;
  959. * <code>false</code> otherwise.
  960. * @see java.lang.Character#isLowerCase(char)
  961. * @see java.lang.Character#isTitleCase(char)
  962. * @see java.lang.Character#toLowerCase(char)
  963. * @see java.lang.Character#getType(char)
  964. */
  965. public static boolean isLowerCase(char ch) {
  966. if (ch <= FAST_PATH_MAX) {
  967. return CharacterDataLatin1.isLowerCase(ch);
  968. } else {
  969. return CharacterData.isLowerCase(ch);
  970. }
  971. }
  972. /**
  973. * Determines if the specified character is an uppercase character.
  974. * <p>
  975. * A character is uppercase if its general category type, provided by
  976. * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
  977. * <p>
  978. * The following are examples of uppercase characters:
  979. * <p><blockquote><pre>
  980. * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  981. * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
  982. * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
  983. * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
  984. * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
  985. * </pre></blockquote>
  986. * <p> Many other Unicode characters are uppercase too.<p>
  987. *
  988. * @param ch the character to be tested.
  989. * @return <code>true</code> if the character is uppercase;
  990. * <code>false</code> otherwise.
  991. * @see java.lang.Character#isLowerCase(char)
  992. * @see java.lang.Character#isTitleCase(char)
  993. * @see java.lang.Character#toUpperCase(char)
  994. * @see java.lang.Character#getType(char)
  995. * @since 1.0
  996. */
  997. public static boolean isUpperCase(char ch) {
  998. if (ch <= FAST_PATH_MAX) {
  999. return CharacterDataLatin1.isUpperCase(ch);
  1000. } else {
  1001. return CharacterData.isUpperCase(ch);
  1002. }
  1003. }
  1004. /**
  1005. * Determines if the specified character is a titlecase character.
  1006. * <p>
  1007. * A character is a titlecase character if its general
  1008. * category type, provided by <code>Character.getType(ch)</code>,
  1009. * is <code>TITLECASE_LETTER</code>.
  1010. * <p>
  1011. * Some characters look like pairs of Latin letters. For example, there
  1012. * is an uppercase letter that looks like "LJ" and has a corresponding
  1013. * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1014. * is the appropriate form to use when rendering a word in lowercase
  1015. * with initial capitals, as for a book title.
  1016. * <p>
  1017. * These are some of the Unicode characters for which this method returns
  1018. * <code>true</code>:
  1019. * <ul>
  1020. * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
  1021. * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
  1022. * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
  1023. * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
  1024. * </ul>
  1025. * <p> Many other Unicode characters are titlecase too.<p>
  1026. *
  1027. * @param ch the character to be tested.
  1028. * @return <code>true</code> if the character is titlecase;
  1029. * <code>false</code> otherwise.
  1030. * @see java.lang.Character#isLowerCase(char)
  1031. * @see java.lang.Character#isUpperCase(char)
  1032. * @see java.lang.Character#toTitleCase(char)
  1033. * @see java.lang.Character#getType(char)
  1034. * @since 1.0.2
  1035. */
  1036. public static boolean isTitleCase(char ch) {
  1037. if (ch <= FAST_PATH_MAX) {
  1038. return CharacterDataLatin1.isTitleCase(ch);
  1039. } else {
  1040. return CharacterData.isTitleCase(ch);
  1041. }
  1042. }
  1043. /**
  1044. * Determines if the specified character is a digit.
  1045. * <p>
  1046. * A character is a digit if its general category type, provided
  1047. * by <code>Character.getType(ch)</code>, is
  1048. * <code>DECIMAL_DIGIT_NUMBER</code>.
  1049. * <p>
  1050. * Some Unicode character ranges that contain digits:
  1051. * <ul>
  1052. * <li><code>'\u0030'</code> through <code>'\u0039'</code>,
  1053. * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
  1054. * <li><code>'\u0660'</code> through <code>'\u0669'</code>,
  1055. * Arabic-Indic digits
  1056. * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>,
  1057. * Extended Arabic-Indic digits
  1058. * <li><code>'\u0966'</code> through <code>'\u096F'</code>,
  1059. * Devanagari digits
  1060. * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>,
  1061. * Fullwidth digits
  1062. * </ul>
  1063. *
  1064. * Many other character ranges contain digits as well.
  1065. *
  1066. * @param ch the character to be tested.
  1067. * @return <code>true</code> if the character is a digit;
  1068. * <code>false</code> otherwise.
  1069. * @see java.lang.Character#digit(char, int)
  1070. * @see java.lang.Character#forDigit(int, int)
  1071. * @see java.lang.Character#getType(char)
  1072. */
  1073. public static boolean isDigit(char ch) {
  1074. if (ch <= FAST_PATH_MAX) {
  1075. return CharacterDataLatin1.isDigit(ch);
  1076. } else {
  1077. return CharacterData.isDigit(ch);
  1078. }
  1079. }
  1080. /**
  1081. * Determines if a character is defined in Unicode.
  1082. * <p>
  1083. * A character is defined if at least one of the following is true:
  1084. * <ul>
  1085. * <li>It has an entry in the UnicodeData file.
  1086. * <li>It has a value in a range defined by the UnicodeData file.
  1087. * </ul>
  1088. *
  1089. * @param ch the character to be tested
  1090. * @return <code>true</code> if the character has a defined meaning
  1091. * in Unicode; <code>false</code> otherwise.
  1092. * @see java.lang.Character#isDigit(char)
  1093. * @see java.lang.Character#isLetter(char)
  1094. * @see java.lang.Character#isLetterOrDigit(char)
  1095. * @see java.lang.Character#isLowerCase(char)
  1096. * @see java.lang.Character#isTitleCase(char)
  1097. * @see java.lang.Character#isUpperCase(char)
  1098. * @since 1.0.2
  1099. */
  1100. public static boolean isDefined(char ch) {
  1101. if (ch <= FAST_PATH_MAX) {
  1102. return CharacterDataLatin1.isDefined(ch);
  1103. } else {
  1104. return CharacterData.isDefined(ch);
  1105. }
  1106. }
  1107. /**
  1108. * Determines if the specified character is a letter.
  1109. * <p>
  1110. * A character is considered to be a letter if its general
  1111. * category type, provided by <code>Character.getType(ch)</code>,
  1112. * is any of the following:
  1113. * <ul>
  1114. * <li> <code>UPPERCASE_LETTER</code>
  1115. * <li> <code>LOWERCASE_LETTER</code>
  1116. * <li> <code>TITLECASE_LETTER</code>
  1117. * <li> <code>MODIFIER_LETTER</code>
  1118. * <li> <code>OTHER_LETTER</code>
  1119. * </ul>
  1120. *
  1121. * Not all letters have case. Many characters are
  1122. * letters but are neither uppercase nor lowercase nor titlecase.
  1123. *
  1124. * @param ch the character to be tested.
  1125. * @return <code>true</code> if the character is a letter;
  1126. * <code>false</code> otherwise.
  1127. * @see java.lang.Character#isDigit(char)
  1128. * @see java.lang.Character#isJavaIdentifierStart(char)
  1129. * @see java.lang.Character#isJavaLetter(char)
  1130. * @see java.lang.Character#isJavaLetterOrDigit(char)
  1131. * @see java.lang.Character#isLetterOrDigit(char)
  1132. * @see java.lang.Character#isLowerCase(char)
  1133. * @see java.lang.Character#isTitleCase(char)
  1134. * @see java.lang.Character#isUnicodeIdentifierStart(char)
  1135. * @see java.lang.Character#isUpperCase(char)
  1136. */
  1137. public static boolean isLetter(char ch) {
  1138. if (ch <= FAST_PATH_MAX) {
  1139. return CharacterDataLatin1.isLetter(ch);
  1140. } else {
  1141. return CharacterData.isLetter(ch);
  1142. }
  1143. }
  1144. /**
  1145. * Determines if the specified character is a letter or digit.
  1146. * <p>
  1147. * A character is considered to be a letter or digit if either
  1148. * <code>Character.isLetter(char ch)</code> or
  1149. * <code>Character.isDigit(char ch)</code> returns
  1150. * <code>true</code> for the character.
  1151. *
  1152. * @param ch the character to be tested.
  1153. * @return <code>true</code> if the character is a letter or digit;
  1154. * <code>false</code> otherwise.
  1155. * @see java.lang.Character#isDigit(char)
  1156. * @see java.lang.Character#isJavaIdentifierPart(char)
  1157. * @see java.lang.Character#isJavaLetter(char)
  1158. * @see java.lang.Character#isJavaLetterOrDigit(char)
  1159. * @see java.lang.Character#isLetter(char)
  1160. * @see java.lang.Character#isUnicodeIdentifierPart(char)
  1161. * @since 1.0.2
  1162. */
  1163. public static boolean isLetterOrDigit(char ch) {
  1164. if (ch <= FAST_PATH_MAX) {
  1165. return CharacterDataLatin1.isLetterOrDigit(ch);
  1166. } else {
  1167. return CharacterData.isLetterOrDigit(ch);
  1168. }
  1169. }
  1170. /**
  1171. * Determines if the specified character is permissible as the first
  1172. * character in a Java identifier.
  1173. * <p>
  1174. * A character may start a Java identifier if and only if
  1175. * one of the following is true:
  1176. * <ul>
  1177. * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
  1178. * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
  1179. * <li> ch is a currency symbol (such as "$")
  1180. * <li> ch is a connecting punctuation character (such as "_").
  1181. * </ul>
  1182. *
  1183. * @param ch the character to be tested.
  1184. * @return <code>true</code> if the character may start a Java
  1185. * identifier; <code>false</code> otherwise.
  1186. * @see java.lang.Character#isJavaLetterOrDigit(char)
  1187. * @see java.lang.Character#isJavaIdentifierStart(char)
  1188. * @see java.lang.Character#isJavaIdentifierPart(char)
  1189. * @see java.lang.Character#isLetter(char)
  1190. * @see java.lang.Character#isLetterOrDigit(char)
  1191. * @see java.lang.Character#isUnicodeIdentifierStart(char)
  1192. * @since 1.02
  1193. * @deprecated Replaced by isJavaIdentifierStart(char).
  1194. */
  1195. public static boolean isJavaLetter(char ch) {
  1196. return isJavaIdentifierStart(ch);
  1197. }
  1198. /**
  1199. * Determines if the specified character may be part of a Java
  1200. * identifier as other than the first character.
  1201. * <p>
  1202. * A character may be part of a Java identifier if and only if any
  1203. * of the following are true:
  1204. * <ul>
  1205. * <li> it is a letter
  1206. * <li> it is a currency symbol (such as <code>'$'</code>)
  1207. * <li> it is a connecting punctuation character (such as <code>'_'</code>)
  1208. * <li> it is a digit
  1209. * <li> it is a numeric letter (such as a Roman numeral character)
  1210. * <li> it is a combining mark
  1211. * <li> it is a non-spacing mark
  1212. * <li> <code>isIdentifierIgnorable</code> returns
  1213. * <code>true</code> for the character.
  1214. * </ul>
  1215. *
  1216. * @param ch the character to be tested.
  1217. * @return <code>true</code> if the character may be part of a
  1218. * Java identifier; <code>false</code> otherwise.
  1219. * @see java.lang.Character#isJavaLetter(char)
  1220. * @see java.lang.Character#isJavaIdentifierStart(char)
  1221. * @see java.lang.Character#isJavaIdentifierPart(char)
  1222. * @see java.lang.Character#isLetter(char)
  1223. * @see java.lang.Character#isLetterOrDigit(char)
  1224. * @see java.lang.Character#isUnicodeIdentifierPart(char)
  1225. * @see java.lang.Character#isIdentifierIgnorable(char)
  1226. * @since 1.02
  1227. * @deprecated Replaced by isJavaIdentifierPart(char).
  1228. */
  1229. public static boolean isJavaLetterOrDigit(char ch) {
  1230. return isJavaIdentifierPart(ch);
  1231. }
  1232. /**
  1233. * Determines if the specified character is
  1234. * permissible as the first character in a Java identifier.
  1235. * <p>
  1236. * A character may start a Java identifier if and only if
  1237. * one of the following conditions is true:
  1238. * <ul>
  1239. * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
  1240. * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
  1241. * <li> ch is a currency symbol (such as "$")
  1242. * <li> ch is a connecting punctuation character (such as "_").
  1243. * </ul>
  1244. *
  1245. * @param ch the character to be tested.
  1246. * @return <code>true</code> if the character may start a Java identifier;
  1247. * <code>false</code> otherwise.
  1248. * @see java.lang.Character#isJavaIdentifierPart(char)
  1249. * @see java.lang.Character#isLetter(char)
  1250. * @see java.lang.Character#isUnicodeIdentifierStart(char)
  1251. * @since 1.1
  1252. */
  1253. public static boolean isJavaIdentifierStart(char ch) {
  1254. if (ch <= FAST_PATH_MAX) {
  1255. return CharacterDataLatin1.isJavaIdentifierStart(ch);
  1256. } else {
  1257. return CharacterData.isJavaIdentifierStart(ch);
  1258. }
  1259. }
  1260. /**
  1261. * Determines if the specified character may be part of a Java
  1262. * identifier as other than the first character.
  1263. * <p>
  1264. * A character may be part of a Java identifier if any of the following
  1265. * are true:
  1266. * <ul>
  1267. * <li> it is a letter
  1268. * <li> it is a currency symbol (such as <code>'$'</code>)
  1269. * <li> it is a connecting punctuation character (such as <code>'_'</code>)
  1270. * <li> it is a digit
  1271. * <li> it is a numeric letter (such as a Roman numeral character)
  1272. * <li> it is a combining mark
  1273. * <li> it is a non-spacing mark
  1274. * <li> <code>isIdentifierIgnorable</code> returns
  1275. * <code>true</code> for the character
  1276. * </ul>
  1277. *
  1278. * @param ch the character to be tested.
  1279. * @return <code>true</code> if the character may be part of a
  1280. * Java identifier; <code>false</code> otherwise.
  1281. * @see java.lang.Character#isIdentifierIgnorable(char)
  1282. * @see java.lang.Character#isJavaIdentifierStart(char)
  1283. * @see java.lang.Character#isLetterOrDigit(char)
  1284. * @see java.lang.Character#isUnicodeIdentifierPart(char)
  1285. * @since 1.1
  1286. */
  1287. public static boolean isJavaIdentifierPart(char ch) {
  1288. if (ch <= FAST_PATH_MAX) {
  1289. return CharacterDataLatin1.isJavaIdentifierPart(ch);
  1290. } else {
  1291. return CharacterData.isJavaIdentifierPart(ch);
  1292. }
  1293. }
  1294. /**
  1295. * Determines if the specified character is permissible as the
  1296. * first character in a Unicode identifier.
  1297. * <p>
  1298. * A character may start a Unicode identifier if and only if
  1299. * one of the following conditions is true:
  1300. * <ul>
  1301. * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
  1302. * <li> {@link #getType(char) getType(ch)} returns
  1303. * <code>LETTER_NUMBER</code>.
  1304. * </ul>
  1305. * @param ch the character to be tested.
  1306. * @return <code>true</code> if the character may start a Unicode
  1307. * identifier; <code>false</code> otherwise.
  1308. * @see java.lang.Character#isJavaIdentifierStart(char)
  1309. * @see java.lang.Character#isLetter(char)
  1310. * @see java.lang.Character#isUnicodeIdentifierPart(char)
  1311. * @since 1.1
  1312. */
  1313. public static boolean isUnicodeIdentifierStart(char ch) {
  1314. if (ch <= FAST_PATH_MAX) {
  1315. return CharacterDataLatin1.isUnicodeIdentifierStart(ch);
  1316. } else {
  1317. return CharacterData.isUnicodeIdentifierStart(ch);
  1318. }
  1319. }
  1320. /**
  1321. * Determines if the specified character may be part of a Unicode
  1322. * identifier as other than the first character.
  1323. * <p>
  1324. * A character may be part of a Unicode identifier if and only if
  1325. * one of the following statements is true:
  1326. * <ul>
  1327. * <li> it is a letter
  1328. * <li> it is a connecting punctuation character (such as <code>'_'</code>)
  1329. * <li> it is a digit
  1330. * <li> it is a numeric letter (such as a Roman numeral character)
  1331. * <li> it is a combining mark
  1332. * <li> it is a non-spacing mark
  1333. * <li> <code>isIdentifierIgnorable</code> returns
  1334. * <code>true</code> for this character.
  1335. * </ul>
  1336. *
  1337. * @param ch the character to be tested.
  1338. * @return <code>true</code> if the character may be part of a
  1339. * Unicode identifier; <code>false</code> otherwise.
  1340. * @see java.lang.Character#isIdentifierIgnorable(char)
  1341. * @see java.lang.Character#isJavaIdentifierPart(char)
  1342. * @see java.lang.Character#isLetterOrDigit(char)
  1343. * @see java.lang.Character#isUnicodeIdentifierStart(char)
  1344. * @since 1.1
  1345. */
  1346. public static boolean isUnicodeIdentifierPart(char ch) {
  1347. if (ch <= FAST_PATH_MAX) {
  1348. return CharacterDataLatin1.isUnicodeIdentifierPart(ch);
  1349. } else {
  1350. return CharacterData.isUnicodeIdentifierPart(ch);
  1351. }
  1352. }
  1353. /**
  1354. * Determines if the specified character should be regarded as
  1355. * an ignorable character in a Java identifier or a Unicode identifier.
  1356. * <p>
  1357. * The following Unicode characters are ignorable in a Java identifier
  1358. * or a Unicode identifier:
  1359. * <ul>
  1360. * <li>ISO control characters that are not whitespace
  1361. * <ul>
  1362. * <li><code>'\u0000'</code> through <code>'\u0008'</code>
  1363. * <li><code>'\u000E'</code> through <code>'\u001B'</code>
  1364. * <li><code>'\u007F'</code> through <code>'\u009F'</code>
  1365. * </ul>
  1366. *
  1367. * <li>all characters that have the <code>FORMAT</code> general
  1368. * category value
  1369. * </ul>
  1370. *
  1371. * @param ch the character to be tested.
  1372. * @return <code>true</code> if the character is an ignorable control
  1373. * character that may be part of a Java or Unicode identifier;
  1374. * <code>false</code> otherwise.
  1375. * @see java.lang.Character#isJavaIdentifierPart(char)
  1376. * @see java.lang.Character#isUnicodeIdentifierPart(char)
  1377. * @since 1.1
  1378. */
  1379. public static boolean isIdentifierIgnorable(char ch) {
  1380. if (ch <= FAST_PATH_MAX) {
  1381. return CharacterDataLatin1.isIdentifierIgnorable(ch);
  1382. } else {
  1383. return CharacterData.isIdentifierIgnorable(ch);
  1384. }
  1385. }
  1386. /**
  1387. * Converts the character argument to lowercase using case
  1388. * mapping information from the UnicodeData file.
  1389. * <p>
  1390. * Note that
  1391. * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
  1392. * does not always return <code>true</code> for some ranges of
  1393. * characters, particularly those that are symbols or ideographs.
  1394. *
  1395. * @param ch the character to be converted.
  1396. * @return the lowercase equivalent of the character, if any;
  1397. * otherwise, the character itself.
  1398. * @see java.lang.Character#isLowerCase(char)
  1399. * @see java.lang.Character#isUpperCase(char)
  1400. * @see java.lang.Character#toTitleCase(char)
  1401. * @see java.lang.Character#toUpperCase(char)
  1402. */
  1403. public static char toLowerCase(char ch) {
  1404. if (ch <= FAST_PATH_MAX) {
  1405. return CharacterDataLatin1.toLowerCase(ch);
  1406. } else {
  1407. return CharacterData.toLowerCase(ch);
  1408. }
  1409. }
  1410. /**
  1411. * Converts the character argument to uppercase using case mapping
  1412. * information from the UnicodeData file.
  1413. * <p>
  1414. * Note that
  1415. * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
  1416. * does not always return <code>true</code> for some ranges of
  1417. * characters, particularly those that are symbols or ideographs.
  1418. *
  1419. * @param ch the character to be converted.
  1420. * @return the uppercase equivalent of the character, if any;
  1421. * otherwise, the character itself.
  1422. * @see java.lang.Character#isLowerCase(char)
  1423. * @see java.lang.Character#isUpperCase(char)
  1424. * @see java.lang.Character#toLowerCase(char)
  1425. * @see java.lang.Character#toTitleCase(char)
  1426. */
  1427. public static char toUpperCase(char ch) {
  1428. if (ch <= FAST_PATH_MAX) {
  1429. return CharacterDataLatin1.toUpperCase(ch);
  1430. } else {
  1431. return CharacterData.toUpperCase(ch);
  1432. }
  1433. }
  1434. /**
  1435. * Converts the character argument to titlecase using case mapping
  1436. * information from the UnicodeData file. If a character has no
  1437. * explicit titlecase mapping and is not itself a titlecase char
  1438. * according to UnicodeData, then the uppercase mapping is
  1439. * returned as an equivalent titlecase mapping. If the
  1440. * <code>char</code> argument is already a titlecase
  1441. * <code>char</code>, the same <code>char</code> value will be
  1442. * returned.
  1443. * <p>
  1444. * Note that
  1445. * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
  1446. * does not always return <code>true</code> for some ranges of
  1447. * characters.
  1448. *
  1449. * @param ch the character to be converted.
  1450. * @return the titlecase equivalent of the character, if any;
  1451. * otherwise, the character itself.
  1452. * @see java.lang.Character#isTitleCase(char)
  1453. * @see java.lang.Character#toLowerCase(char)
  1454. * @see java.lang.Character#toUpperCase(char)
  1455. * @since 1.0.2
  1456. */
  1457. public static char toTitleCase(char ch) {
  1458. if (ch <= FAST_PATH_MAX) {
  1459. return CharacterDataLatin1.toTitleCase(ch);
  1460. } else {
  1461. return CharacterData.toTitleCase(ch);
  1462. }
  1463. }
  1464. /**
  1465. * Returns the numeric value of the character <code>ch</code> in the
  1466. * specified radix.
  1467. * <p>
  1468. * If the radix is not in the range <code>MIN_RADIX</code> <=
  1469. * <code>radix</code> <= <code>MAX_RADIX</code> or if the
  1470. * value of <code>ch</code> is not a valid digit in the specified
  1471. * radix, <code>-1</code> is returned. A character is a valid digit
  1472. * if at least one of the following is true:
  1473. * <ul>
  1474. * <li>The method <code>isDigit</code> is <code>true</code> of the character
  1475. * and the Unicode decimal digit value of the character (or its
  1476. * single-character decomposition) is less than the specified radix.
  1477. * In this case the decimal digit value is returned.
  1478. * <li>The character is one of the uppercase Latin letters
  1479. * <code>'A'</code> through <code>'Z'</code> and its code is less than
  1480. * <code>radix + 'A' - 10</code>.
  1481. * In this case, <code>ch - 'A' + 10</code>
  1482. * is returned.
  1483. * <li>The character is one of the lowercase Latin letters
  1484. * <code>'a'</code> through <code>'z'</code> and its code is less than
  1485. * <code>radix + 'a' - 10</code>.
  1486. * In this case, <code>ch - 'a' + 10</code>
  1487. * is returned.
  1488. * </ul>
  1489. *
  1490. * @param ch the character to be converted.
  1491. * @param radix the radix.
  1492. * @return the numeric value represented by the character in the
  1493. * specified radix.
  1494. * @see java.lang.Character#forDigit(int, int)
  1495. * @see java.lang.Character#isDigit(char)
  1496. */
  1497. public static int digit(char ch, int radix) {
  1498. if (ch <= FAST_PATH_MAX) {
  1499. return CharacterDataLatin1.digit(ch, radix);
  1500. } else {
  1501. return CharacterData.digit(ch, radix);
  1502. }
  1503. }
  1504. /**
  1505. * Returns the <code>int</code> value that the specified Unicode
  1506. * character represents. For example, the character
  1507. * <code>'\u216C'</code> (the roman numeral fifty) will return
  1508. * an int with a value of 50.
  1509. * <p>
  1510. * The letters A-Z in their uppercase (<code>'\u0041'</code> through
  1511. * <code>'\u005A'</code>), lowercase
  1512. * (<code>'\u0061'</code> through <code>'\u007A'</code>), and
  1513. * full width variant (<code>'\uFF21'</code> through
  1514. * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through
  1515. * <code>'\uFF5A'</code>) forms have numeric values from 10
  1516. * through 35. This is independent of the Unicode specification,
  1517. * which does not assign numeric values to these <code>char</code>
  1518. * values.
  1519. * <p>
  1520. * If the character does not have a numeric value, then -1 is returned.
  1521. * If the character has a numeric value that cannot be represented as a
  1522. * nonnegative integer (for example, a fractional value), then -2
  1523. * is returned.
  1524. *
  1525. * @param ch the character to be converted.
  1526. * @return the numeric value of the character, as a nonnegative <code>int</code>
  1527. * value; -2 if the character has a numeric value that is not a
  1528. * nonnegative integer; -1 if the character has no numeric value.
  1529. * @see java.lang.Character#forDigit(int, int)
  1530. * @see java.lang.Character#isDigit(char)
  1531. * @since 1.1
  1532. */
  1533. public static int getNumericValue(char ch) {
  1534. if (ch <= FAST_PATH_MAX) {
  1535. return CharacterDataLatin1.getNumericValue(ch);
  1536. } else {
  1537. return CharacterData.getNumericValue(ch);
  1538. }
  1539. }
  1540. /**
  1541. * Determines if the specified character is ISO-LATIN-1 white space.
  1542. * This method returns <code>true</code> for the following five
  1543. * characters only:
  1544. * <table>
  1545. * <tr><td><code>'\t'</code></td> <td><code>'\u0009'</code></td>
  1546. * <td><code>HORIZONTAL TABULATION</code></td></tr>
  1547. * <tr><td><code>'\n'</code></td> <td><code>'\u000A'</code></td>
  1548. * <td><code>NEW LINE</code></td></tr>
  1549. * <tr><td><code>'\f'</code></td> <td><code>'\u000C'</code></td>
  1550. * <td><code>FORM FEED</code></td></tr>
  1551. * <tr><td><code>'\r'</code></td> <td><code>'\u000D'</code></td>
  1552. * <td><code>CARRIAGE RETURN</code></td></tr>
  1553. * <tr><td><code>' '</code></td> <td><code>'\u0020'</code></td>
  1554. * <td><code>SPACE</code></td></tr>
  1555. * </table>
  1556. *
  1557. * @param ch the character to be tested.
  1558. * @return <code>true</code> if the character is ISO-LATIN-1 white
  1559. * space; <code>false</code> otherwise.
  1560. * @see java.lang.Character#isSpaceChar(char)
  1561. * @see java.lang.Character#isWhitespace(char)
  1562. * @deprecated Replaced by isWhitespace(char).
  1563. */
  1564. public static boolean isSpace(char ch) {
  1565. return (ch <= 0x0020) &&
  1566. (((((1L << 0x0009) |
  1567. (1L << 0x000A) |
  1568. (1L << 0x000C) |
  1569. (1L << 0x000D) |
  1570. (1L << 0x0020)) >> ch) & 1L) != 0);
  1571. }
  1572. /**
  1573. * Determines if the specified character is a Unicode space character.
  1574. * A character is considered to be a space character if and only if
  1575. * it is specified to be a space character by the Unicode standard. This
  1576. * method returns true if the character's general category type is any of
  1577. * the following:
  1578. * <ul>
  1579. * <li> <code>SPACE_SEPARATOR</code>
  1580. * <li> <code>LINE_SEPARATOR</code>
  1581. * <li> <code>PARAGRAPH_SEPARATOR</code>
  1582. * </ul>
  1583. *
  1584. * @param ch the character to be tested.
  1585. * @return <code>true</code> if the character is a space character;
  1586. * <code>false</code> otherwise.
  1587. * @see java.lang.Character#isWhitespace(char)
  1588. * @since 1.1
  1589. */
  1590. public static boolean isSpaceChar(char ch) {
  1591. if (ch <= FAST_PATH_MAX) {
  1592. return CharacterDataLatin1.isSpaceChar(ch);
  1593. } else {
  1594. return CharacterData.isSpaceChar(ch);
  1595. }
  1596. }
  1597. /**
  1598. * Determines if the specified character is white space according to Java.
  1599. * A character is a Java whitespace character if and only if it satisfies
  1600. * one of the following criteria:
  1601. * <ul>
  1602. * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
  1603. * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
  1604. * but is not also a non-breaking space (<code>'\u00A0'</code>,
  1605. * <code>'\u2007'</code>, <code>'\u202F'</code>).
  1606. * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION.
  1607. * <li> It is <code>'\u000A'</code>, LINE FEED.
  1608. * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION.
  1609. * <li> It is <code>'\u000C'</code>, FORM FEED.
  1610. * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN.
  1611. * <li> It is <code>'\u001C'</code>, FILE SEPARATOR.
  1612. * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR.
  1613. * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR.
  1614. * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR.
  1615. * </ul>
  1616. *
  1617. * @param ch the character to be tested.
  1618. * @return <code>true</code> if the character is a Java whitespace
  1619. * character; <code>false</code> otherwise.
  1620. * @see java.lang.Character#isSpaceChar(char)
  1621. * @since 1.1
  1622. */
  1623. public static boolean isWhitespace(char ch) {
  1624. if (ch <= FAST_PATH_MAX) {
  1625. return CharacterDataLatin1.isWhitespace(ch);
  1626. } else {
  1627. return CharacterData.isWhitespace(ch);
  1628. }
  1629. }
  1630. /**
  1631. * Determines if the specified character is an ISO control
  1632. * character. A character is considered to be an ISO control
  1633. * character if its code is in the range <code>'\u0000'</code>
  1634. * through <code>'\u001F'</code> or in the range
  1635. * <code>'\u007F'</code> through <code>'\u009F'</code>.
  1636. *
  1637. * @param ch the character to be tested.
  1638. * @return <code>true</code> if the character is an ISO control character;
  1639. * <code>false</code> otherwise.
  1640. *
  1641. * @see java.lang.Character#isSpaceChar(char)
  1642. * @see java.lang.Character#isWhitespace(char)
  1643. * @since 1.1
  1644. */
  1645. public static boolean isISOControl(char ch) {
  1646. return (ch <= 0x009F) && ((ch <= 0x001F) || (ch >= 0x007F));
  1647. }
  1648. /**
  1649. * Returns a value indicating a character's general category.
  1650. *
  1651. * @param ch the character to be tested.
  1652. * @return a value of type <code>int</code> representing the
  1653. * character's general category.
  1654. * @see java.lang.Character#COMBINING_SPACING_MARK
  1655. * @see java.lang.Character#CONNECTOR_PUNCTUATION
  1656. * @see java.lang.Character#CONTROL
  1657. * @see java.lang.Character#CURRENCY_SYMBOL
  1658. * @see java.lang.Character#DASH_PUNCTUATION
  1659. * @see java.lang.Character#DECIMAL_DIGIT_NUMBER
  1660. * @see java.lang.Character#ENCLOSING_MARK
  1661. * @see java.lang.Character#END_PUNCTUATION
  1662. * @see java.lang.Character#FINAL_QUOTE_PUNCTUATION
  1663. * @see java.lang.Character#FORMAT
  1664. * @see java.lang.Character#INITIAL_QUOTE_PUNCTUATION
  1665. * @see java.lang.Character#LETTER_NUMBER
  1666. * @see java.lang.Character#LINE_SEPARATOR
  1667. * @see java.lang.Character#LOWERCASE_LETTER
  1668. * @see java.lang.Character#MATH_SYMBOL
  1669. * @see java.lang.Character#MODIFIER_LETTER
  1670. * @see java.lang.Character#MODIFIER_SYMBOL
  1671. * @see java.lang.Character#NON_SPACING_MARK
  1672. * @see java.lang.Character#OTHER_LETTER
  1673. * @see java.lang.Character#OTHER_NUMBER
  1674. * @see java.lang.Character#OTHER_PUNCTUATION
  1675. * @see java.lang.Character#OTHER_SYMBOL
  1676. * @see java.lang.Character#PARAGRAPH_SEPARATOR
  1677. * @see java.lang.Character#PRIVATE_USE
  1678. * @see java.lang.Character#SPACE_SEPARATOR
  1679. * @see java.lang.Character#START_PUNCTUATION
  1680. * @see java.lang.Character#SURROGATE
  1681. * @see java.lang.Character#TITLECASE_LETTER
  1682. * @see java.lang.Character#UNASSIGNED
  1683. * @see java.lang.Character#UPPERCASE_LETTER
  1684. * @since 1.1
  1685. */
  1686. public static int getType(char ch) {
  1687. if (ch <= FAST_PATH_MAX) {
  1688. return CharacterDataLatin1.getType(ch);
  1689. } else {
  1690. return CharacterData.getType(ch);
  1691. }
  1692. }
  1693. /**
  1694. * Determines the character representation for a specific digit in
  1695. * the specified radix. If the value of <code>radix</code> is not a
  1696. * valid radix, or the value of <code>digit</code> is not a valid
  1697. * digit in the specified radix, the null character
  1698. * (<code>'\u0000'</code>) is returned.
  1699. * <p>
  1700. * The <code>radix</code> argument is valid if it is greater than or
  1701. * equal to <code>MIN_RADIX</code> and less than or equal to
  1702. * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
  1703. * <code>0 <=digit < radix</code>.
  1704. * <p>
  1705. * If the digit is less than 10, then
  1706. * <code>'0' + digit</code> is returned. Otherwise, the value
  1707. * <code>'a' + digit - 10</code> is returned.
  1708. *
  1709. * @param digit the number to convert to a character.
  1710. * @param radix the radix.
  1711. * @return the <code>char</code> representation of the specified digit
  1712. * in the specified radix.
  1713. * @see java.lang.Character#MIN_RADIX
  1714. * @see java.lang.Character#MAX_RADIX
  1715. * @see java.lang.Character#digit(char, int)
  1716. */
  1717. public static char forDigit(int digit, int radix) {
  1718. if ((digit >= radix) || (digit < 0)) {
  1719. return '\0';
  1720. }
  1721. if ((radix < MIN_RADIX) || (radix > MAX_RADIX)) {
  1722. return '\0';
  1723. }
  1724. if (digit < 10) {
  1725. return (char)('0' + digit);
  1726. }
  1727. return (char)('a' - 10 + digit);
  1728. }
  1729. /**
  1730. * Returns the Unicode directionality property for the given
  1731. * character. Character directionality is used to calculate the
  1732. * visual ordering of text. The directionality value of undefined
  1733. * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
  1734. *
  1735. * @param ch <code>char</code> for which the directionality property
  1736. * is requested.
  1737. * @return the directionality property of the <code>char</code> value.
  1738. *
  1739. * @see Character#DIRECTIONALITY_UNDEFINED
  1740. * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
  1741. * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
  1742. * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
  1743. * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
  1744. * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
  1745. * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
  1746. * @see Character#DIRECTIONALITY_ARABIC_NUMBER
  1747. * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
  1748. * @see Character#DIRECTIONALITY_NONSPACING_MARK
  1749. * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
  1750. * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
  1751. * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
  1752. * @see Character#DIRECTIONALITY_WHITESPACE
  1753. * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
  1754. * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
  1755. * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
  1756. * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
  1757. * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
  1758. * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
  1759. * @since 1.4
  1760. */
  1761. public static byte getDirectionality(char ch) {
  1762. if (ch <= FAST_PATH_MAX) {
  1763. return CharacterDataLatin1.getDirectionality(ch);
  1764. } else {
  1765. return CharacterData.getDirectionality(ch);
  1766. }
  1767. }
  1768. /**
  1769. * Determines whether the character is mirrored according to the
  1770. * Unicode specification. Mirrored characters should have their
  1771. * glyphs horizontally mirrored when displayed in text that is
  1772. * right-to-left. For example, <code>'\u0028'</code> LEFT
  1773. * PARENTHESIS is semantically defined to be an <i>opening
  1774. * parenthesis</i>. This will appear as a "(" in text that is
  1775. * left-to-right but as a ")" in text that is right-to-left.
  1776. *
  1777. * @param ch <code>char</code> for which the mirrored property is requested
  1778. * @return <code>true</code> if the char is mirrored, <code>false</code>
  1779. * if the <code>char</code> is not mirrored or is not defined.
  1780. * @since 1.4
  1781. */
  1782. public static boolean isMirrored(char ch) {
  1783. if (ch <= FAST_PATH_MAX) {
  1784. return CharacterDataLatin1.isMirrored(ch);
  1785. } else {
  1786. return CharacterData.isMirrored(ch);
  1787. }
  1788. }
  1789. /**
  1790. * Compares two <code>Character</code> objects numerically.
  1791. *
  1792. * @param anotherCharacter the <code>Character</code> to be compared.
  1793. * @return the value <code>0</code> if the argument <code>Character</code>
  1794. * is equal to this <code>Character</code> a value less than
  1795. * <code>0</code> if this <code>Character</code> is numerically less
  1796. * than the <code>Character</code> argument; and a value greater than
  1797. * <code>0</code> if this <code>Character</code> is numerically greater
  1798. * than the <code>Character</code> argument (unsigned comparison).
  1799. * Note that this is strictly a numerical comparison; it is not
  1800. * locale-dependent.
  1801. * @since 1.2
  1802. */
  1803. public int compareTo(Character anotherCharacter) {
  1804. return this.value - anotherCharacter.value;
  1805. }
  1806. /**
  1807. * Compares this <code>Character</code> object to another object.
  1808. * If the object is a <code>Character</code>, this function
  1809. * behaves like <code>compareTo(Character)</code>. Otherwise, it
  1810. * throws a <code>ClassCastException</code> (as
  1811. * <code>Character</code> objects are comparable only to other
  1812. * <code>Character</code> objects).
  1813. *
  1814. * @param o the <code>Object</code> to be compared.
  1815. * @return the value <code>0</code> if the argument is a <code>Character</code>
  1816. * numerically equal to this <code>Character</code> a value less than
  1817. * <code>0</code> if the argument is a <code>Character</code> numerically
  1818. * greater than this <code>Character</code> and a value greater than
  1819. * <code>0</code> if the argument is a <code>Character</code> numerically
  1820. * less than this <code>Character</code>.
  1821. * @exception <code>ClassCastException</code> if the argument is not a
  1822. * <code>Character</code>.
  1823. * @see java.lang.Comparable
  1824. * @since 1.2 */
  1825. public int compareTo(Object o) {
  1826. return compareTo((Character)o);
  1827. }
  1828. /**
  1829. * Converts the character argument to uppercase using case mapping
  1830. * information from the UnicodeData file.
  1831. * <p>
  1832. *
  1833. * @param ch the <code>char</code> to be converted.
  1834. * @return either the uppercase equivalent of the character, if
  1835. * any, or an error flag (<code>Character.CHAR_ERROR</code>)
  1836. * that indicates that a 1:M <code>char</code> mapping exists.
  1837. * @see java.lang.Character#isLowerCase(char)
  1838. * @see java.lang.Character#isUpperCase(char)
  1839. * @see java.lang.Character#toLowerCase(char)
  1840. * @see java.lang.Character#toTitleCase(char)
  1841. * @since 1.4
  1842. */
  1843. static char toUpperCaseEx(char ch) {
  1844. if (ch <= FAST_PATH_MAX) {
  1845. return CharacterDataLatin1.toUpperCaseEx(ch);
  1846. } else {
  1847. return CharacterData.toUpperCaseEx(ch);
  1848. }
  1849. }
  1850. /**
  1851. * Converts the <code>char</code> argument to uppercase using case
  1852. * mapping information from the SpecialCasing file in the Unicode
  1853. * specification. If a character has no explicit uppercase
  1854. * mapping, then the <code>char</code> itself is returned in the
  1855. * <code>char[]</code>.
  1856. *
  1857. * @param ch the <code>char</code> to uppercase
  1858. * @return a <code>char[]</code> with the uppercased character.
  1859. * @since 1.4
  1860. */
  1861. static char[] sharpsMap = new char[] {'S', 'S'};
  1862. static char[] toUpperCaseCharArray(char ch) {
  1863. char[] upperMap = {ch};
  1864. if (ch <= FAST_PATH_MAX) {
  1865. if (ch == '\u00DF') {
  1866. upperMap = sharpsMap;
  1867. }
  1868. // else ch -> ch
  1869. } else {
  1870. int location = findInCharMap(ch);
  1871. if (location != -1) {
  1872. upperMap = CharacterData.charMap[location][1];
  1873. }
  1874. }
  1875. return upperMap;
  1876. }
  1877. /**
  1878. * Finds the character in the uppercase mapping table.
  1879. *
  1880. * @param ch the <code>char</code> to search
  1881. * @return the index location ch in the table or -1 if not found
  1882. * @since 1.4
  1883. */
  1884. static int findInCharMap(char ch) {
  1885. int top, bottom, current;
  1886. bottom = 0;
  1887. top = CharacterData.charMap.length;
  1888. current = top2;
  1889. // invariant: top > current >= bottom && ch >= CharacterData.charMap[bottom][0]
  1890. while (top - bottom > 1) {
  1891. if (ch >= CharacterData.charMap[current][0][0]) {
  1892. bottom = current;
  1893. } else {
  1894. top = current;
  1895. }
  1896. current = (top + bottom) / 2;
  1897. }
  1898. if (ch == CharacterData.charMap[current][0][0]) return current;
  1899. else return -1;
  1900. }
  1901. }