1. /*
  2. * @(#)UnicodeClassMapping.java 1.13 00/01/19
  3. *
  4. * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation
  15. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  16. * subsidiary of IBM. These materials are provided under terms
  17. * of a License Agreement between Taligent and Sun. This technology
  18. * is protected by multiple US and International patents.
  19. *
  20. * This notice and attribution to Taligent may not be removed.
  21. * Taligent is a registered trademark of Taligent, Inc.
  22. *
  23. */
  24. package java.text;
  25. import java.lang.Character;
  26. /**
  27. * This class maps categories to state change inputs for the
  28. * WordBreakTable. An entire category is mapped to the same
  29. * value unless the character in question appears in the exception list.
  30. */
  31. final class UnicodeClassMapping
  32. {
  33. private int mappedValue[];
  34. private SpecialMapping exceptionChars[];
  35. private boolean hasException[];
  36. private int asciiValues[];
  37. /**
  38. * Create a mapping given a mapping from categories and a list
  39. * of exceptions. Both the mapping list and exceptionChars list must
  40. * be sorted in ascending order.
  41. */
  42. public UnicodeClassMapping(int mappedValue[],
  43. SpecialMapping exceptionChars[],
  44. boolean hasException[],
  45. int asciiValues[])
  46. {
  47. this.mappedValue = mappedValue;
  48. this.exceptionChars = exceptionChars;
  49. this.hasException = hasException;
  50. this.asciiValues = asciiValues;
  51. }
  52. /**
  53. * Map a character to a stage change input for WordBreakTable
  54. * @param ch The character to map.
  55. * @return The mapped value.
  56. */
  57. public int mappedChar(char ch)
  58. {
  59. if (ch <= 255)
  60. return asciiValues[ch];
  61. // get an appropriate category based on the character's Unicode class
  62. // if there's no entry in the exception table for that Unicode class,
  63. // we're done; otherwise we have to look in the exception table for
  64. // the character's category (\uffff is treated here as a sentinel
  65. // value meaning "end of the string"-- we always look in the exception
  66. // table for its category)
  67. int charType = Character.getType(ch);
  68. if ((exceptionChars.length == 0) //|| (ch > '\u003f' && ch < '\u00a0')
  69. || (!hasException[charType] && ch != '\uffff')) {
  70. return mappedValue[charType];
  71. }
  72. //do binary search of exceptionChars table
  73. int min = 0;
  74. int max = exceptionChars.length - 1;
  75. while (max > min) {
  76. int pos = (max + min) >> 1;
  77. if (ch > exceptionChars[pos].endChar)
  78. min = pos + 1;
  79. else
  80. max = pos;
  81. }
  82. SpecialMapping sm = exceptionChars[min];
  83. if (sm.startChar <= ch && ch <= sm.endChar)
  84. return sm.newValue;
  85. else
  86. return mappedValue[charType];
  87. }
  88. }