1. /*
  2. * @(#)UnicodeClassMapping.java 1.15 03/01/23
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation
  12. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  13. * subsidiary of IBM. These materials are provided under terms
  14. * of a License Agreement between Taligent and Sun. This technology
  15. * is protected by multiple US and International patents.
  16. *
  17. * This notice and attribution to Taligent may not be removed.
  18. * Taligent is a registered trademark of Taligent, Inc.
  19. *
  20. */
  21. package java.text;
  22. import java.lang.Character;
  23. /**
  24. * This class maps categories to state change inputs for the
  25. * WordBreakTable. An entire category is mapped to the same
  26. * value unless the character in question appears in the exception list.
  27. */
  28. final class UnicodeClassMapping
  29. {
  30. private int mappedValue[];
  31. private SpecialMapping exceptionChars[];
  32. private boolean hasException[];
  33. private int asciiValues[];
  34. /**
  35. * Create a mapping given a mapping from categories and a list
  36. * of exceptions. Both the mapping list and exceptionChars list must
  37. * be sorted in ascending order.
  38. */
  39. public UnicodeClassMapping(int mappedValue[],
  40. SpecialMapping exceptionChars[],
  41. boolean hasException[],
  42. int asciiValues[])
  43. {
  44. this.mappedValue = mappedValue;
  45. this.exceptionChars = exceptionChars;
  46. this.hasException = hasException;
  47. this.asciiValues = asciiValues;
  48. }
  49. /**
  50. * Map a character to a stage change input for WordBreakTable
  51. * @param ch The character to map.
  52. * @return The mapped value.
  53. */
  54. public int mappedChar(char ch)
  55. {
  56. if (ch <= 255)
  57. return asciiValues[ch];
  58. // get an appropriate category based on the character's Unicode class
  59. // if there's no entry in the exception table for that Unicode class,
  60. // we're done; otherwise we have to look in the exception table for
  61. // the character's category (\uffff is treated here as a sentinel
  62. // value meaning "end of the string"-- we always look in the exception
  63. // table for its category)
  64. int charType = Character.getType(ch);
  65. if ((exceptionChars.length == 0) //|| (ch > '\u003f' && ch < '\u00a0')
  66. || (!hasException[charType] && ch != '\uffff')) {
  67. return mappedValue[charType];
  68. }
  69. //do binary search of exceptionChars table
  70. int min = 0;
  71. int max = exceptionChars.length - 1;
  72. while (max > min) {
  73. int pos = (max + min) >> 1;
  74. if (ch > exceptionChars[pos].endChar)
  75. min = pos + 1;
  76. else
  77. max = pos;
  78. }
  79. SpecialMapping sm = exceptionChars[min];
  80. if (sm.startChar <= ch && ch <= sm.endChar)
  81. return sm.newValue;
  82. else
  83. return mappedValue[charType];
  84. }
  85. }