1. /*
  2. * @(#)UnicodeClassMapping.java 1.13 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)UnicodeClassMapping.java 1.13 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1996-1998 Sun Microsystems, Inc.
  14. * All Rights Reserved.
  15. *
  16. * The original version of this source code and documentation
  17. * is copyrighted and owned by Taligent, Inc., a wholly-owned
  18. * subsidiary of IBM. These materials are provided under terms
  19. * of a License Agreement between Taligent and Sun. This technology
  20. * is protected by multiple US and International patents.
  21. *
  22. * This notice and attribution to Taligent may not be removed.
  23. * Taligent is a registered trademark of Taligent, Inc.
  24. *
  25. * Permission to use, copy, modify, and distribute this software
  26. * and its documentation for NON-COMMERCIAL purposes and without
  27. * fee is hereby granted provided that this copyright notice
  28. * appears in all copies. Please refer to the file "copyright.html"
  29. * for further important copyright and licensing information.
  30. *
  31. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  32. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  33. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  34. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  35. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  36. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  37. *
  38. */
  39. package java.text;
  40. import java.lang.Character;
  41. /**
  42. * This class maps categories to state change inputs for the
  43. * WordBreakTable. An entire category is mapped to the same
  44. * value unless the character in question appears in the exception list.
  45. */
  46. final class UnicodeClassMapping
  47. {
  48. private int mappedValue[];
  49. private SpecialMapping exceptionChars[];
  50. private boolean hasException[];
  51. private int asciiValues[];
  52. /**
  53. * Create a mapping given a mapping from categories and a list
  54. * of exceptions. Both the mapping list and exceptionChars list must
  55. * be sorted in ascending order.
  56. */
  57. public UnicodeClassMapping(int mappedValue[],
  58. SpecialMapping exceptionChars[],
  59. boolean hasException[],
  60. int asciiValues[])
  61. {
  62. this.mappedValue = mappedValue;
  63. this.exceptionChars = exceptionChars;
  64. this.hasException = hasException;
  65. this.asciiValues = asciiValues;
  66. }
  67. /**
  68. * Map a character to a stage change input for WordBreakTable
  69. * @param ch The character to map.
  70. * @return The mapped value.
  71. */
  72. public int mappedChar(char ch)
  73. {
  74. if (ch <= 255)
  75. return asciiValues[ch];
  76. // get an appropriate category based on the character's Unicode class
  77. // if there's no entry in the exception table for that Unicode class,
  78. // we're done; otherwise we have to look in the exception table for
  79. // the character's category (\uffff is treated here as a sentinel
  80. // value meaning "end of the string"-- we always look in the exception
  81. // table for its category)
  82. int charType = Character.getType(ch);
  83. if ((exceptionChars.length == 0) //|| (ch > '\u003f' && ch < '\u00a0')
  84. || (!hasException[charType] && ch != '\uffff')) {
  85. return mappedValue[charType];
  86. }
  87. //do binary search of exceptionChars table
  88. int min = 0;
  89. int max = exceptionChars.length - 1;
  90. while (max > min) {
  91. int pos = (max + min) >> 1;
  92. if (ch > exceptionChars[pos].endChar)
  93. min = pos + 1;
  94. else
  95. max = pos;
  96. }
  97. SpecialMapping sm = exceptionChars[min];
  98. if (sm.startChar <= ch && ch <= sm.endChar)
  99. return sm.newValue;
  100. else
  101. return mappedValue[charType];
  102. }
  103. }