1. /*
  2. * @(#)NewArabicShaping.java 1.3 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)NewArabicShaping.java 1.1 98/07/31
  9. *
  10. * (C) Copyright IBM Corp. 1998 - All Rights Reserved
  11. */
  12. package javax.swing.text;
  13. /*
  14. * The new ligature model performs ligatures after shaping. This is mainly because there
  15. * don't seem to be a full complement of shapes for all the ligatures. For example, there
  16. * are ligatures with only medial forms-- presumably these ligatures don't form in initial
  17. * or final positions. Therefore we need to know the position before trying to form the
  18. * ligature. We do this by shaping the component parts first, and then matching the shaped
  19. * versions when trying to fit a ligature. This makes for more ligature rules, though.
  20. *
  21. * Therefore this version does not do ligatures at all, just shaping.
  22. */
  23. class NewArabicShaping {
  24. // arabic shaping type code
  25. // shaping bit masks
  26. static final int MASK_SHAPE_RIGHT = 1; // if this bit set, shapes to right
  27. static final int MASK_SHAPE_LEFT = 2; // if this bit set, shapes to left
  28. static final int MASK_TRANSPARENT = 4; // if this bit set, is transparent (ignore other bits)
  29. static final int MASK_NOSHAPE = 8; // if this bit set, don't shape this char, i.e. tatweel
  30. // shaping values
  31. static final int VALUE_NONE = 0;
  32. static final int VALUE_RIGHT = MASK_SHAPE_RIGHT;
  33. static final int VALUE_LEFT = MASK_SHAPE_LEFT;
  34. static final int VALUE_DUAL = MASK_SHAPE_RIGHT | MASK_SHAPE_LEFT;
  35. static final int VALUE_TRANSPARENT = MASK_TRANSPARENT;
  36. static final int VALUE_NOSHAPE_DUAL = MASK_NOSHAPE | VALUE_DUAL;
  37. static final int VALUE_NOSHAPE_NONE = MASK_NOSHAPE;
  38. // shape types for 0622 to 06d5 inclusive from unicode std 2.1
  39. // correction to table in std, 0671 is r, not u.
  40. // everything below 0622 is non-shaping.
  41. // 06d6 to 06f9 either n or t based on description in std, but
  42. // not listed as such.
  43. // n - non-joining
  44. // r - right-joining
  45. // d - dual-joining
  46. // c - join-causing (tatweel), dual-joining and non-shaping
  47. // t - transparent
  48. // . - undefined code point (non-joining and non-shaping)
  49. static final String shapeTypes =
  50. // 2 3 4 5 6
  51. // 123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0
  52. "nrrrrdrdrdddddrrrrdddddddd.....cdddddddrrdtttttttt.............n" +
  53. // 6 7 8 9 a
  54. // 123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0
  55. "nnnnnnnnnnnnn..trrrurrrddddddddddddddddrrrrrrrrrrrrrrrrrrddddddd" +
  56. // a b c d e
  57. // 123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0
  58. "ddddddddddddddddddddddd..ddddd.rdrrrrrrrrrrdrd.ddrr.untttttttttt" +
  59. // e f
  60. // 123456789abcdef0123456789abcdef
  61. "ttttnnttntttt..nnnnnnnnnn......";
  62. static final byte[] shapeVals = new byte[shapeTypes.length()];
  63. static {
  64. for (int i = 0; i < shapeVals.length; i++) {
  65. byte v = VALUE_NOSHAPE_NONE;
  66. switch (shapeTypes.charAt(i)) {
  67. case 'c': v = VALUE_NOSHAPE_DUAL; break;
  68. case 'd': v = VALUE_DUAL; break;
  69. case 'n': v = VALUE_NONE; break;
  70. case 'r': v = VALUE_RIGHT; break;
  71. case 't': v = VALUE_TRANSPARENT; break;
  72. default: break;
  73. }
  74. shapeVals[i] = v;
  75. }
  76. }
  77. static boolean isCombiningOrFormat(char ch) {
  78. return ((((1 << Character.NON_SPACING_MARK) |
  79. (1 << Character.FORMAT) |
  80. (1 << Character.ENCLOSING_MARK) |
  81. (1 << Character.COMBINING_SPACING_MARK)) >> Character.getType(ch)) & 1) != 0;
  82. }
  83. static int getShapeType(char c) {
  84. // shaping array holds types for arabic chars between 0621 and 0700
  85. // other values are either unshaped, or transparent if a mark or format
  86. // coce, except for format codes 200c (zero-width non-joiner) and 200d
  87. // (dual-width joiner) which are both unshaped and non_joining or
  88. // dual-joining, respectively.
  89. if (c >= '\u0621' && c <= '\u200d') {
  90. if (c < '\u0700') {
  91. return shapeVals[c - '\u0621'];
  92. } else if (c == '\u200c') {
  93. return VALUE_NOSHAPE_NONE;
  94. } else if (c == '\u200d') {
  95. return VALUE_NOSHAPE_DUAL;
  96. }
  97. }
  98. return isCombiningOrFormat(c) ? VALUE_TRANSPARENT : VALUE_NOSHAPE_NONE;
  99. }
  100. /*
  101. * Chars in visual order.
  102. * leftshape is shaping code of char to visual left of range
  103. * rightshape is shaping code of char to visual right of range
  104. */
  105. static void shape(char[] chars, int leftType, int rightType) {
  106. // iterate in visual order from left to right
  107. //
  108. // the effective left char is the most recently encountered
  109. // non-transparent char
  110. //
  111. // four boolean states:
  112. // the effective left char shapes
  113. // the effective left char causes right shaping
  114. // the current char shapes
  115. // the current char causes left shaping
  116. //
  117. // if both cause shaping, then
  118. // left += 1 (isolate to final, or initial to medial)
  119. // cur += 2 (isolate to initial)
  120. // eln is effective left logical index
  121. int eln = -1;
  122. boolean leftShapes = false;
  123. boolean leftCauses = (leftType & MASK_SHAPE_RIGHT) != 0;
  124. for (int n = 0; n < chars.length; n++) {
  125. char c = chars[n];
  126. int t = getShapeType(c);
  127. if ((t & MASK_TRANSPARENT) != 0) {
  128. continue;
  129. }
  130. boolean curShapes = (t & MASK_NOSHAPE) == 0;
  131. boolean curCauses = (t & MASK_SHAPE_LEFT) != 0;
  132. if (leftCauses && curCauses) {
  133. if (leftShapes) {
  134. chars[eln] += 1;
  135. }
  136. if (curShapes) {
  137. chars[n] = (char)(getToIsolateShape(c) + 2);
  138. }
  139. } else {
  140. if (curShapes) {
  141. chars[n] = getToIsolateShape(c);
  142. }
  143. }
  144. leftShapes = curShapes;
  145. leftCauses = (t & MASK_SHAPE_RIGHT) != 0;
  146. eln = n;
  147. }
  148. if (leftShapes && leftCauses && (rightType & MASK_SHAPE_LEFT) != 0) {
  149. chars[eln] += 1;
  150. }
  151. }
  152. /*
  153. static void dumpIsoTable() {
  154. char[] out = { '\\', 'u', 'x', 'x', 'x', 'x' };
  155. System.out.print("static String iso = \"");
  156. for (char c = '\u0621'; c < '\u06d4'; c++) {
  157. charHex(getToIsolateShape(c), out, 2, 4);
  158. System.out.print(new String(out));
  159. }
  160. System.out.println( "\";");
  161. }
  162. */
  163. static char[] iso = "\ufe80\ufe81\ufe83\ufe85\ufe87\ufe89\ufe8d\ufe8f\ufe93\ufe95\ufe99\ufe9d\ufea1\ufea5\ufea9\ufeab\ufead\ufeaf\ufeb1\ufeb5\ufeb9\ufebd\ufec1\ufec5\ufec9\ufecd\u063b\u063c\u063d\u063e\u063f\u0640\ufed1\ufed5\ufed9\ufedd\ufee1\ufee5\ufee9\ufeed\ufeef\ufef1\u064b\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u0653\u0654\u0655\u0656\u0657\u0658\u0659\u065a\u065b\u065c\u065d\u065e\u065f\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u066a\u066b\u066c\u066d\u066e\u066f\u0670\ufb50\u0672\u0673\u0674\u0675\u0676\ufbdd\u0678\ufb66\ufb5e\ufb52\u067c\u067d\ufb56\ufb62\ufb5a\u0681\u0682\ufb76\ufb72\u0685\ufb7a\ufb7e\ufb88\u0689\u068a\u068b\ufb84\ufb82\ufb86\u068f\u0690\ufb8c\u0692\u0693\u0694\u0695\u0696\u0697\ufb8a\u0699\u069a\u069b\u069c\u069d\u069e\u069f\u06a0\u06a1\u06a2\u06a3\ufb6a\u06a5\ufb6e\u06a7\u06a8\ufb8e\u06aa\u06ab\u06ac\ufbd3\u06ae\ufb92\u06b0\ufb9a\u06b2\ufb96\u06b4\u06b5\u06b6\u06b7\u06b8\u06b9\ufb9e\ufba0\u06bc\u06bd\ufbaa\u06bf\ufba4\ufba6\u06c2\u06c3\u06c4\ufbe0\ufbd9\ufbd7\ufbdb\ufbe2\u06ca\ufbde\ufbfc\u06cd\u06ce\u06cf\ufbe4\u06d1\ufbae\ufbb0".toCharArray();
  164. static char getToIsolateShape(char ch) {
  165. if (ch < '\u0621' || ch > '\u06d3') {
  166. return ch;
  167. }
  168. return iso[ch - '\u0621'];
  169. }
  170. }