1. /*
  2. * @(#)Bidi.java 1.13 03/03/19
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright IBM Corp. 1999-2000 - All Rights Reserved
  9. *
  10. * The original version of this source code and documentation is
  11. * copyrighted and owned by IBM. These materials are provided
  12. * under terms of a License Agreement between IBM and Sun.
  13. * This technology is protected by multiple US and International
  14. * patents. This notice and attribution to IBM may not be removed.
  15. */
  16. package java.text;
  17. import java.awt.Toolkit;
  18. import java.awt.font.TextAttribute;
  19. import java.awt.font.NumericShaper;
  20. /**
  21. * This class implements the Unicode Version 3.0 Bidirectional Algorithm.
  22. * <p>
  23. * A Bidi object provides information on the bidirectional reordering of the text
  24. * used to create it. This is required, for example, to properly display Arabic
  25. * or Hebrew text. These languages are inherently mixed directional, as they order
  26. * numbers from left-to-right while ordering most other text from right-to-left.
  27. * <p>
  28. * Once created, a Bidi object can be queried to see if the text it represents is
  29. * all left-to-right or all right-to-left. Such objects are very lightweight and
  30. * this text is relatively easy to process.
  31. * <p>
  32. * If there are multiple runs of text, information about the runs can be accessed
  33. * by indexing to get the start, limit, and level of a run. The level represents
  34. * both the direction and the 'nesting level' of a directional run. Odd levels
  35. * are right-to-left, while even levels are left-to-right. So for example level
  36. * 0 represents left-to-right text, while level 1 represents right-to-left text, and
  37. * level 2 represents left-to-right text embedded in a right-to-left run.
  38. *
  39. * @since 1.4
  40. */
  41. public final class Bidi {
  42. byte dir;
  43. byte baselevel;
  44. int length;
  45. int[] runs;
  46. int[] cws;
  47. static {
  48. java.security.AccessController.doPrivileged(
  49. new sun.security.action.LoadLibraryAction("awt"));
  50. java.security.AccessController.doPrivileged(
  51. new sun.security.action.LoadLibraryAction("fontmanager"));
  52. }
  53. /** Constant indicating base direction is left-to-right. */
  54. public static final int DIRECTION_LEFT_TO_RIGHT = 0;
  55. /** Constant indicating base direction is right-to-left. */
  56. public static final int DIRECTION_RIGHT_TO_LEFT = 1;
  57. /**
  58. * Constant indicating that the base direction depends on the first strong
  59. * directional character in the text according to the Unicode
  60. * Bidirectional Algorithm. If no strong directional character is present,
  61. * the base direction is left-to-right.
  62. */
  63. public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
  64. /**
  65. * Constant indicating that the base direction depends on the first strong
  66. * directional character in the text according to the Unicode
  67. * Bidirectional Algorithm. If no strong directional character is present,
  68. * the base direction is right-to-left.
  69. */
  70. public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
  71. private static final int DIR_MIXED = 2;
  72. /**
  73. * Create Bidi from the given paragraph of text and base direction.
  74. * @param paragraph a paragraph of text
  75. * @param flags a collection of flags that control the algorithm. The
  76. * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
  77. * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
  78. * Other values are reserved.
  79. */
  80. public Bidi(String paragraph, int flags) {
  81. if (paragraph == null) {
  82. throw new IllegalArgumentException("paragraph is null");
  83. }
  84. nativeBidiChars(this, paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags);
  85. }
  86. /**
  87. * Create Bidi from the given paragraph of text.
  88. * <p>
  89. * The RUN_DIRECTION attribute in the text, if present, determines the base
  90. * direction (left-to-right or right-to-left). If not present, the base
  91. * direction is computes using the Unicode Bidirectional Algorithm, defaulting to left-to-right
  92. * if there are no strong directional characters in the text. This attribute, if
  93. * present, must be applied to all the text in the paragraph.
  94. * <p>
  95. * The BIDI_EMBEDDING attribute in the text, if present, represents embedding level
  96. * information. Negative values from -1 to -62 indicate overrides at the absolute value
  97. * of the level. Positive values from 1 to 62 indicate embeddings. Where values are
  98. * zero or not defined, the base embedding level as determined by the base direction
  99. * is assumed.
  100. * <p>
  101. * The NUMERIC_SHAPING attribute in the text, if present, converts European digits to
  102. * other decimal digits before running the bidi algorithm. This attribute, if present,
  103. * must be applied to all the text in the paragraph.
  104. *
  105. * @param paragraph a paragraph of text with optional character and paragraph attribute information
  106. *
  107. * @see TextAttribute#BIDI_EMBEDDING
  108. * @see TextAttribute#NUMERIC_SHAPING
  109. * @see TextAttribute#RUN_DIRECTION
  110. */
  111. public Bidi(AttributedCharacterIterator paragraph) {
  112. if (paragraph == null) {
  113. throw new IllegalArgumentException("paragraph is null");
  114. }
  115. int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
  116. byte[] embeddings = null;
  117. int start = paragraph.getBeginIndex();
  118. int limit = paragraph.getEndIndex();
  119. int length = limit - start;
  120. int n = 0;
  121. char[] text = new char[length];
  122. for (char c = paragraph.first(); c != paragraph.DONE; c = paragraph.next()) {
  123. text[n++] = c;
  124. }
  125. paragraph.first();
  126. try {
  127. Boolean runDirection = (Boolean)paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
  128. if (runDirection != null) {
  129. if (TextAttribute.RUN_DIRECTION_LTR.equals(runDirection)) {
  130. flags = DIRECTION_LEFT_TO_RIGHT; // clears default setting
  131. } else {
  132. flags = DIRECTION_RIGHT_TO_LEFT;
  133. }
  134. }
  135. }
  136. catch (ClassCastException e) {
  137. }
  138. try {
  139. NumericShaper shaper = (NumericShaper)paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
  140. if (shaper != null) {
  141. shaper.shape(text, 0, text.length);
  142. }
  143. }
  144. catch (ClassCastException e) {
  145. }
  146. int pos = start;
  147. do {
  148. paragraph.setIndex(pos);
  149. Object embeddingLevel = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
  150. int newpos = paragraph.getRunLimit(TextAttribute.BIDI_EMBEDDING);
  151. if (embeddingLevel != null) {
  152. try {
  153. int intLevel = ((Integer)embeddingLevel).intValue();
  154. if (intLevel >= -61 && intLevel < 61) {
  155. byte level = (byte)(intLevel < 0 ? (-intLevel | 0x80) : intLevel);
  156. if (embeddings == null) {
  157. embeddings = new byte[length];
  158. }
  159. for (int i = pos - start; i < newpos - start; ++i) {
  160. embeddings[i] = level;
  161. }
  162. }
  163. }
  164. catch (ClassCastException e) {
  165. }
  166. }
  167. pos = newpos;
  168. } while (pos < limit);
  169. nativeBidiChars(this, text, 0, embeddings, 0, text.length, flags);
  170. }
  171. /**
  172. * Create Bidi from the given text, embedding, and direction information.
  173. * The embeddings array may be null. If present, the values represent embedding level
  174. * information. Negative values from -1 to -61 indicate overrides at the absolute value
  175. * of the level. Positive values from 1 to 61 indicate embeddings. Where values are
  176. * zero, the base embedding level as determined by the base direction is assumed.
  177. * @param text an array containing the paragraph of text to process.
  178. * @param textStart the index into the text array of the start of the paragraph.
  179. * @param embeddings an array containing embedding values for each character in the paragraph.
  180. * This can be null, in which case it is assumed that there is no external embedding information.
  181. * @param embStart the index into the embedding array of the start of the paragraph.
  182. * @param paragraphLength the length of the paragraph in the text and embeddings arrays.
  183. * @param flags a collection of flags that control the algorithm. The
  184. * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
  185. * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
  186. * Other values are reserved.
  187. */
  188. public Bidi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags) {
  189. if (text == null) {
  190. throw new IllegalArgumentException("text is null");
  191. }
  192. if (paragraphLength < 0) {
  193. throw new IllegalArgumentException("bad length: " + paragraphLength);
  194. }
  195. if (textStart < 0 || paragraphLength > text.length - textStart) {
  196. throw new IllegalArgumentException("bad range: " + textStart +
  197. " length: " + paragraphLength +
  198. " for text of length: " + text.length);
  199. }
  200. if (embeddings != null && (embStart < 0 || paragraphLength > embeddings.length - embStart)) {
  201. throw new IllegalArgumentException("bad range: " + embStart +
  202. " length: " + paragraphLength +
  203. " for embeddings of length: " + text.length);
  204. }
  205. if (embeddings != null) {
  206. // native uses high bit to indicate override, not negative value, sigh
  207. for (int i = embStart, embLimit = embStart + paragraphLength; i < embLimit; ++i) {
  208. if (embeddings[i] < 0) {
  209. byte[] temp = new byte[paragraphLength];
  210. System.arraycopy(embeddings, embStart, temp, 0, paragraphLength);
  211. for (i -= embStart; i < paragraphLength; ++i) {
  212. if (temp[i] < 0) {
  213. temp[i] = (byte)(-temp[i] | 0x80);
  214. }
  215. }
  216. embeddings = temp;
  217. embStart = 0;
  218. break;
  219. }
  220. }
  221. }
  222. nativeBidiChars(this, text, textStart, embeddings, embStart, paragraphLength, flags);
  223. }
  224. /**
  225. * Private constructor used by line bidi.
  226. */
  227. private Bidi(int dir, int baseLevel, int length, int[] data, int[] cws) {
  228. reset(dir, baseLevel, length, data, cws);
  229. }
  230. /**
  231. * Private mutator used by native code.
  232. */
  233. private void reset(int dir, int baselevel, int length, int[] data, int[] cws) {
  234. this.dir = (byte)dir;
  235. this.baselevel = (byte)baselevel;
  236. this.length = length;
  237. this.runs = data;
  238. this.cws = cws;
  239. }
  240. /**
  241. * Create a Bidi object representing the bidi information on a line of text within
  242. * the paragraph represented by the current Bidi. This call is not required if the
  243. * entire paragraph fits on one line.
  244. * @param lineStart the offset from the start of the paragraph to the start of the line.
  245. * @param lineLimit the offset from the start of the paragraph to the limit of the line.
  246. */
  247. public Bidi createLineBidi(int lineStart, int lineLimit) {
  248. if (lineStart == 0 && lineLimit == length) {
  249. return this;
  250. }
  251. int lineLength = lineLimit - lineStart;
  252. if (lineStart < 0 ||
  253. lineLimit < lineStart ||
  254. lineLimit > length) {
  255. throw new IllegalArgumentException("range " + lineStart +
  256. " to " + lineLimit +
  257. " is invalid for paragraph of length " + length);
  258. }
  259. if (runs == null) {
  260. return new Bidi(dir, baselevel, lineLength, null, null);
  261. } else {
  262. int cwspos = -1;
  263. int[] ncws = null;
  264. if (cws != null) {
  265. int cwss = 0;
  266. int cwsl = cws.length;
  267. while (cwss < cwsl) {
  268. if (cws[cwss] >= lineStart) {
  269. cwsl = cwss;
  270. while (cwsl < cws.length && cws[cwsl] < lineLimit) {
  271. cwsl++;
  272. }
  273. int ll = lineLimit-1;
  274. while (cwsl > cwss && cws[cwsl-1] == ll) {
  275. cwspos = ll; // record start of counter-directional whitespace
  276. --cwsl;
  277. --ll;
  278. }
  279. if (cwspos == lineStart) { // entire line is cws, so ignore
  280. return new Bidi(dir, baselevel, lineLength, null, null);
  281. }
  282. int ncwslen = cwsl - cwss;
  283. if (ncwslen > 0) {
  284. ncws = new int[ncwslen];
  285. for (int i = 0; i < ncwslen; ++i) {
  286. ncws[i] = cws[cwss+i] - lineStart;
  287. }
  288. }
  289. break;
  290. }
  291. ++cwss;
  292. }
  293. }
  294. int[] nruns = null;
  295. int nlevel = baselevel;
  296. int limit = cwspos == -1 ? lineLimit : cwspos;
  297. int rs = 0;
  298. int rl = runs.length;
  299. int ndir = dir;
  300. for (; rs < runs.length; rs += 2) {
  301. if (runs[rs] > lineStart) {
  302. rl = rs;
  303. while (rl < runs.length && runs[rl] < limit) {
  304. rl += 2;
  305. }
  306. if ((rl > rs) || (runs[rs+1] != baselevel)) {
  307. rl += 2;
  308. if (cwspos != -1 && rl > rs && runs[rl-1] != baselevel) { // add level for cws
  309. nruns = new int[rl - rs + 2];
  310. nruns[rl - rs] = lineLength;
  311. nruns[rl - rs + 1] = baselevel;
  312. } else {
  313. limit = lineLimit;
  314. nruns = new int[rl - rs];
  315. }
  316. int n = 0;
  317. for (int i = rs; i < rl; i += 2) {
  318. nruns[n++] = runs[i] - lineStart;
  319. nruns[n++] = runs[i+1];
  320. }
  321. nruns[n-2] = limit - lineStart;
  322. } else {
  323. ndir = (runs[rs+1] & 0x1) == 0 ? DIRECTION_LEFT_TO_RIGHT : DIRECTION_RIGHT_TO_LEFT;
  324. }
  325. break;
  326. }
  327. }
  328. return new Bidi(ndir, baselevel, lineLength, nruns, ncws);
  329. }
  330. }
  331. /**
  332. * Return true if the line is not left-to-right or right-to-left. This means it either has mixed runs of left-to-right
  333. * and right-to-left text, or the base direction differs from the direction of the only run of text.
  334. * @return true if the line is not left-to-right or right-to-left.
  335. */
  336. public boolean isMixed() {
  337. return dir == DIR_MIXED;
  338. }
  339. /**
  340. * Return true if the line is all left-to-right text and the base direction is left-to-right.
  341. * @return true if the line is all left-to-right text and the base direction is left-to-right
  342. */
  343. public boolean isLeftToRight() {
  344. return dir == DIRECTION_LEFT_TO_RIGHT;
  345. }
  346. /**
  347. * Return true if the line is all right-to-left text, and the base direction is right-to-left.
  348. * @return true if the line is all right-to-left text, and the base direction is right-to-left
  349. */
  350. public boolean isRightToLeft() {
  351. return dir == DIRECTION_RIGHT_TO_LEFT;
  352. }
  353. /**
  354. * Return the length of text in the line.
  355. * @return the length of text in the line
  356. */
  357. public int getLength() {
  358. return length;
  359. }
  360. /**
  361. * Return true if the base direction is left-to-right.
  362. * @return true if the base direction is left-to-right
  363. */
  364. public boolean baseIsLeftToRight() {
  365. return (baselevel & 0x1) == 0;
  366. }
  367. /**
  368. * Return the base level (0 if left-to-right, 1 if right-to-left).
  369. * @return the base level
  370. */
  371. public int getBaseLevel() {
  372. return baselevel;
  373. }
  374. /**
  375. * Return the resolved level of the character at offset. If offset is <0 or >=
  376. * the length of the line, return the base direction level.
  377. * @param offset the index of the character for which to return the level
  378. * @return the resolved level of the character at offset
  379. */
  380. public int getLevelAt(int offset) {
  381. if (runs == null || offset < 0 || offset >= length) {
  382. return baselevel;
  383. } else {
  384. int i = 0;
  385. do {
  386. if (offset < runs[i]) {
  387. return runs[i+1];
  388. }
  389. i += 2;
  390. } while (true);
  391. }
  392. }
  393. /**
  394. * Return the number of level runs.
  395. * @return the number of level runs
  396. */
  397. public int getRunCount() {
  398. return runs == null ? 1 : runs.length / 2;
  399. }
  400. /**
  401. * Return the level of the nth logical run in this line.
  402. * @param run the index of the run, between 0 and <code>getRunCount()</code>
  403. * @return the level of the run
  404. */
  405. public int getRunLevel(int run) {
  406. return runs == null ? baselevel : runs[run * 2 + 1];
  407. }
  408. /**
  409. * Return the index of the character at the start of the nth logical run in this line, as
  410. * an offset from the start of the line.
  411. * @param run the index of the run, between 0 and <code>getRunCount()</code>
  412. * @return the start of the run
  413. */
  414. public int getRunStart(int run) {
  415. return (runs == null || run == 0) ? 0 : runs[run * 2 - 2];
  416. }
  417. /**
  418. * Return the index of the character past the end of the nth logical run in this line, as
  419. * an offset from the start of the line. For example, this will return the length
  420. * of the line for the last run on the line.
  421. * @param run the index of the run, between 0 and <code>getRunCount()</code>
  422. * @return limit the limit of the run
  423. */
  424. public int getRunLimit(int run) {
  425. return runs == null ? length : runs[run * 2];
  426. }
  427. /**
  428. * Return true if the specified text requires bidi analysis. If this returns false,
  429. * the text will display left-to-right. Clients can then avoid constructing a Bidi object.
  430. * Text in the Arabic Presentation Forms area of Unicode is presumed to already be shaped
  431. * and ordered for display, and so will not cause this function to return true.
  432. *
  433. * @param text the text containing the characters to test
  434. * @param start the start of the range of characters to test
  435. * @param limit the limit of the range of characters to test
  436. * @return true if the range of characters requires bidi analysis
  437. */
  438. public static boolean requiresBidi(char[] text, int start, int limit) {
  439. for (int i = start; i < limit; ++i) {
  440. char c = text[i];
  441. if (c < '\u0591') continue;
  442. if (c > '\u202e') continue; // if contains arabic extended data, presume already ordered
  443. int dc = nativeGetDirectionCode(c);
  444. if ((RMASK & (1 << dc)) != 0) {
  445. return true;
  446. }
  447. }
  448. return false;
  449. }
  450. /**
  451. * Reorder the objects in the array into visual order based on their levels.
  452. * This is a utility function to use when you have a collection of objects
  453. * representing runs of text in logical order, each run containing text
  454. * at a single level. The elements at <code>index</code> from
  455. * <code>objectStart</code> up to <code>objectStart + count</code>
  456. * in the objects array will be reordered into visual order assuming
  457. * each run of text has the level indicated by the corresponding element
  458. * in the levels array (at <code>index - objectStart + levelStart</code>).
  459. *
  460. * @param levels an array representing the bidi level of each object
  461. * @param levelStart the start position in the levels array
  462. * @param objects the array of objects to be reordered into visual order
  463. * @param objectStart the start position in the objects array
  464. * @param count the number of objects to reorder
  465. */
  466. public static void reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count) {
  467. if (count < 0) {
  468. throw new IllegalArgumentException("count " + count + " must be >= 0");
  469. }
  470. if (levelStart < 0 || levelStart + count > levels.length) {
  471. throw new IllegalArgumentException("levelStart " + levelStart + " and count " + count +
  472. " out of range [0, " + levels.length + "]");
  473. }
  474. if (objectStart < 0 || objectStart + count > objects.length) {
  475. throw new IllegalArgumentException("objectStart " + objectStart + " and count " + count +
  476. " out of range [0, " + objects.length + "]");
  477. }
  478. byte lowestOddLevel = (byte)(NUMLEVELS + 1);
  479. byte highestLevel = 0;
  480. // initialize mapping and levels
  481. int levelLimit = levelStart + count;
  482. for (int i = levelStart; i < levelLimit; i++) {
  483. byte level = levels[i];
  484. if (level > highestLevel) {
  485. highestLevel = level;
  486. }
  487. if ((level & 0x01) != 0 && level < lowestOddLevel) {
  488. lowestOddLevel = level;
  489. }
  490. }
  491. int delta = objectStart - levelStart;
  492. while (highestLevel >= lowestOddLevel) {
  493. int i = levelStart;
  494. for (;;) {
  495. while (i < levelLimit && levels[i] < highestLevel) {
  496. i++;
  497. }
  498. int begin = i++;
  499. if (begin == levelLimit) {
  500. break; // no more runs at this level
  501. }
  502. while (i < levelLimit && levels[i] >= highestLevel) {
  503. i++;
  504. }
  505. int end = i - 1;
  506. begin += delta;
  507. end += delta;
  508. while (begin < end) {
  509. Object temp = objects[begin];
  510. objects[begin] = objects[end];
  511. objects[end] = temp;
  512. ++begin;
  513. --end;
  514. }
  515. }
  516. --highestLevel;
  517. }
  518. }
  519. private static final char NUMLEVELS = 62;
  520. private static final int RMASK =
  521. (1 << 1 /* U_RIGHT_TO_LEFT */) |
  522. (1 << 5 /* U_ARABIC_NUMBER */) |
  523. (1 << 13 /* U_RIGHT_TO_LEFT_ARABIC */) |
  524. (1 << 14 /* U_RIGHT_TO_LEFT_EMBEDDING */) |
  525. (1 << 15 /* U_RIGHT_TO_LEFT_OVERRIDE */);
  526. /** Access native bidi implementation. */
  527. private static native int nativeGetDirectionCode(char c);
  528. /** Access native bidi implementation. */
  529. private static synchronized native void nativeBidiChars(Bidi bidi, char[] text, int textStart,
  530. byte[] embeddings, int embeddingStart,
  531. int length, int flags);
  532. /**
  533. * Display the bidi internal state, used in debugging.
  534. */
  535. public String toString() {
  536. StringBuffer buf = new StringBuffer(super.toString());
  537. buf.append("[dir: " + dir);
  538. buf.append(" baselevel: " + baselevel);
  539. buf.append(" length: " + length);
  540. if (runs == null) {
  541. buf.append(" runs: null");
  542. } else {
  543. buf.append(" runs: [");
  544. for (int i = 0; i < runs.length; i += 2) {
  545. if (i != 0) {
  546. buf.append(' ');
  547. }
  548. buf.append(runs[i]); // limit
  549. buf.append('/');
  550. buf.append(runs[i+1]); // level
  551. }
  552. buf.append(']');
  553. }
  554. if (cws == null) {
  555. buf.append(" cws: null");
  556. } else {
  557. buf.append(" cws: [");
  558. for (int i = 0; i < cws.length; ++i) {
  559. if (i != 0) {
  560. buf.append(' ');
  561. }
  562. buf.append(Integer.toHexString(cws[i]));
  563. }
  564. buf.append(']');
  565. }
  566. buf.append(']');
  567. return buf.toString();
  568. }
  569. }