1. /*
  2. * @(#)Bidi.java 1.16 03/12/19
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved
  9. *
  10. * The original version of this source code and documentation is
  11. * copyrighted and owned by IBM. These materials are provided
  12. * under terms of a License Agreement between IBM and Sun.
  13. * This technology is protected by multiple US and International
  14. * patents. This notice and attribution to IBM may not be removed.
  15. */
  16. package java.text;
  17. import java.awt.Toolkit;
  18. import java.awt.font.TextAttribute;
  19. import java.awt.font.NumericShaper;
  20. import sun.text.CodePointIterator;
  21. /**
  22. * This class implements the Unicode Bidirectional Algorithm.
  23. * <p>
  24. * A Bidi object provides information on the bidirectional reordering of the text
  25. * used to create it. This is required, for example, to properly display Arabic
  26. * or Hebrew text. These languages are inherently mixed directional, as they order
  27. * numbers from left-to-right while ordering most other text from right-to-left.
  28. * <p>
  29. * Once created, a Bidi object can be queried to see if the text it represents is
  30. * all left-to-right or all right-to-left. Such objects are very lightweight and
  31. * this text is relatively easy to process.
  32. * <p>
  33. * If there are multiple runs of text, information about the runs can be accessed
  34. * by indexing to get the start, limit, and level of a run. The level represents
  35. * both the direction and the 'nesting level' of a directional run. Odd levels
  36. * are right-to-left, while even levels are left-to-right. So for example level
  37. * 0 represents left-to-right text, while level 1 represents right-to-left text, and
  38. * level 2 represents left-to-right text embedded in a right-to-left run.
  39. *
  40. * @since 1.4
  41. */
  42. public final class Bidi {
  43. byte dir;
  44. byte baselevel;
  45. int length;
  46. int[] runs;
  47. int[] cws;
  48. static {
  49. java.security.AccessController.doPrivileged(
  50. new sun.security.action.LoadLibraryAction("awt"));
  51. java.security.AccessController.doPrivileged(
  52. new sun.security.action.LoadLibraryAction("fontmanager"));
  53. }
  54. /** Constant indicating base direction is left-to-right. */
  55. public static final int DIRECTION_LEFT_TO_RIGHT = 0;
  56. /** Constant indicating base direction is right-to-left. */
  57. public static final int DIRECTION_RIGHT_TO_LEFT = 1;
  58. /**
  59. * Constant indicating that the base direction depends on the first strong
  60. * directional character in the text according to the Unicode
  61. * Bidirectional Algorithm. If no strong directional character is present,
  62. * the base direction is left-to-right.
  63. */
  64. public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
  65. /**
  66. * Constant indicating that the base direction depends on the first strong
  67. * directional character in the text according to the Unicode
  68. * Bidirectional Algorithm. If no strong directional character is present,
  69. * the base direction is right-to-left.
  70. */
  71. public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
  72. private static final int DIR_MIXED = 2;
  73. /**
  74. * Create Bidi from the given paragraph of text and base direction.
  75. * @param paragraph a paragraph of text
  76. * @param flags a collection of flags that control the algorithm. The
  77. * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
  78. * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
  79. * Other values are reserved.
  80. */
  81. public Bidi(String paragraph, int flags) {
  82. if (paragraph == null) {
  83. throw new IllegalArgumentException("paragraph is null");
  84. }
  85. nativeBidiChars(this, paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags);
  86. }
  87. /**
  88. * Create Bidi from the given paragraph of text.
  89. * <p>
  90. * The RUN_DIRECTION attribute in the text, if present, determines the base
  91. * direction (left-to-right or right-to-left). If not present, the base
  92. * direction is computes using the Unicode Bidirectional Algorithm, defaulting to left-to-right
  93. * if there are no strong directional characters in the text. This attribute, if
  94. * present, must be applied to all the text in the paragraph.
  95. * <p>
  96. * The BIDI_EMBEDDING attribute in the text, if present, represents embedding level
  97. * information. Negative values from -1 to -62 indicate overrides at the absolute value
  98. * of the level. Positive values from 1 to 62 indicate embeddings. Where values are
  99. * zero or not defined, the base embedding level as determined by the base direction
  100. * is assumed.
  101. * <p>
  102. * The NUMERIC_SHAPING attribute in the text, if present, converts European digits to
  103. * other decimal digits before running the bidi algorithm. This attribute, if present,
  104. * must be applied to all the text in the paragraph.
  105. *
  106. * @param paragraph a paragraph of text with optional character and paragraph attribute information
  107. *
  108. * @see TextAttribute#BIDI_EMBEDDING
  109. * @see TextAttribute#NUMERIC_SHAPING
  110. * @see TextAttribute#RUN_DIRECTION
  111. */
  112. public Bidi(AttributedCharacterIterator paragraph) {
  113. if (paragraph == null) {
  114. throw new IllegalArgumentException("paragraph is null");
  115. }
  116. int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
  117. byte[] embeddings = null;
  118. int start = paragraph.getBeginIndex();
  119. int limit = paragraph.getEndIndex();
  120. int length = limit - start;
  121. int n = 0;
  122. char[] text = new char[length];
  123. for (char c = paragraph.first(); c != paragraph.DONE; c = paragraph.next()) {
  124. text[n++] = c;
  125. }
  126. paragraph.first();
  127. try {
  128. Boolean runDirection = (Boolean)paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
  129. if (runDirection != null) {
  130. if (TextAttribute.RUN_DIRECTION_LTR.equals(runDirection)) {
  131. flags = DIRECTION_LEFT_TO_RIGHT; // clears default setting
  132. } else {
  133. flags = DIRECTION_RIGHT_TO_LEFT;
  134. }
  135. }
  136. }
  137. catch (ClassCastException e) {
  138. }
  139. try {
  140. NumericShaper shaper = (NumericShaper)paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
  141. if (shaper != null) {
  142. shaper.shape(text, 0, text.length);
  143. }
  144. }
  145. catch (ClassCastException e) {
  146. }
  147. int pos = start;
  148. do {
  149. paragraph.setIndex(pos);
  150. Object embeddingLevel = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
  151. int newpos = paragraph.getRunLimit(TextAttribute.BIDI_EMBEDDING);
  152. if (embeddingLevel != null) {
  153. try {
  154. int intLevel = ((Integer)embeddingLevel).intValue();
  155. if (intLevel >= -61 && intLevel < 61) {
  156. byte level = (byte)(intLevel < 0 ? (-intLevel | 0x80) : intLevel);
  157. if (embeddings == null) {
  158. embeddings = new byte[length];
  159. }
  160. for (int i = pos - start; i < newpos - start; ++i) {
  161. embeddings[i] = level;
  162. }
  163. }
  164. }
  165. catch (ClassCastException e) {
  166. }
  167. }
  168. pos = newpos;
  169. } while (pos < limit);
  170. nativeBidiChars(this, text, 0, embeddings, 0, text.length, flags);
  171. }
  172. /**
  173. * Create Bidi from the given text, embedding, and direction information.
  174. * The embeddings array may be null. If present, the values represent embedding level
  175. * information. Negative values from -1 to -61 indicate overrides at the absolute value
  176. * of the level. Positive values from 1 to 61 indicate embeddings. Where values are
  177. * zero, the base embedding level as determined by the base direction is assumed.
  178. * @param text an array containing the paragraph of text to process.
  179. * @param textStart the index into the text array of the start of the paragraph.
  180. * @param embeddings an array containing embedding values for each character in the paragraph.
  181. * This can be null, in which case it is assumed that there is no external embedding information.
  182. * @param embStart the index into the embedding array of the start of the paragraph.
  183. * @param paragraphLength the length of the paragraph in the text and embeddings arrays.
  184. * @param flags a collection of flags that control the algorithm. The
  185. * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
  186. * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
  187. * Other values are reserved.
  188. */
  189. public Bidi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags) {
  190. if (text == null) {
  191. throw new IllegalArgumentException("text is null");
  192. }
  193. if (paragraphLength < 0) {
  194. throw new IllegalArgumentException("bad length: " + paragraphLength);
  195. }
  196. if (textStart < 0 || paragraphLength > text.length - textStart) {
  197. throw new IllegalArgumentException("bad range: " + textStart +
  198. " length: " + paragraphLength +
  199. " for text of length: " + text.length);
  200. }
  201. if (embeddings != null && (embStart < 0 || paragraphLength > embeddings.length - embStart)) {
  202. throw new IllegalArgumentException("bad range: " + embStart +
  203. " length: " + paragraphLength +
  204. " for embeddings of length: " + text.length);
  205. }
  206. if (embeddings != null) {
  207. // native uses high bit to indicate override, not negative value, sigh
  208. for (int i = embStart, embLimit = embStart + paragraphLength; i < embLimit; ++i) {
  209. if (embeddings[i] < 0) {
  210. byte[] temp = new byte[paragraphLength];
  211. System.arraycopy(embeddings, embStart, temp, 0, paragraphLength);
  212. for (i -= embStart; i < paragraphLength; ++i) {
  213. if (temp[i] < 0) {
  214. temp[i] = (byte)(-temp[i] | 0x80);
  215. }
  216. }
  217. embeddings = temp;
  218. embStart = 0;
  219. break;
  220. }
  221. }
  222. }
  223. nativeBidiChars(this, text, textStart, embeddings, embStart, paragraphLength, flags);
  224. }
  225. /**
  226. * Private constructor used by line bidi.
  227. */
  228. private Bidi(int dir, int baseLevel, int length, int[] data, int[] cws) {
  229. reset(dir, baseLevel, length, data, cws);
  230. }
  231. /**
  232. * Private mutator used by native code.
  233. */
  234. private void reset(int dir, int baselevel, int length, int[] data, int[] cws) {
  235. this.dir = (byte)dir;
  236. this.baselevel = (byte)baselevel;
  237. this.length = length;
  238. this.runs = data;
  239. this.cws = cws;
  240. }
  241. /**
  242. * Create a Bidi object representing the bidi information on a line of text within
  243. * the paragraph represented by the current Bidi. This call is not required if the
  244. * entire paragraph fits on one line.
  245. * @param lineStart the offset from the start of the paragraph to the start of the line.
  246. * @param lineLimit the offset from the start of the paragraph to the limit of the line.
  247. */
  248. public Bidi createLineBidi(int lineStart, int lineLimit) {
  249. if (lineStart == 0 && lineLimit == length) {
  250. return this;
  251. }
  252. int lineLength = lineLimit - lineStart;
  253. if (lineStart < 0 ||
  254. lineLimit < lineStart ||
  255. lineLimit > length) {
  256. throw new IllegalArgumentException("range " + lineStart +
  257. " to " + lineLimit +
  258. " is invalid for paragraph of length " + length);
  259. }
  260. if (runs == null) {
  261. return new Bidi(dir, baselevel, lineLength, null, null);
  262. } else {
  263. int cwspos = -1;
  264. int[] ncws = null;
  265. if (cws != null) {
  266. int cwss = 0;
  267. int cwsl = cws.length;
  268. while (cwss < cwsl) {
  269. if (cws[cwss] >= lineStart) {
  270. cwsl = cwss;
  271. while (cwsl < cws.length && cws[cwsl] < lineLimit) {
  272. cwsl++;
  273. }
  274. int ll = lineLimit-1;
  275. while (cwsl > cwss && cws[cwsl-1] == ll) {
  276. cwspos = ll; // record start of counter-directional whitespace
  277. --cwsl;
  278. --ll;
  279. }
  280. if (cwspos == lineStart) { // entire line is cws, so ignore
  281. return new Bidi(dir, baselevel, lineLength, null, null);
  282. }
  283. int ncwslen = cwsl - cwss;
  284. if (ncwslen > 0) {
  285. ncws = new int[ncwslen];
  286. for (int i = 0; i < ncwslen; ++i) {
  287. ncws[i] = cws[cwss+i] - lineStart;
  288. }
  289. }
  290. break;
  291. }
  292. ++cwss;
  293. }
  294. }
  295. int[] nruns = null;
  296. int nlevel = baselevel;
  297. int limit = cwspos == -1 ? lineLimit : cwspos;
  298. int rs = 0;
  299. int rl = runs.length;
  300. int ndir = dir;
  301. for (; rs < runs.length; rs += 2) {
  302. if (runs[rs] > lineStart) {
  303. rl = rs;
  304. while (rl < runs.length && runs[rl] < limit) {
  305. rl += 2;
  306. }
  307. if ((rl > rs) || (runs[rs+1] != baselevel)) {
  308. rl += 2;
  309. if (cwspos != -1 && rl > rs && runs[rl-1] != baselevel) { // add level for cws
  310. nruns = new int[rl - rs + 2];
  311. nruns[rl - rs] = lineLength;
  312. nruns[rl - rs + 1] = baselevel;
  313. } else {
  314. limit = lineLimit;
  315. nruns = new int[rl - rs];
  316. }
  317. int n = 0;
  318. for (int i = rs; i < rl; i += 2) {
  319. nruns[n++] = runs[i] - lineStart;
  320. nruns[n++] = runs[i+1];
  321. }
  322. nruns[n-2] = limit - lineStart;
  323. } else {
  324. ndir = (runs[rs+1] & 0x1) == 0 ? DIRECTION_LEFT_TO_RIGHT : DIRECTION_RIGHT_TO_LEFT;
  325. }
  326. break;
  327. }
  328. }
  329. return new Bidi(ndir, baselevel, lineLength, nruns, ncws);
  330. }
  331. }
  332. /**
  333. * Return true if the line is not left-to-right or right-to-left. This means it either has mixed runs of left-to-right
  334. * and right-to-left text, or the base direction differs from the direction of the only run of text.
  335. * @return true if the line is not left-to-right or right-to-left.
  336. */
  337. public boolean isMixed() {
  338. return dir == DIR_MIXED;
  339. }
  340. /**
  341. * Return true if the line is all left-to-right text and the base direction is left-to-right.
  342. * @return true if the line is all left-to-right text and the base direction is left-to-right
  343. */
  344. public boolean isLeftToRight() {
  345. return dir == DIRECTION_LEFT_TO_RIGHT;
  346. }
  347. /**
  348. * Return true if the line is all right-to-left text, and the base direction is right-to-left.
  349. * @return true if the line is all right-to-left text, and the base direction is right-to-left
  350. */
  351. public boolean isRightToLeft() {
  352. return dir == DIRECTION_RIGHT_TO_LEFT;
  353. }
  354. /**
  355. * Return the length of text in the line.
  356. * @return the length of text in the line
  357. */
  358. public int getLength() {
  359. return length;
  360. }
  361. /**
  362. * Return true if the base direction is left-to-right.
  363. * @return true if the base direction is left-to-right
  364. */
  365. public boolean baseIsLeftToRight() {
  366. return (baselevel & 0x1) == 0;
  367. }
  368. /**
  369. * Return the base level (0 if left-to-right, 1 if right-to-left).
  370. * @return the base level
  371. */
  372. public int getBaseLevel() {
  373. return baselevel;
  374. }
  375. /**
  376. * Return the resolved level of the character at offset. If offset is <0 or >=
  377. * the length of the line, return the base direction level.
  378. * @param offset the index of the character for which to return the level
  379. * @return the resolved level of the character at offset
  380. */
  381. public int getLevelAt(int offset) {
  382. if (runs == null || offset < 0 || offset >= length) {
  383. return baselevel;
  384. } else {
  385. int i = 0;
  386. do {
  387. if (offset < runs[i]) {
  388. return runs[i+1];
  389. }
  390. i += 2;
  391. } while (true);
  392. }
  393. }
  394. /**
  395. * Return the number of level runs.
  396. * @return the number of level runs
  397. */
  398. public int getRunCount() {
  399. return runs == null ? 1 : runs.length / 2;
  400. }
  401. /**
  402. * Return the level of the nth logical run in this line.
  403. * @param run the index of the run, between 0 and <code>getRunCount()</code>
  404. * @return the level of the run
  405. */
  406. public int getRunLevel(int run) {
  407. return runs == null ? baselevel : runs[run * 2 + 1];
  408. }
  409. /**
  410. * Return the index of the character at the start of the nth logical run in this line, as
  411. * an offset from the start of the line.
  412. * @param run the index of the run, between 0 and <code>getRunCount()</code>
  413. * @return the start of the run
  414. */
  415. public int getRunStart(int run) {
  416. return (runs == null || run == 0) ? 0 : runs[run * 2 - 2];
  417. }
  418. /**
  419. * Return the index of the character past the end of the nth logical run in this line, as
  420. * an offset from the start of the line. For example, this will return the length
  421. * of the line for the last run on the line.
  422. * @param run the index of the run, between 0 and <code>getRunCount()</code>
  423. * @return limit the limit of the run
  424. */
  425. public int getRunLimit(int run) {
  426. return runs == null ? length : runs[run * 2];
  427. }
  428. /**
  429. * Return true if the specified text requires bidi analysis. If this returns false,
  430. * the text will display left-to-right. Clients can then avoid constructing a Bidi object.
  431. * Text in the Arabic Presentation Forms area of Unicode is presumed to already be shaped
  432. * and ordered for display, and so will not cause this function to return true.
  433. *
  434. * @param text the text containing the characters to test
  435. * @param start the start of the range of characters to test
  436. * @param limit the limit of the range of characters to test
  437. * @return true if the range of characters requires bidi analysis
  438. */
  439. public static boolean requiresBidi(char[] text, int start, int limit) {
  440. CodePointIterator cpi = CodePointIterator.create(text, start, limit);
  441. for (int cp = cpi.next(); cp != CodePointIterator.DONE; cp = cpi.next()) {
  442. if (cp > 0x0590) {
  443. int dc = nativeGetDirectionCode(cp);
  444. if ((RMASK & (1 << dc)) != 0) {
  445. return true;
  446. }
  447. }
  448. }
  449. return false;
  450. }
  451. /**
  452. * Reorder the objects in the array into visual order based on their levels.
  453. * This is a utility function to use when you have a collection of objects
  454. * representing runs of text in logical order, each run containing text
  455. * at a single level. The elements at <code>index</code> from
  456. * <code>objectStart</code> up to <code>objectStart + count</code>
  457. * in the objects array will be reordered into visual order assuming
  458. * each run of text has the level indicated by the corresponding element
  459. * in the levels array (at <code>index - objectStart + levelStart</code>).
  460. *
  461. * @param levels an array representing the bidi level of each object
  462. * @param levelStart the start position in the levels array
  463. * @param objects the array of objects to be reordered into visual order
  464. * @param objectStart the start position in the objects array
  465. * @param count the number of objects to reorder
  466. */
  467. public static void reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count) {
  468. if (count < 0) {
  469. throw new IllegalArgumentException("count " + count + " must be >= 0");
  470. }
  471. if (levelStart < 0 || levelStart + count > levels.length) {
  472. throw new IllegalArgumentException("levelStart " + levelStart + " and count " + count +
  473. " out of range [0, " + levels.length + "]");
  474. }
  475. if (objectStart < 0 || objectStart + count > objects.length) {
  476. throw new IllegalArgumentException("objectStart " + objectStart + " and count " + count +
  477. " out of range [0, " + objects.length + "]");
  478. }
  479. byte lowestOddLevel = (byte)(NUMLEVELS + 1);
  480. byte highestLevel = 0;
  481. // initialize mapping and levels
  482. int levelLimit = levelStart + count;
  483. for (int i = levelStart; i < levelLimit; i++) {
  484. byte level = levels[i];
  485. if (level > highestLevel) {
  486. highestLevel = level;
  487. }
  488. if ((level & 0x01) != 0 && level < lowestOddLevel) {
  489. lowestOddLevel = level;
  490. }
  491. }
  492. int delta = objectStart - levelStart;
  493. while (highestLevel >= lowestOddLevel) {
  494. int i = levelStart;
  495. for (;;) {
  496. while (i < levelLimit && levels[i] < highestLevel) {
  497. i++;
  498. }
  499. int begin = i++;
  500. if (begin == levelLimit) {
  501. break; // no more runs at this level
  502. }
  503. while (i < levelLimit && levels[i] >= highestLevel) {
  504. i++;
  505. }
  506. int end = i - 1;
  507. begin += delta;
  508. end += delta;
  509. while (begin < end) {
  510. Object temp = objects[begin];
  511. objects[begin] = objects[end];
  512. objects[end] = temp;
  513. ++begin;
  514. --end;
  515. }
  516. }
  517. --highestLevel;
  518. }
  519. }
  520. private static final char NUMLEVELS = 62;
  521. private static final int RMASK =
  522. (1 << 1 /* U_RIGHT_TO_LEFT */) |
  523. (1 << 5 /* U_ARABIC_NUMBER */) |
  524. (1 << 13 /* U_RIGHT_TO_LEFT_ARABIC */) |
  525. (1 << 14 /* U_RIGHT_TO_LEFT_EMBEDDING */) |
  526. (1 << 15 /* U_RIGHT_TO_LEFT_OVERRIDE */);
  527. /** Access native bidi implementation. */
  528. private static native int nativeGetDirectionCode(int cp);
  529. /** Access native bidi implementation. */
  530. private static synchronized native void nativeBidiChars(Bidi bidi, char[] text, int textStart,
  531. byte[] embeddings, int embeddingStart,
  532. int length, int flags);
  533. /**
  534. * Display the bidi internal state, used in debugging.
  535. */
  536. public String toString() {
  537. StringBuffer buf = new StringBuffer(super.toString());
  538. buf.append("[dir: " + dir);
  539. buf.append(" baselevel: " + baselevel);
  540. buf.append(" length: " + length);
  541. if (runs == null) {
  542. buf.append(" runs: null");
  543. } else {
  544. buf.append(" runs: [");
  545. for (int i = 0; i < runs.length; i += 2) {
  546. if (i != 0) {
  547. buf.append(' ');
  548. }
  549. buf.append(runs[i]); // limit
  550. buf.append('/');
  551. buf.append(runs[i+1]); // level
  552. }
  553. buf.append(']');
  554. }
  555. if (cws == null) {
  556. buf.append(" cws: null");
  557. } else {
  558. buf.append(" cws: [");
  559. for (int i = 0; i < cws.length; ++i) {
  560. if (i != 0) {
  561. buf.append(' ');
  562. }
  563. buf.append(Integer.toHexString(cws[i]));
  564. }
  565. buf.append(']');
  566. }
  567. buf.append(']');
  568. return buf.toString();
  569. }
  570. }