1. /*
  2. * @(#)Bidi.java 1.5 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)Bidi.java 1.5 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1997 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1997, 1998 - All Rights Reserved
  12. *
  13. * The original version of this source code and documentation is copyrighted
  14. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  15. * materials are provided under terms of a License Agreement between Taligent
  16. * and Sun. This technology is protected by multiple US and International
  17. * patents. This notice and attribution to Taligent may not be removed.
  18. * Taligent is a registered trademark of Taligent, Inc.
  19. */
  20. package javax.swing.text;
  21. import java.io.*;
  22. /**
  23. * Bidi implementation.
  24. */
  25. final class Bidi {
  26. private boolean ltr;
  27. private byte[] dirs;
  28. private byte[] levels;
  29. private int[] l2vMap;
  30. private int[] v2lMap;
  31. /**
  32. * Return the number of elements the bidi represents.
  33. */
  34. int getLength() {
  35. return levels.length;
  36. }
  37. /**
  38. * Return true if the base run direction defined for this bidi is
  39. * left to right, false if it is right to left.
  40. */
  41. boolean isDirectionLTR() {
  42. return ltr;
  43. }
  44. /**
  45. * Return a mapping of the bidirectional information from
  46. * logical to visual position. If mapping is canonical, return
  47. * null;
  48. */
  49. int[] getLogicalToVisualMap() {
  50. if (l2vMap == null) {
  51. l2vMap = getInverseOrder(getVisualToLogicalMap());
  52. }
  53. return l2vMap;
  54. }
  55. /**
  56. * Return a mapping of the bidirectional information from
  57. * visual to logical position. If mapping is canonical, return
  58. * null;
  59. */
  60. int[] getVisualToLogicalMap() {
  61. if (v2lMap == null) {
  62. v2lMap = createVisualToLogicalMap(levels);
  63. }
  64. return v2lMap;
  65. }
  66. /**
  67. * Return the resolved level array.
  68. */
  69. byte[] getLevels() {
  70. return levels;
  71. }
  72. /**
  73. * Return a bidi representing the given subrange of this bidi, with
  74. * line reordering applied to counterdirectional trailing whitespace.
  75. */
  76. Bidi createLineBidi(int start, int limit) {
  77. byte[] newLevels = new byte[limit - start];
  78. System.arraycopy(levels, start, newLevels, 0, newLevels.length);
  79. if (dirs != null) {
  80. byte x = (byte)(ltr ? 0 : 1);
  81. for (int i = newLevels.length; --i >= 0;) {
  82. if ((newLevels[i] % 2) == x || dirs[start + i] != WS) {
  83. break;
  84. }
  85. newLevels[i] = x;
  86. }
  87. }
  88. return new Bidi(newLevels, ltr);
  89. }
  90. /**
  91. * Return the level of the element at pos.
  92. */
  93. int getLevelAt(int pos) {
  94. return levels[pos];
  95. }
  96. /**
  97. * Get the limit of the level run starting at start.
  98. */
  99. int getLevelLimit(int start) {
  100. byte l = levels[start];
  101. while (++start < levels.length && levels[start] == l) {}
  102. return start;
  103. }
  104. /**
  105. * Create bidi information about the provided paragraph of text.
  106. * Use bidi rules to default line direction.
  107. */
  108. Bidi(char[] text) {
  109. this(text, defaultIsLTR(text, 0, text.length));
  110. }
  111. /**
  112. * Create bidi information about the provided paragraph of text.
  113. * Embedding codes in the text will be processed.
  114. * @param text the characters in the paragraph.
  115. * @param ltr true if the paragraph run direction is LTR,
  116. * false if it is RTL.
  117. */
  118. Bidi(char[] text, boolean ltr) {
  119. this(text, getEmbeddingArray(text, ltr), ltr);
  120. }
  121. /**
  122. * Create bidi information about the provided paragraph of text, using the
  123. * provided embedding information. Explicit embedding codes in the text, if
  124. * any, are not processed.
  125. * @param text the characters in the paragraph.
  126. * @param embs the embedding data. Values from 0-15 are embeddings at the
  127. * indicated level. Values from 16-31 are overrides at the corresponding levels.
  128. * This may modify the data in embs.
  129. * @param ltr true if the run direction for the paragraph is LTR,
  130. * false if it is RTL.
  131. */
  132. Bidi(char[] text, byte[] embs, boolean ltr) {
  133. byte[] dirs = getDirectionCodeArray(text, embs);
  134. for (int i = 0; i < embs.length; i++) {
  135. if ((embs[i] & 0x10) != 0) {
  136. embs[i] &= 0x0f;
  137. dirs[i] = (byte)(embs[i] & 0x01); // L, R
  138. }
  139. }
  140. applyBidiRules(dirs, embs, ltr); // save the input dirs
  141. this.ltr = ltr;
  142. this.dirs = dirs;
  143. this.levels = embs;
  144. }
  145. /**
  146. * Create bidi information, using the provided data on the direction
  147. * codes and levels of the text in the paragraph. These dirs and levels
  148. * arrays should not be subsequently modified by the caller.
  149. *
  150. * @param dirs an array of bidi direction codes. Arabic rtl text is
  151. * encoded using AR. Embedding overrides have already been
  152. * processed into this array.
  153. * @param levels an array of embedding levels, 0-15 inclusive.
  154. * @param ltr true if the run direction for the paragraph is LTR,
  155. * false if it is RTL.
  156. */
  157. Bidi(byte[] dirs, byte[] levels, boolean ltr) {
  158. applyBidiRules(dirs, levels, ltr); // save the input dirs
  159. this.ltr = ltr;
  160. this.dirs = dirs;
  161. this.levels = levels;
  162. }
  163. protected Bidi(byte[] levels, boolean ltr) {
  164. this.ltr = ltr;
  165. this.dirs = null;
  166. this.levels = levels;
  167. }
  168. /* so clients can construct any direction array. */
  169. static final byte L = 0; /* left to right (strong) */
  170. static final byte R = 1; /* right to left (strong) */
  171. static final byte EN = 2; /* european number (weak) */
  172. static final byte ES = 3; /* european number separator (weak) */
  173. static final byte ET = 4; /* european number terminator (weak) */
  174. static final byte AN = 5; /* arabic number (weak) */
  175. static final byte CS = 6; /* common number separator (weak) */
  176. static final byte B = 7; /* block separator */
  177. static final byte S = 8; /* segment separator */
  178. static final byte WS = 9; /* whitespace */
  179. static final byte ON = 10; /* other neutral */
  180. static final byte AR = 11; /* arabic strong rtl character, not in 2.1 spec */
  181. static final byte CM = 12; /* neutral combining mark, not in 2.1 spec */
  182. static final byte F = 13; /* directional formatting code, not in 2.1 spec */
  183. static final char LRM = 0x200E; /* left to right mark */
  184. static final char RLM = 0x200F; /* right to left mark */
  185. static final char LRE = 0x202A; /* left to right embedding */
  186. static final char RLE = 0x202B; /* right to left embedding */
  187. static final char PDF = 0x202C; /* pop directional formatting */
  188. static final char LRO = 0x202D; /* left to right override */
  189. static final char RLO = 0x202E; /* right to left override */
  190. /* The embedding/override level stack limit */
  191. static final char NUMLEVELS = 16;
  192. /**
  193. * Modify the dir and levels arrays by applying the bidi algorithm to the arrays, changing
  194. * them in place. On output, the direction array contains only L, R, or AR codes, and
  195. * the levels array contains the resolved levels.
  196. */
  197. static void applyBidiRules(byte[] dirs, byte[] levels, boolean ltr) {
  198. byte[] wdirs = (byte[])dirs.clone(); // working dir array, can mutate
  199. resolveWeakTypes(wdirs, levels, ltr);
  200. resolveNeutralTypes(wdirs, levels, ltr);
  201. resolveImplicitLevels(wdirs, dirs, levels, ltr);
  202. }
  203. /*
  204. * This resolves serially in order from left to right, with the results
  205. * of previous changes taken into account for later characters. So, for
  206. * example, a series of ET's after an EN will all change to EN, since
  207. * once the first ET changes to EN, it is then treated as EN for
  208. * transforming the following ET, and so on. It will also process ETs
  209. * before EN by scanning forward across runs of ET and checking the
  210. * following character.
  211. *
  212. * !!! This does not take embedded levels into account. Should it?
  213. * yes it should, lastStrongWasArabic should be unaffected by text within a previous embedding
  214. */
  215. private static void resolveWeakTypes(byte[] dirs, byte[] levels, boolean ltr) {
  216. int i = 0;
  217. int limit = 0;
  218. while (limit < dirs.length) {
  219. byte level = levels[limit++];
  220. while (limit < dirs.length && levels[limit] == level) {
  221. ++limit;
  222. }
  223. byte prev = -1;
  224. byte cur = dirs[i];
  225. boolean lastStrongWasArabic = cur == AR;
  226. while (i < limit) {
  227. int ii = i + 1;
  228. byte next = (ii == limit) ? -1 : dirs[ii];
  229. if (next == EN && lastStrongWasArabic) {
  230. next = AN;
  231. }
  232. byte ncur = cur;
  233. switch (cur) {
  234. case L:
  235. case R:
  236. lastStrongWasArabic = false;
  237. break;
  238. case AR:
  239. lastStrongWasArabic = true;
  240. break;
  241. case ES:
  242. if (prev == EN && next == EN)
  243. ncur = EN;
  244. else
  245. ncur = ON;
  246. break;
  247. case CS:
  248. if (prev == EN && next == EN)
  249. ncur = EN;
  250. else if (prev == AN && next == AN)
  251. ncur = AN;
  252. else
  253. ncur = ON;
  254. break;
  255. case ET:
  256. if (prev == EN || next == EN) {
  257. ncur = EN;
  258. } else if (next == ET && !lastStrongWasArabic) {
  259. // forward scan to handle ET ET EN
  260. for (int j = ii + 1; j < limit; ++j) {
  261. byte dir = dirs[j];
  262. if (dir == ET) {
  263. continue;
  264. }
  265. byte nval = dir == EN ? EN : ON;
  266. while (ii < j) {
  267. dirs[ii++] = nval;
  268. }
  269. ncur = nval;
  270. next = dir;
  271. break;
  272. }
  273. } else {
  274. ncur = ON;
  275. }
  276. break;
  277. default:
  278. break;
  279. }
  280. dirs[i] = ncur;
  281. i = ii;
  282. prev = ncur;
  283. cur = next;
  284. }
  285. }
  286. }
  287. /*
  288. * According to Mark, this operation should never span a level boundary.
  289. * The start and end of the level should be treated like sot and eot,
  290. * with the base direction the direction of the level.
  291. */
  292. // new version fixes #4173251
  293. private static void resolveNeutralTypes(byte[] dirs, byte[] levels, boolean ltr) {
  294. int i = 0;
  295. int limit = dirs.length;
  296. while (i < limit) {
  297. byte tempBaseLevel = levels[i];
  298. byte tempBaseDir = ((tempBaseLevel & 0x1) == 0) ? L : R;
  299. int eot = i + 1;
  300. while (eot < limit && levels[eot] == tempBaseLevel) {
  301. eot++;
  302. }
  303. byte last = tempBaseDir;
  304. byte nval = tempBaseDir;
  305. int nexti = i - 1;
  306. while (i < eot) {
  307. byte dir = dirs[i];
  308. switch (dir) {
  309. case L:
  310. last = L; break;
  311. case R:
  312. case AR:
  313. last = R;
  314. break;
  315. case EN:
  316. case ES:
  317. case ET:
  318. case AN:
  319. case CS:
  320. break;
  321. case B:
  322. case S:
  323. last = tempBaseDir;
  324. break;
  325. case WS:
  326. case ON:
  327. case CM:
  328. if (i > nexti) {
  329. nval = tempBaseDir;
  330. nexti = i + 1;
  331. loop:
  332. while (nexti < eot) {
  333. byte ndir = dirs[nexti];
  334. switch (ndir) {
  335. case L:
  336. nval = last == L ? L : tempBaseDir;
  337. break loop;
  338. case R:
  339. case AR:
  340. case AN:
  341. nval = last == L ? tempBaseDir : R;
  342. break loop;
  343. case EN:
  344. nval = last;
  345. break loop;
  346. case S:
  347. nval = tempBaseDir;
  348. break loop;
  349. }
  350. ++nexti;
  351. }
  352. }
  353. dirs[i] = nval;
  354. }
  355. ++i;
  356. }
  357. }
  358. }
  359. /*
  360. * This does not use "global direction" but instead uses the
  361. * resolved level. EN processing is influenced by level boundaries.
  362. * This also processes segment and paragraph separator directions
  363. *
  364. * This interprets 'end of line' in L1 to mean end of segment.
  365. * Runs of whitespace at the end of the paragraph, and before a
  366. * block or segment separator, are set to the base level.
  367. */
  368. private static void resolveImplicitLevels(byte[] dirs, byte[] odirs, byte[] levels, boolean ltr) {
  369. byte baselevel = (byte)(ltr ? 0 : 1);
  370. int limit = dirs.length;
  371. byte prevlevel = -1;
  372. for (int i = 0; i < limit; i++) {
  373. byte level = levels[i];
  374. byte nlevel = level;
  375. switch (dirs[i]) {
  376. case L: nlevel = (byte)((level + 1) & 0x1e); break;
  377. case AR:
  378. case R: nlevel = (byte)(level | 0x1); break;
  379. case AN: nlevel = (byte)((level & 0xe) + 2); break;
  380. case EN: if ((level & 0x1) != 0) {
  381. nlevel += 1;
  382. } else if (i == 0 || prevlevel != level) {
  383. // start of ltr level, leave it as is
  384. } else {
  385. byte dir = dirs[i-1];
  386. if (dir == EN) {
  387. nlevel = levels[i-1];
  388. } else if (dir != L) {
  389. nlevel += 2;
  390. }
  391. }
  392. break;
  393. case B:
  394. case S: nlevel = baselevel;
  395. // set preceeding whitespace to baselevel too
  396. for (int j = i - 1; j >= 0 && odirs[j] == WS; --j) {
  397. levels[j] = baselevel;
  398. }
  399. break;
  400. }
  401. if (nlevel != level) {
  402. levels[i] = nlevel;
  403. }
  404. prevlevel = level;
  405. }
  406. for (int j = limit - 1; j >= 0 && odirs[j] == WS; --j) {
  407. levels[j] = baselevel;
  408. }
  409. }
  410. Bidi(Bidi paragraphBidi, int start, int limit) {
  411. byte[] indirs = paragraphBidi.dirs;
  412. byte[] newLevels = createLineLevels(indirs, paragraphBidi.levels, paragraphBidi.ltr, start, limit);
  413. this.ltr = paragraphBidi.ltr;
  414. this.dirs = null;
  415. this.levels = newLevels;
  416. }
  417. /**
  418. * Return a level array representing the levels between lineStart and lineLimit using
  419. * the direction information to identify trailing whitespace that might need to switch
  420. * levels.
  421. */
  422. static byte[] createLineLevels(byte[] dirs, byte[] levels, boolean ltr, int lineStart, int lineLimit) {
  423. byte[] lineLevels = new byte[lineLimit - lineStart];
  424. System.arraycopy(levels, lineStart, lineLevels, 0, lineLevels.length);
  425. byte x = (byte)(ltr ? 0 : 1);
  426. for (int i = lineLimit - lineStart - 1; i >= 0; --i) {
  427. if (lineLevels[i] == x || dirs[lineStart + i] != WS) {
  428. break;
  429. }
  430. lineLevels[i] = x;
  431. }
  432. return lineLevels;
  433. }
  434. /**
  435. * Return true if the default bidi rules indicate a run direction of LTR for the
  436. * provided range of the char array.
  437. */
  438. static boolean defaultIsLTR(char[] chars, int start, int limit) {
  439. while (start < limit) {
  440. char c = chars[start++];
  441. byte dir = getDirectionCode(c);
  442. switch (dir) {
  443. case L:
  444. return true;
  445. case AR:
  446. case R:
  447. return false;
  448. case F:
  449. return c == LRO || c == LRE;
  450. default:
  451. break;
  452. }
  453. }
  454. return true;
  455. }
  456. /**
  457. * Return true if the character suggests a need for bidi processing.
  458. * Characters in the arabic extended blocks return false by default.
  459. * Other rtl characters and rtl explicit codes return true.
  460. */
  461. static boolean requiresBidi(char c) {
  462. if (c < '\u0591') return false;
  463. if (c > '\u202e') return false; // if contains arabic extended data, presume already ordered
  464. byte dc = getDirectionCode(c);
  465. return dc == R || dc == AR || dc == F;
  466. }
  467. /**
  468. * Return the bidirectional direction code of the provided character.
  469. * Includes AR, CM, F direction codes.
  470. */
  471. static byte getDirectionCode(char c) {
  472. return dirValues[(dirIndices[c >> 7] << 7) + (c & 0x7f)];
  473. }
  474. /**
  475. * Return an array of the bidirectional direction codes for the
  476. * provided characters. Includes the AR direction code. Combining
  477. * characters take the direction code of the previous strong
  478. * directional character, if present, otherwise they take the
  479. * direction code ON.
  480. */
  481. static byte[] getDirectionCodeArray(char[] chars, byte[] embs) {
  482. byte sc = ON;
  483. byte olevel = -1;
  484. byte[] dirs = new byte[chars.length];
  485. for (int i = 0; i < chars.length; i++) {
  486. if (embs[i] != olevel) {
  487. sc = ON;
  488. olevel = embs[i];
  489. }
  490. char c = chars[i];
  491. byte dc = getDirectionCode(c);
  492. switch (dc) {
  493. case L:
  494. case R:
  495. case AR:
  496. sc = dc;
  497. break;
  498. case B:
  499. sc = ON;
  500. break;
  501. case CM:
  502. dc = sc;
  503. break;
  504. case F:
  505. dc = ON;
  506. break;
  507. default:
  508. break;
  509. }
  510. dirs[i] = dc;
  511. }
  512. return dirs;
  513. }
  514. /**
  515. * Process the text for embedding codes and return the embedding array.
  516. * The text represents a single paragraph using the provided run direction.
  517. * The embedding array represents simple embeddings as values from 0-15 and
  518. * overrides as values from 16-31.
  519. */
  520. static byte[] getEmbeddingArray(char[] chars, boolean ltr) {
  521. byte[] embeddings = new byte[chars.length];
  522. byte baseLevel = (byte)(ltr ? 0 : 1);
  523. byte level = baseLevel;
  524. int s = 0; // stack counter
  525. int skip = 0; // skip counter when codes don't affect the stack
  526. byte levelStack[] = new byte[NUMLEVELS]; // stack of levels
  527. char charStack[] = new char[NUMLEVELS]; // stack of format chars
  528. for (int i = 0; i < chars.length; i++) {
  529. char c = chars[i];
  530. switch (c) {
  531. case LRE:
  532. case LRO: {
  533. if (skip > 0) {
  534. ++skip;
  535. } else {
  536. byte newlevel = (byte)((level & 0x0e) + 2);
  537. if (newlevel >= NUMLEVELS) {
  538. ++skip;
  539. } else {
  540. charStack[s] = c;
  541. levelStack[s++] = level;
  542. embeddings[i] = level;
  543. if (c == LRO) {
  544. level = (byte)(newlevel + 0x10);
  545. } else {
  546. level = newlevel;
  547. }
  548. continue;
  549. }
  550. }
  551. } break;
  552. case RLE:
  553. case RLO: {
  554. if (skip > 0) {
  555. ++skip;
  556. } else {
  557. byte newlevel = (byte)(((level & 0xf) + 1) | 0x01);
  558. if (newlevel >= NUMLEVELS) {
  559. ++skip;
  560. } else {
  561. charStack[s] = c;
  562. levelStack[s++] = level;
  563. embeddings[i] = level;
  564. if (c == RLO) {
  565. level = (byte)(newlevel + 0x10);
  566. } else {
  567. level = newlevel;
  568. }
  569. continue;
  570. }
  571. }
  572. } break;
  573. case PDF:
  574. if (skip > 0) {
  575. --skip;
  576. } else if (s > 0) {
  577. // lookahead to coalesce level pairs
  578. if ((i < chars.length-1) && (chars[i+1] == charStack[s-1])) {
  579. embeddings[i] = level;
  580. embeddings[i+1] = level;
  581. i += 1;
  582. continue;
  583. }
  584. level = levelStack[--s];
  585. }
  586. break;
  587. default:
  588. break;
  589. }
  590. embeddings[i] = level;
  591. }
  592. return embeddings;
  593. }
  594. /**
  595. * Given a level array, compute a a visual to logical ordering.
  596. */
  597. static int[] createVisualToLogicalMap(byte[] levels) {
  598. int len = levels.length;
  599. int[] mapping = new int[len];
  600. byte lowestOddLevel = (byte)(NUMLEVELS + 1);
  601. byte highestLevel = 0;
  602. // initialize mapping and levels
  603. for (int i = 0; i < len; i++) {
  604. mapping[i] = i;
  605. byte level = levels[i];
  606. if (level > highestLevel) {
  607. highestLevel = level;
  608. }
  609. if ((level & 0x01) != 0 && level < lowestOddLevel) {
  610. lowestOddLevel = level;
  611. }
  612. }
  613. while (highestLevel >= lowestOddLevel) {
  614. int i = 0;
  615. for (;;) {
  616. while (i < len && levels[i] < highestLevel) {
  617. i++;
  618. }
  619. int begin = i++;
  620. if (begin == levels.length) {
  621. break; // no more runs at this level
  622. }
  623. while (i < len && levels[i] >= highestLevel) {
  624. i++;
  625. }
  626. int end = i - 1;
  627. while (begin < end) {
  628. int temp = mapping[begin];
  629. mapping[begin] = mapping[end];
  630. mapping[end] = temp;
  631. ++begin;
  632. --end;
  633. }
  634. }
  635. --highestLevel;
  636. }
  637. return mapping;
  638. }
  639. /**
  640. * Reorder the objects in the array into visual order based on their levels.
  641. */
  642. static void reorderVisually(byte[] levels, Object[] objects) {
  643. int len = levels.length;
  644. byte lowestOddLevel = (byte)(NUMLEVELS + 1);
  645. byte highestLevel = 0;
  646. // initialize mapping and levels
  647. for (int i = 0; i < len; i++) {
  648. byte level = levels[i];
  649. if (level > highestLevel) {
  650. highestLevel = level;
  651. }
  652. if ((level & 0x01) != 0 && level < lowestOddLevel) {
  653. lowestOddLevel = level;
  654. }
  655. }
  656. while (highestLevel >= lowestOddLevel) {
  657. int i = 0;
  658. for (;;) {
  659. while (i < len && levels[i] < highestLevel) {
  660. i++;
  661. }
  662. int begin = i++;
  663. if (begin == levels.length) {
  664. break; // no more runs at this level
  665. }
  666. while (i < len && levels[i] >= highestLevel) {
  667. i++;
  668. }
  669. int end = i - 1;
  670. while (begin < end) {
  671. Object temp = objects[begin];
  672. objects[begin] = objects[end];
  673. objects[end] = temp;
  674. ++begin;
  675. --end;
  676. }
  677. }
  678. --highestLevel;
  679. }
  680. }
  681. /**
  682. * Return the inverse array, source array must map 1-1
  683. *
  684. * i.e. if values[i] = j, then inverse[j] = i.
  685. */
  686. static int[] getInverseOrder(int[] values) {
  687. if (values == null) {
  688. return null;
  689. }
  690. int[] result = new int[values.length];
  691. for (int i = 0; i < values.length; i++) {
  692. result[values[i]] = i;
  693. }
  694. return result;
  695. }
  696. /**
  697. * Compute a contiguous order for the range start, limit.
  698. */
  699. private static int[] computeContiguousOrder(int[] values, int start,
  700. int limit) {
  701. int[] result = new int[limit-start];
  702. for (int i=0; i < result.length; i++) {
  703. result[i] = i + start;
  704. }
  705. // now we'll sort result[], with the following comparison:
  706. // result[i] lessthan result[j] iff values[result[i]] < values[result[j]]
  707. // selection sort for now; use more elaborate sorts if desired
  708. for (int i=0; i < result.length-1; i++) {
  709. int minIndex = i;
  710. int currentValue = values[result[minIndex]];
  711. for (int j=i; j < result.length; j++) {
  712. if (values[result[j]] < currentValue) {
  713. minIndex = j;
  714. currentValue = values[result[minIndex]];
  715. }
  716. }
  717. int temp = result[i];
  718. result[i] = result[minIndex];
  719. result[minIndex] = temp;
  720. }
  721. // shift result by start:
  722. if (start != 0) {
  723. for (int i=0; i < result.length; i++) {
  724. result[i] -= start;
  725. }
  726. }
  727. // next, check for canonical order:
  728. int k;
  729. for (k=0; k < result.length; k++) {
  730. if (result[k] != k) {
  731. break;
  732. }
  733. }
  734. if (k == result.length) {
  735. return null;
  736. }
  737. // now return inverse of result:
  738. return getInverseOrder(result);
  739. }
  740. /**
  741. * Return an array containing contiguous values from 0 to length
  742. * having the same ordering as the source array. If this would be
  743. * a canonical ltr ordering, return null. values[] is NOT
  744. * required to be a permutation.
  745. */
  746. static int[] getContiguousOrder(int[] values) {
  747. if (values != null) {
  748. return computeContiguousOrder(values, 0, values.length);
  749. }
  750. return null;
  751. }
  752. /**
  753. * Return an array containing the values from start up to limit,
  754. * normalized to fall within the range from 0 up to limit - start.
  755. * If this would be a canonical ltr ordering, return null.
  756. * NOTE: This method assumes that values[] is a permutation
  757. * generated from levels[].
  758. */
  759. static int[] getNormalizedOrder(int[] values, byte[] levels,
  760. int start, int limit) {
  761. if (values != null) {
  762. if (start != 0 || limit != values.length) {
  763. // levels optimization
  764. boolean copyRange, canonical;
  765. byte primaryLevel;
  766. if (levels == null) {
  767. primaryLevel = (byte) 0x0;
  768. copyRange = true;
  769. canonical = true;
  770. }
  771. else {
  772. if (levels[start] == levels[limit-1]) {
  773. primaryLevel = levels[start];
  774. canonical = (primaryLevel & (byte)0x1) == 0;
  775. // scan for levels below primary
  776. int i;
  777. for (i=start; i < limit; i++) {
  778. if (levels[i] < primaryLevel) {
  779. break;
  780. }
  781. if (canonical) {
  782. canonical = levels[i] == primaryLevel;
  783. }
  784. }
  785. copyRange = (i == limit);
  786. }
  787. else {
  788. copyRange = false;
  789. // these don't matter; but the compiler cares:
  790. primaryLevel = (byte) 0x0;
  791. canonical = false;
  792. }
  793. }
  794. if (copyRange) {
  795. if (canonical) {
  796. return null;
  797. }
  798. int[] result = new int[limit-start];
  799. int baseValue;
  800. if ((primaryLevel & (byte)0x1) != 0) {
  801. baseValue = values[limit-1];
  802. } else {
  803. baseValue = values[start];
  804. }
  805. if (baseValue == 0) {
  806. System.arraycopy(values, start, result, 0, limit-start);
  807. }
  808. else {
  809. for (int j=0; j < result.length; j++) {
  810. result[j] = values[j+start] - baseValue;
  811. }
  812. }
  813. return result;
  814. }
  815. else {
  816. return computeContiguousOrder(values, start, limit);
  817. }
  818. }
  819. else {
  820. return values;
  821. }
  822. }
  823. return null;
  824. }
  825. // convenience method for compatibility with old tests
  826. static Bidi createBidi(char[] text) {
  827. return new Bidi(text);
  828. }
  829. // from Unicode Data 2.1.8
  830. // gets reset in static init
  831. private static byte[] dirIndices = {
  832. 14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -124,
  833. 14, 18, 15, 16, 17, 18, 19, 20, 21, 22, 23, 14, 24, 25, 26, 27,
  834. 14, 28, 29, 30, -104, 14, 14, 2, 31, 32, 33, 34, 35, 36, 37, 38,
  835. 14, 39, 14, 40, 41, -106, 14, 8, 42, 43, 44, 45, 46, 47, 48, 49,
  836. -76, 14, -2, 2, -91, 2, 1, 50, -104, 14, -41, 2, 1, 51, -60, 2,
  837. 12, 52, 14, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61,
  838. };
  839. // gets reset in static init
  840. private static byte[] dirValues = {
  841. -119, 10, 5, 8, 7, 8, 7, 7, -114, 10, -125, 7, 4, 8, 9, 10,
  842. 10, -125, 4, -123, 10, 5, 4, 6, 4, 6, 3, -118, 2, 1, 6, -122,
  843. 10, -102, 0, -122, 10, -102, 0, -91, 10, 2, 9, 10, -124, 4, -124, 10,
  844. 1, 0, -123, 10, 6, 4, 4, 2, 2, 10, 0, -125, 10, 2, 2, 0,
  845. -123, 10, -105, 0, 1, 10, -97, 0, 1, 10, -2, 0, -2, 0, 2, 0,
  846. 0, -124, 10, -98, 0, -72, 10, -39, 0, -121, 10, -119, 0, 2, 10, 10,
  847. -121, 0, -98, 10, -123, 0, -101, 10, -58, 12, -102, 10, 2, 12, 12, -110,
  848. 10, 2, 0, 0, -124, 10, 1, 0, -117, 10, 2, 0, 10, -125, 0, 3,
  849. 10, 0, 10, -108, 0, 1, 10, -84, 0, 1, 10, -121, 0, -125, 10, 8,
  850. 0, 10, 0, 10, 0, 10, 0, 10, -110, 0, -115, 10, -116, 0, 1, 10,
  851. -62, 0, 1, 10, -116, 0, 1, 10, -91, 0, -124, 12, -119, 10, -75, 0,
  852. 8, 10, 10, 0, 0, 10, 10, 0, 0, -125, 10, -100, 0, 2, 10, 10,
  853. -120, 0, 4, 10, 10, 0, 0, -73, 10, -90, 0, 2, 10, 10, -121, 0,
  854. 1, 10, -89, 0, 2, 10, 0, -121, 10, -111, 12, 1, 10, -105, 12, 1,
  855. 10, -125, 12, 7, 1, 12, 1, 12, 12, 1, 12, -117, 10, -101, 1, -123,
  856. 10, -123, 1, -105, 10, 1, 6, -114, 10, 1, 11, -125, 10, 2, 11, 10,
  857. -102, 11, -123, 10, -117, 11, -120, 12, -115, 10, -118, 5, 7, 4, 5, 5,
  858. 11, 10, 10, 12, -57, 11, 2, 10, 10, -123, 11, 1, 10, -113, 11, 1,
  859. 10, -122, 11, -113, 12, 5, 11, 11, 12, 12, 10, -124, 12, 2, 10, 10,
  860. -118, 2, -2, 10, -119, 10, 4, 12, 12, 0, 10, -75, 0, 3, 10, 10,
  861. 12, -124, 0, -120, 12, -124, 0, 4, 12, 10, 10, 0, -124, 12, -125, 10,
  862. -118, 0, 2, 12, 12, -115, 0, -112, 10, 4, 12, 0, 0, 10, -120, 0,
  863. 6, 10, 10, 0, 0, 10, 10, -106, 0, 1, 10, -121, 0, 2, 10, 0,
  864. -125, 10, -124, 0, 4, 10, 10, 12, 10, -125, 0, -124, 12, 9, 10, 10,
  865. 0, 0, 10, 10, 0, 0, 12, -119, 10, 1, 0, -124, 10, 3, 0, 0,
  866. 10, -125, 0, 4, 12, 12, 10, 10, -116, 0, 2, 4, 4, -121, 0, -121,
  867. 10, 3, 12, 10, 10, -122, 0, -124, 10, 4, 0, 0, 10, 10, -106, 0,
  868. 1, 10, -121, 0, 13, 10, 0, 0, 10, 0, 0, 10, 0, 0, 10, 10,
  869. 12, 10, -125, 0, 2, 12, 12, -124, 10, 4, 12, 12, 10, 10, -125, 12,
  870. -117, 10, -124, 0, 2, 10, 0, -121, 10, -118, 0, 2, 12, 12, -125, 0,
  871. -116, 10, 4, 12, 12, 0, 10, -121, 0, 3, 10, 0, 10, -125, 0, 1,
  872. 10, -106, 0, 1, 10, -121, 0, 4, 10, 0, 0, 10, -123, 0, 3, 10,
  873. 10, 12, -124, 0, -123, 12, 11, 10, 12, 12, 0, 10, 0, 0, 12, 10,
  874. 10, 0, -113, 10, 1, 0, -123, 10, -118, 0, -111, 10, 4, 12, 0, 0,
  875. 10, -120, 0, 6, 10, 10, 0, 0, 10, 10, -106, 0, 1, 10, -121, 0,
  876. 5, 10, 0, 0, 10, 10, -124, 0, 7, 10, 10, 12, 0, 0, 12, 0,
  877. -125, 12, -125, 10, 7, 0, 0, 10, 10, 0, 0, 12, -120, 10, 2, 12,
  878. 0, -124, 10, 3, 0, 0, 10, -125, 0, -124, 10, -117, 0, -111, 10, 3,
  879. 12, 0, 10, -122, 0, -125, 10, -125, 0, 1, 10, -124, 0, -125, 10, 7,
  880. 0, 0, 10, 0, 10, 0, 0, -125, 10, 2, 0, 0, -125, 10, -125, 0,
  881. -125, 10, -120, 0, 1, 10, -125, 0, -124, 10, 5, 0, 0, 12, 0, 0,
  882. -125, 10, -125, 0, 1, 10, -125, 0, 1, 12, -119, 10, 1, 0, -113, 10,
  883. -116, 0, -114, 10, -125, 0, 1, 10, -120, 0, 1, 10, -125, 0, 1, 10,
  884. -105, 0, 1, 10, -118, 0, 1, 10, -123, 0, -124, 10, -125, 12, -124, 0,
  885. 1, 10, -125, 12, 1, 10, -124, 12, -121, 10, 2, 12, 12, -119, 10, 2,
  886. 0, 0, -124, 10, -118, 0, -110, 10, 3, 0, 0, 10, -120, 0, 1, 10,
  887. -125, 0, 1, 10, -105, 0, 1, 10, -118, 0, 1, 10, -123, 0, -124, 10,
  888. 2, 0, 12, -123, 0, 9, 10, 12, 0, 0, 10, 0, 0, 12, 12, -121,
  889. 10, 2, 0, 0, -121, 10, 4, 0, 10, 0, 0, -124, 10, -118, 0, -110,
  890. 10, 3, 0, 0, 10, -120, 0, 1, 10, -125, 0, 1, 10, -105, 0, 1,
  891. 10, -112, 0, -124, 10, -125, 0, -125, 12, 2, 10, 10, -125, 0, 1, 10,
  892. -125, 0, 1, 12, -119, 10, 1, 0, -120, 10, 2, 0, 0, -124, 10, -118,
  893. 0, -111, 10, -80, 0, 3, 12, 0, 0, -121, 12, -124, 10, 1, 4, -121,
  894. 0, -120, 12, -115, 0, -91, 10, 13, 0, 0, 10, 0, 10, 10, 0, 0,
  895. 10, 0, 10, 10, 0, -122, 10, -124, 0, 1, 10, -121, 0, 1, 10, -125,
  896. 0, 9, 10, 0, 10, 0, 10, 10, 0, 0, 10, -124, 0, 3, 12, 0,
  897. 0, -122, 12, 6, 10, 12, 12, 0, 10, 10, -123, 0, 3, 10, 0, 10,
  898. -122, 12, 2, 10, 10, -118, 0, 4, 10, 10, 0, 0, -94, 10, -104, 0,
  899. 2, 12, 12, -101, 0, 5, 12, 0, 12, 0, 12, -124, 10, 2, 12, 12,
  900. -120, 0, 1, 10, -95, 0, -121, 10, -114, 12, 1, 0, -123, 12, 3, 0,
  901. 12, 12, -124, 0, -124, 10, -122, 12, 3, 10, 12, 10, -107, 12, -125, 10,
  902. -121, 12, 2, 10, 12, -26, 10, -90, 0, -118, 10, -89, 0, -124, 10, 1,
  903. 0, -124, 10, -38, 0, -123, 10, -60, 0, -123, 10, -46, 0, -122, 10, -100,
  904. 0, -124, 10, -38, 0, -122, 10, -106, 0, 2, 10, 10, -122, 0, 2, 10,
  905. 10, -90, 0, 2, 10, 10, -122, 0, 2, 10, 10, -120, 0, 7, 10, 0,
  906. 10, 0, 10, 0, 10, -97, 0, 2, 10, 10, -75, 0, 1, 10, -121, 0,
  907. 2, 10, 0, -125, 10, -125, 0, 1, 10, -121, 0, -125, 10, -124, 0, 2,
  908. 10, 10, -122, 0, -124, 10, -115, 0, -123, 10, -125, 0, 1, 10, -121, 0,
  909. -125, 10, -121, 9, 1, 6, -124, 9, 4, 10, 10, 0, 1, -104, 10, 8,
  910. 7, 7, 13, 13, 10, 13, 13, 10, -123, 4, -69, 10, 1, 2, -125, 10,
  911. -122, 2, 2, 4, 4, -125, 10, 1, 0, -118, 2, 2, 4, 4, -108, 10,
  912. -115, 4, -93, 10, -110, 12, -96, 10, 1, 0, -124, 10, 3, 0, 10, 10,
  913. -118, 0, 4, 10, 0, 10, 10, -122, 0, -122, 10, 6, 0, 10, 0, 10,
  914. 0, 10, -120, 0, 1, 10, -122, 0, -89, 10, -93, 0, -2, 10, -111, 10,
  915. 2, 4, 4, -2, 10, -92, 10, -59, 0, -27, 10, -68, 2, -50, 0, 1,
  916. 2, -107, 10, 1, 9, -123, 10, 2, 0, 0, -103, 10, -119, 0, -122, 12,
  917. -111, 10, -44, 0, -124, 10, 8, 12, 12, 10, 10, 0, 0, 10, 10, -34,
  918. 0, -122, 10, -88, 0, -124, 10, -34, 0, 1, 10, -112, 0, -32, 10, -99,
  919. 0, -125, 10, -92, 0, -100, 10, -100, 0, -125, 10, -78, 0, -113, 10, -116,
  920. 0, -124, 10, -81, 0, 1, 10, -9, 0, -124, 10, -29, 0, 2, 10, 10,
  921. -97, 0, 1, 10, -90, 0, -38, 10, -92, 0, -36, 10, -82, 0, -46, 10,
  922. -121, 0, -116, 10, -123, 0, -122, 10, 1, 12, -118, 1, 1, 4, -115, 1,
  923. 1, 10, -123, 1, 9, 10, 1, 10, 1, 1, 10, 1, 1, 10, -20, 1,
  924. -95, 10, -2, 1, -19, 1, -110, 10, -64, 1, 2, 10, 10, -74, 1, -88,
  925. 10, -116, 1, -92, 10, -124, 12, -84, 10, 6, 6, 10, 6, 10, 10, 6,
  926. -119, 10, 5, 4, 10, 10, 4, 4, -123, 10, 2, 4, 4, -123, 10, -125,
  927. 1, 3, 10, 1, 10, -2, 1, -119, 1, -122, 10, -125, 4, -123, 10, 5,
  928. 4, 6, 4, 6, 3, -118, 2, 1, 6, -122, 10, -102, 0, -122, 10, -102,
  929. 0, -118, 10, -71, 0, 2, 10, 10, -97, 0, -125, 10, -122, 0, 2, 10,
  930. 10, -122, 0, 2, 10, 10, -122, 0, 2, 10, 10, -125, 0, -125, 10, 2,
  931. 4, 4, -125, 10, 2, 4, 4, -103, 10,
  932. };
  933. static {
  934. dirIndices = RLEUtilities.readRLE(dirIndices);
  935. dirValues = RLEUtilities.readRLE(dirValues);
  936. }
  937. }