1. /*
  2. * @(#)Bidi.java 1.5 00/02/02
  3. *
  4. * Copyright 1998-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1997 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1997, 1998 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation is copyrighted
  15. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  16. * materials are provided under terms of a License Agreement between Taligent
  17. * and Sun. This technology is protected by multiple US and International
  18. * patents. This notice and attribution to Taligent may not be removed.
  19. * Taligent is a registered trademark of Taligent, Inc.
  20. */
  21. package javax.swing.text;
  22. import java.io.*;
  23. /**
  24. * Bidi implementation.
  25. */
  26. final class Bidi {
  27. private boolean ltr;
  28. private byte[] dirs;
  29. private byte[] levels;
  30. private int[] l2vMap;
  31. private int[] v2lMap;
  32. /**
  33. * Return the number of elements the bidi represents.
  34. */
  35. int getLength() {
  36. return levels.length;
  37. }
  38. /**
  39. * Return true if the base run direction defined for this bidi is
  40. * left to right, false if it is right to left.
  41. */
  42. boolean isDirectionLTR() {
  43. return ltr;
  44. }
  45. /**
  46. * Return a mapping of the bidirectional information from
  47. * logical to visual position. If mapping is canonical, return
  48. * null;
  49. */
  50. int[] getLogicalToVisualMap() {
  51. if (l2vMap == null) {
  52. l2vMap = getInverseOrder(getVisualToLogicalMap());
  53. }
  54. return l2vMap;
  55. }
  56. /**
  57. * Return a mapping of the bidirectional information from
  58. * visual to logical position. If mapping is canonical, return
  59. * null;
  60. */
  61. int[] getVisualToLogicalMap() {
  62. if (v2lMap == null) {
  63. v2lMap = createVisualToLogicalMap(levels);
  64. }
  65. return v2lMap;
  66. }
  67. /**
  68. * Return the resolved level array.
  69. */
  70. byte[] getLevels() {
  71. return levels;
  72. }
  73. /**
  74. * Return a bidi representing the given subrange of this bidi, with
  75. * line reordering applied to counterdirectional trailing whitespace.
  76. */
  77. Bidi createLineBidi(int start, int limit) {
  78. byte[] newLevels = new byte[limit - start];
  79. System.arraycopy(levels, start, newLevels, 0, newLevels.length);
  80. if (dirs != null) {
  81. byte x = (byte)(ltr ? 0 : 1);
  82. for (int i = newLevels.length; --i >= 0;) {
  83. if ((newLevels[i] % 2) == x || dirs[start + i] != WS) {
  84. break;
  85. }
  86. newLevels[i] = x;
  87. }
  88. }
  89. return new Bidi(newLevels, ltr);
  90. }
  91. /**
  92. * Return the level of the element at pos.
  93. */
  94. int getLevelAt(int pos) {
  95. return levels[pos];
  96. }
  97. /**
  98. * Get the limit of the level run starting at start.
  99. */
  100. int getLevelLimit(int start) {
  101. byte l = levels[start];
  102. while (++start < levels.length && levels[start] == l) {}
  103. return start;
  104. }
  105. /**
  106. * Create bidi information about the provided paragraph of text.
  107. * Use bidi rules to default line direction.
  108. */
  109. Bidi(char[] text) {
  110. this(text, defaultIsLTR(text, 0, text.length));
  111. }
  112. /**
  113. * Create bidi information about the provided paragraph of text.
  114. * Embedding codes in the text will be processed.
  115. * @param text the characters in the paragraph.
  116. * @param ltr true if the paragraph run direction is LTR,
  117. * false if it is RTL.
  118. */
  119. Bidi(char[] text, boolean ltr) {
  120. this(text, getEmbeddingArray(text, ltr), ltr);
  121. }
  122. /**
  123. * Create bidi information about the provided paragraph of text, using the
  124. * provided embedding information. Explicit embedding codes in the text, if
  125. * any, are not processed.
  126. * @param text the characters in the paragraph.
  127. * @param embs the embedding data. Values from 0-15 are embeddings at the
  128. * indicated level. Values from 16-31 are overrides at the corresponding levels.
  129. * This may modify the data in embs.
  130. * @param ltr true if the run direction for the paragraph is LTR,
  131. * false if it is RTL.
  132. */
  133. Bidi(char[] text, byte[] embs, boolean ltr) {
  134. byte[] dirs = getDirectionCodeArray(text, embs);
  135. for (int i = 0; i < embs.length; i++) {
  136. if ((embs[i] & 0x10) != 0) {
  137. embs[i] &= 0x0f;
  138. dirs[i] = (byte)(embs[i] & 0x01); // L, R
  139. }
  140. }
  141. applyBidiRules(dirs, embs, ltr); // save the input dirs
  142. this.ltr = ltr;
  143. this.dirs = dirs;
  144. this.levels = embs;
  145. }
  146. /**
  147. * Create bidi information, using the provided data on the direction
  148. * codes and levels of the text in the paragraph. These dirs and levels
  149. * arrays should not be subsequently modified by the caller.
  150. *
  151. * @param dirs an array of bidi direction codes. Arabic rtl text is
  152. * encoded using AR. Embedding overrides have already been
  153. * processed into this array.
  154. * @param levels an array of embedding levels, 0-15 inclusive.
  155. * @param ltr true if the run direction for the paragraph is LTR,
  156. * false if it is RTL.
  157. */
  158. Bidi(byte[] dirs, byte[] levels, boolean ltr) {
  159. applyBidiRules(dirs, levels, ltr); // save the input dirs
  160. this.ltr = ltr;
  161. this.dirs = dirs;
  162. this.levels = levels;
  163. }
  164. protected Bidi(byte[] levels, boolean ltr) {
  165. this.ltr = ltr;
  166. this.dirs = null;
  167. this.levels = levels;
  168. }
  169. /* so clients can construct any direction array. */
  170. static final byte L = 0; /* left to right (strong) */
  171. static final byte R = 1; /* right to left (strong) */
  172. static final byte EN = 2; /* european number (weak) */
  173. static final byte ES = 3; /* european number separator (weak) */
  174. static final byte ET = 4; /* european number terminator (weak) */
  175. static final byte AN = 5; /* arabic number (weak) */
  176. static final byte CS = 6; /* common number separator (weak) */
  177. static final byte B = 7; /* block separator */
  178. static final byte S = 8; /* segment separator */
  179. static final byte WS = 9; /* whitespace */
  180. static final byte ON = 10; /* other neutral */
  181. static final byte AR = 11; /* arabic strong rtl character, not in 2.1 spec */
  182. static final byte CM = 12; /* neutral combining mark, not in 2.1 spec */
  183. static final byte F = 13; /* directional formatting code, not in 2.1 spec */
  184. static final char LRM = 0x200E; /* left to right mark */
  185. static final char RLM = 0x200F; /* right to left mark */
  186. static final char LRE = 0x202A; /* left to right embedding */
  187. static final char RLE = 0x202B; /* right to left embedding */
  188. static final char PDF = 0x202C; /* pop directional formatting */
  189. static final char LRO = 0x202D; /* left to right override */
  190. static final char RLO = 0x202E; /* right to left override */
  191. /* The embedding/override level stack limit */
  192. static final char NUMLEVELS = 16;
  193. /**
  194. * Modify the dir and levels arrays by applying the bidi algorithm to the arrays, changing
  195. * them in place. On output, the direction array contains only L, R, or AR codes, and
  196. * the levels array contains the resolved levels.
  197. */
  198. static void applyBidiRules(byte[] dirs, byte[] levels, boolean ltr) {
  199. byte[] wdirs = (byte[])dirs.clone(); // working dir array, can mutate
  200. resolveWeakTypes(wdirs, levels, ltr);
  201. resolveNeutralTypes(wdirs, levels, ltr);
  202. resolveImplicitLevels(wdirs, dirs, levels, ltr);
  203. }
  204. /*
  205. * This resolves serially in order from left to right, with the results
  206. * of previous changes taken into account for later characters. So, for
  207. * example, a series of ET's after an EN will all change to EN, since
  208. * once the first ET changes to EN, it is then treated as EN for
  209. * transforming the following ET, and so on. It will also process ETs
  210. * before EN by scanning forward across runs of ET and checking the
  211. * following character.
  212. *
  213. * !!! This does not take embedded levels into account. Should it?
  214. * yes it should, lastStrongWasArabic should be unaffected by text within a previous embedding
  215. */
  216. private static void resolveWeakTypes(byte[] dirs, byte[] levels, boolean ltr) {
  217. int i = 0;
  218. int limit = 0;
  219. while (limit < dirs.length) {
  220. byte level = levels[limit++];
  221. while (limit < dirs.length && levels[limit] == level) {
  222. ++limit;
  223. }
  224. byte prev = -1;
  225. byte cur = dirs[i];
  226. boolean lastStrongWasArabic = cur == AR;
  227. while (i < limit) {
  228. int ii = i + 1;
  229. byte next = (ii == limit) ? -1 : dirs[ii];
  230. if (next == EN && lastStrongWasArabic) {
  231. next = AN;
  232. }
  233. byte ncur = cur;
  234. switch (cur) {
  235. case L:
  236. case R:
  237. lastStrongWasArabic = false;
  238. break;
  239. case AR:
  240. lastStrongWasArabic = true;
  241. break;
  242. case ES:
  243. if (prev == EN && next == EN)
  244. ncur = EN;
  245. else
  246. ncur = ON;
  247. break;
  248. case CS:
  249. if (prev == EN && next == EN)
  250. ncur = EN;
  251. else if (prev == AN && next == AN)
  252. ncur = AN;
  253. else
  254. ncur = ON;
  255. break;
  256. case ET:
  257. if (prev == EN || next == EN) {
  258. ncur = EN;
  259. } else if (next == ET && !lastStrongWasArabic) {
  260. // forward scan to handle ET ET EN
  261. for (int j = ii + 1; j < limit; ++j) {
  262. byte dir = dirs[j];
  263. if (dir == ET) {
  264. continue;
  265. }
  266. byte nval = dir == EN ? EN : ON;
  267. while (ii < j) {
  268. dirs[ii++] = nval;
  269. }
  270. ncur = nval;
  271. next = dir;
  272. break;
  273. }
  274. } else {
  275. ncur = ON;
  276. }
  277. break;
  278. default:
  279. break;
  280. }
  281. dirs[i] = ncur;
  282. i = ii;
  283. prev = ncur;
  284. cur = next;
  285. }
  286. }
  287. }
  288. /*
  289. * According to Mark, this operation should never span a level boundary.
  290. * The start and end of the level should be treated like sot and eot,
  291. * with the base direction the direction of the level.
  292. */
  293. // new version fixes #4173251
  294. private static void resolveNeutralTypes(byte[] dirs, byte[] levels, boolean ltr) {
  295. int i = 0;
  296. int limit = dirs.length;
  297. while (i < limit) {
  298. byte tempBaseLevel = levels[i];
  299. byte tempBaseDir = ((tempBaseLevel & 0x1) == 0) ? L : R;
  300. int eot = i + 1;
  301. while (eot < limit && levels[eot] == tempBaseLevel) {
  302. eot++;
  303. }
  304. byte last = tempBaseDir;
  305. byte nval = tempBaseDir;
  306. int nexti = i - 1;
  307. while (i < eot) {
  308. byte dir = dirs[i];
  309. switch (dir) {
  310. case L:
  311. last = L; break;
  312. case R:
  313. case AR:
  314. last = R;
  315. break;
  316. case EN:
  317. case ES:
  318. case ET:
  319. case AN:
  320. case CS:
  321. break;
  322. case B:
  323. case S:
  324. last = tempBaseDir;
  325. break;
  326. case WS:
  327. case ON:
  328. case CM:
  329. if (i > nexti) {
  330. nval = tempBaseDir;
  331. nexti = i + 1;
  332. loop:
  333. while (nexti < eot) {
  334. byte ndir = dirs[nexti];
  335. switch (ndir) {
  336. case L:
  337. nval = last == L ? L : tempBaseDir;
  338. break loop;
  339. case R:
  340. case AR:
  341. case AN:
  342. nval = last == L ? tempBaseDir : R;
  343. break loop;
  344. case EN:
  345. nval = last;
  346. break loop;
  347. case S:
  348. nval = tempBaseDir;
  349. break loop;
  350. }
  351. ++nexti;
  352. }
  353. }
  354. dirs[i] = nval;
  355. }
  356. ++i;
  357. }
  358. }
  359. }
  360. /*
  361. * This does not use "global direction" but instead uses the
  362. * resolved level. EN processing is influenced by level boundaries.
  363. * This also processes segment and paragraph separator directions
  364. *
  365. * This interprets 'end of line' in L1 to mean end of segment.
  366. * Runs of whitespace at the end of the paragraph, and before a
  367. * block or segment separator, are set to the base level.
  368. */
  369. private static void resolveImplicitLevels(byte[] dirs, byte[] odirs, byte[] levels, boolean ltr) {
  370. byte baselevel = (byte)(ltr ? 0 : 1);
  371. int limit = dirs.length;
  372. byte prevlevel = -1;
  373. for (int i = 0; i < limit; i++) {
  374. byte level = levels[i];
  375. byte nlevel = level;
  376. switch (dirs[i]) {
  377. case L: nlevel = (byte)((level + 1) & 0x1e); break;
  378. case AR:
  379. case R: nlevel = (byte)(level | 0x1); break;
  380. case AN: nlevel = (byte)((level & 0xe) + 2); break;
  381. case EN: if ((level & 0x1) != 0) {
  382. nlevel += 1;
  383. } else if (i == 0 || prevlevel != level) {
  384. // start of ltr level, leave it as is
  385. } else {
  386. byte dir = dirs[i-1];
  387. if (dir == EN) {
  388. nlevel = levels[i-1];
  389. } else if (dir != L) {
  390. nlevel += 2;
  391. }
  392. }
  393. break;
  394. case B:
  395. case S: nlevel = baselevel;
  396. // set preceeding whitespace to baselevel too
  397. for (int j = i - 1; j >= 0 && odirs[j] == WS; --j) {
  398. levels[j] = baselevel;
  399. }
  400. break;
  401. }
  402. if (nlevel != level) {
  403. levels[i] = nlevel;
  404. }
  405. prevlevel = level;
  406. }
  407. for (int j = limit - 1; j >= 0 && odirs[j] == WS; --j) {
  408. levels[j] = baselevel;
  409. }
  410. }
  411. Bidi(Bidi paragraphBidi, int start, int limit) {
  412. byte[] indirs = paragraphBidi.dirs;
  413. byte[] newLevels = createLineLevels(indirs, paragraphBidi.levels, paragraphBidi.ltr, start, limit);
  414. this.ltr = paragraphBidi.ltr;
  415. this.dirs = null;
  416. this.levels = newLevels;
  417. }
  418. /**
  419. * Return a level array representing the levels between lineStart and lineLimit using
  420. * the direction information to identify trailing whitespace that might need to switch
  421. * levels.
  422. */
  423. static byte[] createLineLevels(byte[] dirs, byte[] levels, boolean ltr, int lineStart, int lineLimit) {
  424. byte[] lineLevels = new byte[lineLimit - lineStart];
  425. System.arraycopy(levels, lineStart, lineLevels, 0, lineLevels.length);
  426. byte x = (byte)(ltr ? 0 : 1);
  427. for (int i = lineLimit - lineStart - 1; i >= 0; --i) {
  428. if (lineLevels[i] == x || dirs[lineStart + i] != WS) {
  429. break;
  430. }
  431. lineLevels[i] = x;
  432. }
  433. return lineLevels;
  434. }
  435. /**
  436. * Return true if the default bidi rules indicate a run direction of LTR for the
  437. * provided range of the char array.
  438. */
  439. static boolean defaultIsLTR(char[] chars, int start, int limit) {
  440. while (start < limit) {
  441. char c = chars[start++];
  442. byte dir = getDirectionCode(c);
  443. switch (dir) {
  444. case L:
  445. return true;
  446. case AR:
  447. case R:
  448. return false;
  449. case F:
  450. return c == LRO || c == LRE;
  451. default:
  452. break;
  453. }
  454. }
  455. return true;
  456. }
  457. /**
  458. * Return true if the character suggests a need for bidi processing.
  459. * Characters in the arabic extended blocks return false by default.
  460. * Other rtl characters and rtl explicit codes return true.
  461. */
  462. static boolean requiresBidi(char c) {
  463. if (c < '\u0591') return false;
  464. if (c > '\u202e') return false; // if contains arabic extended data, presume already ordered
  465. byte dc = getDirectionCode(c);
  466. return dc == R || dc == AR || dc == F;
  467. }
  468. /**
  469. * Return the bidirectional direction code of the provided character.
  470. * Includes AR, CM, F direction codes.
  471. */
  472. static byte getDirectionCode(char c) {
  473. return dirValues[(dirIndices[c >> 7] << 7) + (c & 0x7f)];
  474. }
  475. /**
  476. * Return an array of the bidirectional direction codes for the
  477. * provided characters. Includes the AR direction code. Combining
  478. * characters take the direction code of the previous strong
  479. * directional character, if present, otherwise they take the
  480. * direction code ON.
  481. */
  482. static byte[] getDirectionCodeArray(char[] chars, byte[] embs) {
  483. byte sc = ON;
  484. byte olevel = -1;
  485. byte[] dirs = new byte[chars.length];
  486. for (int i = 0; i < chars.length; i++) {
  487. if (embs[i] != olevel) {
  488. sc = ON;
  489. olevel = embs[i];
  490. }
  491. char c = chars[i];
  492. byte dc = getDirectionCode(c);
  493. switch (dc) {
  494. case L:
  495. case R:
  496. case AR:
  497. sc = dc;
  498. break;
  499. case B:
  500. sc = ON;
  501. break;
  502. case CM:
  503. dc = sc;
  504. break;
  505. case F:
  506. dc = ON;
  507. break;
  508. default:
  509. break;
  510. }
  511. dirs[i] = dc;
  512. }
  513. return dirs;
  514. }
  515. /**
  516. * Process the text for embedding codes and return the embedding array.
  517. * The text represents a single paragraph using the provided run direction.
  518. * The embedding array represents simple embeddings as values from 0-15 and
  519. * overrides as values from 16-31.
  520. */
  521. static byte[] getEmbeddingArray(char[] chars, boolean ltr) {
  522. byte[] embeddings = new byte[chars.length];
  523. byte baseLevel = (byte)(ltr ? 0 : 1);
  524. byte level = baseLevel;
  525. int s = 0; // stack counter
  526. int skip = 0; // skip counter when codes don't affect the stack
  527. byte levelStack[] = new byte[NUMLEVELS]; // stack of levels
  528. char charStack[] = new char[NUMLEVELS]; // stack of format chars
  529. for (int i = 0; i < chars.length; i++) {
  530. char c = chars[i];
  531. switch (c) {
  532. case LRE:
  533. case LRO: {
  534. if (skip > 0) {
  535. ++skip;
  536. } else {
  537. byte newlevel = (byte)((level & 0x0e) + 2);
  538. if (newlevel >= NUMLEVELS) {
  539. ++skip;
  540. } else {
  541. charStack[s] = c;
  542. levelStack[s++] = level;
  543. embeddings[i] = level;
  544. if (c == LRO) {
  545. level = (byte)(newlevel + 0x10);
  546. } else {
  547. level = newlevel;
  548. }
  549. continue;
  550. }
  551. }
  552. } break;
  553. case RLE:
  554. case RLO: {
  555. if (skip > 0) {
  556. ++skip;
  557. } else {
  558. byte newlevel = (byte)(((level & 0xf) + 1) | 0x01);
  559. if (newlevel >= NUMLEVELS) {
  560. ++skip;
  561. } else {
  562. charStack[s] = c;
  563. levelStack[s++] = level;
  564. embeddings[i] = level;
  565. if (c == RLO) {
  566. level = (byte)(newlevel + 0x10);
  567. } else {
  568. level = newlevel;
  569. }
  570. continue;
  571. }
  572. }
  573. } break;
  574. case PDF:
  575. if (skip > 0) {
  576. --skip;
  577. } else if (s > 0) {
  578. // lookahead to coalesce level pairs
  579. if ((i < chars.length-1) && (chars[i+1] == charStack[s-1])) {
  580. embeddings[i] = level;
  581. embeddings[i+1] = level;
  582. i += 1;
  583. continue;
  584. }
  585. level = levelStack[--s];
  586. }
  587. break;
  588. default:
  589. break;
  590. }
  591. embeddings[i] = level;
  592. }
  593. return embeddings;
  594. }
  595. /**
  596. * Given a level array, compute a a visual to logical ordering.
  597. */
  598. static int[] createVisualToLogicalMap(byte[] levels) {
  599. int len = levels.length;
  600. int[] mapping = new int[len];
  601. byte lowestOddLevel = (byte)(NUMLEVELS + 1);
  602. byte highestLevel = 0;
  603. // initialize mapping and levels
  604. for (int i = 0; i < len; i++) {
  605. mapping[i] = i;
  606. byte level = levels[i];
  607. if (level > highestLevel) {
  608. highestLevel = level;
  609. }
  610. if ((level & 0x01) != 0 && level < lowestOddLevel) {
  611. lowestOddLevel = level;
  612. }
  613. }
  614. while (highestLevel >= lowestOddLevel) {
  615. int i = 0;
  616. for (;;) {
  617. while (i < len && levels[i] < highestLevel) {
  618. i++;
  619. }
  620. int begin = i++;
  621. if (begin == levels.length) {
  622. break; // no more runs at this level
  623. }
  624. while (i < len && levels[i] >= highestLevel) {
  625. i++;
  626. }
  627. int end = i - 1;
  628. while (begin < end) {
  629. int temp = mapping[begin];
  630. mapping[begin] = mapping[end];
  631. mapping[end] = temp;
  632. ++begin;
  633. --end;
  634. }
  635. }
  636. --highestLevel;
  637. }
  638. return mapping;
  639. }
  640. /**
  641. * Reorder the objects in the array into visual order based on their levels.
  642. */
  643. static void reorderVisually(byte[] levels, Object[] objects) {
  644. int len = levels.length;
  645. byte lowestOddLevel = (byte)(NUMLEVELS + 1);
  646. byte highestLevel = 0;
  647. // initialize mapping and levels
  648. for (int i = 0; i < len; i++) {
  649. byte level = levels[i];
  650. if (level > highestLevel) {
  651. highestLevel = level;
  652. }
  653. if ((level & 0x01) != 0 && level < lowestOddLevel) {
  654. lowestOddLevel = level;
  655. }
  656. }
  657. while (highestLevel >= lowestOddLevel) {
  658. int i = 0;
  659. for (;;) {
  660. while (i < len && levels[i] < highestLevel) {
  661. i++;
  662. }
  663. int begin = i++;
  664. if (begin == levels.length) {
  665. break; // no more runs at this level
  666. }
  667. while (i < len && levels[i] >= highestLevel) {
  668. i++;
  669. }
  670. int end = i - 1;
  671. while (begin < end) {
  672. Object temp = objects[begin];
  673. objects[begin] = objects[end];
  674. objects[end] = temp;
  675. ++begin;
  676. --end;
  677. }
  678. }
  679. --highestLevel;
  680. }
  681. }
  682. /**
  683. * Return the inverse array, source array must map 1-1
  684. *
  685. * i.e. if values[i] = j, then inverse[j] = i.
  686. */
  687. static int[] getInverseOrder(int[] values) {
  688. if (values == null) {
  689. return null;
  690. }
  691. int[] result = new int[values.length];
  692. for (int i = 0; i < values.length; i++) {
  693. result[values[i]] = i;
  694. }
  695. return result;
  696. }
  697. /**
  698. * Compute a contiguous order for the range start, limit.
  699. */
  700. private static int[] computeContiguousOrder(int[] values, int start,
  701. int limit) {
  702. int[] result = new int[limit-start];
  703. for (int i=0; i < result.length; i++) {
  704. result[i] = i + start;
  705. }
  706. // now we'll sort result[], with the following comparison:
  707. // result[i] lessthan result[j] iff values[result[i]] < values[result[j]]
  708. // selection sort for now; use more elaborate sorts if desired
  709. for (int i=0; i < result.length-1; i++) {
  710. int minIndex = i;
  711. int currentValue = values[result[minIndex]];
  712. for (int j=i; j < result.length; j++) {
  713. if (values[result[j]] < currentValue) {
  714. minIndex = j;
  715. currentValue = values[result[minIndex]];
  716. }
  717. }
  718. int temp = result[i];
  719. result[i] = result[minIndex];
  720. result[minIndex] = temp;
  721. }
  722. // shift result by start:
  723. if (start != 0) {
  724. for (int i=0; i < result.length; i++) {
  725. result[i] -= start;
  726. }
  727. }
  728. // next, check for canonical order:
  729. int k;
  730. for (k=0; k < result.length; k++) {
  731. if (result[k] != k) {
  732. break;
  733. }
  734. }
  735. if (k == result.length) {
  736. return null;
  737. }
  738. // now return inverse of result:
  739. return getInverseOrder(result);
  740. }
  741. /**
  742. * Return an array containing contiguous values from 0 to length
  743. * having the same ordering as the source array. If this would be
  744. * a canonical ltr ordering, return null. values[] is NOT
  745. * required to be a permutation.
  746. */
  747. static int[] getContiguousOrder(int[] values) {
  748. if (values != null) {
  749. return computeContiguousOrder(values, 0, values.length);
  750. }
  751. return null;
  752. }
  753. /**
  754. * Return an array containing the values from start up to limit,
  755. * normalized to fall within the range from 0 up to limit - start.
  756. * If this would be a canonical ltr ordering, return null.
  757. * NOTE: This method assumes that values[] is a permutation
  758. * generated from levels[].
  759. */
  760. static int[] getNormalizedOrder(int[] values, byte[] levels,
  761. int start, int limit) {
  762. if (values != null) {
  763. if (start != 0 || limit != values.length) {
  764. // levels optimization
  765. boolean copyRange, canonical;
  766. byte primaryLevel;
  767. if (levels == null) {
  768. primaryLevel = (byte) 0x0;
  769. copyRange = true;
  770. canonical = true;
  771. }
  772. else {
  773. if (levels[start] == levels[limit-1]) {
  774. primaryLevel = levels[start];
  775. canonical = (primaryLevel & (byte)0x1) == 0;
  776. // scan for levels below primary
  777. int i;
  778. for (i=start; i < limit; i++) {
  779. if (levels[i] < primaryLevel) {
  780. break;
  781. }
  782. if (canonical) {
  783. canonical = levels[i] == primaryLevel;
  784. }
  785. }
  786. copyRange = (i == limit);
  787. }
  788. else {
  789. copyRange = false;
  790. // these don't matter; but the compiler cares:
  791. primaryLevel = (byte) 0x0;
  792. canonical = false;
  793. }
  794. }
  795. if (copyRange) {
  796. if (canonical) {
  797. return null;
  798. }
  799. int[] result = new int[limit-start];
  800. int baseValue;
  801. if ((primaryLevel & (byte)0x1) != 0) {
  802. baseValue = values[limit-1];
  803. } else {
  804. baseValue = values[start];
  805. }
  806. if (baseValue == 0) {
  807. System.arraycopy(values, start, result, 0, limit-start);
  808. }
  809. else {
  810. for (int j=0; j < result.length; j++) {
  811. result[j] = values[j+start] - baseValue;
  812. }
  813. }
  814. return result;
  815. }
  816. else {
  817. return computeContiguousOrder(values, start, limit);
  818. }
  819. }
  820. else {
  821. return values;
  822. }
  823. }
  824. return null;
  825. }
  826. // convenience method for compatibility with old tests
  827. static Bidi createBidi(char[] text) {
  828. return new Bidi(text);
  829. }
  830. // from Unicode Data 2.1.8
  831. // gets reset in static init
  832. private static byte[] dirIndices = {
  833. 14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -124,
  834. 14, 18, 15, 16, 17, 18, 19, 20, 21, 22, 23, 14, 24, 25, 26, 27,
  835. 14, 28, 29, 30, -104, 14, 14, 2, 31, 32, 33, 34, 35, 36, 37, 38,
  836. 14, 39, 14, 40, 41, -106, 14, 8, 42, 43, 44, 45, 46, 47, 48, 49,
  837. -76, 14, -2, 2, -91, 2, 1, 50, -104, 14, -41, 2, 1, 51, -60, 2,
  838. 12, 52, 14, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61,
  839. };
  840. // gets reset in static init
  841. private static byte[] dirValues = {
  842. -119, 10, 5, 8, 7, 8, 7, 7, -114, 10, -125, 7, 4, 8, 9, 10,
  843. 10, -125, 4, -123, 10, 5, 4, 6, 4, 6, 3, -118, 2, 1, 6, -122,
  844. 10, -102, 0, -122, 10, -102, 0, -91, 10, 2, 9, 10, -124, 4, -124, 10,
  845. 1, 0, -123, 10, 6, 4, 4, 2, 2, 10, 0, -125, 10, 2, 2, 0,
  846. -123, 10, -105, 0, 1, 10, -97, 0, 1, 10, -2, 0, -2, 0, 2, 0,
  847. 0, -124, 10, -98, 0, -72, 10, -39, 0, -121, 10, -119, 0, 2, 10, 10,
  848. -121, 0, -98, 10, -123, 0, -101, 10, -58, 12, -102, 10, 2, 12, 12, -110,
  849. 10, 2, 0, 0, -124, 10, 1, 0, -117, 10, 2, 0, 10, -125, 0, 3,
  850. 10, 0, 10, -108, 0, 1, 10, -84, 0, 1, 10, -121, 0, -125, 10, 8,
  851. 0, 10, 0, 10, 0, 10, 0, 10, -110, 0, -115, 10, -116, 0, 1, 10,
  852. -62, 0, 1, 10, -116, 0, 1, 10, -91, 0, -124, 12, -119, 10, -75, 0,
  853. 8, 10, 10, 0, 0, 10, 10, 0, 0, -125, 10, -100, 0, 2, 10, 10,
  854. -120, 0, 4, 10, 10, 0, 0, -73, 10, -90, 0, 2, 10, 10, -121, 0,
  855. 1, 10, -89, 0, 2, 10, 0, -121, 10, -111, 12, 1, 10, -105, 12, 1,
  856. 10, -125, 12, 7, 1, 12, 1, 12, 12, 1, 12, -117, 10, -101, 1, -123,
  857. 10, -123, 1, -105, 10, 1, 6, -114, 10, 1, 11, -125, 10, 2, 11, 10,
  858. -102, 11, -123, 10, -117, 11, -120, 12, -115, 10, -118, 5, 7, 4, 5, 5,
  859. 11, 10, 10, 12, -57, 11, 2, 10, 10, -123, 11, 1, 10, -113, 11, 1,
  860. 10, -122, 11, -113, 12, 5, 11, 11, 12, 12, 10, -124, 12, 2, 10, 10,
  861. -118, 2, -2, 10, -119, 10, 4, 12, 12, 0, 10, -75, 0, 3, 10, 10,
  862. 12, -124, 0, -120, 12, -124, 0, 4, 12, 10, 10, 0, -124, 12, -125, 10,
  863. -118, 0, 2, 12, 12, -115, 0, -112, 10, 4, 12, 0, 0, 10, -120, 0,
  864. 6, 10, 10, 0, 0, 10, 10, -106, 0, 1, 10, -121, 0, 2, 10, 0,
  865. -125, 10, -124, 0, 4, 10, 10, 12, 10, -125, 0, -124, 12, 9, 10, 10,
  866. 0, 0, 10, 10, 0, 0, 12, -119, 10, 1, 0, -124, 10, 3, 0, 0,
  867. 10, -125, 0, 4, 12, 12, 10, 10, -116, 0, 2, 4, 4, -121, 0, -121,
  868. 10, 3, 12, 10, 10, -122, 0, -124, 10, 4, 0, 0, 10, 10, -106, 0,
  869. 1, 10, -121, 0, 13, 10, 0, 0, 10, 0, 0, 10, 0, 0, 10, 10,
  870. 12, 10, -125, 0, 2, 12, 12, -124, 10, 4, 12, 12, 10, 10, -125, 12,
  871. -117, 10, -124, 0, 2, 10, 0, -121, 10, -118, 0, 2, 12, 12, -125, 0,
  872. -116, 10, 4, 12, 12, 0, 10, -121, 0, 3, 10, 0, 10, -125, 0, 1,
  873. 10, -106, 0, 1, 10, -121, 0, 4, 10, 0, 0, 10, -123, 0, 3, 10,
  874. 10, 12, -124, 0, -123, 12, 11, 10, 12, 12, 0, 10, 0, 0, 12, 10,
  875. 10, 0, -113, 10, 1, 0, -123, 10, -118, 0, -111, 10, 4, 12, 0, 0,
  876. 10, -120, 0, 6, 10, 10, 0, 0, 10, 10, -106, 0, 1, 10, -121, 0,
  877. 5, 10, 0, 0, 10, 10, -124, 0, 7, 10, 10, 12, 0, 0, 12, 0,
  878. -125, 12, -125, 10, 7, 0, 0, 10, 10, 0, 0, 12, -120, 10, 2, 12,
  879. 0, -124, 10, 3, 0, 0, 10, -125, 0, -124, 10, -117, 0, -111, 10, 3,
  880. 12, 0, 10, -122, 0, -125, 10, -125, 0, 1, 10, -124, 0, -125, 10, 7,
  881. 0, 0, 10, 0, 10, 0, 0, -125, 10, 2, 0, 0, -125, 10, -125, 0,
  882. -125, 10, -120, 0, 1, 10, -125, 0, -124, 10, 5, 0, 0, 12, 0, 0,
  883. -125, 10, -125, 0, 1, 10, -125, 0, 1, 12, -119, 10, 1, 0, -113, 10,
  884. -116, 0, -114, 10, -125, 0, 1, 10, -120, 0, 1, 10, -125, 0, 1, 10,
  885. -105, 0, 1, 10, -118, 0, 1, 10, -123, 0, -124, 10, -125, 12, -124, 0,
  886. 1, 10, -125, 12, 1, 10, -124, 12, -121, 10, 2, 12, 12, -119, 10, 2,
  887. 0, 0, -124, 10, -118, 0, -110, 10, 3, 0, 0, 10, -120, 0, 1, 10,
  888. -125, 0, 1, 10, -105, 0, 1, 10, -118, 0, 1, 10, -123, 0, -124, 10,
  889. 2, 0, 12, -123, 0, 9, 10, 12, 0, 0, 10, 0, 0, 12, 12, -121,
  890. 10, 2, 0, 0, -121, 10, 4, 0, 10, 0, 0, -124, 10, -118, 0, -110,
  891. 10, 3, 0, 0, 10, -120, 0, 1, 10, -125, 0, 1, 10, -105, 0, 1,
  892. 10, -112, 0, -124, 10, -125, 0, -125, 12, 2, 10, 10, -125, 0, 1, 10,
  893. -125, 0, 1, 12, -119, 10, 1, 0, -120, 10, 2, 0, 0, -124, 10, -118,
  894. 0, -111, 10, -80, 0, 3, 12, 0, 0, -121, 12, -124, 10, 1, 4, -121,
  895. 0, -120, 12, -115, 0, -91, 10, 13, 0, 0, 10, 0, 10, 10, 0, 0,
  896. 10, 0, 10, 10, 0, -122, 10, -124, 0, 1, 10, -121, 0, 1, 10, -125,
  897. 0, 9, 10, 0, 10, 0, 10, 10, 0, 0, 10, -124, 0, 3, 12, 0,
  898. 0, -122, 12, 6, 10, 12, 12, 0, 10, 10, -123, 0, 3, 10, 0, 10,
  899. -122, 12, 2, 10, 10, -118, 0, 4, 10, 10, 0, 0, -94, 10, -104, 0,
  900. 2, 12, 12, -101, 0, 5, 12, 0, 12, 0, 12, -124, 10, 2, 12, 12,
  901. -120, 0, 1, 10, -95, 0, -121, 10, -114, 12, 1, 0, -123, 12, 3, 0,
  902. 12, 12, -124, 0, -124, 10, -122, 12, 3, 10, 12, 10, -107, 12, -125, 10,
  903. -121, 12, 2, 10, 12, -26, 10, -90, 0, -118, 10, -89, 0, -124, 10, 1,
  904. 0, -124, 10, -38, 0, -123, 10, -60, 0, -123, 10, -46, 0, -122, 10, -100,
  905. 0, -124, 10, -38, 0, -122, 10, -106, 0, 2, 10, 10, -122, 0, 2, 10,
  906. 10, -90, 0, 2, 10, 10, -122, 0, 2, 10, 10, -120, 0, 7, 10, 0,
  907. 10, 0, 10, 0, 10, -97, 0, 2, 10, 10, -75, 0, 1, 10, -121, 0,
  908. 2, 10, 0, -125, 10, -125, 0, 1, 10, -121, 0, -125, 10, -124, 0, 2,
  909. 10, 10, -122, 0, -124, 10, -115, 0, -123, 10, -125, 0, 1, 10, -121, 0,
  910. -125, 10, -121, 9, 1, 6, -124, 9, 4, 10, 10, 0, 1, -104, 10, 8,
  911. 7, 7, 13, 13, 10, 13, 13, 10, -123, 4, -69, 10, 1, 2, -125, 10,
  912. -122, 2, 2, 4, 4, -125, 10, 1, 0, -118, 2, 2, 4, 4, -108, 10,
  913. -115, 4, -93, 10, -110, 12, -96, 10, 1, 0, -124, 10, 3, 0, 10, 10,
  914. -118, 0, 4, 10, 0, 10, 10, -122, 0, -122, 10, 6, 0, 10, 0, 10,
  915. 0, 10, -120, 0, 1, 10, -122, 0, -89, 10, -93, 0, -2, 10, -111, 10,
  916. 2, 4, 4, -2, 10, -92, 10, -59, 0, -27, 10, -68, 2, -50, 0, 1,
  917. 2, -107, 10, 1, 9, -123, 10, 2, 0, 0, -103, 10, -119, 0, -122, 12,
  918. -111, 10, -44, 0, -124, 10, 8, 12, 12, 10, 10, 0, 0, 10, 10, -34,
  919. 0, -122, 10, -88, 0, -124, 10, -34, 0, 1, 10, -112, 0, -32, 10, -99,
  920. 0, -125, 10, -92, 0, -100, 10, -100, 0, -125, 10, -78, 0, -113, 10, -116,
  921. 0, -124, 10, -81, 0, 1, 10, -9, 0, -124, 10, -29, 0, 2, 10, 10,
  922. -97, 0, 1, 10, -90, 0, -38, 10, -92, 0, -36, 10, -82, 0, -46, 10,
  923. -121, 0, -116, 10, -123, 0, -122, 10, 1, 12, -118, 1, 1, 4, -115, 1,
  924. 1, 10, -123, 1, 9, 10, 1, 10, 1, 1, 10, 1, 1, 10, -20, 1,
  925. -95, 10, -2, 1, -19, 1, -110, 10, -64, 1, 2, 10, 10, -74, 1, -88,
  926. 10, -116, 1, -92, 10, -124, 12, -84, 10, 6, 6, 10, 6, 10, 10, 6,
  927. -119, 10, 5, 4, 10, 10, 4, 4, -123, 10, 2, 4, 4, -123, 10, -125,
  928. 1, 3, 10, 1, 10, -2, 1, -119, 1, -122, 10, -125, 4, -123, 10, 5,
  929. 4, 6, 4, 6, 3, -118, 2, 1, 6, -122, 10, -102, 0, -122, 10, -102,
  930. 0, -118, 10, -71, 0, 2, 10, 10, -97, 0, -125, 10, -122, 0, 2, 10,
  931. 10, -122, 0, 2, 10, 10, -122, 0, 2, 10, 10, -125, 0, -125, 10, 2,
  932. 4, 4, -125, 10, 2, 4, 4, -103, 10,
  933. };
  934. static {
  935. dirIndices = RLEUtilities.readRLE(dirIndices);
  936. dirValues = RLEUtilities.readRLE(dirValues);
  937. }
  938. }