1. /*
  2. * @(#)NumericShaper.java 1.10 03/12/19
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package java.awt.font;
  8. /**
  9. * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
  10. * digits to other Unicode decimal digits. Users of this class will
  11. * primarily be people who wish to present data using
  12. * national digit shapes, but find it more convenient to represent the
  13. * data internally using Latin-1 (European) digits. This does not
  14. * interpret the deprecated numeric shape selector character (U+206E).
  15. * <p>
  16. * Instances of <code>NumericShaper</code> are typically applied
  17. * as attributes to text with the
  18. * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
  19. * of the <code>TextAttribute</code> class.
  20. * For example, this code snippet causes a <code>TextLayout</code> to
  21. * shape European digits to Arabic in an Arabic context:<br>
  22. * <blockquote><pre>
  23. * Map map = new HashMap();
  24. * map.put(TextAttribute.NUMERIC_SHAPING,
  25. * NumericShaper.getContextualShaper(NumericShaper.ARABIC));
  26. * FontRenderContext frc = ...;
  27. * TextLayout layout = new TextLayout(text, map, frc);
  28. * layout.draw(g2d, x, y);
  29. * </pre></blockquote>
  30. * <br>
  31. * It is also possible to perform numeric shaping explicitly using instances
  32. * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
  33. * <blockquote><pre>
  34. * char[] text = ...;
  35. * // shape all EUROPEAN digits (except zero) to ARABIC digits
  36. * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
  37. * shaper.shape(text, start, count);
  38. *
  39. * // shape European digits to ARABIC digits if preceeding text is Arabic, or
  40. * // shape European digits to TAMIL digits if preceeding text is Tamil, or
  41. * // leave European digits alone if there is no preceeding text, or
  42. * // preceeding text is neither Arabic nor Tamil
  43. * NumericShaper shaper =
  44. * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
  45. * NumericShaper.TAMIL,
  46. * NumericShaper.EUROPEAN);
  47. * shaper.shape(text. start, count);
  48. * </pre></blockquote>
  49. *
  50. * @since 1.4
  51. */
  52. public final class NumericShaper implements java.io.Serializable {
  53. /** index of context for contextual shaping - values range from 0 to 18 */
  54. private int key;
  55. /** flag indicating whether to shape contextually (high bit) and which
  56. * digit ranges to shape (bits 0-18)
  57. */
  58. private int mask;
  59. /** Identifies the Latin-1 (European) and extended range, and
  60. * Latin-1 (European) decimal base.
  61. */
  62. public static final int EUROPEAN = 1<<0;
  63. /** Identifies the ARABIC range and decimal base. */
  64. public static final int ARABIC = 1<<1;
  65. /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
  66. public static final int EASTERN_ARABIC = 1<<2;
  67. /** Identifies the DEVANAGARI range and decimal base. */
  68. public static final int DEVANAGARI = 1<<3;
  69. /** Identifies the BENGALI range and decimal base. */
  70. public static final int BENGALI = 1<<4;
  71. /** Identifies the GURMUKHI range and decimal base. */
  72. public static final int GURMUKHI = 1<<5;
  73. /** Identifies the GUJARATI range and decimal base. */
  74. public static final int GUJARATI = 1<<6;
  75. /** Identifies the ORIYA range and decimal base. */
  76. public static final int ORIYA = 1<<7;
  77. /** Identifies the TAMIL range and decimal base. Tamil does not have a
  78. * decimal digit 0 so Latin-1 (European) 0 is used.
  79. */
  80. public static final int TAMIL = 1<<8;
  81. /** Identifies the TELUGU range and decimal base. */
  82. public static final int TELUGU = 1<<9;
  83. /** Identifies the KANNADA range and decimal base. */
  84. public static final int KANNADA = 1<<10;
  85. /** Identifies the MALAYALAM range and decimal base. */
  86. public static final int MALAYALAM = 1<<11;
  87. /** Identifies the THAI range and decimal base. */
  88. public static final int THAI = 1<<12;
  89. /** Identifies the LAO range and decimal base. */
  90. public static final int LAO = 1<<13;
  91. /** Identifies the TIBETAN range and decimal base. */
  92. public static final int TIBETAN = 1<<14;
  93. /** Identifies the MYANMAR range and decimal base. */
  94. public static final int MYANMAR = 1<<15;
  95. /** Identifies the ETHIOPIC range and decimal base. */
  96. public static final int ETHIOPIC = 1<<16;
  97. /** Identifies the KHMER range and decimal base. */
  98. public static final int KHMER = 1<<17;
  99. /** Identifies the MONGOLIAN range and decimal base. */
  100. public static final int MONGOLIAN = 1<<18;
  101. /** Identifies all ranges, for full contextual shaping. */
  102. public static final int ALL_RANGES = 0x0007ffff;
  103. private static final int EUROPEAN_KEY = 0;
  104. private static final int ARABIC_KEY = 1;
  105. private static final int EASTERN_ARABIC_KEY = 2;
  106. private static final int DEVANAGARI_KEY = 3;
  107. private static final int BENGALI_KEY = 4;
  108. private static final int GURMUKHI_KEY = 5;
  109. private static final int GUJARATI_KEY = 6;
  110. private static final int ORIYA_KEY = 7;
  111. private static final int TAMIL_KEY = 8;
  112. private static final int TELUGU_KEY = 9;
  113. private static final int KANNADA_KEY = 10;
  114. private static final int MALAYALAM_KEY = 11;
  115. private static final int THAI_KEY = 12;
  116. private static final int LAO_KEY = 13;
  117. private static final int TIBETAN_KEY = 14;
  118. private static final int MYANMAR_KEY = 15;
  119. private static final int ETHIOPIC_KEY = 16;
  120. private static final int KHMER_KEY = 17;
  121. private static final int MONGOLIAN_KEY = 18;
  122. private static final int NUM_KEYS = 19;
  123. private static final String[] keyNames = {
  124. "EUROPEAN",
  125. "ARABIC",
  126. "EASTERN_ARABIC",
  127. "DEVANAGARI",
  128. "BENGALI",
  129. "GURMUKHI",
  130. "GUJARATI",
  131. "ORIYA",
  132. "TAMIL",
  133. "TELUGU",
  134. "KANNADA",
  135. "MALAYALAM",
  136. "THAI",
  137. "LAO",
  138. "TIBETAN",
  139. "MYANMAR",
  140. "ETHIOPIC",
  141. "KHMER",
  142. "MONGOLIAN"
  143. };
  144. private static final int CONTEXTUAL_MASK = 1<<31;
  145. private static final char[] bases = {
  146. '\u0030' - '\u0030', // EUROPEAN
  147. '\u0660' - '\u0030', // ARABIC
  148. '\u06f0' - '\u0030', // EASTERN_ARABIC
  149. '\u0966' - '\u0030', // DEVANAGARI
  150. '\u09e6' - '\u0030', // BENGALI
  151. '\u0a66' - '\u0030', // GURMUKHI
  152. '\u0ae6' - '\u0030', // GUJARATI
  153. '\u0b66' - '\u0030', // ORIYA
  154. '\u0be7' - '\u0030', // TAMIL - note missing zero
  155. '\u0c66' - '\u0030', // TELUGU
  156. '\u0ce6' - '\u0030', // KANNADA
  157. '\u0d66' - '\u0030', // MALAYALAM
  158. '\u0e50' - '\u0030', // THAI
  159. '\u0ed0' - '\u0030', // LAO
  160. '\u0f20' - '\u0030', // TIBETAN
  161. '\u1040' - '\u0030', // MYANMAR
  162. '\u1369' - '\u0030', // ETHIOPIC
  163. '\u17e0' - '\u0030', // KHMER
  164. '\u1810' - '\u0030', // MONGOLIAN
  165. };
  166. // some ranges adjoin or overlap, rethink if we want to do a binary search on this
  167. private static final char[] contexts = {
  168. '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
  169. '\u0600', '\u0700', // ARABIC
  170. '\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic
  171. '\u0900', '\u0980', // DEVANAGARI
  172. '\u0980', '\u0a00', // BENGALI
  173. '\u0a00', '\u0a80', // GURMUKHI
  174. '\u0a80', '\u0b00', // GUJARATI
  175. '\u0b00', '\u0b80', // ORIYA
  176. '\u0b80', '\u0c00', // TAMIL - note missing zero
  177. '\u0c00', '\u0c80', // TELUGU
  178. '\u0c80', '\u0d00', // KANNADA
  179. '\u0d00', '\u0d80', // MALAYALAM
  180. '\u0e00', '\u0e80', // THAI
  181. '\u0e80', '\u0f00', // LAO
  182. '\u0f00', '\u1000', // TIBETAN
  183. '\u1000', '\u1080', // MYANMAR
  184. '\u1200', '\u1380', // ETHIOPIC
  185. '\u1780', '\u1800', // KHMER
  186. '\u1800', '\u1900', // MONGOLIAN
  187. '\uffff',
  188. };
  189. // assume most characters are near each other so probing the cache is infrequent,
  190. // and a linear probe is ok.
  191. private static int ctCache = 0;
  192. private static int ctCacheLimit = contexts.length - 2;
  193. // warning, synchronize access to this as it modifies state
  194. private static int getContextKey(char c) {
  195. if (c < contexts[ctCache]) {
  196. while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
  197. } else if (c >= contexts[ctCache + 1]) {
  198. while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
  199. }
  200. // if we're not in a known range, then return EUROPEAN as the range key
  201. return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
  202. }
  203. /*
  204. * A range table of strong directional characters (types L, R, AL).
  205. * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
  206. * characters, odd (right) indexes are starts of ranges of strong directional
  207. * characters.
  208. */
  209. private static char[] strongTable = {
  210. '\u0000', '\u0041',
  211. '\u005b', '\u0061',
  212. '\u007b', '\u00aa',
  213. '\u00ab', '\u00b5',
  214. '\u00b6', '\u00ba',
  215. '\u00bb', '\u00c0',
  216. '\u00d7', '\u00d8',
  217. '\u00f7', '\u00f8',
  218. '\u0220', '\u0222',
  219. '\u0234', '\u0250',
  220. '\u02ae', '\u02b0',
  221. '\u02b9', '\u02bb',
  222. '\u02c2', '\u02d0',
  223. '\u02d2', '\u02e0',
  224. '\u02e5', '\u02ee',
  225. '\u02ef', '\u037a',
  226. '\u037b', '\u0386',
  227. '\u0387', '\u0388',
  228. '\u038b', '\u038c',
  229. '\u038d', '\u038e',
  230. '\u03a2', '\u03a3',
  231. '\u03cf', '\u03d0',
  232. '\u03d8', '\u03da',
  233. '\u03f4', '\u0400',
  234. '\u0483', '\u048c',
  235. '\u04c5', '\u04c7',
  236. '\u04c9', '\u04cb',
  237. '\u04cd', '\u04d0',
  238. '\u04f6', '\u04f8',
  239. '\u04fa', '\u0531',
  240. '\u0557', '\u0559',
  241. '\u0560', '\u0561',
  242. '\u0588', '\u0589',
  243. '\u058a', '\u05be',
  244. '\u05bf', '\u05c0',
  245. '\u05c1', '\u05c3',
  246. '\u05c4', '\u05d0',
  247. '\u05eb', '\u05f0',
  248. '\u05f5', '\u061b',
  249. '\u061c', '\u061f',
  250. '\u0620', '\u0621',
  251. '\u063b', '\u0640',
  252. '\u064b', '\u066d',
  253. '\u066e', '\u0671',
  254. '\u06d6', '\u06e5',
  255. '\u06e7', '\u06fa',
  256. '\u06ff', '\u0700',
  257. '\u070e', '\u0710',
  258. '\u0711', '\u0712',
  259. '\u072d', '\u0780',
  260. '\u07a6', '\u0903',
  261. '\u0904', '\u0905',
  262. '\u093a', '\u093d',
  263. '\u0941', '\u0949',
  264. '\u094d', '\u0950',
  265. '\u0951', '\u0958',
  266. '\u0962', '\u0964',
  267. '\u0971', '\u0982',
  268. '\u0984', '\u0985',
  269. '\u098d', '\u098f',
  270. '\u0991', '\u0993',
  271. '\u09a9', '\u09aa',
  272. '\u09b1', '\u09b2',
  273. '\u09b3', '\u09b6',
  274. '\u09ba', '\u09be',
  275. '\u09c1', '\u09c7',
  276. '\u09c9', '\u09cb',
  277. '\u09cd', '\u09d7',
  278. '\u09d8', '\u09dc',
  279. '\u09de', '\u09df',
  280. '\u09e2', '\u09e6',
  281. '\u09f2', '\u09f4',
  282. '\u09fb', '\u0a05',
  283. '\u0a0b', '\u0a0f',
  284. '\u0a11', '\u0a13',
  285. '\u0a29', '\u0a2a',
  286. '\u0a31', '\u0a32',
  287. '\u0a34', '\u0a35',
  288. '\u0a37', '\u0a38',
  289. '\u0a3a', '\u0a3e',
  290. '\u0a41', '\u0a59',
  291. '\u0a5d', '\u0a5e',
  292. '\u0a5f', '\u0a66',
  293. '\u0a70', '\u0a72',
  294. '\u0a75', '\u0a83',
  295. '\u0a84', '\u0a85',
  296. '\u0a8c', '\u0a8d',
  297. '\u0a8e', '\u0a8f',
  298. '\u0a92', '\u0a93',
  299. '\u0aa9', '\u0aaa',
  300. '\u0ab1', '\u0ab2',
  301. '\u0ab4', '\u0ab5',
  302. '\u0aba', '\u0abd',
  303. '\u0ac1', '\u0ac9',
  304. '\u0aca', '\u0acb',
  305. '\u0acd', '\u0ad0',
  306. '\u0ad1', '\u0ae0',
  307. '\u0ae1', '\u0ae6',
  308. '\u0af0', '\u0b02',
  309. '\u0b04', '\u0b05',
  310. '\u0b0d', '\u0b0f',
  311. '\u0b11', '\u0b13',
  312. '\u0b29', '\u0b2a',
  313. '\u0b31', '\u0b32',
  314. '\u0b34', '\u0b36',
  315. '\u0b3a', '\u0b3d',
  316. '\u0b3f', '\u0b40',
  317. '\u0b41', '\u0b47',
  318. '\u0b49', '\u0b4b',
  319. '\u0b4d', '\u0b57',
  320. '\u0b58', '\u0b5c',
  321. '\u0b5e', '\u0b5f',
  322. '\u0b62', '\u0b66',
  323. '\u0b71', '\u0b83',
  324. '\u0b84', '\u0b85',
  325. '\u0b8b', '\u0b8e',
  326. '\u0b91', '\u0b92',
  327. '\u0b96', '\u0b99',
  328. '\u0b9b', '\u0b9c',
  329. '\u0b9d', '\u0b9e',
  330. '\u0ba0', '\u0ba3',
  331. '\u0ba5', '\u0ba8',
  332. '\u0bab', '\u0bae',
  333. '\u0bb6', '\u0bb7',
  334. '\u0bba', '\u0bbe',
  335. '\u0bc0', '\u0bc1',
  336. '\u0bc3', '\u0bc6',
  337. '\u0bc9', '\u0bca',
  338. '\u0bcd', '\u0bd7',
  339. '\u0bd8', '\u0be7',
  340. '\u0bf3', '\u0c01',
  341. '\u0c04', '\u0c05',
  342. '\u0c0d', '\u0c0e',
  343. '\u0c11', '\u0c12',
  344. '\u0c29', '\u0c2a',
  345. '\u0c34', '\u0c35',
  346. '\u0c3a', '\u0c41',
  347. '\u0c45', '\u0c60',
  348. '\u0c62', '\u0c66',
  349. '\u0c70', '\u0c82',
  350. '\u0c84', '\u0c85',
  351. '\u0c8d', '\u0c8e',
  352. '\u0c91', '\u0c92',
  353. '\u0ca9', '\u0caa',
  354. '\u0cb4', '\u0cb5',
  355. '\u0cba', '\u0cbe',
  356. '\u0cbf', '\u0cc0',
  357. '\u0cc5', '\u0cc7',
  358. '\u0cc9', '\u0cca',
  359. '\u0ccc', '\u0cd5',
  360. '\u0cd7', '\u0cde',
  361. '\u0cdf', '\u0ce0',
  362. '\u0ce2', '\u0ce6',
  363. '\u0cf0', '\u0d02',
  364. '\u0d04', '\u0d05',
  365. '\u0d0d', '\u0d0e',
  366. '\u0d11', '\u0d12',
  367. '\u0d29', '\u0d2a',
  368. '\u0d3a', '\u0d3e',
  369. '\u0d41', '\u0d46',
  370. '\u0d49', '\u0d4a',
  371. '\u0d4d', '\u0d57',
  372. '\u0d58', '\u0d60',
  373. '\u0d62', '\u0d66',
  374. '\u0d70', '\u0d82',
  375. '\u0d84', '\u0d85',
  376. '\u0d97', '\u0d9a',
  377. '\u0db2', '\u0db3',
  378. '\u0dbc', '\u0dbd',
  379. '\u0dbe', '\u0dc0',
  380. '\u0dc7', '\u0dcf',
  381. '\u0dd2', '\u0dd8',
  382. '\u0de0', '\u0df2',
  383. '\u0df5', '\u0e01',
  384. '\u0e31', '\u0e32',
  385. '\u0e34', '\u0e40',
  386. '\u0e47', '\u0e4f',
  387. '\u0e5c', '\u0e81',
  388. '\u0e83', '\u0e84',
  389. '\u0e85', '\u0e87',
  390. '\u0e89', '\u0e8a',
  391. '\u0e8b', '\u0e8d',
  392. '\u0e8e', '\u0e94',
  393. '\u0e98', '\u0e99',
  394. '\u0ea0', '\u0ea1',
  395. '\u0ea4', '\u0ea5',
  396. '\u0ea6', '\u0ea7',
  397. '\u0ea8', '\u0eaa',
  398. '\u0eac', '\u0ead',
  399. '\u0eb1', '\u0eb2',
  400. '\u0eb4', '\u0ebd',
  401. '\u0ebe', '\u0ec0',
  402. '\u0ec5', '\u0ec6',
  403. '\u0ec7', '\u0ed0',
  404. '\u0eda', '\u0edc',
  405. '\u0ede', '\u0f00',
  406. '\u0f18', '\u0f1a',
  407. '\u0f35', '\u0f36',
  408. '\u0f37', '\u0f38',
  409. '\u0f39', '\u0f3e',
  410. '\u0f48', '\u0f49',
  411. '\u0f6b', '\u0f7f',
  412. '\u0f80', '\u0f85',
  413. '\u0f86', '\u0f88',
  414. '\u0f8c', '\u0fbe',
  415. '\u0fc6', '\u0fc7',
  416. '\u0fcd', '\u0fcf',
  417. '\u0fd0', '\u1000',
  418. '\u1022', '\u1023',
  419. '\u1028', '\u1029',
  420. '\u102b', '\u102c',
  421. '\u102d', '\u1031',
  422. '\u1032', '\u1038',
  423. '\u1039', '\u1040',
  424. '\u1058', '\u10a0',
  425. '\u10c6', '\u10d0',
  426. '\u10f7', '\u10fb',
  427. '\u10fc', '\u1100',
  428. '\u115a', '\u115f',
  429. '\u11a3', '\u11a8',
  430. '\u11fa', '\u1200',
  431. '\u1207', '\u1208',
  432. '\u1247', '\u1248',
  433. '\u1249', '\u124a',
  434. '\u124e', '\u1250',
  435. '\u1257', '\u1258',
  436. '\u1259', '\u125a',
  437. '\u125e', '\u1260',
  438. '\u1287', '\u1288',
  439. '\u1289', '\u128a',
  440. '\u128e', '\u1290',
  441. '\u12af', '\u12b0',
  442. '\u12b1', '\u12b2',
  443. '\u12b6', '\u12b8',
  444. '\u12bf', '\u12c0',
  445. '\u12c1', '\u12c2',
  446. '\u12c6', '\u12c8',
  447. '\u12cf', '\u12d0',
  448. '\u12d7', '\u12d8',
  449. '\u12ef', '\u12f0',
  450. '\u130f', '\u1310',
  451. '\u1311', '\u1312',
  452. '\u1316', '\u1318',
  453. '\u131f', '\u1320',
  454. '\u1347', '\u1348',
  455. '\u135b', '\u1361',
  456. '\u137d', '\u13a0',
  457. '\u13f5', '\u1401',
  458. '\u1677', '\u1681',
  459. '\u169b', '\u16a0',
  460. '\u16f1', '\u1780',
  461. '\u17b7', '\u17be',
  462. '\u17c6', '\u17c7',
  463. '\u17c9', '\u17d4',
  464. '\u17db', '\u17dc',
  465. '\u17dd', '\u17e0',
  466. '\u17ea', '\u1810',
  467. '\u181a', '\u1820',
  468. '\u1878', '\u1880',
  469. '\u18a9', '\u1e00',
  470. '\u1e9c', '\u1ea0',
  471. '\u1efa', '\u1f00',
  472. '\u1f16', '\u1f18',
  473. '\u1f1e', '\u1f20',
  474. '\u1f46', '\u1f48',
  475. '\u1f4e', '\u1f50',
  476. '\u1f58', '\u1f59',
  477. '\u1f5a', '\u1f5b',
  478. '\u1f5c', '\u1f5d',
  479. '\u1f5e', '\u1f5f',
  480. '\u1f7e', '\u1f80',
  481. '\u1fb5', '\u1fb6',
  482. '\u1fbd', '\u1fbe',
  483. '\u1fbf', '\u1fc2',
  484. '\u1fc5', '\u1fc6',
  485. '\u1fcd', '\u1fd0',
  486. '\u1fd4', '\u1fd6',
  487. '\u1fdc', '\u1fe0',
  488. '\u1fed', '\u1ff2',
  489. '\u1ff5', '\u1ff6',
  490. '\u1ffd', '\u200e',
  491. '\u2010', '\u207f',
  492. '\u2080', '\u2102',
  493. '\u2103', '\u2107',
  494. '\u2108', '\u210a',
  495. '\u2114', '\u2115',
  496. '\u2116', '\u2119',
  497. '\u211e', '\u2124',
  498. '\u2125', '\u2126',
  499. '\u2127', '\u2128',
  500. '\u2129', '\u212a',
  501. '\u212e', '\u212f',
  502. '\u2132', '\u2133',
  503. '\u213a', '\u2160',
  504. '\u2184', '\u2336',
  505. '\u237b', '\u2395',
  506. '\u2396', '\u249c',
  507. '\u24ea', '\u3005',
  508. '\u3008', '\u3021',
  509. '\u302a', '\u3031',
  510. '\u3036', '\u3038',
  511. '\u303b', '\u3041',
  512. '\u3095', '\u309d',
  513. '\u309f', '\u30a1',
  514. '\u30fb', '\u30fc',
  515. '\u30ff', '\u3105',
  516. '\u312d', '\u3131',
  517. '\u318f', '\u3190',
  518. '\u31b8', '\u3200',
  519. '\u321d', '\u3220',
  520. '\u3244', '\u3260',
  521. '\u327c', '\u327f',
  522. '\u32b1', '\u32c0',
  523. '\u32cc', '\u32d0',
  524. '\u32ff', '\u3300',
  525. '\u3377', '\u337b',
  526. '\u33de', '\u33e0',
  527. '\u33ff', '\u3400',
  528. '\u4db6', '\u4e00',
  529. '\u9fa6', '\ua000',
  530. '\ua48d', '\uac00',
  531. '\ud7a4', '\uf900',
  532. '\ufa2e', '\ufb00',
  533. '\ufb07', '\ufb13',
  534. '\ufb18', '\ufb1d',
  535. '\ufb1e', '\ufb1f',
  536. '\ufb29', '\ufb2a',
  537. '\ufb37', '\ufb38',
  538. '\ufb3d', '\ufb3e',
  539. '\ufb3f', '\ufb40',
  540. '\ufb42', '\ufb43',
  541. '\ufb45', '\ufb46',
  542. '\ufbb2', '\ufbd3',
  543. '\ufd3e', '\ufd50',
  544. '\ufd90', '\ufd92',
  545. '\ufdc8', '\ufdf0',
  546. '\ufdfc', '\ufe70',
  547. '\ufe73', '\ufe74',
  548. '\ufe75', '\ufe76',
  549. '\ufefd', '\uff21',
  550. '\uff3b', '\uff41',
  551. '\uff5b', '\uff66',
  552. '\uffbf', '\uffc2',
  553. '\uffc8', '\uffca',
  554. '\uffd0', '\uffd2',
  555. '\uffd8', '\uffda',
  556. '\uffdd', '\uffff' // last entry is sentinel, actually never checked
  557. };
  558. // use a binary search with a cache
  559. private static int stCache = 0;
  560. // warning, synchronize access to this as it modifies state
  561. private static boolean isStrongDirectional(char c) {
  562. if (c < strongTable[stCache]) {
  563. stCache = search(c, strongTable, 0, stCache);
  564. } else if (c >= strongTable[stCache + 1]) {
  565. stCache = search(c, strongTable, stCache + 1, strongTable.length - stCache - 1);
  566. }
  567. return (stCache & 0x1) == 1;
  568. }
  569. static private int getKeyFromMask(int mask) {
  570. int key = 0;
  571. while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
  572. ++key;
  573. }
  574. if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
  575. throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
  576. }
  577. return key;
  578. }
  579. /**
  580. * Returns a shaper for the provided unicode range. All
  581. * Latin-1 (EUROPEAN) digits are converted
  582. * to the corresponding decimal unicode digits.
  583. * @param singleRange the specified Unicode range
  584. * @return a non-contextual numeric shaper
  585. * @throws IllegalArgumentException if the range is not a single range
  586. */
  587. static public NumericShaper getShaper(int singleRange) {
  588. int key = getKeyFromMask(singleRange);
  589. return new NumericShaper(key, singleRange);
  590. }
  591. /**
  592. * Returns a contextual shaper for the provided unicode range(s).
  593. * Latin-1 (EUROPEAN) digits are converted to the decimal digits
  594. * corresponding to the range of the preceeding text, if the
  595. * range is one of the provided ranges. Multiple ranges are
  596. * represented by or-ing the values together, such as,
  597. * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
  598. * shaper assumes EUROPEAN as the starting context, that is, if
  599. * EUROPEAN digits are encountered before any strong directional
  600. * text in the string, the context is presumed to be EUROPEAN, and
  601. * so the digits will not shape.
  602. * @param ranges the specified Unicode ranges
  603. * @return a shaper for the specified ranges
  604. */
  605. static public NumericShaper getContextualShaper(int ranges) {
  606. ranges |= CONTEXTUAL_MASK;
  607. return new NumericShaper(EUROPEAN_KEY, ranges);
  608. }
  609. /**
  610. * Returns a contextual shaper for the provided unicode range(s).
  611. * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
  612. * corresponding to the range of the preceeding text, if the
  613. * range is one of the provided ranges. Multiple ranges are
  614. * represented by or-ing the values together, for example,
  615. * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
  616. * shaper uses defaultContext as the starting context.
  617. * @param ranges the specified Unicode ranges
  618. * @param defaultContext the starting context, such as
  619. * <code>NumericShaper.EUROPEAN</code>
  620. * @return a shaper for the specified Unicode ranges.
  621. */
  622. static public NumericShaper getContextualShaper(int ranges, int defaultContext) {
  623. int key = getKeyFromMask(defaultContext);
  624. ranges |= CONTEXTUAL_MASK;
  625. return new NumericShaper(key, ranges);
  626. }
  627. /**
  628. * Private constructor.
  629. */
  630. private NumericShaper(int key, int mask) {
  631. this.key = key;
  632. this.mask = mask;
  633. }
  634. /**
  635. * Converts the digits in the text that occur between start and
  636. * start + count.
  637. * @param text an array of characters to convert
  638. * @param start the index into <code>text</code> to start
  639. * converting
  640. * @param count the number of characters in <code>text</code>
  641. * to convert
  642. */
  643. public void shape(char[] text, int start, int count) {
  644. if (isContextual()) {
  645. shapeContextually(text, start, count, key);
  646. } else {
  647. shapeNonContextually(text, start, count);
  648. }
  649. }
  650. /**
  651. * Converts the digits in the text that occur between start and
  652. * start + count, using the provided context.
  653. * Context is ignored if the shaper is not a contextual shaper.
  654. * @param text an array of characters
  655. * @param start the index into <code>text</code> to start
  656. * converting
  657. * @param count the number of characters in <code>text</code>
  658. * to convert
  659. * @param context the context to which to convert the
  660. * characters, such as <code>NumericShaper.EUROPEAN</code>
  661. */
  662. public void shape(char[] text, int start, int count, int context) {
  663. if (isContextual()) {
  664. int ctxKey = getKeyFromMask(context);
  665. shapeContextually(text, start, count, ctxKey);
  666. } else {
  667. shapeNonContextually(text, start, count);
  668. }
  669. }
  670. /**
  671. * Returns a <code>boolean</code> indicating whether or not
  672. * this shaper shapes contextually.
  673. * @return <code>true</code> if this shaper is contextual;
  674. * <code>false</code> otherwise.
  675. */
  676. public boolean isContextual() {
  677. return (mask & CONTEXTUAL_MASK) != 0;
  678. }
  679. /**
  680. * Returns an <code>int</code> that ORs together the values for
  681. * all the ranges that will be shaped.
  682. * <p>
  683. * For example, to check if a shaper shapes to Arabic, you would use the
  684. * following:
  685. * <blockquote>
  686. * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
  687. * </blockquote>
  688. * @return the values for all the ranges to be shaped.
  689. */
  690. public int getRanges() {
  691. return mask & ~CONTEXTUAL_MASK;
  692. }
  693. /**
  694. * Perform non-contextual shaping.
  695. */
  696. private void shapeNonContextually(char[] text, int start, int count) {
  697. int base = bases[key];
  698. char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
  699. for (int i = start, e = start + count; i < e; ++i) {
  700. char c = text[i];
  701. if (c >= minDigit && c <= '\u0039') {
  702. text[i] = (char)(c + base);
  703. }
  704. }
  705. }
  706. /**
  707. * Perform contextual shaping.
  708. * Synchronized to protect caches used in getContextKey and isStrongDirectional.
  709. */
  710. private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
  711. // if we don't support this context, then don't shape
  712. if ((mask & (1<<ctxKey)) == 0) {
  713. ctxKey = EUROPEAN_KEY;
  714. }
  715. int lastkey = ctxKey;
  716. int base = bases[ctxKey];
  717. char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
  718. for (int i = start, e = start + count; i < e; ++i) {
  719. char c = text[i];
  720. if (c >= minDigit && c <= '\u0039') {
  721. text[i] = (char)(c + base);
  722. }
  723. if (isStrongDirectional(c)) {
  724. int newkey = getContextKey(c);
  725. if (newkey != lastkey) {
  726. lastkey = newkey;
  727. ctxKey = newkey;
  728. if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) {
  729. ctxKey = EASTERN_ARABIC_KEY;
  730. } else if ((mask & (1<<ctxKey)) == 0) {
  731. ctxKey = EUROPEAN_KEY;
  732. }
  733. base = bases[ctxKey];
  734. minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
  735. }
  736. }
  737. }
  738. }
  739. /**
  740. * Returns a hash code for this shaper.
  741. * @return this shaper's hash code.
  742. * @see java.lang.Object#hashCode
  743. */
  744. public int hashCode() {
  745. return mask;
  746. }
  747. /**
  748. * Returns true if the specified object is an instance of
  749. * <code>NumericShaper</code> and shapes identically to this one.
  750. * @param o the specified object to compare to this
  751. * <code>NumericShaper</code>
  752. * @return <code>true</code> if <code>o</code> is an instance
  753. * of <code>NumericShaper</code> and shapes in the same way;
  754. * <code>false</code> otherwise.
  755. * @see java.lang.Object#equals(java.lang.Object)
  756. */
  757. public boolean equals(Object o) {
  758. if (o != null) {
  759. try {
  760. NumericShaper rhs = (NumericShaper)o;
  761. return rhs.mask == mask && rhs.key == key;
  762. }
  763. catch (ClassCastException e) {
  764. }
  765. }
  766. return false;
  767. }
  768. /**
  769. * Returns a <code>String</code> that describes this shaper. This method
  770. * is used for debugging purposes only.
  771. * @return a <code>String</code> describing this shaper.
  772. */
  773. public String toString() {
  774. StringBuffer buf = new StringBuffer(super.toString());
  775. buf.append("[contextual:" + isContextual());
  776. if (isContextual()) {
  777. buf.append(", context:" + keyNames[key]);
  778. }
  779. buf.append(", range(s): ");
  780. boolean first = true;
  781. for (int i = 0; i < NUM_KEYS; ++i) {
  782. if ((mask & (1 << i)) != 0) {
  783. if (first) {
  784. first = false;
  785. } else {
  786. buf.append(", ");
  787. }
  788. buf.append(keyNames[i]);
  789. }
  790. }
  791. buf.append(']');
  792. return buf.toString();
  793. }
  794. /**
  795. * Returns the index of the high bit in value (assuming le, actually
  796. * power of 2 >= value). value must be positive.
  797. */
  798. private static int getHighBit(int value) {
  799. if (value <= 0) {
  800. return -32;
  801. }
  802. int bit = 0;
  803. if (value >= 1 << 16) {
  804. value >>= 16;
  805. bit += 16;
  806. }
  807. if (value >= 1 << 8) {
  808. value >>= 8;
  809. bit += 8;
  810. }
  811. if (value >= 1 << 4) {
  812. value >>= 4;
  813. bit += 4;
  814. }
  815. if (value >= 1 << 2) {
  816. value >>= 2;
  817. bit += 2;
  818. }
  819. if (value >= 1 << 1) {
  820. value >>= 1;
  821. bit += 1;
  822. }
  823. return bit;
  824. }
  825. /**
  826. * fast binary search over subrange of array.
  827. */
  828. private static int search(char value, char[] array, int start, int length)
  829. {
  830. int power = 1 << getHighBit(length);
  831. int extra = length - power;
  832. int probe = power;
  833. int index = start;
  834. if (value >= array[index + extra]) {
  835. index += extra;
  836. }
  837. while (probe > 1) {
  838. probe >>= 1;
  839. if (value >= array[index + probe]) {
  840. index += probe;
  841. }
  842. }
  843. return index;
  844. }
  845. }